| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1680, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0035714285714285713, |
| "grad_norm": 0.3871030807495117, |
| "learning_rate": 1.1904761904761906e-07, |
| "loss": 1.9294867515563965, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.007142857142857143, |
| "grad_norm": 0.34607651829719543, |
| "learning_rate": 3.5714285714285716e-07, |
| "loss": 1.931689739227295, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010714285714285714, |
| "grad_norm": 0.3077217638492584, |
| "learning_rate": 5.952380952380953e-07, |
| "loss": 1.859986662864685, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014285714285714285, |
| "grad_norm": 0.26033300161361694, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 1.8296231031417847, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.017857142857142856, |
| "grad_norm": 0.3577536344528198, |
| "learning_rate": 1.0714285714285714e-06, |
| "loss": 1.840135097503662, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02142857142857143, |
| "grad_norm": 0.35149258375167847, |
| "learning_rate": 1.3095238095238096e-06, |
| "loss": 1.718151330947876, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.025, |
| "grad_norm": 0.3105311691761017, |
| "learning_rate": 1.5476190476190479e-06, |
| "loss": 1.8123761415481567, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02857142857142857, |
| "grad_norm": 0.3541400134563446, |
| "learning_rate": 1.7857142857142859e-06, |
| "loss": 1.801349401473999, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03214285714285714, |
| "grad_norm": 0.32876938581466675, |
| "learning_rate": 2.023809523809524e-06, |
| "loss": 1.8854210376739502, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03571428571428571, |
| "grad_norm": 0.9392958283424377, |
| "learning_rate": 2.261904761904762e-06, |
| "loss": 1.7024314403533936, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.039285714285714285, |
| "grad_norm": 0.6484195590019226, |
| "learning_rate": 2.5e-06, |
| "loss": 1.9459373950958252, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04285714285714286, |
| "grad_norm": 0.36433079838752747, |
| "learning_rate": 2.7380952380952387e-06, |
| "loss": 1.9512709379196167, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04642857142857143, |
| "grad_norm": 0.4358835220336914, |
| "learning_rate": 2.9761904761904763e-06, |
| "loss": 1.7940953969955444, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 0.506097137928009, |
| "learning_rate": 3.2142857142857147e-06, |
| "loss": 1.7999926805496216, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05357142857142857, |
| "grad_norm": 0.5315778255462646, |
| "learning_rate": 3.4523809523809528e-06, |
| "loss": 1.7870306968688965, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05714285714285714, |
| "grad_norm": 0.30482104420661926, |
| "learning_rate": 3.690476190476191e-06, |
| "loss": 1.8913555145263672, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.060714285714285714, |
| "grad_norm": 0.8241702318191528, |
| "learning_rate": 3.928571428571429e-06, |
| "loss": 1.9280858039855957, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06428571428571428, |
| "grad_norm": 0.39840635657310486, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 1.9256908893585205, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06785714285714285, |
| "grad_norm": 0.33251017332077026, |
| "learning_rate": 4.404761904761905e-06, |
| "loss": 1.8829214572906494, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 0.49388226866722107, |
| "learning_rate": 4.642857142857144e-06, |
| "loss": 1.8666248321533203, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 0.28926795721054077, |
| "learning_rate": 4.880952380952381e-06, |
| "loss": 1.8469940423965454, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07857142857142857, |
| "grad_norm": 0.317127525806427, |
| "learning_rate": 5.119047619047619e-06, |
| "loss": 1.892695426940918, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08214285714285714, |
| "grad_norm": 0.8169130682945251, |
| "learning_rate": 5.357142857142857e-06, |
| "loss": 1.893534541130066, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08571428571428572, |
| "grad_norm": 0.27684587240219116, |
| "learning_rate": 5.595238095238096e-06, |
| "loss": 1.5699528455734253, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08928571428571429, |
| "grad_norm": 0.5231921076774597, |
| "learning_rate": 5.833333333333334e-06, |
| "loss": 1.6496429443359375, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09285714285714286, |
| "grad_norm": 0.5755372643470764, |
| "learning_rate": 6.071428571428571e-06, |
| "loss": 1.6312464475631714, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09642857142857143, |
| "grad_norm": 0.40994322299957275, |
| "learning_rate": 6.30952380952381e-06, |
| "loss": 1.8703556060791016, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 0.6402392983436584, |
| "learning_rate": 6.547619047619048e-06, |
| "loss": 1.7122882604599, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.10357142857142858, |
| "grad_norm": 0.4092760980129242, |
| "learning_rate": 6.785714285714287e-06, |
| "loss": 1.7604010105133057, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.10714285714285714, |
| "grad_norm": 0.41417962312698364, |
| "learning_rate": 7.023809523809524e-06, |
| "loss": 2.1781420707702637, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11071428571428571, |
| "grad_norm": 0.6246824264526367, |
| "learning_rate": 7.261904761904762e-06, |
| "loss": 1.7982336282730103, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.26309502124786377, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 1.696463704109192, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11785714285714285, |
| "grad_norm": 0.9458585381507874, |
| "learning_rate": 7.738095238095238e-06, |
| "loss": 1.7728084325790405, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.12142857142857143, |
| "grad_norm": 0.22862379252910614, |
| "learning_rate": 7.976190476190477e-06, |
| "loss": 1.6821340322494507, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.236324280500412, |
| "learning_rate": 8.214285714285714e-06, |
| "loss": 1.7681533098220825, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12857142857142856, |
| "grad_norm": 0.2597522735595703, |
| "learning_rate": 8.452380952380953e-06, |
| "loss": 1.8034054040908813, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.13214285714285715, |
| "grad_norm": 0.24487343430519104, |
| "learning_rate": 8.690476190476192e-06, |
| "loss": 1.7554086446762085, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1357142857142857, |
| "grad_norm": 0.22543826699256897, |
| "learning_rate": 8.92857142857143e-06, |
| "loss": 1.7456854581832886, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1392857142857143, |
| "grad_norm": 0.2380058914422989, |
| "learning_rate": 9.166666666666666e-06, |
| "loss": 1.7143663167953491, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.26500657200813293, |
| "learning_rate": 9.404761904761905e-06, |
| "loss": 1.7059998512268066, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14642857142857144, |
| "grad_norm": 0.2978551387786865, |
| "learning_rate": 9.642857142857144e-06, |
| "loss": 1.7792344093322754, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 0.2930593490600586, |
| "learning_rate": 9.880952380952381e-06, |
| "loss": 1.6987429857254028, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.15357142857142858, |
| "grad_norm": 0.4046596884727478, |
| "learning_rate": 9.999991282010348e-06, |
| "loss": 1.7894960641860962, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15714285714285714, |
| "grad_norm": 0.23502953350543976, |
| "learning_rate": 9.999921538295799e-06, |
| "loss": 1.749454379081726, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.16071428571428573, |
| "grad_norm": 0.22283266484737396, |
| "learning_rate": 9.999782051947632e-06, |
| "loss": 1.686018943786621, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.16428571428571428, |
| "grad_norm": 0.24027639627456665, |
| "learning_rate": 9.999572825127696e-06, |
| "loss": 1.480033040046692, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.16785714285714284, |
| "grad_norm": 0.5684676766395569, |
| "learning_rate": 9.99929386107872e-06, |
| "loss": 1.675416350364685, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.17142857142857143, |
| "grad_norm": 1.138840675354004, |
| "learning_rate": 9.998945164124268e-06, |
| "loss": 1.7155344486236572, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 0.2664114534854889, |
| "learning_rate": 9.998526739668664e-06, |
| "loss": 1.6043933629989624, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.17857142857142858, |
| "grad_norm": 0.28691864013671875, |
| "learning_rate": 9.998038594196913e-06, |
| "loss": 1.6187028884887695, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18214285714285713, |
| "grad_norm": 0.31850922107696533, |
| "learning_rate": 9.997480735274608e-06, |
| "loss": 1.5820776224136353, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.18571428571428572, |
| "grad_norm": 0.23401758074760437, |
| "learning_rate": 9.996853171547794e-06, |
| "loss": 1.5967426300048828, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.18928571428571428, |
| "grad_norm": 0.23440219461917877, |
| "learning_rate": 9.996155912742856e-06, |
| "loss": 1.6334154605865479, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.19285714285714287, |
| "grad_norm": 0.7341821193695068, |
| "learning_rate": 9.995388969666348e-06, |
| "loss": 1.598835825920105, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.19642857142857142, |
| "grad_norm": 0.6320663094520569, |
| "learning_rate": 9.994552354204844e-06, |
| "loss": 1.6243830919265747, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 0.5404586791992188, |
| "learning_rate": 9.993646079324738e-06, |
| "loss": 1.566571831703186, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.20357142857142857, |
| "grad_norm": 0.5022917985916138, |
| "learning_rate": 9.992670159072052e-06, |
| "loss": 1.6408634185791016, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.20714285714285716, |
| "grad_norm": 1.1218639612197876, |
| "learning_rate": 9.991624608572215e-06, |
| "loss": 1.7009669542312622, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.21071428571428572, |
| "grad_norm": 0.5181306004524231, |
| "learning_rate": 9.990509444029833e-06, |
| "loss": 1.7996366024017334, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.21428571428571427, |
| "grad_norm": 0.43997907638549805, |
| "learning_rate": 9.98932468272843e-06, |
| "loss": 1.7554632425308228, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.21785714285714286, |
| "grad_norm": 0.3227292597293854, |
| "learning_rate": 9.98807034303019e-06, |
| "loss": 1.473575472831726, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.22142857142857142, |
| "grad_norm": 0.3611178398132324, |
| "learning_rate": 9.98674644437566e-06, |
| "loss": 1.594710350036621, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 0.9151387214660645, |
| "learning_rate": 9.985353007283464e-06, |
| "loss": 1.6291745901107788, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.2581241726875305, |
| "learning_rate": 9.983890053349969e-06, |
| "loss": 1.2893997430801392, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.23214285714285715, |
| "grad_norm": 0.5861591100692749, |
| "learning_rate": 9.982357605248963e-06, |
| "loss": 1.0495647192001343, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2357142857142857, |
| "grad_norm": 0.46270960569381714, |
| "learning_rate": 9.980755686731296e-06, |
| "loss": 1.3306972980499268, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2392857142857143, |
| "grad_norm": 0.36067521572113037, |
| "learning_rate": 9.979084322624518e-06, |
| "loss": 1.5336247682571411, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.24285714285714285, |
| "grad_norm": 0.9336586594581604, |
| "learning_rate": 9.977343538832486e-06, |
| "loss": 1.7042999267578125, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.24642857142857144, |
| "grad_norm": 0.387260377407074, |
| "learning_rate": 9.97553336233497e-06, |
| "loss": 1.396690011024475, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.4640398919582367, |
| "learning_rate": 9.973653821187233e-06, |
| "loss": 1.3623416423797607, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.25357142857142856, |
| "grad_norm": 0.33568593859672546, |
| "learning_rate": 9.971704944519593e-06, |
| "loss": 1.3992865085601807, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2571428571428571, |
| "grad_norm": 0.24492622911930084, |
| "learning_rate": 9.969686762536973e-06, |
| "loss": 1.444324016571045, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.26071428571428573, |
| "grad_norm": 0.8304792642593384, |
| "learning_rate": 9.967599306518438e-06, |
| "loss": 1.3990877866744995, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.2642857142857143, |
| "grad_norm": 0.623303234577179, |
| "learning_rate": 9.965442608816704e-06, |
| "loss": 1.4563076496124268, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.26785714285714285, |
| "grad_norm": 0.31338322162628174, |
| "learning_rate": 9.963216702857635e-06, |
| "loss": 1.6392706632614136, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2714285714285714, |
| "grad_norm": 0.3007861375808716, |
| "learning_rate": 9.96092162313973e-06, |
| "loss": 1.5057697296142578, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 0.15701599419116974, |
| "learning_rate": 9.958557405233593e-06, |
| "loss": 1.4597502946853638, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.2785714285714286, |
| "grad_norm": 0.2882039248943329, |
| "learning_rate": 9.956124085781366e-06, |
| "loss": 1.3839119672775269, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.28214285714285714, |
| "grad_norm": 0.3199823498725891, |
| "learning_rate": 9.953621702496178e-06, |
| "loss": 1.6068451404571533, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.2857142857142857, |
| "grad_norm": 0.24365948140621185, |
| "learning_rate": 9.951050294161548e-06, |
| "loss": 1.7299036979675293, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2892857142857143, |
| "grad_norm": 0.46145617961883545, |
| "learning_rate": 9.948409900630787e-06, |
| "loss": 1.3489717245101929, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.29285714285714287, |
| "grad_norm": 0.15912453830242157, |
| "learning_rate": 9.945700562826394e-06, |
| "loss": 1.5043880939483643, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.29642857142857143, |
| "grad_norm": 0.1797444075345993, |
| "learning_rate": 9.942922322739395e-06, |
| "loss": 1.1060163974761963, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 0.2711468040943146, |
| "learning_rate": 9.940075223428718e-06, |
| "loss": 1.8668510913848877, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.30357142857142855, |
| "grad_norm": 0.1544935554265976, |
| "learning_rate": 9.93715930902051e-06, |
| "loss": 1.2808830738067627, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.30714285714285716, |
| "grad_norm": 0.37536972761154175, |
| "learning_rate": 9.934174624707459e-06, |
| "loss": 1.4175796508789062, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3107142857142857, |
| "grad_norm": 0.22495543956756592, |
| "learning_rate": 9.931121216748092e-06, |
| "loss": 1.6282312870025635, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3142857142857143, |
| "grad_norm": 0.3511154055595398, |
| "learning_rate": 9.927999132466059e-06, |
| "loss": 1.635170340538025, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.31785714285714284, |
| "grad_norm": 0.47558754682540894, |
| "learning_rate": 9.924808420249404e-06, |
| "loss": 1.563542127609253, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.32142857142857145, |
| "grad_norm": 0.6490837335586548, |
| "learning_rate": 9.921549129549799e-06, |
| "loss": 2.066225290298462, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 0.37414857745170593, |
| "learning_rate": 9.918221310881797e-06, |
| "loss": 1.2984635829925537, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.32857142857142857, |
| "grad_norm": 0.24293118715286255, |
| "learning_rate": 9.91482501582204e-06, |
| "loss": 1.560595989227295, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.33214285714285713, |
| "grad_norm": 1.0313069820404053, |
| "learning_rate": 9.91136029700846e-06, |
| "loss": 1.6615456342697144, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.3357142857142857, |
| "grad_norm": 0.2365736961364746, |
| "learning_rate": 9.907827208139462e-06, |
| "loss": 1.4550660848617554, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.3392857142857143, |
| "grad_norm": 0.48783159255981445, |
| "learning_rate": 9.904225803973095e-06, |
| "loss": 1.1695599555969238, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.18840378522872925, |
| "learning_rate": 9.900556140326203e-06, |
| "loss": 1.175657033920288, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3464285714285714, |
| "grad_norm": 0.33193379640579224, |
| "learning_rate": 9.896818274073555e-06, |
| "loss": 1.5140769481658936, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 0.23811382055282593, |
| "learning_rate": 9.893012263146971e-06, |
| "loss": 1.5519834756851196, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3535714285714286, |
| "grad_norm": 0.3118128478527069, |
| "learning_rate": 9.889138166534416e-06, |
| "loss": 1.6215221881866455, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.35714285714285715, |
| "grad_norm": 0.20358364284038544, |
| "learning_rate": 9.885196044279095e-06, |
| "loss": 1.4951940774917603, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3607142857142857, |
| "grad_norm": 0.33988499641418457, |
| "learning_rate": 9.881185957478514e-06, |
| "loss": 1.5101016759872437, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.36428571428571427, |
| "grad_norm": 0.2685701251029968, |
| "learning_rate": 9.877107968283538e-06, |
| "loss": 1.3352025747299194, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3678571428571429, |
| "grad_norm": 0.24243606626987457, |
| "learning_rate": 9.872962139897426e-06, |
| "loss": 1.6032525300979614, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.37142857142857144, |
| "grad_norm": 0.2574315369129181, |
| "learning_rate": 9.86874853657485e-06, |
| "loss": 1.4210426807403564, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.30428075790405273, |
| "learning_rate": 9.864467223620908e-06, |
| "loss": 1.4541680812835693, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.37857142857142856, |
| "grad_norm": 0.27129194140434265, |
| "learning_rate": 9.860118267390092e-06, |
| "loss": 1.5869474411010742, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3821428571428571, |
| "grad_norm": 0.2757408022880554, |
| "learning_rate": 9.855701735285285e-06, |
| "loss": 1.4132391214370728, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.38571428571428573, |
| "grad_norm": 0.8043122291564941, |
| "learning_rate": 9.851217695756694e-06, |
| "loss": 1.3423351049423218, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3892857142857143, |
| "grad_norm": 0.3778972327709198, |
| "learning_rate": 9.846666218300808e-06, |
| "loss": 1.4252076148986816, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.39285714285714285, |
| "grad_norm": 0.30076470971107483, |
| "learning_rate": 9.842047373459305e-06, |
| "loss": 1.477191686630249, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3964285714285714, |
| "grad_norm": 0.2551064193248749, |
| "learning_rate": 9.837361232817964e-06, |
| "loss": 1.4160501956939697, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.3507143557071686, |
| "learning_rate": 9.832607869005565e-06, |
| "loss": 1.3853830099105835, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4035714285714286, |
| "grad_norm": 0.2576788663864136, |
| "learning_rate": 9.827787355692749e-06, |
| "loss": 1.5642895698547363, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.40714285714285714, |
| "grad_norm": 0.6864569187164307, |
| "learning_rate": 9.822899767590884e-06, |
| "loss": 1.5939396619796753, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.4107142857142857, |
| "grad_norm": 0.290575236082077, |
| "learning_rate": 9.817945180450902e-06, |
| "loss": 1.692050814628601, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.4142857142857143, |
| "grad_norm": 0.2678094506263733, |
| "learning_rate": 9.812923671062139e-06, |
| "loss": 1.5078585147857666, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.41785714285714287, |
| "grad_norm": 0.2580035924911499, |
| "learning_rate": 9.80783531725112e-06, |
| "loss": 1.5001014471054077, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.42142857142857143, |
| "grad_norm": 0.48962509632110596, |
| "learning_rate": 9.80268019788038e-06, |
| "loss": 1.30159592628479, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 0.3317374587059021, |
| "learning_rate": 9.79745839284722e-06, |
| "loss": 1.3308159112930298, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.42857142857142855, |
| "grad_norm": 0.40648195147514343, |
| "learning_rate": 9.792169983082484e-06, |
| "loss": 1.2273372411727905, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.43214285714285716, |
| "grad_norm": 0.23908843100070953, |
| "learning_rate": 9.786815050549295e-06, |
| "loss": 1.3610113859176636, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4357142857142857, |
| "grad_norm": 0.30702945590019226, |
| "learning_rate": 9.781393678241787e-06, |
| "loss": 1.526265025138855, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4392857142857143, |
| "grad_norm": 0.3442508578300476, |
| "learning_rate": 9.775905950183821e-06, |
| "loss": 1.4831691980361938, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.44285714285714284, |
| "grad_norm": 0.2286010980606079, |
| "learning_rate": 9.770351951427684e-06, |
| "loss": 1.5686728954315186, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.44642857142857145, |
| "grad_norm": 0.42278486490249634, |
| "learning_rate": 9.764731768052762e-06, |
| "loss": 1.583655834197998, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 0.2032240778207779, |
| "learning_rate": 9.75904548716422e-06, |
| "loss": 1.7357392311096191, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.45357142857142857, |
| "grad_norm": 0.6682279706001282, |
| "learning_rate": 9.753293196891639e-06, |
| "loss": 1.4943958520889282, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.3326359689235687, |
| "learning_rate": 9.747474986387655e-06, |
| "loss": 1.4537054300308228, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.4607142857142857, |
| "grad_norm": 0.31363770365715027, |
| "learning_rate": 9.74159094582658e-06, |
| "loss": 1.4956998825073242, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.4642857142857143, |
| "grad_norm": 0.6148827075958252, |
| "learning_rate": 9.735641166402998e-06, |
| "loss": 1.3277488946914673, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.46785714285714286, |
| "grad_norm": 0.31903398036956787, |
| "learning_rate": 9.729625740330363e-06, |
| "loss": 1.49782133102417, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4714285714285714, |
| "grad_norm": 0.8235952258110046, |
| "learning_rate": 9.723544760839555e-06, |
| "loss": 1.460282325744629, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 0.3165692090988159, |
| "learning_rate": 9.717398322177442e-06, |
| "loss": 1.5261378288269043, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.4785714285714286, |
| "grad_norm": 1.6001724004745483, |
| "learning_rate": 9.71118651960543e-06, |
| "loss": 1.7769297361373901, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.48214285714285715, |
| "grad_norm": 0.26702550053596497, |
| "learning_rate": 9.704909449397962e-06, |
| "loss": 1.3878670930862427, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4857142857142857, |
| "grad_norm": 0.18522176146507263, |
| "learning_rate": 9.69856720884105e-06, |
| "loss": 1.2690881490707397, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.48928571428571427, |
| "grad_norm": 0.40137895941734314, |
| "learning_rate": 9.692159896230757e-06, |
| "loss": 1.3622859716415405, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4928571428571429, |
| "grad_norm": 0.34499719738960266, |
| "learning_rate": 9.685687610871666e-06, |
| "loss": 1.6427959203720093, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.49642857142857144, |
| "grad_norm": 0.3400484621524811, |
| "learning_rate": 9.679150453075357e-06, |
| "loss": 1.3161296844482422, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.3539294898509979, |
| "learning_rate": 9.67254852415884e-06, |
| "loss": 1.2691534757614136, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5035714285714286, |
| "grad_norm": 0.2683607339859009, |
| "learning_rate": 9.665881926442994e-06, |
| "loss": 1.5461015701293945, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5071428571428571, |
| "grad_norm": 0.3605668246746063, |
| "learning_rate": 9.659150763250966e-06, |
| "loss": 1.6314688920974731, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5107142857142857, |
| "grad_norm": 0.3184402585029602, |
| "learning_rate": 9.652355138906591e-06, |
| "loss": 1.518629789352417, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5142857142857142, |
| "grad_norm": 0.870186984539032, |
| "learning_rate": 9.645495158732755e-06, |
| "loss": 1.143850564956665, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5178571428571429, |
| "grad_norm": 0.33421170711517334, |
| "learning_rate": 9.638570929049776e-06, |
| "loss": 1.0234707593917847, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5214285714285715, |
| "grad_norm": 0.1717844307422638, |
| "learning_rate": 9.631582557173751e-06, |
| "loss": 1.4672911167144775, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 0.9301527738571167, |
| "learning_rate": 9.624530151414894e-06, |
| "loss": 1.2730239629745483, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5285714285714286, |
| "grad_norm": 0.38581225275993347, |
| "learning_rate": 9.617413821075852e-06, |
| "loss": 1.2601397037506104, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5321428571428571, |
| "grad_norm": 0.23976172506809235, |
| "learning_rate": 9.61023367645002e-06, |
| "loss": 1.2101945877075195, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 0.22152353823184967, |
| "learning_rate": 9.602989828819829e-06, |
| "loss": 1.542162537574768, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5392857142857143, |
| "grad_norm": 0.4408532381057739, |
| "learning_rate": 9.595682390455015e-06, |
| "loss": 1.3136895895004272, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5428571428571428, |
| "grad_norm": 0.4480395019054413, |
| "learning_rate": 9.588311474610888e-06, |
| "loss": 1.1242649555206299, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5464285714285714, |
| "grad_norm": 0.7074999213218689, |
| "learning_rate": 9.580877195526564e-06, |
| "loss": 1.6407079696655273, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 0.3410518765449524, |
| "learning_rate": 9.573379668423209e-06, |
| "loss": 1.3072420358657837, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.5535714285714286, |
| "grad_norm": 0.47393250465393066, |
| "learning_rate": 9.56581900950225e-06, |
| "loss": 1.4017832279205322, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.5571428571428572, |
| "grad_norm": 0.6089979410171509, |
| "learning_rate": 9.558195335943566e-06, |
| "loss": 1.5297354459762573, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.5607142857142857, |
| "grad_norm": 0.2799089848995209, |
| "learning_rate": 9.550508765903672e-06, |
| "loss": 1.043546199798584, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.5642857142857143, |
| "grad_norm": 0.2620464563369751, |
| "learning_rate": 9.542759418513906e-06, |
| "loss": 1.6538763046264648, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.5678571428571428, |
| "grad_norm": 0.5144315958023071, |
| "learning_rate": 9.534947413878556e-06, |
| "loss": 1.5541188716888428, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.29509297013282776, |
| "learning_rate": 9.52707287307302e-06, |
| "loss": 1.1971598863601685, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.1929909884929657, |
| "learning_rate": 9.519135918141913e-06, |
| "loss": 1.1823662519454956, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.5785714285714286, |
| "grad_norm": 0.48544377088546753, |
| "learning_rate": 9.511136672097194e-06, |
| "loss": 1.3313523530960083, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.5821428571428572, |
| "grad_norm": 0.3510501980781555, |
| "learning_rate": 9.503075258916241e-06, |
| "loss": 1.3195650577545166, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5857142857142857, |
| "grad_norm": 0.2727429270744324, |
| "learning_rate": 9.494951803539942e-06, |
| "loss": 1.2425987720489502, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5892857142857143, |
| "grad_norm": 15.424638748168945, |
| "learning_rate": 9.486766431870752e-06, |
| "loss": 1.2101187705993652, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5928571428571429, |
| "grad_norm": 0.2866066098213196, |
| "learning_rate": 9.478519270770746e-06, |
| "loss": 1.2784419059753418, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5964285714285714, |
| "grad_norm": 0.4156343638896942, |
| "learning_rate": 9.470210448059645e-06, |
| "loss": 1.583785057067871, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 0.45487159490585327, |
| "learning_rate": 9.46184009251285e-06, |
| "loss": 1.3652830123901367, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6035714285714285, |
| "grad_norm": 0.32525262236595154, |
| "learning_rate": 9.453408333859427e-06, |
| "loss": 1.4697949886322021, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6071428571428571, |
| "grad_norm": 0.5784197449684143, |
| "learning_rate": 9.444915302780117e-06, |
| "loss": 1.3824127912521362, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6107142857142858, |
| "grad_norm": 0.26421067118644714, |
| "learning_rate": 9.436361130905288e-06, |
| "loss": 1.42073655128479, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6142857142857143, |
| "grad_norm": 0.2380143105983734, |
| "learning_rate": 9.427745950812917e-06, |
| "loss": 1.3658424615859985, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6178571428571429, |
| "grad_norm": 0.43499693274497986, |
| "learning_rate": 9.41906989602652e-06, |
| "loss": 1.461742639541626, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6214285714285714, |
| "grad_norm": 0.400419145822525, |
| "learning_rate": 9.410333101013086e-06, |
| "loss": 1.3119421005249023, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.4901754856109619, |
| "learning_rate": 9.401535701180998e-06, |
| "loss": 1.2844195365905762, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6285714285714286, |
| "grad_norm": 0.48951858282089233, |
| "learning_rate": 9.392677832877932e-06, |
| "loss": 1.568238615989685, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6321428571428571, |
| "grad_norm": 0.2112666368484497, |
| "learning_rate": 9.383759633388737e-06, |
| "loss": 1.5015143156051636, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6357142857142857, |
| "grad_norm": 0.2615770101547241, |
| "learning_rate": 9.374781240933316e-06, |
| "loss": 1.5211448669433594, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6392857142857142, |
| "grad_norm": 0.2031329870223999, |
| "learning_rate": 9.365742794664484e-06, |
| "loss": 1.3461060523986816, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.6428571428571429, |
| "grad_norm": 0.36589643359184265, |
| "learning_rate": 9.356644434665804e-06, |
| "loss": 1.3849568367004395, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.6464285714285715, |
| "grad_norm": 0.3387724757194519, |
| "learning_rate": 9.347486301949417e-06, |
| "loss": 1.558565616607666, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 0.29686594009399414, |
| "learning_rate": 9.33826853845387e-06, |
| "loss": 1.3272876739501953, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.6535714285714286, |
| "grad_norm": 0.8538780808448792, |
| "learning_rate": 9.328991287041892e-06, |
| "loss": 1.3049917221069336, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.6571428571428571, |
| "grad_norm": 0.2655990719795227, |
| "learning_rate": 9.319654691498205e-06, |
| "loss": 1.2668689489364624, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.6607142857142857, |
| "grad_norm": 0.28246378898620605, |
| "learning_rate": 9.31025889652728e-06, |
| "loss": 1.4307278394699097, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.6642857142857143, |
| "grad_norm": 0.33245643973350525, |
| "learning_rate": 9.300804047751093e-06, |
| "loss": 1.3824753761291504, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.6678571428571428, |
| "grad_norm": 0.21639856696128845, |
| "learning_rate": 9.291290291706881e-06, |
| "loss": 1.3373095989227295, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.6714285714285714, |
| "grad_norm": 0.42471569776535034, |
| "learning_rate": 9.281717775844857e-06, |
| "loss": 1.2794650793075562, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 0.5068492293357849, |
| "learning_rate": 9.272086648525937e-06, |
| "loss": 1.3947125673294067, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.6785714285714286, |
| "grad_norm": 0.42456403374671936, |
| "learning_rate": 9.26239705901943e-06, |
| "loss": 1.4322527647018433, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6821428571428572, |
| "grad_norm": 0.7322901487350464, |
| "learning_rate": 9.25264915750073e-06, |
| "loss": 1.427004337310791, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.2247505635023117, |
| "learning_rate": 9.242843095048987e-06, |
| "loss": 1.2980873584747314, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.6892857142857143, |
| "grad_norm": 0.28208163380622864, |
| "learning_rate": 9.232979023644768e-06, |
| "loss": 1.466817855834961, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.6928571428571428, |
| "grad_norm": 0.4910048544406891, |
| "learning_rate": 9.223057096167696e-06, |
| "loss": 1.4608205556869507, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.6964285714285714, |
| "grad_norm": 0.5288735032081604, |
| "learning_rate": 9.213077466394088e-06, |
| "loss": 1.3513166904449463, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 0.304855078458786, |
| "learning_rate": 9.203040288994566e-06, |
| "loss": 1.464281678199768, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7035714285714286, |
| "grad_norm": 0.28660398721694946, |
| "learning_rate": 9.192945719531662e-06, |
| "loss": 1.3084968328475952, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7071428571428572, |
| "grad_norm": 0.5636733770370483, |
| "learning_rate": 9.182793914457402e-06, |
| "loss": 1.2844712734222412, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7107142857142857, |
| "grad_norm": 0.5751602649688721, |
| "learning_rate": 9.172585031110895e-06, |
| "loss": 1.5046448707580566, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 0.34700506925582886, |
| "learning_rate": 9.162319227715877e-06, |
| "loss": 1.3449612855911255, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7178571428571429, |
| "grad_norm": 0.34046903252601624, |
| "learning_rate": 9.151996663378271e-06, |
| "loss": 1.3594465255737305, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7214285714285714, |
| "grad_norm": 0.48511913418769836, |
| "learning_rate": 9.141617498083717e-06, |
| "loss": 1.5169265270233154, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 0.26317858695983887, |
| "learning_rate": 9.131181892695089e-06, |
| "loss": 1.4639661312103271, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7285714285714285, |
| "grad_norm": 0.4234665632247925, |
| "learning_rate": 9.120690008950008e-06, |
| "loss": 1.4238711595535278, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.7321428571428571, |
| "grad_norm": 0.683773934841156, |
| "learning_rate": 9.110142009458333e-06, |
| "loss": 1.2991688251495361, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.7357142857142858, |
| "grad_norm": 0.9090404510498047, |
| "learning_rate": 9.099538057699643e-06, |
| "loss": 1.4411964416503906, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7392857142857143, |
| "grad_norm": 0.42502301931381226, |
| "learning_rate": 9.08887831802069e-06, |
| "loss": 1.3963665962219238, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7428571428571429, |
| "grad_norm": 0.25458428263664246, |
| "learning_rate": 9.078162955632878e-06, |
| "loss": 1.3666608333587646, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7464285714285714, |
| "grad_norm": 0.2778110206127167, |
| "learning_rate": 9.067392136609672e-06, |
| "loss": 1.4295861721038818, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.3574320673942566, |
| "learning_rate": 9.056566027884051e-06, |
| "loss": 1.4124993085861206, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.7535714285714286, |
| "grad_norm": 0.2570479214191437, |
| "learning_rate": 9.045684797245902e-06, |
| "loss": 1.3560070991516113, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.7571428571428571, |
| "grad_norm": 0.3374227285385132, |
| "learning_rate": 9.034748613339427e-06, |
| "loss": 1.360439658164978, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.7607142857142857, |
| "grad_norm": 0.25365766882896423, |
| "learning_rate": 9.023757645660531e-06, |
| "loss": 1.3708235025405884, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.7642857142857142, |
| "grad_norm": 0.2227737158536911, |
| "learning_rate": 9.01271206455419e-06, |
| "loss": 1.3818211555480957, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.7678571428571429, |
| "grad_norm": 0.21550701558589935, |
| "learning_rate": 9.001612041211817e-06, |
| "loss": 1.3254315853118896, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.7714285714285715, |
| "grad_norm": 0.5434844493865967, |
| "learning_rate": 8.9904577476686e-06, |
| "loss": 1.3340120315551758, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 0.2289412021636963, |
| "learning_rate": 8.979249356800846e-06, |
| "loss": 1.2770015001296997, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.7785714285714286, |
| "grad_norm": 0.25175049901008606, |
| "learning_rate": 8.967987042323293e-06, |
| "loss": 1.3385746479034424, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.7821428571428571, |
| "grad_norm": 0.27297094464302063, |
| "learning_rate": 8.956670978786423e-06, |
| "loss": 1.2522022724151611, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.7857142857142857, |
| "grad_norm": 0.2057066559791565, |
| "learning_rate": 8.945301341573757e-06, |
| "loss": 1.3175703287124634, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.7892857142857143, |
| "grad_norm": 0.16934043169021606, |
| "learning_rate": 8.93387830689913e-06, |
| "loss": 1.2785143852233887, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.7928571428571428, |
| "grad_norm": 0.17673851549625397, |
| "learning_rate": 8.922402051803968e-06, |
| "loss": 1.311404824256897, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.7964285714285714, |
| "grad_norm": 0.36772605776786804, |
| "learning_rate": 8.91087275415454e-06, |
| "loss": 1.27708101272583, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.1414009928703308, |
| "learning_rate": 8.8992905926392e-06, |
| "loss": 1.247365117073059, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8035714285714286, |
| "grad_norm": 0.16844603419303894, |
| "learning_rate": 8.887655746765625e-06, |
| "loss": 1.3339194059371948, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.8071428571428572, |
| "grad_norm": 0.4043944180011749, |
| "learning_rate": 8.875968396858023e-06, |
| "loss": 1.3012686967849731, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.8107142857142857, |
| "grad_norm": 0.19886070489883423, |
| "learning_rate": 8.864228724054342e-06, |
| "loss": 1.2051547765731812, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8142857142857143, |
| "grad_norm": 0.18143871426582336, |
| "learning_rate": 8.852436910303466e-06, |
| "loss": 1.264425277709961, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8178571428571428, |
| "grad_norm": 0.30469146370887756, |
| "learning_rate": 8.840593138362395e-06, |
| "loss": 1.2156575918197632, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.8214285714285714, |
| "grad_norm": 0.19490455090999603, |
| "learning_rate": 8.828697591793405e-06, |
| "loss": 1.2579315900802612, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 0.22966210544109344, |
| "learning_rate": 8.816750454961206e-06, |
| "loss": 1.2265636920928955, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.8285714285714286, |
| "grad_norm": 0.4836777448654175, |
| "learning_rate": 8.804751913030095e-06, |
| "loss": 1.2515498399734497, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.8321428571428572, |
| "grad_norm": 0.22509177029132843, |
| "learning_rate": 8.792702151961074e-06, |
| "loss": 1.2572628259658813, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.8357142857142857, |
| "grad_norm": 0.4269544184207916, |
| "learning_rate": 8.780601358508966e-06, |
| "loss": 1.2433445453643799, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.8392857142857143, |
| "grad_norm": 0.19438913464546204, |
| "learning_rate": 8.768449720219533e-06, |
| "loss": 1.2479232549667358, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.8428571428571429, |
| "grad_norm": 0.695250391960144, |
| "learning_rate": 8.75624742542656e-06, |
| "loss": 1.300042748451233, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8464285714285714, |
| "grad_norm": 0.35800135135650635, |
| "learning_rate": 8.743994663248939e-06, |
| "loss": 1.2871143817901611, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 0.20253418385982513, |
| "learning_rate": 8.73169162358774e-06, |
| "loss": 1.2776912450790405, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8535714285714285, |
| "grad_norm": 0.203902930021286, |
| "learning_rate": 8.719338497123258e-06, |
| "loss": 1.3039164543151855, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.8571428571428571, |
| "grad_norm": 0.24306446313858032, |
| "learning_rate": 8.706935475312073e-06, |
| "loss": 1.30210542678833, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.8607142857142858, |
| "grad_norm": 0.2822311520576477, |
| "learning_rate": 8.694482750384069e-06, |
| "loss": 1.2630928754806519, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.8642857142857143, |
| "grad_norm": 0.2177450954914093, |
| "learning_rate": 8.681980515339464e-06, |
| "loss": 1.2841533422470093, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.8678571428571429, |
| "grad_norm": 0.19454443454742432, |
| "learning_rate": 8.669428963945815e-06, |
| "loss": 1.2446175813674927, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.8714285714285714, |
| "grad_norm": 0.161905437707901, |
| "learning_rate": 8.656828290735013e-06, |
| "loss": 1.2695343494415283, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.19021154940128326, |
| "learning_rate": 8.644178691000272e-06, |
| "loss": 1.2780508995056152, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.8785714285714286, |
| "grad_norm": 0.3725239038467407, |
| "learning_rate": 8.631480360793095e-06, |
| "loss": 1.2979791164398193, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.8821428571428571, |
| "grad_norm": 0.5264632701873779, |
| "learning_rate": 8.61873349692025e-06, |
| "loss": 1.2810431718826294, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.8857142857142857, |
| "grad_norm": 0.26536062359809875, |
| "learning_rate": 8.605938296940702e-06, |
| "loss": 1.2166625261306763, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.8892857142857142, |
| "grad_norm": 0.4096132516860962, |
| "learning_rate": 8.593094959162565e-06, |
| "loss": 1.2420190572738647, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.8928571428571429, |
| "grad_norm": 0.4396449327468872, |
| "learning_rate": 8.58020368264002e-06, |
| "loss": 1.2754027843475342, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.8964285714285715, |
| "grad_norm": 0.15545235574245453, |
| "learning_rate": 8.567264667170232e-06, |
| "loss": 1.3059731721878052, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 0.25121352076530457, |
| "learning_rate": 8.554278113290262e-06, |
| "loss": 1.2766114473342896, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9035714285714286, |
| "grad_norm": 0.21137557923793793, |
| "learning_rate": 8.541244222273942e-06, |
| "loss": 1.258975863456726, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9071428571428571, |
| "grad_norm": 0.16647249460220337, |
| "learning_rate": 8.528163196128767e-06, |
| "loss": 1.2222638130187988, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9107142857142857, |
| "grad_norm": 0.3039259910583496, |
| "learning_rate": 8.51503523759277e-06, |
| "loss": 1.257559895515442, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.32180115580558777, |
| "learning_rate": 8.501860550131361e-06, |
| "loss": 1.280539631843567, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.9178571428571428, |
| "grad_norm": 0.2822877764701843, |
| "learning_rate": 8.488639337934188e-06, |
| "loss": 1.225077509880066, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.9214285714285714, |
| "grad_norm": 0.22444438934326172, |
| "learning_rate": 8.475371805911975e-06, |
| "loss": 1.259244441986084, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 0.17102967202663422, |
| "learning_rate": 8.462058159693332e-06, |
| "loss": 1.2512003183364868, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.9285714285714286, |
| "grad_norm": 0.9442085027694702, |
| "learning_rate": 8.44869860562158e-06, |
| "loss": 1.2956591844558716, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.9321428571428572, |
| "grad_norm": 0.31264039874076843, |
| "learning_rate": 8.435293350751545e-06, |
| "loss": 1.3134222030639648, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.9357142857142857, |
| "grad_norm": 0.20593850314617157, |
| "learning_rate": 8.421842602846362e-06, |
| "loss": 1.269896149635315, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.9392857142857143, |
| "grad_norm": 0.24257254600524902, |
| "learning_rate": 8.408346570374234e-06, |
| "loss": 1.2887259721755981, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.9428571428571428, |
| "grad_norm": 0.18374580144882202, |
| "learning_rate": 8.394805462505224e-06, |
| "loss": 1.2653754949569702, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.9464285714285714, |
| "grad_norm": 0.7440497875213623, |
| "learning_rate": 8.381219489107992e-06, |
| "loss": 1.2136163711547852, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 0.3250195384025574, |
| "learning_rate": 8.36758886074656e-06, |
| "loss": 1.233951449394226, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9535714285714286, |
| "grad_norm": 0.2864832878112793, |
| "learning_rate": 8.353913788677036e-06, |
| "loss": 1.2546851634979248, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9571428571428572, |
| "grad_norm": 0.22155587375164032, |
| "learning_rate": 8.34019448484435e-06, |
| "loss": 1.2355575561523438, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.9607142857142857, |
| "grad_norm": 0.19411461055278778, |
| "learning_rate": 8.326431161878957e-06, |
| "loss": 1.2437915802001953, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.9642857142857143, |
| "grad_norm": 0.26431798934936523, |
| "learning_rate": 8.312624033093555e-06, |
| "loss": 1.2899754047393799, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.9678571428571429, |
| "grad_norm": 0.3181489109992981, |
| "learning_rate": 8.298773312479767e-06, |
| "loss": 1.2769360542297363, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.9714285714285714, |
| "grad_norm": 0.2669861912727356, |
| "learning_rate": 8.284879214704834e-06, |
| "loss": 1.2913857698440552, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 0.2932322919368744, |
| "learning_rate": 8.270941955108281e-06, |
| "loss": 1.2430675029754639, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.9785714285714285, |
| "grad_norm": 0.3006272614002228, |
| "learning_rate": 8.256961749698583e-06, |
| "loss": 1.2453312873840332, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.9821428571428571, |
| "grad_norm": 0.2196272611618042, |
| "learning_rate": 8.242938815149817e-06, |
| "loss": 1.2648967504501343, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.9857142857142858, |
| "grad_norm": 0.2562142014503479, |
| "learning_rate": 8.228873368798304e-06, |
| "loss": 1.3159946203231812, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.9892857142857143, |
| "grad_norm": 0.26237812638282776, |
| "learning_rate": 8.214765628639235e-06, |
| "loss": 1.3476945161819458, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.9928571428571429, |
| "grad_norm": 0.38732582330703735, |
| "learning_rate": 8.200615813323306e-06, |
| "loss": 1.9057130813598633, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.9964285714285714, |
| "grad_norm": 0.33351263403892517, |
| "learning_rate": 8.18642414215331e-06, |
| "loss": 1.8800382614135742, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.6058505773544312, |
| "learning_rate": 8.172190835080757e-06, |
| "loss": 1.8019236326217651, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.0035714285714286, |
| "grad_norm": 0.31470683217048645, |
| "learning_rate": 8.157916112702452e-06, |
| "loss": 1.384263277053833, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.0071428571428571, |
| "grad_norm": 0.310624897480011, |
| "learning_rate": 8.143600196257086e-06, |
| "loss": 1.3995013236999512, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.0107142857142857, |
| "grad_norm": 0.20878104865550995, |
| "learning_rate": 8.129243307621791e-06, |
| "loss": 1.3525418043136597, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.0142857142857142, |
| "grad_norm": 0.2683800160884857, |
| "learning_rate": 8.114845669308723e-06, |
| "loss": 1.3207361698150635, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.0178571428571428, |
| "grad_norm": 0.27859288454055786, |
| "learning_rate": 8.100407504461595e-06, |
| "loss": 1.3501830101013184, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.0214285714285714, |
| "grad_norm": 0.32225877046585083, |
| "learning_rate": 8.085929036852236e-06, |
| "loss": 1.1840941905975342, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.025, |
| "grad_norm": 0.23283155262470245, |
| "learning_rate": 8.071410490877097e-06, |
| "loss": 1.2650562524795532, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.0285714285714285, |
| "grad_norm": 0.1705978810787201, |
| "learning_rate": 8.0568520915538e-06, |
| "loss": 1.2940489053726196, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.032142857142857, |
| "grad_norm": 0.23754863440990448, |
| "learning_rate": 8.042254064517642e-06, |
| "loss": 1.3267643451690674, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.0357142857142858, |
| "grad_norm": 0.46769577264785767, |
| "learning_rate": 8.027616636018085e-06, |
| "loss": 1.2288154363632202, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.0392857142857144, |
| "grad_norm": 0.233358234167099, |
| "learning_rate": 8.012940032915263e-06, |
| "loss": 1.3615669012069702, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.042857142857143, |
| "grad_norm": 0.2691819369792938, |
| "learning_rate": 7.998224482676473e-06, |
| "loss": 1.3021140098571777, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.0464285714285715, |
| "grad_norm": 0.24730414152145386, |
| "learning_rate": 7.983470213372624e-06, |
| "loss": 1.2602746486663818, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 0.2731882929801941, |
| "learning_rate": 7.96867745367473e-06, |
| "loss": 1.2430776357650757, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.0535714285714286, |
| "grad_norm": 0.22160141170024872, |
| "learning_rate": 7.953846432850346e-06, |
| "loss": 1.2589969635009766, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0571428571428572, |
| "grad_norm": 0.2917991280555725, |
| "learning_rate": 7.938977380760024e-06, |
| "loss": 1.408372402191162, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.0607142857142857, |
| "grad_norm": 0.23420438170433044, |
| "learning_rate": 7.92407052785375e-06, |
| "loss": 1.3381731510162354, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.0642857142857143, |
| "grad_norm": 0.19835133850574493, |
| "learning_rate": 7.909126105167373e-06, |
| "loss": 1.3641246557235718, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.0678571428571428, |
| "grad_norm": 0.21805885434150696, |
| "learning_rate": 7.894144344319015e-06, |
| "loss": 1.2766021490097046, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.0714285714285714, |
| "grad_norm": 0.3379668593406677, |
| "learning_rate": 7.879125477505495e-06, |
| "loss": 1.2909208536148071, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.075, |
| "grad_norm": 0.3864686191082001, |
| "learning_rate": 7.864069737498722e-06, |
| "loss": 1.259904146194458, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.0785714285714285, |
| "grad_norm": 0.3104611933231354, |
| "learning_rate": 7.848977357642089e-06, |
| "loss": 1.3227314949035645, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.082142857142857, |
| "grad_norm": 0.244283065199852, |
| "learning_rate": 7.833848571846855e-06, |
| "loss": 1.3027191162109375, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.0857142857142856, |
| "grad_norm": 0.19385835528373718, |
| "learning_rate": 7.818683614588523e-06, |
| "loss": 1.0396664142608643, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.0892857142857142, |
| "grad_norm": 0.2750968933105469, |
| "learning_rate": 7.803482720903206e-06, |
| "loss": 1.1102863550186157, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.092857142857143, |
| "grad_norm": 0.3333893418312073, |
| "learning_rate": 7.788246126383977e-06, |
| "loss": 1.1634554862976074, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.0964285714285715, |
| "grad_norm": 0.28989356756210327, |
| "learning_rate": 7.77297406717723e-06, |
| "loss": 1.3986788988113403, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 0.27835774421691895, |
| "learning_rate": 7.757666779979008e-06, |
| "loss": 1.2263062000274658, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.1035714285714286, |
| "grad_norm": 0.2572242021560669, |
| "learning_rate": 7.74232450203134e-06, |
| "loss": 1.2180155515670776, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.1071428571428572, |
| "grad_norm": 0.3894072473049164, |
| "learning_rate": 7.72694747111857e-06, |
| "loss": 1.478975534439087, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.1107142857142858, |
| "grad_norm": 0.4212060868740082, |
| "learning_rate": 7.711535925563655e-06, |
| "loss": 1.3129830360412598, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.1142857142857143, |
| "grad_norm": 0.23659296333789825, |
| "learning_rate": 7.696090104224492e-06, |
| "loss": 1.229081392288208, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.1178571428571429, |
| "grad_norm": 0.254404217004776, |
| "learning_rate": 7.680610246490199e-06, |
| "loss": 1.2878901958465576, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.1214285714285714, |
| "grad_norm": 0.3570263981819153, |
| "learning_rate": 7.665096592277415e-06, |
| "loss": 1.218833088874817, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.27803489565849304, |
| "learning_rate": 7.649549382026575e-06, |
| "loss": 1.274793028831482, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.1285714285714286, |
| "grad_norm": 0.2562004327774048, |
| "learning_rate": 7.633968856698192e-06, |
| "loss": 1.3318731784820557, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.1321428571428571, |
| "grad_norm": 0.19307534396648407, |
| "learning_rate": 7.618355257769111e-06, |
| "loss": 1.2363682985305786, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.1357142857142857, |
| "grad_norm": 0.5484210848808289, |
| "learning_rate": 7.602708827228779e-06, |
| "loss": 1.259455680847168, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.1392857142857142, |
| "grad_norm": 0.2351217418909073, |
| "learning_rate": 7.587029807575482e-06, |
| "loss": 1.2625541687011719, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.1428571428571428, |
| "grad_norm": 0.5461699962615967, |
| "learning_rate": 7.571318441812599e-06, |
| "loss": 1.1984379291534424, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.1464285714285714, |
| "grad_norm": 0.30940407514572144, |
| "learning_rate": 7.55557497344482e-06, |
| "loss": 1.3161015510559082, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 0.32747605443000793, |
| "learning_rate": 7.539799646474393e-06, |
| "loss": 1.234968900680542, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.1535714285714285, |
| "grad_norm": 0.2250605821609497, |
| "learning_rate": 7.523992705397321e-06, |
| "loss": 1.3490346670150757, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.157142857142857, |
| "grad_norm": 0.3528631925582886, |
| "learning_rate": 7.508154395199592e-06, |
| "loss": 1.350324034690857, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.1607142857142858, |
| "grad_norm": 0.247028186917305, |
| "learning_rate": 7.492284961353361e-06, |
| "loss": 1.285825252532959, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.1642857142857144, |
| "grad_norm": 0.26968345046043396, |
| "learning_rate": 7.4763846498131675e-06, |
| "loss": 1.123679518699646, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.167857142857143, |
| "grad_norm": 0.23967714607715607, |
| "learning_rate": 7.460453707012107e-06, |
| "loss": 1.2702839374542236, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.1714285714285715, |
| "grad_norm": 0.9412787556648254, |
| "learning_rate": 7.444492379858021e-06, |
| "loss": 1.3307619094848633, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.175, |
| "grad_norm": 0.60057133436203, |
| "learning_rate": 7.428500915729663e-06, |
| "loss": 1.218625783920288, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.1785714285714286, |
| "grad_norm": 0.2611408829689026, |
| "learning_rate": 7.412479562472873e-06, |
| "loss": 1.1818389892578125, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.1821428571428572, |
| "grad_norm": 0.21901297569274902, |
| "learning_rate": 7.3964285683967285e-06, |
| "loss": 1.2105083465576172, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.1857142857142857, |
| "grad_norm": 0.9242513179779053, |
| "learning_rate": 7.380348182269701e-06, |
| "loss": 1.2359505891799927, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.1892857142857143, |
| "grad_norm": 0.24152880907058716, |
| "learning_rate": 7.364238653315795e-06, |
| "loss": 1.268753170967102, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.1928571428571428, |
| "grad_norm": 2.834768533706665, |
| "learning_rate": 7.348100231210697e-06, |
| "loss": 1.2450233697891235, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.1964285714285714, |
| "grad_norm": 0.7332023978233337, |
| "learning_rate": 7.331933166077886e-06, |
| "loss": 1.2236673831939697, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.3339300751686096, |
| "learning_rate": 7.31573770848478e-06, |
| "loss": 1.1605288982391357, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.2035714285714285, |
| "grad_norm": 0.7548586130142212, |
| "learning_rate": 7.299514109438835e-06, |
| "loss": 1.276812195777893, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.207142857142857, |
| "grad_norm": 0.32066163420677185, |
| "learning_rate": 7.283262620383664e-06, |
| "loss": 1.2277733087539673, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.2107142857142856, |
| "grad_norm": 0.3439161777496338, |
| "learning_rate": 7.266983493195133e-06, |
| "loss": 1.443245768547058, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.2142857142857142, |
| "grad_norm": 0.28881630301475525, |
| "learning_rate": 7.250676980177468e-06, |
| "loss": 1.3642569780349731, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.217857142857143, |
| "grad_norm": 0.3376900553703308, |
| "learning_rate": 7.2343433340593315e-06, |
| "loss": 1.1232848167419434, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.2214285714285715, |
| "grad_norm": 0.5144054293632507, |
| "learning_rate": 7.217982807989915e-06, |
| "loss": 1.2558438777923584, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.225, |
| "grad_norm": 0.25952062010765076, |
| "learning_rate": 7.201595655535011e-06, |
| "loss": 1.3395494222640991, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.2285714285714286, |
| "grad_norm": 0.3723627030849457, |
| "learning_rate": 7.1851821306730876e-06, |
| "loss": 0.9402600526809692, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.2321428571428572, |
| "grad_norm": 0.3420025110244751, |
| "learning_rate": 7.168742487791345e-06, |
| "loss": 0.7468339204788208, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.2357142857142858, |
| "grad_norm": 0.34970328211784363, |
| "learning_rate": 7.152276981681781e-06, |
| "loss": 1.0327891111373901, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.2392857142857143, |
| "grad_norm": 0.3740408420562744, |
| "learning_rate": 7.135785867537235e-06, |
| "loss": 1.267980694770813, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.2428571428571429, |
| "grad_norm": 0.5309215188026428, |
| "learning_rate": 7.119269400947437e-06, |
| "loss": 1.4097453355789185, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.2464285714285714, |
| "grad_norm": 0.3004949390888214, |
| "learning_rate": 7.1027278378950486e-06, |
| "loss": 1.2045501470565796, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.6161743402481079, |
| "learning_rate": 7.086161434751684e-06, |
| "loss": 1.0838185548782349, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.2535714285714286, |
| "grad_norm": 0.22701780498027802, |
| "learning_rate": 7.069570448273951e-06, |
| "loss": 1.1616631746292114, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.2571428571428571, |
| "grad_norm": 0.3208640515804291, |
| "learning_rate": 7.0529551355994686e-06, |
| "loss": 1.2447824478149414, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.2607142857142857, |
| "grad_norm": 0.7384056448936462, |
| "learning_rate": 7.03631575424287e-06, |
| "loss": 1.1175577640533447, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.2642857142857142, |
| "grad_norm": 0.5497505068778992, |
| "learning_rate": 7.019652562091826e-06, |
| "loss": 1.141535758972168, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.2678571428571428, |
| "grad_norm": 0.3330208361148834, |
| "learning_rate": 7.0029658174030425e-06, |
| "loss": 1.3164706230163574, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.2714285714285714, |
| "grad_norm": 0.4105195701122284, |
| "learning_rate": 6.986255778798253e-06, |
| "loss": 1.234831690788269, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.275, |
| "grad_norm": 0.28338423371315, |
| "learning_rate": 6.9695227052602174e-06, |
| "loss": 1.1415457725524902, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.2785714285714285, |
| "grad_norm": 0.3706303536891937, |
| "learning_rate": 6.952766856128709e-06, |
| "loss": 1.199047565460205, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.282142857142857, |
| "grad_norm": 0.3346574902534485, |
| "learning_rate": 6.9359884910964856e-06, |
| "loss": 1.4197050333023071, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.2857142857142856, |
| "grad_norm": 0.3120553195476532, |
| "learning_rate": 6.919187870205275e-06, |
| "loss": 1.5487772226333618, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.2892857142857144, |
| "grad_norm": 0.2753259837627411, |
| "learning_rate": 6.902365253841737e-06, |
| "loss": 1.177211880683899, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.292857142857143, |
| "grad_norm": 0.2185521274805069, |
| "learning_rate": 6.885520902733435e-06, |
| "loss": 1.2806293964385986, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.2964285714285715, |
| "grad_norm": 0.14865590631961823, |
| "learning_rate": 6.868655077944788e-06, |
| "loss": 0.9303812980651855, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 0.39503300189971924, |
| "learning_rate": 6.85176804087303e-06, |
| "loss": 1.5363171100616455, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.3035714285714286, |
| "grad_norm": 0.510991632938385, |
| "learning_rate": 6.834860053244154e-06, |
| "loss": 1.1531927585601807, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.3071428571428572, |
| "grad_norm": 0.28777721524238586, |
| "learning_rate": 6.8179313771088626e-06, |
| "loss": 1.2121974229812622, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.3107142857142857, |
| "grad_norm": 0.30707836151123047, |
| "learning_rate": 6.800982274838495e-06, |
| "loss": 1.4065004587173462, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.3142857142857143, |
| "grad_norm": 0.23764309287071228, |
| "learning_rate": 6.784013009120975e-06, |
| "loss": 1.4308959245681763, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.3178571428571428, |
| "grad_norm": 0.6906368136405945, |
| "learning_rate": 6.767023842956725e-06, |
| "loss": 1.1925731897354126, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.3214285714285714, |
| "grad_norm": 0.4775388538837433, |
| "learning_rate": 6.750015039654603e-06, |
| "loss": 1.6403999328613281, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.325, |
| "grad_norm": 0.2565818727016449, |
| "learning_rate": 6.732986862827813e-06, |
| "loss": 1.0603913068771362, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.3285714285714285, |
| "grad_norm": 0.47122514247894287, |
| "learning_rate": 6.7159395763898214e-06, |
| "loss": 1.3830267190933228, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.332142857142857, |
| "grad_norm": 0.5306914448738098, |
| "learning_rate": 6.698873444550271e-06, |
| "loss": 1.2981680631637573, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.3357142857142856, |
| "grad_norm": 0.408100426197052, |
| "learning_rate": 6.68178873181088e-06, |
| "loss": 1.2487084865570068, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.3392857142857144, |
| "grad_norm": 0.33308205008506775, |
| "learning_rate": 6.664685702961344e-06, |
| "loss": 0.9980481266975403, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.342857142857143, |
| "grad_norm": 0.20474325120449066, |
| "learning_rate": 6.647564623075236e-06, |
| "loss": 0.9687408804893494, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.3464285714285715, |
| "grad_norm": 0.8245405554771423, |
| "learning_rate": 6.630425757505894e-06, |
| "loss": 1.33769953250885, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 0.2982644736766815, |
| "learning_rate": 6.613269371882308e-06, |
| "loss": 1.3833491802215576, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.3535714285714286, |
| "grad_norm": 0.45085495710372925, |
| "learning_rate": 6.596095732105011e-06, |
| "loss": 1.2755907773971558, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.3571428571428572, |
| "grad_norm": 0.29945558309555054, |
| "learning_rate": 6.5789051043419435e-06, |
| "loss": 1.2956531047821045, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.3607142857142858, |
| "grad_norm": 0.5544592142105103, |
| "learning_rate": 6.5616977550243435e-06, |
| "loss": 1.2718784809112549, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.3642857142857143, |
| "grad_norm": 0.7638172507286072, |
| "learning_rate": 6.544473950842606e-06, |
| "loss": 1.126919150352478, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.3678571428571429, |
| "grad_norm": 0.4192071557044983, |
| "learning_rate": 6.527233958742154e-06, |
| "loss": 1.4331161975860596, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.3714285714285714, |
| "grad_norm": 0.2737813889980316, |
| "learning_rate": 6.509978045919307e-06, |
| "loss": 1.2379997968673706, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 0.7987821102142334, |
| "learning_rate": 6.492706479817125e-06, |
| "loss": 1.278856873512268, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.3785714285714286, |
| "grad_norm": 0.30944374203681946, |
| "learning_rate": 6.475419528121279e-06, |
| "loss": 1.3922899961471558, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.3821428571428571, |
| "grad_norm": 0.29533934593200684, |
| "learning_rate": 6.45811745875589e-06, |
| "loss": 1.235024094581604, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.3857142857142857, |
| "grad_norm": 0.788487434387207, |
| "learning_rate": 6.440800539879392e-06, |
| "loss": 1.1024410724639893, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.3892857142857142, |
| "grad_norm": 0.3519847095012665, |
| "learning_rate": 6.423469039880355e-06, |
| "loss": 1.233741283416748, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.3928571428571428, |
| "grad_norm": 0.18675316870212555, |
| "learning_rate": 6.406123227373343e-06, |
| "loss": 1.3022193908691406, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.3964285714285714, |
| "grad_norm": 0.263254314661026, |
| "learning_rate": 6.388763371194741e-06, |
| "loss": 1.2517147064208984, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 0.35091346502304077, |
| "learning_rate": 6.371389740398597e-06, |
| "loss": 1.1601366996765137, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.4035714285714285, |
| "grad_norm": 0.34103208780288696, |
| "learning_rate": 6.35400260425244e-06, |
| "loss": 1.3991872072219849, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.407142857142857, |
| "grad_norm": 1.0600661039352417, |
| "learning_rate": 6.336602232233116e-06, |
| "loss": 1.4128477573394775, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.4107142857142856, |
| "grad_norm": 0.6274294257164001, |
| "learning_rate": 6.319188894022612e-06, |
| "loss": 1.5149511098861694, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.4142857142857144, |
| "grad_norm": 0.25083670020103455, |
| "learning_rate": 6.301762859503869e-06, |
| "loss": 1.3468106985092163, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.417857142857143, |
| "grad_norm": 0.4435229003429413, |
| "learning_rate": 6.284324398756606e-06, |
| "loss": 1.3005448579788208, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.4214285714285715, |
| "grad_norm": 0.5059611201286316, |
| "learning_rate": 6.266873782053131e-06, |
| "loss": 1.0667213201522827, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.425, |
| "grad_norm": 0.2751584053039551, |
| "learning_rate": 6.249411279854152e-06, |
| "loss": 1.1674690246582031, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.2168678641319275, |
| "learning_rate": 6.231937162804584e-06, |
| "loss": 1.0654405355453491, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.4321428571428572, |
| "grad_norm": 0.6201224327087402, |
| "learning_rate": 6.214451701729363e-06, |
| "loss": 1.1552761793136597, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.4357142857142857, |
| "grad_norm": 0.4682956635951996, |
| "learning_rate": 6.196955167629236e-06, |
| "loss": 1.3353182077407837, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.4392857142857143, |
| "grad_norm": 0.3534834384918213, |
| "learning_rate": 6.179447831676566e-06, |
| "loss": 1.3080209493637085, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.4428571428571428, |
| "grad_norm": 0.4813729226589203, |
| "learning_rate": 6.161929965211135e-06, |
| "loss": 1.3717149496078491, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.4464285714285714, |
| "grad_norm": 0.26942121982574463, |
| "learning_rate": 6.144401839735931e-06, |
| "loss": 1.4133044481277466, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.30204319953918457, |
| "learning_rate": 6.12686372691294e-06, |
| "loss": 1.581753134727478, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.4535714285714285, |
| "grad_norm": 1.1933614015579224, |
| "learning_rate": 6.109315898558943e-06, |
| "loss": 1.1946600675582886, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.457142857142857, |
| "grad_norm": 0.651054322719574, |
| "learning_rate": 6.091758626641296e-06, |
| "loss": 1.2849314212799072, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.4607142857142856, |
| "grad_norm": 0.41265299916267395, |
| "learning_rate": 6.074192183273714e-06, |
| "loss": 1.2870151996612549, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.4642857142857144, |
| "grad_norm": 0.2880115807056427, |
| "learning_rate": 6.056616840712065e-06, |
| "loss": 1.156186580657959, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.467857142857143, |
| "grad_norm": 0.31380829215049744, |
| "learning_rate": 6.039032871350136e-06, |
| "loss": 1.3075363636016846, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.4714285714285715, |
| "grad_norm": 0.735464334487915, |
| "learning_rate": 6.021440547715418e-06, |
| "loss": 1.2372568845748901, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.475, |
| "grad_norm": 0.3404405117034912, |
| "learning_rate": 6.0038401424648866e-06, |
| "loss": 1.3656535148620605, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.4785714285714286, |
| "grad_norm": 1.0161242485046387, |
| "learning_rate": 5.986231928380764e-06, |
| "loss": 1.4575047492980957, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.4821428571428572, |
| "grad_norm": 0.32120001316070557, |
| "learning_rate": 5.968616178366304e-06, |
| "loss": 1.1328424215316772, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.4857142857142858, |
| "grad_norm": 0.24318258464336395, |
| "learning_rate": 5.95099316544156e-06, |
| "loss": 1.1171592473983765, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.4892857142857143, |
| "grad_norm": 0.2471759170293808, |
| "learning_rate": 5.9333631627391385e-06, |
| "loss": 1.1361713409423828, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.4928571428571429, |
| "grad_norm": 0.31643709540367126, |
| "learning_rate": 5.915726443499992e-06, |
| "loss": 1.4550275802612305, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.4964285714285714, |
| "grad_norm": 0.2178327739238739, |
| "learning_rate": 5.89808328106916e-06, |
| "loss": 1.1423126459121704, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.38720911741256714, |
| "learning_rate": 5.880433948891548e-06, |
| "loss": 1.0535848140716553, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.5035714285714286, |
| "grad_norm": 0.4030672013759613, |
| "learning_rate": 5.862778720507684e-06, |
| "loss": 1.3946490287780762, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.5071428571428571, |
| "grad_norm": 0.35578665137290955, |
| "learning_rate": 5.845117869549477e-06, |
| "loss": 1.5173096656799316, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.5107142857142857, |
| "grad_norm": 0.3867500126361847, |
| "learning_rate": 5.827451669735977e-06, |
| "loss": 1.352368712425232, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.5142857142857142, |
| "grad_norm": 0.9219626188278198, |
| "learning_rate": 5.80978039486914e-06, |
| "loss": 0.9382961988449097, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.5178571428571428, |
| "grad_norm": 0.21979399025440216, |
| "learning_rate": 5.79210431882957e-06, |
| "loss": 0.8432712554931641, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.5214285714285714, |
| "grad_norm": 0.28859761357307434, |
| "learning_rate": 5.774423715572289e-06, |
| "loss": 1.296618938446045, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.525, |
| "grad_norm": 0.4942507743835449, |
| "learning_rate": 5.756738859122483e-06, |
| "loss": 1.0648285150527954, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.5285714285714285, |
| "grad_norm": 0.5750854015350342, |
| "learning_rate": 5.739050023571258e-06, |
| "loss": 1.0088112354278564, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.532142857142857, |
| "grad_norm": 0.20957696437835693, |
| "learning_rate": 5.721357483071386e-06, |
| "loss": 1.0590897798538208, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.5357142857142856, |
| "grad_norm": 0.8381152153015137, |
| "learning_rate": 5.703661511833064e-06, |
| "loss": 1.3163901567459106, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.5392857142857141, |
| "grad_norm": 0.4364100992679596, |
| "learning_rate": 5.68596238411966e-06, |
| "loss": 1.1863445043563843, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.5428571428571427, |
| "grad_norm": 0.3657117784023285, |
| "learning_rate": 5.668260374243467e-06, |
| "loss": 0.98140949010849, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.5464285714285713, |
| "grad_norm": 0.6113946437835693, |
| "learning_rate": 5.650555756561439e-06, |
| "loss": 1.3584340810775757, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.7465829849243164, |
| "learning_rate": 5.6328488054709575e-06, |
| "loss": 1.149134874343872, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.5535714285714286, |
| "grad_norm": 0.9023903608322144, |
| "learning_rate": 5.615139795405559e-06, |
| "loss": 1.2276476621627808, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.5571428571428572, |
| "grad_norm": 0.5961250066757202, |
| "learning_rate": 5.5974290008307e-06, |
| "loss": 1.3803772926330566, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.5607142857142857, |
| "grad_norm": 0.31303706765174866, |
| "learning_rate": 5.579716696239486e-06, |
| "loss": 0.8974480628967285, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.5642857142857143, |
| "grad_norm": 0.49465271830558777, |
| "learning_rate": 5.562003156148434e-06, |
| "loss": 1.500373125076294, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.5678571428571428, |
| "grad_norm": 0.4547047019004822, |
| "learning_rate": 5.544288655093203e-06, |
| "loss": 1.3437693119049072, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.2680365741252899, |
| "learning_rate": 5.526573467624351e-06, |
| "loss": 1.0480762720108032, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.575, |
| "grad_norm": 0.2553335130214691, |
| "learning_rate": 5.508857868303068e-06, |
| "loss": 1.078729271888733, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.5785714285714287, |
| "grad_norm": 0.2632956802845001, |
| "learning_rate": 5.491142131696934e-06, |
| "loss": 1.16781485080719, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.5821428571428573, |
| "grad_norm": 0.42439237236976624, |
| "learning_rate": 5.473426532375651e-06, |
| "loss": 1.0907145738601685, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.5857142857142859, |
| "grad_norm": 0.4016067087650299, |
| "learning_rate": 5.455711344906797e-06, |
| "loss": 1.0479315519332886, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.5892857142857144, |
| "grad_norm": 0.787295401096344, |
| "learning_rate": 5.437996843851567e-06, |
| "loss": 1.1056879758834839, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.592857142857143, |
| "grad_norm": 0.24893441796302795, |
| "learning_rate": 5.420283303760515e-06, |
| "loss": 1.086808443069458, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.5964285714285715, |
| "grad_norm": 1.0016993284225464, |
| "learning_rate": 5.402570999169303e-06, |
| "loss": 1.4259756803512573, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.5658416748046875, |
| "learning_rate": 5.384860204594442e-06, |
| "loss": 1.175308346748352, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.6035714285714286, |
| "grad_norm": 0.32960644364356995, |
| "learning_rate": 5.367151194529045e-06, |
| "loss": 1.3044936656951904, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.6071428571428572, |
| "grad_norm": 1.566615343093872, |
| "learning_rate": 5.349444243438563e-06, |
| "loss": 1.1787108182907104, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.6107142857142858, |
| "grad_norm": 0.3008659780025482, |
| "learning_rate": 5.331739625756535e-06, |
| "loss": 1.2578707933425903, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.6142857142857143, |
| "grad_norm": 0.3048568367958069, |
| "learning_rate": 5.314037615880341e-06, |
| "loss": 1.214415192604065, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.6178571428571429, |
| "grad_norm": 0.30796509981155396, |
| "learning_rate": 5.296338488166939e-06, |
| "loss": 1.2612226009368896, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.6214285714285714, |
| "grad_norm": 0.3856910467147827, |
| "learning_rate": 5.278642516928617e-06, |
| "loss": 1.1769757270812988, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.4512476921081543, |
| "learning_rate": 5.260949976428745e-06, |
| "loss": 1.058244228363037, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.6285714285714286, |
| "grad_norm": 0.5113015174865723, |
| "learning_rate": 5.243261140877517e-06, |
| "loss": 1.3994414806365967, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.6321428571428571, |
| "grad_norm": 0.24723981320858002, |
| "learning_rate": 5.225576284427712e-06, |
| "loss": 1.29803466796875, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.6357142857142857, |
| "grad_norm": 0.2900439202785492, |
| "learning_rate": 5.207895681170432e-06, |
| "loss": 1.341897964477539, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.6392857142857142, |
| "grad_norm": 0.2555374205112457, |
| "learning_rate": 5.190219605130863e-06, |
| "loss": 1.1864595413208008, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.6428571428571428, |
| "grad_norm": 0.31760746240615845, |
| "learning_rate": 5.172548330264023e-06, |
| "loss": 1.2025091648101807, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.6464285714285714, |
| "grad_norm": 0.28426891565322876, |
| "learning_rate": 5.154882130450525e-06, |
| "loss": 1.3937333822250366, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 0.26754945516586304, |
| "learning_rate": 5.137221279492317e-06, |
| "loss": 1.1592669486999512, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.6535714285714285, |
| "grad_norm": 0.398725301027298, |
| "learning_rate": 5.119566051108453e-06, |
| "loss": 1.041808009147644, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.657142857142857, |
| "grad_norm": 0.24082130193710327, |
| "learning_rate": 5.10191671893084e-06, |
| "loss": 1.1113499402999878, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.6607142857142856, |
| "grad_norm": 0.32985880970954895, |
| "learning_rate": 5.08427355650001e-06, |
| "loss": 1.243566632270813, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.6642857142857141, |
| "grad_norm": 0.22729991376399994, |
| "learning_rate": 5.066636837260863e-06, |
| "loss": 1.218003511428833, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.6678571428571427, |
| "grad_norm": 0.20701321959495544, |
| "learning_rate": 5.049006834558443e-06, |
| "loss": 1.1665146350860596, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.6714285714285713, |
| "grad_norm": 0.7482126355171204, |
| "learning_rate": 5.031383821633695e-06, |
| "loss": 1.0261443853378296, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.675, |
| "grad_norm": 0.6510646939277649, |
| "learning_rate": 5.013768071619237e-06, |
| "loss": 1.1913405656814575, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.6785714285714286, |
| "grad_norm": 0.3893536329269409, |
| "learning_rate": 4.996159857535116e-06, |
| "loss": 1.2498658895492554, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.6821428571428572, |
| "grad_norm": 0.33820265531539917, |
| "learning_rate": 4.9785594522845835e-06, |
| "loss": 1.2645461559295654, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.6857142857142857, |
| "grad_norm": 0.2857305407524109, |
| "learning_rate": 4.9609671286498655e-06, |
| "loss": 1.1648997068405151, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.6892857142857143, |
| "grad_norm": 0.35911425948143005, |
| "learning_rate": 4.943383159287936e-06, |
| "loss": 1.3200312852859497, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.6928571428571428, |
| "grad_norm": 0.28003281354904175, |
| "learning_rate": 4.925807816726288e-06, |
| "loss": 1.2886927127838135, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.6964285714285714, |
| "grad_norm": 0.3707423210144043, |
| "learning_rate": 4.908241373358707e-06, |
| "loss": 1.2256838083267212, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 0.37781476974487305, |
| "learning_rate": 4.890684101441059e-06, |
| "loss": 1.261880874633789, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.7035714285714287, |
| "grad_norm": 0.455138623714447, |
| "learning_rate": 4.873136273087061e-06, |
| "loss": 1.1675777435302734, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.7071428571428573, |
| "grad_norm": 0.3084830641746521, |
| "learning_rate": 4.855598160264071e-06, |
| "loss": 1.0751243829727173, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.7107142857142859, |
| "grad_norm": 0.33484798669815063, |
| "learning_rate": 4.838070034788865e-06, |
| "loss": 1.2969300746917725, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.7142857142857144, |
| "grad_norm": 0.45519745349884033, |
| "learning_rate": 4.820552168323434e-06, |
| "loss": 1.1682568788528442, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.717857142857143, |
| "grad_norm": 0.3936917185783386, |
| "learning_rate": 4.803044832370765e-06, |
| "loss": 1.2029849290847778, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.7214285714285715, |
| "grad_norm": 0.2847800850868225, |
| "learning_rate": 4.7855482982706396e-06, |
| "loss": 1.308813452720642, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.725, |
| "grad_norm": 0.2914465069770813, |
| "learning_rate": 4.768062837195417e-06, |
| "loss": 1.2900055646896362, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.7285714285714286, |
| "grad_norm": 0.5518858432769775, |
| "learning_rate": 4.7505887201458485e-06, |
| "loss": 1.2404606342315674, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.7321428571428572, |
| "grad_norm": 0.34736767411231995, |
| "learning_rate": 4.73312621794687e-06, |
| "loss": 1.1192835569381714, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.7357142857142858, |
| "grad_norm": 0.27989068627357483, |
| "learning_rate": 4.715675601243396e-06, |
| "loss": 1.2646175622940063, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.7392857142857143, |
| "grad_norm": 0.2832848131656647, |
| "learning_rate": 4.698237140496132e-06, |
| "loss": 1.2004600763320923, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.7428571428571429, |
| "grad_norm": 1.7877376079559326, |
| "learning_rate": 4.68081110597739e-06, |
| "loss": 1.2224751710891724, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.7464285714285714, |
| "grad_norm": 0.2644546627998352, |
| "learning_rate": 4.663397767766885e-06, |
| "loss": 1.2846026420593262, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.23440435528755188, |
| "learning_rate": 4.6459973957475625e-06, |
| "loss": 1.2761108875274658, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.7535714285714286, |
| "grad_norm": 0.29541414976119995, |
| "learning_rate": 4.628610259601406e-06, |
| "loss": 1.2253004312515259, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.7571428571428571, |
| "grad_norm": 0.3721539378166199, |
| "learning_rate": 4.611236628805259e-06, |
| "loss": 1.217316746711731, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.7607142857142857, |
| "grad_norm": 0.23486927151679993, |
| "learning_rate": 4.593876772626659e-06, |
| "loss": 1.238864779472351, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.7642857142857142, |
| "grad_norm": 0.35403114557266235, |
| "learning_rate": 4.576530960119646e-06, |
| "loss": 1.2506440877914429, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.7678571428571428, |
| "grad_norm": 0.24216312170028687, |
| "learning_rate": 4.55919946012061e-06, |
| "loss": 1.203848123550415, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.7714285714285714, |
| "grad_norm": 0.5742025971412659, |
| "learning_rate": 4.54188254124411e-06, |
| "loss": 1.2036422491073608, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.775, |
| "grad_norm": 0.4332943260669708, |
| "learning_rate": 4.524580471878724e-06, |
| "loss": 1.1484333276748657, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.7785714285714285, |
| "grad_norm": 0.2262076586484909, |
| "learning_rate": 4.507293520182877e-06, |
| "loss": 1.2005127668380737, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.782142857142857, |
| "grad_norm": 0.18153786659240723, |
| "learning_rate": 4.490021954080695e-06, |
| "loss": 1.1209759712219238, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.7857142857142856, |
| "grad_norm": 0.2752821147441864, |
| "learning_rate": 4.472766041257846e-06, |
| "loss": 1.1912975311279297, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.7892857142857141, |
| "grad_norm": 0.37398797273635864, |
| "learning_rate": 4.4555260491573956e-06, |
| "loss": 1.1634106636047363, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.7928571428571427, |
| "grad_norm": 0.4885188341140747, |
| "learning_rate": 4.438302244975659e-06, |
| "loss": 1.19752037525177, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.7964285714285713, |
| "grad_norm": 0.20963414013385773, |
| "learning_rate": 4.421094895658058e-06, |
| "loss": 1.1573578119277954, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 0.19768458604812622, |
| "learning_rate": 4.403904267894991e-06, |
| "loss": 1.1309683322906494, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.8035714285714286, |
| "grad_norm": 0.3265831470489502, |
| "learning_rate": 4.386730628117692e-06, |
| "loss": 1.210740566253662, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.8071428571428572, |
| "grad_norm": 0.24831008911132812, |
| "learning_rate": 4.369574242494108e-06, |
| "loss": 1.1857199668884277, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.8107142857142857, |
| "grad_norm": 0.24806837737560272, |
| "learning_rate": 4.3524353769247665e-06, |
| "loss": 1.0957400798797607, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.8142857142857143, |
| "grad_norm": 0.21978451311588287, |
| "learning_rate": 4.335314297038656e-06, |
| "loss": 1.1512374877929688, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.8178571428571428, |
| "grad_norm": 0.18174096941947937, |
| "learning_rate": 4.318211268189121e-06, |
| "loss": 1.1074084043502808, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.8214285714285714, |
| "grad_norm": 0.1807389110326767, |
| "learning_rate": 4.3011265554497305e-06, |
| "loss": 1.1385325193405151, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.825, |
| "grad_norm": 0.3229348063468933, |
| "learning_rate": 4.28406042361018e-06, |
| "loss": 1.119950771331787, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.8285714285714287, |
| "grad_norm": 0.21613694727420807, |
| "learning_rate": 4.267013137172189e-06, |
| "loss": 1.1364243030548096, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.8321428571428573, |
| "grad_norm": 0.5674333572387695, |
| "learning_rate": 4.249984960345399e-06, |
| "loss": 1.1446290016174316, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.8357142857142859, |
| "grad_norm": 0.19522684812545776, |
| "learning_rate": 4.232976157043277e-06, |
| "loss": 1.1350977420806885, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.8392857142857144, |
| "grad_norm": 0.22652848064899445, |
| "learning_rate": 4.2159869908790275e-06, |
| "loss": 1.1374115943908691, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.842857142857143, |
| "grad_norm": 0.20917841792106628, |
| "learning_rate": 4.199017725161505e-06, |
| "loss": 1.1824545860290527, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.8464285714285715, |
| "grad_norm": 0.2631721496582031, |
| "learning_rate": 4.182068622891139e-06, |
| "loss": 1.1770212650299072, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 0.24983558058738708, |
| "learning_rate": 4.165139946755847e-06, |
| "loss": 1.161262035369873, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.8535714285714286, |
| "grad_norm": 0.31537604331970215, |
| "learning_rate": 4.148231959126973e-06, |
| "loss": 1.1958869695663452, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.3142789900302887, |
| "learning_rate": 4.131344922055213e-06, |
| "loss": 1.1789402961730957, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.8607142857142858, |
| "grad_norm": 0.42967483401298523, |
| "learning_rate": 4.114479097266567e-06, |
| "loss": 1.1411830186843872, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.8642857142857143, |
| "grad_norm": 0.21074344217777252, |
| "learning_rate": 4.0976347461582656e-06, |
| "loss": 1.17338228225708, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.8678571428571429, |
| "grad_norm": 0.33415719866752625, |
| "learning_rate": 4.080812129794728e-06, |
| "loss": 1.1420398950576782, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.8714285714285714, |
| "grad_norm": 0.16336952149868011, |
| "learning_rate": 4.064011508903516e-06, |
| "loss": 1.1628490686416626, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.2252008020877838, |
| "learning_rate": 4.047233143871292e-06, |
| "loss": 1.173589825630188, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.8785714285714286, |
| "grad_norm": 0.33176442980766296, |
| "learning_rate": 4.030477294739783e-06, |
| "loss": 1.194374918937683, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.8821428571428571, |
| "grad_norm": 0.29097726941108704, |
| "learning_rate": 4.013744221201749e-06, |
| "loss": 1.1737301349639893, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.8857142857142857, |
| "grad_norm": 0.1832679808139801, |
| "learning_rate": 3.997034182596958e-06, |
| "loss": 1.110135793685913, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.8892857142857142, |
| "grad_norm": 0.2953426241874695, |
| "learning_rate": 3.980347437908175e-06, |
| "loss": 1.1428486108779907, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.8928571428571428, |
| "grad_norm": 0.20754416286945343, |
| "learning_rate": 3.963684245757132e-06, |
| "loss": 1.17241632938385, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.8964285714285714, |
| "grad_norm": 0.29985517263412476, |
| "learning_rate": 3.9470448644005345e-06, |
| "loss": 1.2037956714630127, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 0.24180017411708832, |
| "learning_rate": 3.930429551726049e-06, |
| "loss": 1.1744909286499023, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.9035714285714285, |
| "grad_norm": 0.1725412905216217, |
| "learning_rate": 3.913838565248318e-06, |
| "loss": 1.1504842042922974, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.907142857142857, |
| "grad_norm": 0.19483552873134613, |
| "learning_rate": 3.8972721621049545e-06, |
| "loss": 1.1242973804473877, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.9107142857142856, |
| "grad_norm": 0.2150045484304428, |
| "learning_rate": 3.880730599052565e-06, |
| "loss": 1.1571553945541382, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.9142857142857141, |
| "grad_norm": 0.26055601239204407, |
| "learning_rate": 3.864214132462766e-06, |
| "loss": 1.1744543313980103, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.9178571428571427, |
| "grad_norm": 0.20224107801914215, |
| "learning_rate": 3.84772301831822e-06, |
| "loss": 1.129955768585205, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.9214285714285713, |
| "grad_norm": 0.21899673342704773, |
| "learning_rate": 3.831257512208657e-06, |
| "loss": 1.1564751863479614, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.925, |
| "grad_norm": 0.24604743719100952, |
| "learning_rate": 3.814817869326915e-06, |
| "loss": 1.1490484476089478, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.9285714285714286, |
| "grad_norm": 0.1920636147260666, |
| "learning_rate": 3.7984043444649898e-06, |
| "loss": 1.1944819688796997, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.9321428571428572, |
| "grad_norm": 0.2951393723487854, |
| "learning_rate": 3.782017192010087e-06, |
| "loss": 1.2130813598632812, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.9357142857142857, |
| "grad_norm": 0.38370734453201294, |
| "learning_rate": 3.76565666594067e-06, |
| "loss": 1.1711630821228027, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.9392857142857143, |
| "grad_norm": 0.7297260165214539, |
| "learning_rate": 3.749323019822534e-06, |
| "loss": 1.1901503801345825, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.9428571428571428, |
| "grad_norm": 0.22041039168834686, |
| "learning_rate": 3.7330165068048673e-06, |
| "loss": 1.1663475036621094, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.9464285714285714, |
| "grad_norm": 0.2529982626438141, |
| "learning_rate": 3.7167373796163377e-06, |
| "loss": 1.1222208738327026, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 0.22839988768100739, |
| "learning_rate": 3.700485890561167e-06, |
| "loss": 1.1396700143814087, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.9535714285714287, |
| "grad_norm": 0.32207345962524414, |
| "learning_rate": 3.6842622915152228e-06, |
| "loss": 1.1646703481674194, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.9571428571428573, |
| "grad_norm": 0.2876273989677429, |
| "learning_rate": 3.668066833922116e-06, |
| "loss": 1.148516058921814, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.9607142857142859, |
| "grad_norm": 0.2196146845817566, |
| "learning_rate": 3.6518997687893053e-06, |
| "loss": 1.1533443927764893, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.9642857142857144, |
| "grad_norm": 0.46365395188331604, |
| "learning_rate": 3.635761346684206e-06, |
| "loss": 1.1947966814041138, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.967857142857143, |
| "grad_norm": 0.2954294681549072, |
| "learning_rate": 3.619651817730302e-06, |
| "loss": 1.1832884550094604, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.9714285714285715, |
| "grad_norm": 0.2565920650959015, |
| "learning_rate": 3.603571431603272e-06, |
| "loss": 1.1965795755386353, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.975, |
| "grad_norm": 0.2640427350997925, |
| "learning_rate": 3.587520437527128e-06, |
| "loss": 1.140123963356018, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.9785714285714286, |
| "grad_norm": 0.26683422923088074, |
| "learning_rate": 3.571499084270338e-06, |
| "loss": 1.1581156253814697, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.9821428571428572, |
| "grad_norm": 0.2290692776441574, |
| "learning_rate": 3.5555076201419816e-06, |
| "loss": 1.174959421157837, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.9857142857142858, |
| "grad_norm": 0.2061983048915863, |
| "learning_rate": 3.5395462929878945e-06, |
| "loss": 1.220007061958313, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.9892857142857143, |
| "grad_norm": 0.20125523209571838, |
| "learning_rate": 3.5236153501868343e-06, |
| "loss": 1.2462403774261475, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.9928571428571429, |
| "grad_norm": 0.29600805044174194, |
| "learning_rate": 3.5077150386466406e-06, |
| "loss": 1.2024950981140137, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.9964285714285714, |
| "grad_norm": 0.2931258976459503, |
| "learning_rate": 3.4918456048004106e-06, |
| "loss": 1.1237006187438965, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.4734819829463959, |
| "learning_rate": 3.4760072946026786e-06, |
| "loss": 1.1085011959075928, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.0035714285714286, |
| "grad_norm": 0.20331430435180664, |
| "learning_rate": 3.46020035352561e-06, |
| "loss": 1.2824596166610718, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.007142857142857, |
| "grad_norm": 0.46622058749198914, |
| "learning_rate": 3.444425026555182e-06, |
| "loss": 1.2747101783752441, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.0107142857142857, |
| "grad_norm": 0.19980192184448242, |
| "learning_rate": 3.4286815581874045e-06, |
| "loss": 1.2517393827438354, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.0142857142857142, |
| "grad_norm": 0.32897406816482544, |
| "learning_rate": 3.4129701924245173e-06, |
| "loss": 1.2301400899887085, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.017857142857143, |
| "grad_norm": 0.17299680411815643, |
| "learning_rate": 3.397291172771221e-06, |
| "loss": 1.2544574737548828, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.0214285714285714, |
| "grad_norm": 0.2090325653553009, |
| "learning_rate": 3.3816447422308883e-06, |
| "loss": 1.0791321992874146, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.025, |
| "grad_norm": 0.2806832790374756, |
| "learning_rate": 3.366031143301811e-06, |
| "loss": 1.1756961345672607, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.0285714285714285, |
| "grad_norm": 0.4019312858581543, |
| "learning_rate": 3.3504506179734254e-06, |
| "loss": 1.1622370481491089, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.032142857142857, |
| "grad_norm": 0.22266216576099396, |
| "learning_rate": 3.334903407722587e-06, |
| "loss": 1.234253168106079, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.0357142857142856, |
| "grad_norm": 0.29923903942108154, |
| "learning_rate": 3.319389753509803e-06, |
| "loss": 1.1241004467010498, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.039285714285714, |
| "grad_norm": 0.3284701704978943, |
| "learning_rate": 3.30390989577551e-06, |
| "loss": 1.260522723197937, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.0428571428571427, |
| "grad_norm": 0.4323379099369049, |
| "learning_rate": 3.288464074436346e-06, |
| "loss": 1.1753382682800293, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.0464285714285713, |
| "grad_norm": 0.3188895285129547, |
| "learning_rate": 3.273052528881433e-06, |
| "loss": 1.1759196519851685, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.05, |
| "grad_norm": 0.754629373550415, |
| "learning_rate": 3.257675497968661e-06, |
| "loss": 1.0839532613754272, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.0535714285714284, |
| "grad_norm": 0.261398047208786, |
| "learning_rate": 3.2423332200209946e-06, |
| "loss": 1.1668034791946411, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.057142857142857, |
| "grad_norm": 0.3192571699619293, |
| "learning_rate": 3.2270259328227703e-06, |
| "loss": 1.312312364578247, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.0607142857142855, |
| "grad_norm": 0.3842572271823883, |
| "learning_rate": 3.2117538736160235e-06, |
| "loss": 1.241450548171997, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.064285714285714, |
| "grad_norm": 0.3109821677207947, |
| "learning_rate": 3.1965172790967967e-06, |
| "loss": 1.2660008668899536, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.067857142857143, |
| "grad_norm": 0.30365416407585144, |
| "learning_rate": 3.1813163854114793e-06, |
| "loss": 1.1892515420913696, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.0714285714285716, |
| "grad_norm": 0.26805219054222107, |
| "learning_rate": 3.1661514281531464e-06, |
| "loss": 1.2073129415512085, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.075, |
| "grad_norm": 0.26900723576545715, |
| "learning_rate": 3.1510226423579127e-06, |
| "loss": 1.1416363716125488, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.0785714285714287, |
| "grad_norm": 0.3996395468711853, |
| "learning_rate": 3.135930262501279e-06, |
| "loss": 1.2287384271621704, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.0821428571428573, |
| "grad_norm": 0.3018134832382202, |
| "learning_rate": 3.120874522494506e-06, |
| "loss": 1.2006416320800781, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.085714285714286, |
| "grad_norm": 0.16339807212352753, |
| "learning_rate": 3.105855655680986e-06, |
| "loss": 0.9185248017311096, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.0892857142857144, |
| "grad_norm": 0.3090437352657318, |
| "learning_rate": 3.090873894832628e-06, |
| "loss": 0.9894356727600098, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.092857142857143, |
| "grad_norm": 0.30770227313041687, |
| "learning_rate": 3.07592947214625e-06, |
| "loss": 1.0587633848190308, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.0964285714285715, |
| "grad_norm": 0.32658347487449646, |
| "learning_rate": 3.0610226192399767e-06, |
| "loss": 1.2783530950546265, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.1, |
| "grad_norm": 0.3846922218799591, |
| "learning_rate": 3.0461535671496537e-06, |
| "loss": 1.0930966138839722, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.1035714285714286, |
| "grad_norm": 0.44550713896751404, |
| "learning_rate": 3.0313225463252716e-06, |
| "loss": 1.0916811227798462, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.107142857142857, |
| "grad_norm": 0.9442609548568726, |
| "learning_rate": 3.0165297866273766e-06, |
| "loss": 1.2753980159759521, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.1107142857142858, |
| "grad_norm": 0.2832079529762268, |
| "learning_rate": 3.0017755173235295e-06, |
| "loss": 1.195408821105957, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.1142857142857143, |
| "grad_norm": 0.27624693512916565, |
| "learning_rate": 2.9870599670847366e-06, |
| "loss": 1.137044072151184, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.117857142857143, |
| "grad_norm": 0.5313391089439392, |
| "learning_rate": 2.972383363981917e-06, |
| "loss": 1.1940035820007324, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.1214285714285714, |
| "grad_norm": 1.0065633058547974, |
| "learning_rate": 2.9577459354823602e-06, |
| "loss": 1.1326301097869873, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.125, |
| "grad_norm": 0.19776014983654022, |
| "learning_rate": 2.9431479084462013e-06, |
| "loss": 1.18599534034729, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.1285714285714286, |
| "grad_norm": 0.2414723038673401, |
| "learning_rate": 2.9285895091229042e-06, |
| "loss": 1.2466977834701538, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.132142857142857, |
| "grad_norm": 0.2931707799434662, |
| "learning_rate": 2.9140709631477666e-06, |
| "loss": 1.155306339263916, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.1357142857142857, |
| "grad_norm": 0.26033467054367065, |
| "learning_rate": 2.8995924955384048e-06, |
| "loss": 1.1785553693771362, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.1392857142857142, |
| "grad_norm": 0.24594391882419586, |
| "learning_rate": 2.885154330691278e-06, |
| "loss": 1.1734336614608765, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.5041958093643188, |
| "learning_rate": 2.8707566923782105e-06, |
| "loss": 1.0410226583480835, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.1464285714285714, |
| "grad_norm": 0.25237134099006653, |
| "learning_rate": 2.856399803742916e-06, |
| "loss": 1.2042694091796875, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.15, |
| "grad_norm": 0.4853833019733429, |
| "learning_rate": 2.8420838872975482e-06, |
| "loss": 1.150026559829712, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.1535714285714285, |
| "grad_norm": 0.3172329366207123, |
| "learning_rate": 2.8278091649192443e-06, |
| "loss": 1.2379705905914307, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.157142857142857, |
| "grad_norm": 0.24962536990642548, |
| "learning_rate": 2.81357585784669e-06, |
| "loss": 1.2625255584716797, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.1607142857142856, |
| "grad_norm": 0.5905876755714417, |
| "learning_rate": 2.799384186676696e-06, |
| "loss": 1.1990773677825928, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.164285714285714, |
| "grad_norm": 0.2595714032649994, |
| "learning_rate": 2.785234371360766e-06, |
| "loss": 1.0102604627609253, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.1678571428571427, |
| "grad_norm": 0.2449759989976883, |
| "learning_rate": 2.7711266312016986e-06, |
| "loss": 1.1595333814620972, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.1714285714285713, |
| "grad_norm": 0.38237428665161133, |
| "learning_rate": 2.757061184850183e-06, |
| "loss": 1.2344083786010742, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.175, |
| "grad_norm": 0.24876584112644196, |
| "learning_rate": 2.743038250301418e-06, |
| "loss": 1.124006748199463, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.1785714285714284, |
| "grad_norm": 0.34139466285705566, |
| "learning_rate": 2.7290580448917204e-06, |
| "loss": 1.090733528137207, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.182142857142857, |
| "grad_norm": 0.22050592303276062, |
| "learning_rate": 2.7151207852951677e-06, |
| "loss": 1.1178282499313354, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.185714285714286, |
| "grad_norm": 0.26262110471725464, |
| "learning_rate": 2.701226687520235e-06, |
| "loss": 1.1468334197998047, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.189285714285714, |
| "grad_norm": 0.2389093041419983, |
| "learning_rate": 2.6873759669064474e-06, |
| "loss": 1.1655080318450928, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.192857142857143, |
| "grad_norm": 0.22899575531482697, |
| "learning_rate": 2.673568838121045e-06, |
| "loss": 1.169728398323059, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.1964285714285716, |
| "grad_norm": 0.7747792601585388, |
| "learning_rate": 2.659805515155653e-06, |
| "loss": 1.0896999835968018, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 0.35865241289138794, |
| "learning_rate": 2.6460862113229656e-06, |
| "loss": 1.0157350301742554, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.2035714285714287, |
| "grad_norm": 0.9577608108520508, |
| "learning_rate": 2.6324111392534423e-06, |
| "loss": 1.1235113143920898, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.2071428571428573, |
| "grad_norm": 0.3065534234046936, |
| "learning_rate": 2.6187805108920104e-06, |
| "loss": 1.071955680847168, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.210714285714286, |
| "grad_norm": 0.33233603835105896, |
| "learning_rate": 2.605194537494779e-06, |
| "loss": 1.3001371622085571, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.2142857142857144, |
| "grad_norm": 0.8232606649398804, |
| "learning_rate": 2.5916534296257655e-06, |
| "loss": 1.2073559761047363, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.217857142857143, |
| "grad_norm": 0.3004189431667328, |
| "learning_rate": 2.5781573971536387e-06, |
| "loss": 0.9778292179107666, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.2214285714285715, |
| "grad_norm": 0.5353025794029236, |
| "learning_rate": 2.5647066492484564e-06, |
| "loss": 1.106062889099121, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.225, |
| "grad_norm": 0.2562118172645569, |
| "learning_rate": 2.5513013943784236e-06, |
| "loss": 1.187153935432434, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.2285714285714286, |
| "grad_norm": 0.3913024067878723, |
| "learning_rate": 2.537941840306669e-06, |
| "loss": 0.8193651437759399, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.232142857142857, |
| "grad_norm": 0.29852673411369324, |
| "learning_rate": 2.524628194088027e-06, |
| "loss": 0.5965661406517029, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.2357142857142858, |
| "grad_norm": 0.2190428078174591, |
| "learning_rate": 2.511360662065813e-06, |
| "loss": 0.9129496812820435, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.2392857142857143, |
| "grad_norm": 0.3540997803211212, |
| "learning_rate": 2.4981394498686413e-06, |
| "loss": 1.138474702835083, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.242857142857143, |
| "grad_norm": 0.9036802053451538, |
| "learning_rate": 2.484964762407232e-06, |
| "loss": 1.2528407573699951, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.2464285714285714, |
| "grad_norm": 0.4152211844921112, |
| "learning_rate": 2.471836803871233e-06, |
| "loss": 1.105533480644226, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 0.48458918929100037, |
| "learning_rate": 2.45875577772606e-06, |
| "loss": 0.9600842595100403, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.2535714285714286, |
| "grad_norm": 0.3086172044277191, |
| "learning_rate": 2.4457218867097396e-06, |
| "loss": 1.0594391822814941, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.257142857142857, |
| "grad_norm": 0.24558311700820923, |
| "learning_rate": 2.4327353328297673e-06, |
| "loss": 1.1570055484771729, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.2607142857142857, |
| "grad_norm": 1.6706045866012573, |
| "learning_rate": 2.419796317359983e-06, |
| "loss": 0.9727555513381958, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.2642857142857142, |
| "grad_norm": 0.37175774574279785, |
| "learning_rate": 2.4069050408374376e-06, |
| "loss": 1.0557781457901, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.267857142857143, |
| "grad_norm": 0.2886607050895691, |
| "learning_rate": 2.3940617030593e-06, |
| "loss": 1.1356130838394165, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.2714285714285714, |
| "grad_norm": 0.2709295451641083, |
| "learning_rate": 2.3812665030797512e-06, |
| "loss": 1.0775344371795654, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.275, |
| "grad_norm": 0.17263904213905334, |
| "learning_rate": 2.368519639206905e-06, |
| "loss": 0.9881319999694824, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.2785714285714285, |
| "grad_norm": 0.3276418149471283, |
| "learning_rate": 2.3558213089997303e-06, |
| "loss": 1.1184488534927368, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.282142857142857, |
| "grad_norm": 0.9172634482383728, |
| "learning_rate": 2.3431717092649892e-06, |
| "loss": 1.3341600894927979, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.2857142857142856, |
| "grad_norm": 0.5521453022956848, |
| "learning_rate": 2.3305710360541857e-06, |
| "loss": 1.4648536443710327, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.289285714285714, |
| "grad_norm": 0.27971673011779785, |
| "learning_rate": 2.3180194846605367e-06, |
| "loss": 1.0912892818450928, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.2928571428571427, |
| "grad_norm": 0.38737377524375916, |
| "learning_rate": 2.3055172496159327e-06, |
| "loss": 1.1721148490905762, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.2964285714285713, |
| "grad_norm": 0.32838499546051025, |
| "learning_rate": 2.2930645246879286e-06, |
| "loss": 0.8287088871002197, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.3, |
| "grad_norm": 0.9804138541221619, |
| "learning_rate": 2.2806615028767447e-06, |
| "loss": 1.3678312301635742, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.3035714285714284, |
| "grad_norm": 0.22584359347820282, |
| "learning_rate": 2.2683083764122626e-06, |
| "loss": 1.076238989830017, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.307142857142857, |
| "grad_norm": 0.3474865257740021, |
| "learning_rate": 2.2560053367510624e-06, |
| "loss": 1.0969926118850708, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.310714285714286, |
| "grad_norm": 0.5705395340919495, |
| "learning_rate": 2.24375257457344e-06, |
| "loss": 1.2809841632843018, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.314285714285714, |
| "grad_norm": 0.4103868007659912, |
| "learning_rate": 2.2315502797804677e-06, |
| "loss": 1.329990029335022, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.317857142857143, |
| "grad_norm": 0.4318333566188812, |
| "learning_rate": 2.2193986414910347e-06, |
| "loss": 0.9738024473190308, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.3214285714285716, |
| "grad_norm": 0.6811454892158508, |
| "learning_rate": 2.2072978480389286e-06, |
| "loss": 1.3944941759109497, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.325, |
| "grad_norm": 0.2388792783021927, |
| "learning_rate": 2.195248086969904e-06, |
| "loss": 0.9329886436462402, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.3285714285714287, |
| "grad_norm": 0.316201388835907, |
| "learning_rate": 2.1832495450387934e-06, |
| "loss": 1.300463080406189, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.3321428571428573, |
| "grad_norm": 1.2377450466156006, |
| "learning_rate": 2.1713024082065965e-06, |
| "loss": 1.1311689615249634, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.335714285714286, |
| "grad_norm": 0.2693905234336853, |
| "learning_rate": 2.1594068616376056e-06, |
| "loss": 1.1664714813232422, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.3392857142857144, |
| "grad_norm": 0.2321355789899826, |
| "learning_rate": 2.1475630896965336e-06, |
| "loss": 0.9228266477584839, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.342857142857143, |
| "grad_norm": 0.2707984149456024, |
| "learning_rate": 2.1357712759456594e-06, |
| "loss": 0.8861098289489746, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.3464285714285715, |
| "grad_norm": 0.41806021332740784, |
| "learning_rate": 2.1240316031419795e-06, |
| "loss": 1.2544275522232056, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.35, |
| "grad_norm": 0.232350692152977, |
| "learning_rate": 2.112344253234377e-06, |
| "loss": 1.2989314794540405, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.3535714285714286, |
| "grad_norm": 0.32811442017555237, |
| "learning_rate": 2.1007094073607996e-06, |
| "loss": 1.0640029907226562, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.357142857142857, |
| "grad_norm": 0.4184323847293854, |
| "learning_rate": 2.0891272458454614e-06, |
| "loss": 1.1912882328033447, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.3607142857142858, |
| "grad_norm": 0.4829447865486145, |
| "learning_rate": 2.0775979481960343e-06, |
| "loss": 1.1298654079437256, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.3642857142857143, |
| "grad_norm": 0.24751894176006317, |
| "learning_rate": 2.0661216931008717e-06, |
| "loss": 1.0057674646377563, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.367857142857143, |
| "grad_norm": 0.3193625807762146, |
| "learning_rate": 2.054698658426244e-06, |
| "loss": 1.3430179357528687, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.3714285714285714, |
| "grad_norm": 1.9639981985092163, |
| "learning_rate": 2.043329021213577e-06, |
| "loss": 1.1388099193572998, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.375, |
| "grad_norm": 0.4315277636051178, |
| "learning_rate": 2.0320129576767083e-06, |
| "loss": 1.1900275945663452, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.3785714285714286, |
| "grad_norm": 0.34250909090042114, |
| "learning_rate": 2.0207506431991556e-06, |
| "loss": 1.29435396194458, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.382142857142857, |
| "grad_norm": 0.6402963399887085, |
| "learning_rate": 2.0095422523314016e-06, |
| "loss": 1.1449788808822632, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.3857142857142857, |
| "grad_norm": 0.24777400493621826, |
| "learning_rate": 1.998387958788185e-06, |
| "loss": 0.9889009594917297, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.3892857142857142, |
| "grad_norm": 0.27742165327072144, |
| "learning_rate": 1.987287935445811e-06, |
| "loss": 1.13013756275177, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.392857142857143, |
| "grad_norm": 0.6482072472572327, |
| "learning_rate": 1.976242354339471e-06, |
| "loss": 1.2214878797531128, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.3964285714285714, |
| "grad_norm": 0.41687601804733276, |
| "learning_rate": 1.965251386660575e-06, |
| "loss": 1.180694818496704, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.4969882071018219, |
| "learning_rate": 1.9543152027541003e-06, |
| "loss": 1.0276660919189453, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.4035714285714285, |
| "grad_norm": 0.25297048687934875, |
| "learning_rate": 1.9434339721159506e-06, |
| "loss": 1.3130789995193481, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.407142857142857, |
| "grad_norm": 0.3224523961544037, |
| "learning_rate": 1.932607863390329e-06, |
| "loss": 1.323912262916565, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.4107142857142856, |
| "grad_norm": 0.3630305528640747, |
| "learning_rate": 1.9218370443671232e-06, |
| "loss": 1.420185923576355, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.414285714285714, |
| "grad_norm": 0.33518993854522705, |
| "learning_rate": 1.91112168197931e-06, |
| "loss": 1.2631648778915405, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.4178571428571427, |
| "grad_norm": 0.2684813439846039, |
| "learning_rate": 1.900461942300359e-06, |
| "loss": 1.2116239070892334, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.4214285714285713, |
| "grad_norm": 1.0438412427902222, |
| "learning_rate": 1.8898579905416678e-06, |
| "loss": 0.9407988786697388, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.425, |
| "grad_norm": 0.2759835124015808, |
| "learning_rate": 1.8793099910499926e-06, |
| "loss": 1.089248776435852, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.4285714285714284, |
| "grad_norm": 0.2118200808763504, |
| "learning_rate": 1.8688181073049125e-06, |
| "loss": 0.9922888278961182, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.432142857142857, |
| "grad_norm": 0.29328909516334534, |
| "learning_rate": 1.8583825019162843e-06, |
| "loss": 1.0572453737258911, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.435714285714286, |
| "grad_norm": 0.34180185198783875, |
| "learning_rate": 1.848003336621729e-06, |
| "loss": 1.2007834911346436, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.439285714285714, |
| "grad_norm": 2.949885368347168, |
| "learning_rate": 1.8376807722841231e-06, |
| "loss": 1.2154308557510376, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.442857142857143, |
| "grad_norm": 0.29990777373313904, |
| "learning_rate": 1.8274149688891057e-06, |
| "loss": 1.2820924520492554, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.4464285714285716, |
| "grad_norm": 0.2850666046142578, |
| "learning_rate": 1.8172060855425986e-06, |
| "loss": 1.3318397998809814, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.45, |
| "grad_norm": 0.4406229555606842, |
| "learning_rate": 1.8070542804683406e-06, |
| "loss": 1.490922212600708, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.4535714285714287, |
| "grad_norm": 0.5301911234855652, |
| "learning_rate": 1.7969597110054343e-06, |
| "loss": 1.04641854763031, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.4571428571428573, |
| "grad_norm": 0.4790363013744354, |
| "learning_rate": 1.7869225336059133e-06, |
| "loss": 1.2003765106201172, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.460714285714286, |
| "grad_norm": 0.3002559542655945, |
| "learning_rate": 1.7769429038323058e-06, |
| "loss": 1.1743593215942383, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.4642857142857144, |
| "grad_norm": 0.47378110885620117, |
| "learning_rate": 1.7670209763552342e-06, |
| "loss": 1.0753716230392456, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.467857142857143, |
| "grad_norm": 0.4303780198097229, |
| "learning_rate": 1.757156904951014e-06, |
| "loss": 1.195298194885254, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.4714285714285715, |
| "grad_norm": 0.40849828720092773, |
| "learning_rate": 1.747350842499271e-06, |
| "loss": 1.0725401639938354, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.475, |
| "grad_norm": 0.4191647469997406, |
| "learning_rate": 1.7376029409805708e-06, |
| "loss": 1.2902517318725586, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.4785714285714286, |
| "grad_norm": 0.5962879657745361, |
| "learning_rate": 1.7279133514740645e-06, |
| "loss": 1.2889909744262695, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.482142857142857, |
| "grad_norm": 0.2635829448699951, |
| "learning_rate": 1.7182822241551434e-06, |
| "loss": 0.9972074627876282, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.4857142857142858, |
| "grad_norm": 0.27476590871810913, |
| "learning_rate": 1.708709708293121e-06, |
| "loss": 1.0351589918136597, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.4892857142857143, |
| "grad_norm": 0.3098399341106415, |
| "learning_rate": 1.6991959522489082e-06, |
| "loss": 1.030190110206604, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.492857142857143, |
| "grad_norm": 0.37093329429626465, |
| "learning_rate": 1.6897411034727217e-06, |
| "loss": 1.3557082414627075, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.4964285714285714, |
| "grad_norm": 0.4083240032196045, |
| "learning_rate": 1.680345308501795e-06, |
| "loss": 1.0274466276168823, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.34320634603500366, |
| "learning_rate": 1.6710087129581086e-06, |
| "loss": 0.9457365274429321, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.5035714285714286, |
| "grad_norm": 0.5619872808456421, |
| "learning_rate": 1.6617314615461325e-06, |
| "loss": 1.3013941049575806, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.507142857142857, |
| "grad_norm": 0.9764664769172668, |
| "learning_rate": 1.6525136980505835e-06, |
| "loss": 1.4310553073883057, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.5107142857142857, |
| "grad_norm": 0.548743724822998, |
| "learning_rate": 1.6433555653341976e-06, |
| "loss": 1.255396842956543, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.5142857142857142, |
| "grad_norm": 0.8014435172080994, |
| "learning_rate": 1.6342572053355166e-06, |
| "loss": 0.830237865447998, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.517857142857143, |
| "grad_norm": 0.21949461102485657, |
| "learning_rate": 1.625218759066685e-06, |
| "loss": 0.7343713641166687, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.5214285714285714, |
| "grad_norm": 0.6966763734817505, |
| "learning_rate": 1.6162403666112653e-06, |
| "loss": 1.1919779777526855, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.525, |
| "grad_norm": 0.30908581614494324, |
| "learning_rate": 1.6073221671220692e-06, |
| "loss": 0.9375178813934326, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.5285714285714285, |
| "grad_norm": 0.34836652874946594, |
| "learning_rate": 1.5984642988190022e-06, |
| "loss": 0.8665962219238281, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.532142857142857, |
| "grad_norm": 0.18187429010868073, |
| "learning_rate": 1.5896668989869151e-06, |
| "loss": 0.9749317765235901, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.5357142857142856, |
| "grad_norm": 0.2711097002029419, |
| "learning_rate": 1.5809301039734814e-06, |
| "loss": 1.1920053958892822, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.539285714285714, |
| "grad_norm": 0.35151663422584534, |
| "learning_rate": 1.5722540491870838e-06, |
| "loss": 1.1063796281814575, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.5428571428571427, |
| "grad_norm": 0.46157142519950867, |
| "learning_rate": 1.5636388690947125e-06, |
| "loss": 0.9042350649833679, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.5464285714285713, |
| "grad_norm": 0.44619572162628174, |
| "learning_rate": 1.5550846972198851e-06, |
| "loss": 1.1896483898162842, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.55, |
| "grad_norm": 0.5084243416786194, |
| "learning_rate": 1.5465916661405734e-06, |
| "loss": 1.0787028074264526, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.553571428571429, |
| "grad_norm": 0.2909405529499054, |
| "learning_rate": 1.5381599074871512e-06, |
| "loss": 1.1317380666732788, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.557142857142857, |
| "grad_norm": 0.7613154053688049, |
| "learning_rate": 1.5297895519403563e-06, |
| "loss": 1.3027656078338623, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.560714285714286, |
| "grad_norm": 0.38280853629112244, |
| "learning_rate": 1.5214807292292567e-06, |
| "loss": 0.8128288984298706, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.564285714285714, |
| "grad_norm": 0.33587777614593506, |
| "learning_rate": 1.5132335681292492e-06, |
| "loss": 1.4057202339172363, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.567857142857143, |
| "grad_norm": 0.4974580407142639, |
| "learning_rate": 1.5050481964600582e-06, |
| "loss": 1.2144535779953003, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.571428571428571, |
| "grad_norm": 0.21717508137226105, |
| "learning_rate": 1.496924741083759e-06, |
| "loss": 0.9632461667060852, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.575, |
| "grad_norm": 0.18900008499622345, |
| "learning_rate": 1.4888633279028068e-06, |
| "loss": 1.021627426147461, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.5785714285714287, |
| "grad_norm": 0.41346102952957153, |
| "learning_rate": 1.4808640818580885e-06, |
| "loss": 1.0733561515808105, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.5821428571428573, |
| "grad_norm": 0.3450411558151245, |
| "learning_rate": 1.4729271269269823e-06, |
| "loss": 1.0130958557128906, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.585714285714286, |
| "grad_norm": 0.4527641832828522, |
| "learning_rate": 1.4650525861214454e-06, |
| "loss": 0.9112399220466614, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.5892857142857144, |
| "grad_norm": 0.43975669145584106, |
| "learning_rate": 1.4572405814860954e-06, |
| "loss": 1.0099694728851318, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.592857142857143, |
| "grad_norm": 2.6724021434783936, |
| "learning_rate": 1.4494912340963286e-06, |
| "loss": 0.9879626035690308, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.5964285714285715, |
| "grad_norm": 0.33726853132247925, |
| "learning_rate": 1.441804664056437e-06, |
| "loss": 1.3339985609054565, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 0.5543254017829895, |
| "learning_rate": 1.4341809904977511e-06, |
| "loss": 1.0636701583862305, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.6035714285714286, |
| "grad_norm": 0.35016801953315735, |
| "learning_rate": 1.4266203315767917e-06, |
| "loss": 1.2073761224746704, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.607142857142857, |
| "grad_norm": 0.37314754724502563, |
| "learning_rate": 1.4191228044734387e-06, |
| "loss": 1.067349910736084, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.6107142857142858, |
| "grad_norm": 0.27696406841278076, |
| "learning_rate": 1.4116885253891142e-06, |
| "loss": 1.1596084833145142, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.6142857142857143, |
| "grad_norm": 0.23734059929847717, |
| "learning_rate": 1.4043176095449843e-06, |
| "loss": 1.130849003791809, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.617857142857143, |
| "grad_norm": 0.451869934797287, |
| "learning_rate": 1.3970101711801712e-06, |
| "loss": 1.1519298553466797, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.6214285714285714, |
| "grad_norm": 0.367313027381897, |
| "learning_rate": 1.3897663235499797e-06, |
| "loss": 1.081532597541809, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.625, |
| "grad_norm": 1.2766571044921875, |
| "learning_rate": 1.382586178924149e-06, |
| "loss": 0.9227726459503174, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.6285714285714286, |
| "grad_norm": 1.6380170583724976, |
| "learning_rate": 1.3754698485851074e-06, |
| "loss": 1.3057407140731812, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.632142857142857, |
| "grad_norm": 0.3816126585006714, |
| "learning_rate": 1.368417442826249e-06, |
| "loss": 1.1892451047897339, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.6357142857142857, |
| "grad_norm": 0.3007228672504425, |
| "learning_rate": 1.3614290709502242e-06, |
| "loss": 1.2595423460006714, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.6392857142857142, |
| "grad_norm": 0.2307678908109665, |
| "learning_rate": 1.3545048412672459e-06, |
| "loss": 1.10439932346344, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.642857142857143, |
| "grad_norm": 0.34183934330940247, |
| "learning_rate": 1.3476448610934104e-06, |
| "loss": 1.1247930526733398, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.6464285714285714, |
| "grad_norm": 0.50603187084198, |
| "learning_rate": 1.3408492367490344e-06, |
| "loss": 1.308542013168335, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.65, |
| "grad_norm": 0.5772185921669006, |
| "learning_rate": 1.3341180735570081e-06, |
| "loss": 1.086531639099121, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.6535714285714285, |
| "grad_norm": 0.2957296073436737, |
| "learning_rate": 1.3274514758411595e-06, |
| "loss": 0.9083548784255981, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.657142857142857, |
| "grad_norm": 0.2126568704843521, |
| "learning_rate": 1.3208495469246445e-06, |
| "loss": 1.0338191986083984, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.6607142857142856, |
| "grad_norm": 0.23187443614006042, |
| "learning_rate": 1.3143123891283354e-06, |
| "loss": 1.1434146165847778, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.664285714285714, |
| "grad_norm": 0.2083001434803009, |
| "learning_rate": 1.3078401037692451e-06, |
| "loss": 1.148645281791687, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.6678571428571427, |
| "grad_norm": 0.24332857131958008, |
| "learning_rate": 1.3014327911589495e-06, |
| "loss": 1.0858982801437378, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.6714285714285713, |
| "grad_norm": 0.44840723276138306, |
| "learning_rate": 1.2950905506020383e-06, |
| "loss": 0.8910313844680786, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.675, |
| "grad_norm": 0.6759834885597229, |
| "learning_rate": 1.2888134803945713e-06, |
| "loss": 1.0723787546157837, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.678571428571429, |
| "grad_norm": 0.3571532964706421, |
| "learning_rate": 1.2826016778225578e-06, |
| "loss": 1.1453263759613037, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.682142857142857, |
| "grad_norm": 0.3260257840156555, |
| "learning_rate": 1.2764552391604468e-06, |
| "loss": 1.1897282600402832, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.685714285714286, |
| "grad_norm": 0.21461273729801178, |
| "learning_rate": 1.2703742596696383e-06, |
| "loss": 1.114097237586975, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.689285714285714, |
| "grad_norm": 0.39265140891075134, |
| "learning_rate": 1.2643588335970021e-06, |
| "loss": 1.2430890798568726, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.692857142857143, |
| "grad_norm": 0.26661592721939087, |
| "learning_rate": 1.2584090541734216e-06, |
| "loss": 1.2044790983200073, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.696428571428571, |
| "grad_norm": 0.4279651641845703, |
| "learning_rate": 1.252525013612346e-06, |
| "loss": 1.1148457527160645, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.7, |
| "grad_norm": 0.26563382148742676, |
| "learning_rate": 1.2467068031083623e-06, |
| "loss": 1.151499629020691, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.7035714285714287, |
| "grad_norm": 0.28036361932754517, |
| "learning_rate": 1.2409545128357806e-06, |
| "loss": 1.112971544265747, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.7071428571428573, |
| "grad_norm": 0.3321837782859802, |
| "learning_rate": 1.235268231947238e-06, |
| "loss": 0.9679718613624573, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.710714285714286, |
| "grad_norm": 0.7659473419189453, |
| "learning_rate": 1.229648048572317e-06, |
| "loss": 1.18712317943573, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.7142857142857144, |
| "grad_norm": 0.5400887131690979, |
| "learning_rate": 1.2240940498161797e-06, |
| "loss": 1.0840147733688354, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.717857142857143, |
| "grad_norm": 0.3426344096660614, |
| "learning_rate": 1.2186063217582144e-06, |
| "loss": 1.1307204961776733, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.7214285714285715, |
| "grad_norm": 0.39970487356185913, |
| "learning_rate": 1.213184949450706e-06, |
| "loss": 1.1921186447143555, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.725, |
| "grad_norm": 0.31394848227500916, |
| "learning_rate": 1.2078300169175158e-06, |
| "loss": 1.1872678995132446, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.7285714285714286, |
| "grad_norm": 0.7688894271850586, |
| "learning_rate": 1.20254160715278e-06, |
| "loss": 1.1403369903564453, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.732142857142857, |
| "grad_norm": 0.3478771448135376, |
| "learning_rate": 1.1973198021196207e-06, |
| "loss": 1.0353933572769165, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.7357142857142858, |
| "grad_norm": 1.663916826248169, |
| "learning_rate": 1.1921646827488807e-06, |
| "loss": 1.1801190376281738, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.7392857142857143, |
| "grad_norm": 0.9486533999443054, |
| "learning_rate": 1.187076328937863e-06, |
| "loss": 1.118172287940979, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.742857142857143, |
| "grad_norm": 0.3661729693412781, |
| "learning_rate": 1.182054819549098e-06, |
| "loss": 1.166612982749939, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.7464285714285714, |
| "grad_norm": 0.273942768573761, |
| "learning_rate": 1.1771002324091183e-06, |
| "loss": 1.219356656074524, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 0.2943507730960846, |
| "learning_rate": 1.172212644307252e-06, |
| "loss": 1.2092581987380981, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.7535714285714286, |
| "grad_norm": 0.22103095054626465, |
| "learning_rate": 1.1673921309944356e-06, |
| "loss": 1.1635977029800415, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.757142857142857, |
| "grad_norm": 0.27992480993270874, |
| "learning_rate": 1.1626387671820363e-06, |
| "loss": 1.1578980684280396, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.7607142857142857, |
| "grad_norm": 0.1873656064271927, |
| "learning_rate": 1.1579526265406972e-06, |
| "loss": 1.1813486814498901, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.7642857142857142, |
| "grad_norm": 0.3528795838356018, |
| "learning_rate": 1.1533337816991932e-06, |
| "loss": 1.1933683156967163, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.767857142857143, |
| "grad_norm": 0.31167811155319214, |
| "learning_rate": 1.1487823042433063e-06, |
| "loss": 1.1475173234939575, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.7714285714285714, |
| "grad_norm": 1.7408783435821533, |
| "learning_rate": 1.1442982647147167e-06, |
| "loss": 1.148131251335144, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.775, |
| "grad_norm": 0.3031138777732849, |
| "learning_rate": 1.1398817326099094e-06, |
| "loss": 1.0997506380081177, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.7785714285714285, |
| "grad_norm": 0.21349631249904633, |
| "learning_rate": 1.1355327763790943e-06, |
| "loss": 1.1433438062667847, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.782142857142857, |
| "grad_norm": 0.16756878793239594, |
| "learning_rate": 1.1312514634251492e-06, |
| "loss": 1.0694825649261475, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.7857142857142856, |
| "grad_norm": 0.19285623729228973, |
| "learning_rate": 1.127037860102575e-06, |
| "loss": 1.1415499448776245, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.789285714285714, |
| "grad_norm": 0.3282257616519928, |
| "learning_rate": 1.1228920317164625e-06, |
| "loss": 1.1128462553024292, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.7928571428571427, |
| "grad_norm": 0.20754434168338776, |
| "learning_rate": 1.118814042521486e-06, |
| "loss": 1.1504778861999512, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.7964285714285713, |
| "grad_norm": 0.22546795010566711, |
| "learning_rate": 1.1148039557209057e-06, |
| "loss": 1.1107934713363647, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.16394157707691193, |
| "learning_rate": 1.1108618334655843e-06, |
| "loss": 1.0830016136169434, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.803571428571429, |
| "grad_norm": 0.1953999102115631, |
| "learning_rate": 1.1069877368530303e-06, |
| "loss": 1.16024649143219, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.807142857142857, |
| "grad_norm": 0.211993008852005, |
| "learning_rate": 1.1031817259264454e-06, |
| "loss": 1.1383813619613647, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.810714285714286, |
| "grad_norm": 0.1844896823167801, |
| "learning_rate": 1.0994438596737971e-06, |
| "loss": 1.0519864559173584, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.814285714285714, |
| "grad_norm": 0.4553788900375366, |
| "learning_rate": 1.0957741960269049e-06, |
| "loss": 1.1024482250213623, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.817857142857143, |
| "grad_norm": 0.2758769989013672, |
| "learning_rate": 1.092172791860539e-06, |
| "loss": 1.0607486963272095, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.821428571428571, |
| "grad_norm": 0.28464648127555847, |
| "learning_rate": 1.0886397029915415e-06, |
| "loss": 1.0878740549087524, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.825, |
| "grad_norm": 0.2519758641719818, |
| "learning_rate": 1.0851749841779609e-06, |
| "loss": 1.0692694187164307, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.8285714285714287, |
| "grad_norm": 0.20021863281726837, |
| "learning_rate": 1.0817786891182041e-06, |
| "loss": 1.0892566442489624, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.8321428571428573, |
| "grad_norm": 0.21085211634635925, |
| "learning_rate": 1.0784508704502029e-06, |
| "loss": 1.0911756753921509, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.835714285714286, |
| "grad_norm": 0.2599065899848938, |
| "learning_rate": 1.0751915797505986e-06, |
| "loss": 1.0842504501342773, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.8392857142857144, |
| "grad_norm": 0.23683688044548035, |
| "learning_rate": 1.0720008675339403e-06, |
| "loss": 1.0852082967758179, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.842857142857143, |
| "grad_norm": 0.19538818299770355, |
| "learning_rate": 1.0688787832519085e-06, |
| "loss": 1.1298590898513794, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.8464285714285715, |
| "grad_norm": 0.5865882039070129, |
| "learning_rate": 1.0658253752925417e-06, |
| "loss": 1.122971773147583, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.85, |
| "grad_norm": 0.3269581198692322, |
| "learning_rate": 1.062840690979491e-06, |
| "loss": 1.109829068183899, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.8535714285714286, |
| "grad_norm": 0.5810469388961792, |
| "learning_rate": 1.0599247765712832e-06, |
| "loss": 1.1492294073104858, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.2330639660358429, |
| "learning_rate": 1.0570776772606056e-06, |
| "loss": 1.123344898223877, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.8607142857142858, |
| "grad_norm": 0.2107606828212738, |
| "learning_rate": 1.0542994371736076e-06, |
| "loss": 1.0889390707015991, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.8642857142857143, |
| "grad_norm": 0.2753591239452362, |
| "learning_rate": 1.0515900993692128e-06, |
| "loss": 1.1300913095474243, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.867857142857143, |
| "grad_norm": 0.27015575766563416, |
| "learning_rate": 1.048949705838454e-06, |
| "loss": 1.0982666015625, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.8714285714285714, |
| "grad_norm": 0.1620846688747406, |
| "learning_rate": 1.0463782975038226e-06, |
| "loss": 1.1166629791259766, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.875, |
| "grad_norm": 0.21408753097057343, |
| "learning_rate": 1.0438759142186336e-06, |
| "loss": 1.127457857131958, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.8785714285714286, |
| "grad_norm": 0.26070085167884827, |
| "learning_rate": 1.0414425947664075e-06, |
| "loss": 1.1438779830932617, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.882142857142857, |
| "grad_norm": 0.1973988115787506, |
| "learning_rate": 1.0390783768602694e-06, |
| "loss": 1.1256788969039917, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.8857142857142857, |
| "grad_norm": 0.1865663081407547, |
| "learning_rate": 1.0367832971423664e-06, |
| "loss": 1.0647690296173096, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.8892857142857142, |
| "grad_norm": 0.40141281485557556, |
| "learning_rate": 1.0345573911832976e-06, |
| "loss": 1.0978182554244995, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.892857142857143, |
| "grad_norm": 0.19470001757144928, |
| "learning_rate": 1.0324006934815623e-06, |
| "loss": 1.1264913082122803, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.8964285714285714, |
| "grad_norm": 0.1923714429140091, |
| "learning_rate": 1.0303132374630276e-06, |
| "loss": 1.1599576473236084, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.9, |
| "grad_norm": 0.2873956561088562, |
| "learning_rate": 1.0282950554804084e-06, |
| "loss": 1.1344720125198364, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.9035714285714285, |
| "grad_norm": 0.2792896330356598, |
| "learning_rate": 1.0263461788127682e-06, |
| "loss": 1.1077191829681396, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.907142857142857, |
| "grad_norm": 0.17874673008918762, |
| "learning_rate": 1.0244666376650307e-06, |
| "loss": 1.0769405364990234, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.9107142857142856, |
| "grad_norm": 0.23230457305908203, |
| "learning_rate": 1.0226564611675146e-06, |
| "loss": 1.1149848699569702, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.914285714285714, |
| "grad_norm": 0.2538415789604187, |
| "learning_rate": 1.020915677375483e-06, |
| "loss": 1.1285921335220337, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.9178571428571427, |
| "grad_norm": 0.18281330168247223, |
| "learning_rate": 1.0192443132687039e-06, |
| "loss": 1.0885471105575562, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.9214285714285713, |
| "grad_norm": 0.27069422602653503, |
| "learning_rate": 1.0176423947510377e-06, |
| "loss": 1.1098750829696655, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.925, |
| "grad_norm": 0.24785873293876648, |
| "learning_rate": 1.016109946650032e-06, |
| "loss": 1.1053394079208374, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.928571428571429, |
| "grad_norm": 0.2786495089530945, |
| "learning_rate": 1.014646992716537e-06, |
| "loss": 1.1500390768051147, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.932142857142857, |
| "grad_norm": 0.3538748621940613, |
| "learning_rate": 1.01325355562434e-06, |
| "loss": 1.1664944887161255, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.935714285714286, |
| "grad_norm": 0.3729296326637268, |
| "learning_rate": 1.0119296569698112e-06, |
| "loss": 1.1281384229660034, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.939285714285714, |
| "grad_norm": 0.21035878360271454, |
| "learning_rate": 1.01067531727157e-06, |
| "loss": 1.1451420783996582, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.942857142857143, |
| "grad_norm": 0.3253045380115509, |
| "learning_rate": 1.0094905559701678e-06, |
| "loss": 1.1268796920776367, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.946428571428571, |
| "grad_norm": 0.20938168466091156, |
| "learning_rate": 1.0083753914277859e-06, |
| "loss": 1.0814552307128906, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.95, |
| "grad_norm": 0.24861246347427368, |
| "learning_rate": 1.007329840927949e-06, |
| "loss": 1.1016547679901123, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.9535714285714287, |
| "grad_norm": 0.26715606451034546, |
| "learning_rate": 1.006353920675263e-06, |
| "loss": 1.1287412643432617, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.9571428571428573, |
| "grad_norm": 0.20948819816112518, |
| "learning_rate": 1.0054476457951567e-06, |
| "loss": 1.11174476146698, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.960714285714286, |
| "grad_norm": 0.5076990127563477, |
| "learning_rate": 1.0046110303336519e-06, |
| "loss": 1.112143874168396, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.9642857142857144, |
| "grad_norm": 0.5603309273719788, |
| "learning_rate": 1.0038440872571456e-06, |
| "loss": 1.1545910835266113, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.967857142857143, |
| "grad_norm": 0.23968827724456787, |
| "learning_rate": 1.0031468284522063e-06, |
| "loss": 1.1435242891311646, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.9714285714285715, |
| "grad_norm": 0.26473504304885864, |
| "learning_rate": 1.0025192647253939e-06, |
| "loss": 1.1580908298492432, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.975, |
| "grad_norm": 0.6800065636634827, |
| "learning_rate": 1.0019614058030874e-06, |
| "loss": 1.1012563705444336, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.9785714285714286, |
| "grad_norm": 0.23044763505458832, |
| "learning_rate": 1.0014732603313375e-06, |
| "loss": 1.1186460256576538, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.982142857142857, |
| "grad_norm": 0.21679583191871643, |
| "learning_rate": 1.0010548358757327e-06, |
| "loss": 1.1382079124450684, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.9857142857142858, |
| "grad_norm": 0.4521788954734802, |
| "learning_rate": 1.0007061389212794e-06, |
| "loss": 1.182320475578308, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.9892857142857143, |
| "grad_norm": 0.24779334664344788, |
| "learning_rate": 1.0004271748723043e-06, |
| "loss": 1.2086482048034668, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.992857142857143, |
| "grad_norm": 0.5126925706863403, |
| "learning_rate": 1.0002179480523687e-06, |
| "loss": 0.834091067314148, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.9964285714285714, |
| "grad_norm": 0.3477499783039093, |
| "learning_rate": 1.0000784617042023e-06, |
| "loss": 0.722780168056488, |
| "step": 1678 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.47854718565940857, |
| "learning_rate": 1.0000087179896533e-06, |
| "loss": 0.7972838282585144, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1680, |
| "total_flos": 2.510120369642275e+18, |
| "train_loss": 1.2744095386493774, |
| "train_runtime": 14979.881, |
| "train_samples_per_second": 1.794, |
| "train_steps_per_second": 0.112 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1680, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.510120369642275e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|