| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1020, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00196078431372549, |
| "grad_norm": 53193.03125, |
| "learning_rate": 0.0, |
| "loss": 12.563469886779785, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00392156862745098, |
| "grad_norm": 65318.5546875, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 11.076333999633789, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0058823529411764705, |
| "grad_norm": 53597.85546875, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 13.65261459350586, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00784313725490196, |
| "grad_norm": 59171.875, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 15.897336959838867, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00980392156862745, |
| "grad_norm": 58358.25390625, |
| "learning_rate": 1.2903225806451614e-06, |
| "loss": 13.228803634643555, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011764705882352941, |
| "grad_norm": 33576.91796875, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 12.043094635009766, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.013725490196078431, |
| "grad_norm": 40702.50390625, |
| "learning_rate": 1.935483870967742e-06, |
| "loss": 9.755388259887695, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01568627450980392, |
| "grad_norm": 15023.3505859375, |
| "learning_rate": 2.2580645161290324e-06, |
| "loss": 11.674318313598633, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01764705882352941, |
| "grad_norm": 11325.5283203125, |
| "learning_rate": 2.580645161290323e-06, |
| "loss": 10.191791534423828, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0196078431372549, |
| "grad_norm": 10106.4345703125, |
| "learning_rate": 2.903225806451613e-06, |
| "loss": 9.150543212890625, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021568627450980392, |
| "grad_norm": 27759.919921875, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 9.193891525268555, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.023529411764705882, |
| "grad_norm": 17614.66796875, |
| "learning_rate": 3.548387096774194e-06, |
| "loss": 8.195146560668945, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.025490196078431372, |
| "grad_norm": 69656.46875, |
| "learning_rate": 3.870967741935484e-06, |
| "loss": 9.301647186279297, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.027450980392156862, |
| "grad_norm": 12202.4873046875, |
| "learning_rate": 4.193548387096774e-06, |
| "loss": 9.306933403015137, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 15527.83984375, |
| "learning_rate": 4.516129032258065e-06, |
| "loss": 11.867281913757324, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03137254901960784, |
| "grad_norm": 9547.95703125, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 7.4255876541137695, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03333333333333333, |
| "grad_norm": 7850.044921875, |
| "learning_rate": 5.161290322580646e-06, |
| "loss": 7.318220138549805, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03529411764705882, |
| "grad_norm": 19678.12109375, |
| "learning_rate": 5.483870967741935e-06, |
| "loss": 8.5791015625, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03725490196078431, |
| "grad_norm": 7771.2578125, |
| "learning_rate": 5.806451612903226e-06, |
| "loss": 7.246687412261963, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0392156862745098, |
| "grad_norm": 279435.96875, |
| "learning_rate": 6.129032258064517e-06, |
| "loss": 8.958134651184082, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.041176470588235294, |
| "grad_norm": 7213.25341796875, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 6.607357025146484, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.043137254901960784, |
| "grad_norm": 6548.99267578125, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 7.45709228515625, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.045098039215686274, |
| "grad_norm": 6222.45751953125, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 6.48307991027832, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.047058823529411764, |
| "grad_norm": 2163.545654296875, |
| "learning_rate": 7.4193548387096784e-06, |
| "loss": 6.221013069152832, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.049019607843137254, |
| "grad_norm": 2340.748779296875, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 6.952737808227539, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.050980392156862744, |
| "grad_norm": 4872.9873046875, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 7.317384719848633, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.052941176470588235, |
| "grad_norm": 1415.2637939453125, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 7.906464576721191, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.054901960784313725, |
| "grad_norm": 1782.3765869140625, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 7.230937957763672, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.056862745098039215, |
| "grad_norm": 1727.0596923828125, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 6.246130466461182, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 967.6840209960938, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 7.539262294769287, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.060784313725490195, |
| "grad_norm": 7889.62060546875, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 6.116678237915039, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06274509803921569, |
| "grad_norm": 7365.2177734375, |
| "learning_rate": 1e-05, |
| "loss": 6.007411956787109, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06470588235294118, |
| "grad_norm": 341.2566833496094, |
| "learning_rate": 9.999974774092107e-06, |
| "loss": 5.7740020751953125, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 1897.9654541015625, |
| "learning_rate": 9.999899096622962e-06, |
| "loss": 7.221264839172363, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06862745098039216, |
| "grad_norm": 2535.89208984375, |
| "learning_rate": 9.999772968356182e-06, |
| "loss": 6.483910083770752, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07058823529411765, |
| "grad_norm": 472.5924377441406, |
| "learning_rate": 9.999596390564446e-06, |
| "loss": 6.1053571701049805, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07254901960784314, |
| "grad_norm": 744.4826049804688, |
| "learning_rate": 9.999369365029487e-06, |
| "loss": 6.1797099113464355, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07450980392156863, |
| "grad_norm": 487.4558410644531, |
| "learning_rate": 9.999091894042077e-06, |
| "loss": 6.281297206878662, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07647058823529412, |
| "grad_norm": 1321.0650634765625, |
| "learning_rate": 9.998763980401997e-06, |
| "loss": 5.605555534362793, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 1479.992919921875, |
| "learning_rate": 9.998385627418015e-06, |
| "loss": 5.528350830078125, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0803921568627451, |
| "grad_norm": 408.816650390625, |
| "learning_rate": 9.997956838907853e-06, |
| "loss": 7.773702144622803, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08235294117647059, |
| "grad_norm": 137.47793579101562, |
| "learning_rate": 9.997477619198138e-06, |
| "loss": 5.883761405944824, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08431372549019608, |
| "grad_norm": 613.70458984375, |
| "learning_rate": 9.996947973124372e-06, |
| "loss": 5.855252265930176, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08627450980392157, |
| "grad_norm": 272.45184326171875, |
| "learning_rate": 9.996367906030879e-06, |
| "loss": 6.615945816040039, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 89.09500122070312, |
| "learning_rate": 9.995737423770746e-06, |
| "loss": 5.607078552246094, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09019607843137255, |
| "grad_norm": 313.746337890625, |
| "learning_rate": 9.995056532705766e-06, |
| "loss": 4.953719139099121, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09215686274509804, |
| "grad_norm": 4434.6171875, |
| "learning_rate": 9.994325239706377e-06, |
| "loss": 5.135598182678223, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.09411764705882353, |
| "grad_norm": 305.1719055175781, |
| "learning_rate": 9.993543552151594e-06, |
| "loss": 5.021300792694092, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09607843137254903, |
| "grad_norm": 9320.9189453125, |
| "learning_rate": 9.992711477928925e-06, |
| "loss": 5.3348188400268555, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.09803921568627451, |
| "grad_norm": 4146.0498046875, |
| "learning_rate": 9.991829025434305e-06, |
| "loss": 4.429060935974121, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 15375.9609375, |
| "learning_rate": 9.990896203571994e-06, |
| "loss": 5.005645275115967, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10196078431372549, |
| "grad_norm": 200.52828979492188, |
| "learning_rate": 9.98991302175451e-06, |
| "loss": 4.591862201690674, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10392156862745099, |
| "grad_norm": 778.8656616210938, |
| "learning_rate": 9.98887948990251e-06, |
| "loss": 4.922955513000488, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10588235294117647, |
| "grad_norm": 338.90618896484375, |
| "learning_rate": 9.987795618444707e-06, |
| "loss": 5.766139507293701, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.10784313725490197, |
| "grad_norm": 2074.159912109375, |
| "learning_rate": 9.986661418317759e-06, |
| "loss": 4.315250873565674, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10980392156862745, |
| "grad_norm": 163.18319702148438, |
| "learning_rate": 9.985476900966156e-06, |
| "loss": 4.612663269042969, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.11176470588235295, |
| "grad_norm": 366.7607116699219, |
| "learning_rate": 9.984242078342108e-06, |
| "loss": 5.53734827041626, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11372549019607843, |
| "grad_norm": 507.3059387207031, |
| "learning_rate": 9.982956962905423e-06, |
| "loss": 5.0663065910339355, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.11568627450980393, |
| "grad_norm": 203.18968200683594, |
| "learning_rate": 9.981621567623385e-06, |
| "loss": 4.972416877746582, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 170.353271484375, |
| "learning_rate": 9.980235905970615e-06, |
| "loss": 5.337252616882324, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11960784313725491, |
| "grad_norm": 138.26260375976562, |
| "learning_rate": 9.978799991928945e-06, |
| "loss": 5.116921424865723, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.12156862745098039, |
| "grad_norm": 129.8578643798828, |
| "learning_rate": 9.977313839987265e-06, |
| "loss": 4.708870887756348, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.12352941176470589, |
| "grad_norm": 182.51133728027344, |
| "learning_rate": 9.975777465141391e-06, |
| "loss": 5.148445129394531, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.12549019607843137, |
| "grad_norm": 263.6011962890625, |
| "learning_rate": 9.974190882893901e-06, |
| "loss": 5.472186088562012, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.12745098039215685, |
| "grad_norm": 410.5545349121094, |
| "learning_rate": 9.972554109253988e-06, |
| "loss": 4.765022277832031, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12941176470588237, |
| "grad_norm": 36.73834991455078, |
| "learning_rate": 9.970867160737293e-06, |
| "loss": 4.604272842407227, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.13137254901960785, |
| "grad_norm": 48809.47265625, |
| "learning_rate": 9.969130054365737e-06, |
| "loss": 5.7819061279296875, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 267.5246276855469, |
| "learning_rate": 9.967342807667355e-06, |
| "loss": 4.696765899658203, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.13529411764705881, |
| "grad_norm": 5235.89404296875, |
| "learning_rate": 9.965505438676115e-06, |
| "loss": 7.0500078201293945, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.13725490196078433, |
| "grad_norm": 61.30277633666992, |
| "learning_rate": 9.963617965931738e-06, |
| "loss": 5.219986915588379, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1392156862745098, |
| "grad_norm": 86.16511535644531, |
| "learning_rate": 9.961680408479508e-06, |
| "loss": 4.936437606811523, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1411764705882353, |
| "grad_norm": 1172.7030029296875, |
| "learning_rate": 9.959692785870086e-06, |
| "loss": 4.968964099884033, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.14313725490196078, |
| "grad_norm": 151.62570190429688, |
| "learning_rate": 9.957655118159304e-06, |
| "loss": 4.32323694229126, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1450980392156863, |
| "grad_norm": 179.33120727539062, |
| "learning_rate": 9.955567425907968e-06, |
| "loss": 4.883161544799805, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 48.311744689941406, |
| "learning_rate": 9.953429730181653e-06, |
| "loss": 4.122620582580566, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.14901960784313725, |
| "grad_norm": 55.67217254638672, |
| "learning_rate": 9.951242052550487e-06, |
| "loss": 4.693333148956299, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.15098039215686274, |
| "grad_norm": 24.245948791503906, |
| "learning_rate": 9.949004415088928e-06, |
| "loss": 4.758542060852051, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.15294117647058825, |
| "grad_norm": 75.69949340820312, |
| "learning_rate": 9.946716840375552e-06, |
| "loss": 4.672502517700195, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.15490196078431373, |
| "grad_norm": 47.35036849975586, |
| "learning_rate": 9.944379351492818e-06, |
| "loss": 5.108541488647461, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 32.07907485961914, |
| "learning_rate": 9.941991972026839e-06, |
| "loss": 4.218465805053711, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1588235294117647, |
| "grad_norm": 45.03062057495117, |
| "learning_rate": 9.939554726067142e-06, |
| "loss": 5.109847068786621, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1607843137254902, |
| "grad_norm": 23.509695053100586, |
| "learning_rate": 9.937067638206418e-06, |
| "loss": 4.969597816467285, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1627450980392157, |
| "grad_norm": 22.31194496154785, |
| "learning_rate": 9.934530733540293e-06, |
| "loss": 4.279464244842529, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.16470588235294117, |
| "grad_norm": 33.23004913330078, |
| "learning_rate": 9.931944037667056e-06, |
| "loss": 4.389366149902344, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 55.22507858276367, |
| "learning_rate": 9.929307576687404e-06, |
| "loss": 4.9593353271484375, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.16862745098039217, |
| "grad_norm": 18.55003547668457, |
| "learning_rate": 9.926621377204188e-06, |
| "loss": 4.743139743804932, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.17058823529411765, |
| "grad_norm": 37.464111328125, |
| "learning_rate": 9.923885466322135e-06, |
| "loss": 5.466174125671387, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.17254901960784313, |
| "grad_norm": 26.026020050048828, |
| "learning_rate": 9.921099871647582e-06, |
| "loss": 4.79066276550293, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.17450980392156862, |
| "grad_norm": 21.373525619506836, |
| "learning_rate": 9.918264621288187e-06, |
| "loss": 4.939136505126953, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 48.5876350402832, |
| "learning_rate": 9.91537974385266e-06, |
| "loss": 5.149735927581787, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1784313725490196, |
| "grad_norm": 19.163881301879883, |
| "learning_rate": 9.912445268450459e-06, |
| "loss": 5.1168317794799805, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1803921568627451, |
| "grad_norm": 79.20329284667969, |
| "learning_rate": 9.909461224691506e-06, |
| "loss": 5.10573673248291, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.18235294117647058, |
| "grad_norm": 14.64513874053955, |
| "learning_rate": 9.906427642685889e-06, |
| "loss": 4.724274635314941, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1843137254901961, |
| "grad_norm": 114.3454818725586, |
| "learning_rate": 9.90334455304355e-06, |
| "loss": 5.02380895614624, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.18627450980392157, |
| "grad_norm": 20.076950073242188, |
| "learning_rate": 9.900211986873986e-06, |
| "loss": 4.226929187774658, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.18823529411764706, |
| "grad_norm": 34.134727478027344, |
| "learning_rate": 9.897029975785924e-06, |
| "loss": 4.3056230545043945, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.19019607843137254, |
| "grad_norm": 1793.708251953125, |
| "learning_rate": 9.89379855188701e-06, |
| "loss": 4.075126647949219, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.19215686274509805, |
| "grad_norm": 223.3547821044922, |
| "learning_rate": 9.89051774778349e-06, |
| "loss": 4.407467365264893, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.19411764705882353, |
| "grad_norm": 150.83956909179688, |
| "learning_rate": 9.887187596579865e-06, |
| "loss": 4.878414630889893, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 631.5261840820312, |
| "learning_rate": 9.883808131878573e-06, |
| "loss": 4.593829154968262, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1980392156862745, |
| "grad_norm": 424.9626770019531, |
| "learning_rate": 9.880379387779637e-06, |
| "loss": 4.420987129211426, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 156.39096069335938, |
| "learning_rate": 9.87690139888033e-06, |
| "loss": 4.282057762145996, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2019607843137255, |
| "grad_norm": 87.1181869506836, |
| "learning_rate": 9.873374200274826e-06, |
| "loss": 4.356283187866211, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.20392156862745098, |
| "grad_norm": 91.06869506835938, |
| "learning_rate": 9.869797827553837e-06, |
| "loss": 4.65322732925415, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 24.660625457763672, |
| "learning_rate": 9.866172316804265e-06, |
| "loss": 4.646048069000244, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.20784313725490197, |
| "grad_norm": 228.06607055664062, |
| "learning_rate": 9.862497704608829e-06, |
| "loss": 4.7434186935424805, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.20980392156862746, |
| "grad_norm": 44.946834564208984, |
| "learning_rate": 9.8587740280457e-06, |
| "loss": 4.521874904632568, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.21176470588235294, |
| "grad_norm": 70.29621887207031, |
| "learning_rate": 9.855001324688128e-06, |
| "loss": 4.990371227264404, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.21372549019607842, |
| "grad_norm": 229.05606079101562, |
| "learning_rate": 9.851179632604057e-06, |
| "loss": 4.608217239379883, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.21568627450980393, |
| "grad_norm": 27.004846572875977, |
| "learning_rate": 9.847308990355752e-06, |
| "loss": 4.623664855957031, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21764705882352942, |
| "grad_norm": 127.99211883544922, |
| "learning_rate": 9.843389436999396e-06, |
| "loss": 4.443979263305664, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2196078431372549, |
| "grad_norm": 17364.953125, |
| "learning_rate": 9.839421012084709e-06, |
| "loss": 4.457244396209717, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.22156862745098038, |
| "grad_norm": 374.42242431640625, |
| "learning_rate": 9.835403755654535e-06, |
| "loss": 4.446849822998047, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2235294117647059, |
| "grad_norm": 251.22105407714844, |
| "learning_rate": 9.831337708244454e-06, |
| "loss": 4.229968547821045, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.22549019607843138, |
| "grad_norm": 49.53373718261719, |
| "learning_rate": 9.827222910882358e-06, |
| "loss": 4.880672454833984, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.22745098039215686, |
| "grad_norm": 50.32280349731445, |
| "learning_rate": 9.82305940508805e-06, |
| "loss": 4.348365783691406, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.22941176470588234, |
| "grad_norm": 140.9510040283203, |
| "learning_rate": 9.818847232872815e-06, |
| "loss": 4.777993202209473, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.23137254901960785, |
| "grad_norm": 279.43096923828125, |
| "learning_rate": 9.814586436738998e-06, |
| "loss": 4.542442321777344, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.23333333333333334, |
| "grad_norm": 67.41588592529297, |
| "learning_rate": 9.81027705967958e-06, |
| "loss": 3.9457411766052246, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 86.58934783935547, |
| "learning_rate": 9.805919145177741e-06, |
| "loss": 4.564214706420898, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2372549019607843, |
| "grad_norm": 54.56572723388672, |
| "learning_rate": 9.801512737206422e-06, |
| "loss": 4.819596290588379, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.23921568627450981, |
| "grad_norm": 301.5806884765625, |
| "learning_rate": 9.797057880227878e-06, |
| "loss": 4.497692108154297, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2411764705882353, |
| "grad_norm": 27.02676010131836, |
| "learning_rate": 9.792554619193235e-06, |
| "loss": 4.406162738800049, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.24313725490196078, |
| "grad_norm": 135.9830322265625, |
| "learning_rate": 9.78800299954203e-06, |
| "loss": 4.398698806762695, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.24509803921568626, |
| "grad_norm": 103.8440170288086, |
| "learning_rate": 9.783403067201763e-06, |
| "loss": 4.38757848739624, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.24705882352941178, |
| "grad_norm": 34.276283264160156, |
| "learning_rate": 9.778754868587414e-06, |
| "loss": 5.003340721130371, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.24901960784313726, |
| "grad_norm": 1124.2784423828125, |
| "learning_rate": 9.774058450601003e-06, |
| "loss": 4.731633186340332, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.25098039215686274, |
| "grad_norm": 226.05685424804688, |
| "learning_rate": 9.76931386063109e-06, |
| "loss": 4.437061309814453, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2529411764705882, |
| "grad_norm": 119.51632690429688, |
| "learning_rate": 9.76452114655231e-06, |
| "loss": 4.532749176025391, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2549019607843137, |
| "grad_norm": 243.1568145751953, |
| "learning_rate": 9.759680356724888e-06, |
| "loss": 3.9236581325531006, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2568627450980392, |
| "grad_norm": 80.8863754272461, |
| "learning_rate": 9.754791539994153e-06, |
| "loss": 4.814586162567139, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.25882352941176473, |
| "grad_norm": 61.44620895385742, |
| "learning_rate": 9.749854745690041e-06, |
| "loss": 4.452531814575195, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2607843137254902, |
| "grad_norm": 28.460981369018555, |
| "learning_rate": 9.744870023626598e-06, |
| "loss": 4.3587646484375, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2627450980392157, |
| "grad_norm": 134.26856994628906, |
| "learning_rate": 9.739837424101484e-06, |
| "loss": 4.51732873916626, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 130.5369415283203, |
| "learning_rate": 9.73475699789545e-06, |
| "loss": 4.519060134887695, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 44.77864074707031, |
| "learning_rate": 9.729628796271844e-06, |
| "loss": 3.988651990890503, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.26862745098039215, |
| "grad_norm": 11.590542793273926, |
| "learning_rate": 9.724452870976084e-06, |
| "loss": 4.040298938751221, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.27058823529411763, |
| "grad_norm": 18.744321823120117, |
| "learning_rate": 9.719229274235134e-06, |
| "loss": 4.6587629318237305, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2725490196078431, |
| "grad_norm": 20.353900909423828, |
| "learning_rate": 9.713958058756985e-06, |
| "loss": 4.333967685699463, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.27450980392156865, |
| "grad_norm": 77.0707015991211, |
| "learning_rate": 9.708639277730112e-06, |
| "loss": 4.544798851013184, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.27647058823529413, |
| "grad_norm": 66.58236694335938, |
| "learning_rate": 9.703272984822947e-06, |
| "loss": 4.844306945800781, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2784313725490196, |
| "grad_norm": 94.8054428100586, |
| "learning_rate": 9.697859234183336e-06, |
| "loss": 4.96964168548584, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2803921568627451, |
| "grad_norm": 20.816715240478516, |
| "learning_rate": 9.692398080437991e-06, |
| "loss": 4.7491607666015625, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2823529411764706, |
| "grad_norm": 35.99055862426758, |
| "learning_rate": 9.68688957869193e-06, |
| "loss": 4.6900787353515625, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.28431372549019607, |
| "grad_norm": 86.11298370361328, |
| "learning_rate": 9.681333784527945e-06, |
| "loss": 4.595911979675293, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.28627450980392155, |
| "grad_norm": 60.40806198120117, |
| "learning_rate": 9.67573075400601e-06, |
| "loss": 4.306059837341309, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.28823529411764703, |
| "grad_norm": 2100.6142578125, |
| "learning_rate": 9.670080543662742e-06, |
| "loss": 4.651222229003906, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2901960784313726, |
| "grad_norm": 27.78398323059082, |
| "learning_rate": 9.66438321051081e-06, |
| "loss": 4.409613609313965, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.29215686274509806, |
| "grad_norm": 60.483070373535156, |
| "learning_rate": 9.658638812038379e-06, |
| "loss": 4.674676418304443, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 32.12427520751953, |
| "learning_rate": 9.652847406208514e-06, |
| "loss": 4.888742446899414, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.296078431372549, |
| "grad_norm": 26.405981063842773, |
| "learning_rate": 9.647009051458604e-06, |
| "loss": 5.177173614501953, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2980392156862745, |
| "grad_norm": 26.46014976501465, |
| "learning_rate": 9.641123806699769e-06, |
| "loss": 4.627013683319092, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 29.641414642333984, |
| "learning_rate": 9.635191731316262e-06, |
| "loss": 4.72336483001709, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.30196078431372547, |
| "grad_norm": 57.82429504394531, |
| "learning_rate": 9.629212885164882e-06, |
| "loss": 4.602277755737305, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.30392156862745096, |
| "grad_norm": 15.265630722045898, |
| "learning_rate": 9.623187328574357e-06, |
| "loss": 4.607016563415527, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3058823529411765, |
| "grad_norm": 18.17546844482422, |
| "learning_rate": 9.617115122344742e-06, |
| "loss": 4.736464023590088, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.307843137254902, |
| "grad_norm": 16.221704483032227, |
| "learning_rate": 9.6109963277468e-06, |
| "loss": 4.437252998352051, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.30980392156862746, |
| "grad_norm": 178.49720764160156, |
| "learning_rate": 9.604831006521393e-06, |
| "loss": 4.21634578704834, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.31176470588235294, |
| "grad_norm": 13.691607475280762, |
| "learning_rate": 9.598619220878852e-06, |
| "loss": 4.380677700042725, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 20.732192993164062, |
| "learning_rate": 9.592361033498349e-06, |
| "loss": 4.700325012207031, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3156862745098039, |
| "grad_norm": 16.656484603881836, |
| "learning_rate": 9.586056507527266e-06, |
| "loss": 4.596185684204102, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.3176470588235294, |
| "grad_norm": 23.23253631591797, |
| "learning_rate": 9.57970570658056e-06, |
| "loss": 3.866307497024536, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3196078431372549, |
| "grad_norm": 17.419248580932617, |
| "learning_rate": 9.57330869474012e-06, |
| "loss": 4.794591903686523, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3215686274509804, |
| "grad_norm": 16.963010787963867, |
| "learning_rate": 9.566865536554119e-06, |
| "loss": 4.635406017303467, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 37.14400863647461, |
| "learning_rate": 9.560376297036362e-06, |
| "loss": 4.17448091506958, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3254901960784314, |
| "grad_norm": 75.3456039428711, |
| "learning_rate": 9.553841041665632e-06, |
| "loss": 4.513227462768555, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.32745098039215687, |
| "grad_norm": 6.637061595916748, |
| "learning_rate": 9.54725983638503e-06, |
| "loss": 4.461617469787598, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.32941176470588235, |
| "grad_norm": 50.82125473022461, |
| "learning_rate": 9.540632747601309e-06, |
| "loss": 4.77449893951416, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.33137254901960783, |
| "grad_norm": 10.49996566772461, |
| "learning_rate": 9.533959842184195e-06, |
| "loss": 4.7018327713012695, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1079.276611328125, |
| "learning_rate": 9.527241187465735e-06, |
| "loss": 4.265623569488525, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3352941176470588, |
| "grad_norm": 16.335859298706055, |
| "learning_rate": 9.520476851239588e-06, |
| "loss": 4.110130310058594, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.33725490196078434, |
| "grad_norm": 18.83439826965332, |
| "learning_rate": 9.513666901760368e-06, |
| "loss": 4.40615177154541, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3392156862745098, |
| "grad_norm": 96.0332260131836, |
| "learning_rate": 9.506811407742938e-06, |
| "loss": 4.52007532119751, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3411764705882353, |
| "grad_norm": 148.7286376953125, |
| "learning_rate": 9.49991043836172e-06, |
| "loss": 4.527442455291748, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3431372549019608, |
| "grad_norm": 12.599474906921387, |
| "learning_rate": 9.49296406325e-06, |
| "loss": 3.911221981048584, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.34509803921568627, |
| "grad_norm": 20.757164001464844, |
| "learning_rate": 9.485972352499231e-06, |
| "loss": 3.9439802169799805, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.34705882352941175, |
| "grad_norm": 27.068387985229492, |
| "learning_rate": 9.478935376658308e-06, |
| "loss": 4.118377685546875, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.34901960784313724, |
| "grad_norm": 10.007109642028809, |
| "learning_rate": 9.471853206732875e-06, |
| "loss": 4.880275249481201, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3509803921568627, |
| "grad_norm": 16.727500915527344, |
| "learning_rate": 9.4647259141846e-06, |
| "loss": 4.590615272521973, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 41.481021881103516, |
| "learning_rate": 9.457553570930451e-06, |
| "loss": 4.905158042907715, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.35490196078431374, |
| "grad_norm": 31.07561492919922, |
| "learning_rate": 9.450336249341976e-06, |
| "loss": 4.413531303405762, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3568627450980392, |
| "grad_norm": 24.699298858642578, |
| "learning_rate": 9.443074022244573e-06, |
| "loss": 4.389023780822754, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3588235294117647, |
| "grad_norm": 16.85030174255371, |
| "learning_rate": 9.435766962916749e-06, |
| "loss": 4.493863105773926, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3607843137254902, |
| "grad_norm": 84.7086410522461, |
| "learning_rate": 9.428415145089385e-06, |
| "loss": 4.844029426574707, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3627450980392157, |
| "grad_norm": 21.581398010253906, |
| "learning_rate": 9.421018642944996e-06, |
| "loss": 4.144974231719971, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.36470588235294116, |
| "grad_norm": 41.96290969848633, |
| "learning_rate": 9.413577531116973e-06, |
| "loss": 4.406426906585693, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.36666666666666664, |
| "grad_norm": 26.514503479003906, |
| "learning_rate": 9.406091884688837e-06, |
| "loss": 4.684653282165527, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3686274509803922, |
| "grad_norm": 113.76895141601562, |
| "learning_rate": 9.398561779193477e-06, |
| "loss": 4.297077178955078, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.37058823529411766, |
| "grad_norm": 71.71430206298828, |
| "learning_rate": 9.390987290612396e-06, |
| "loss": 4.727826118469238, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.37254901960784315, |
| "grad_norm": 3643.83251953125, |
| "learning_rate": 9.38336849537493e-06, |
| "loss": 4.563501358032227, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.37450980392156863, |
| "grad_norm": 5243.2451171875, |
| "learning_rate": 9.375705470357493e-06, |
| "loss": 4.342321395874023, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "grad_norm": 72.02322387695312, |
| "learning_rate": 9.367998292882789e-06, |
| "loss": 3.8581736087799072, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3784313725490196, |
| "grad_norm": 18.841279983520508, |
| "learning_rate": 9.36024704071904e-06, |
| "loss": 3.925510883331299, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3803921568627451, |
| "grad_norm": 17.755449295043945, |
| "learning_rate": 9.35245179207919e-06, |
| "loss": 4.235489368438721, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 58.992862701416016, |
| "learning_rate": 9.344612625620134e-06, |
| "loss": 4.563477516174316, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3843137254901961, |
| "grad_norm": 507.7042236328125, |
| "learning_rate": 9.336729620441906e-06, |
| "loss": 4.757030487060547, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3862745098039216, |
| "grad_norm": 15.296229362487793, |
| "learning_rate": 9.328802856086891e-06, |
| "loss": 4.090035915374756, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.38823529411764707, |
| "grad_norm": 18.083436965942383, |
| "learning_rate": 9.32083241253902e-06, |
| "loss": 4.428251266479492, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.39019607843137255, |
| "grad_norm": 9.689542770385742, |
| "learning_rate": 9.312818370222962e-06, |
| "loss": 4.577901840209961, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 787.316162109375, |
| "learning_rate": 9.304760810003318e-06, |
| "loss": 4.476050853729248, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3941176470588235, |
| "grad_norm": 12.827642440795898, |
| "learning_rate": 9.296659813183794e-06, |
| "loss": 4.565194129943848, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.396078431372549, |
| "grad_norm": 21.77273178100586, |
| "learning_rate": 9.28851546150639e-06, |
| "loss": 4.149844169616699, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3980392156862745, |
| "grad_norm": 19.56074333190918, |
| "learning_rate": 9.280327837150572e-06, |
| "loss": 4.48900842666626, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 99.8299560546875, |
| "learning_rate": 9.272097022732444e-06, |
| "loss": 4.36943244934082, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4019607843137255, |
| "grad_norm": 14.808595657348633, |
| "learning_rate": 9.263823101303911e-06, |
| "loss": 3.8771262168884277, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.403921568627451, |
| "grad_norm": 57.59800338745117, |
| "learning_rate": 9.255506156351846e-06, |
| "loss": 4.419735908508301, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.40588235294117647, |
| "grad_norm": 11.623266220092773, |
| "learning_rate": 9.247146271797244e-06, |
| "loss": 4.8247294425964355, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.40784313725490196, |
| "grad_norm": 24.259164810180664, |
| "learning_rate": 9.238743531994378e-06, |
| "loss": 3.6684348583221436, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.40980392156862744, |
| "grad_norm": 6.247960090637207, |
| "learning_rate": 9.23029802172994e-06, |
| "loss": 4.1336989402771, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 51.79281997680664, |
| "learning_rate": 9.221809826222198e-06, |
| "loss": 4.226710319519043, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4137254901960784, |
| "grad_norm": 7.704559326171875, |
| "learning_rate": 9.213279031120129e-06, |
| "loss": 4.430539608001709, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.41568627450980394, |
| "grad_norm": 14.719862937927246, |
| "learning_rate": 9.20470572250255e-06, |
| "loss": 4.240396499633789, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4176470588235294, |
| "grad_norm": 181.2888946533203, |
| "learning_rate": 9.196089986877262e-06, |
| "loss": 5.368552207946777, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4196078431372549, |
| "grad_norm": 57.88325119018555, |
| "learning_rate": 9.18743191118016e-06, |
| "loss": 4.403110980987549, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4215686274509804, |
| "grad_norm": 7.923624038696289, |
| "learning_rate": 9.17873158277438e-06, |
| "loss": 4.505987167358398, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4235294117647059, |
| "grad_norm": 12.484492301940918, |
| "learning_rate": 9.16998908944939e-06, |
| "loss": 4.539330005645752, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.42549019607843136, |
| "grad_norm": 17.081193923950195, |
| "learning_rate": 9.161204519420126e-06, |
| "loss": 4.4862260818481445, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.42745098039215684, |
| "grad_norm": 40.73018264770508, |
| "learning_rate": 9.152377961326085e-06, |
| "loss": 3.9755687713623047, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4294117647058823, |
| "grad_norm": 25.14496612548828, |
| "learning_rate": 9.14350950423045e-06, |
| "loss": 4.247672080993652, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.43137254901960786, |
| "grad_norm": 42.86063003540039, |
| "learning_rate": 9.134599237619167e-06, |
| "loss": 4.861601829528809, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.43333333333333335, |
| "grad_norm": 6.335762977600098, |
| "learning_rate": 9.125647251400068e-06, |
| "loss": 4.403509140014648, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.43529411764705883, |
| "grad_norm": 15.092013359069824, |
| "learning_rate": 9.11665363590194e-06, |
| "loss": 4.2879791259765625, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4372549019607843, |
| "grad_norm": 21.203367233276367, |
| "learning_rate": 9.107618481873632e-06, |
| "loss": 4.234829902648926, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4392156862745098, |
| "grad_norm": 17.83528709411621, |
| "learning_rate": 9.098541880483129e-06, |
| "loss": 4.473383903503418, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 9.45661449432373, |
| "learning_rate": 9.089423923316636e-06, |
| "loss": 4.060793876647949, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.44313725490196076, |
| "grad_norm": 15.240864753723145, |
| "learning_rate": 9.08026470237765e-06, |
| "loss": 4.1823625564575195, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.44509803921568625, |
| "grad_norm": 9.284879684448242, |
| "learning_rate": 9.07106431008604e-06, |
| "loss": 4.482451438903809, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4470588235294118, |
| "grad_norm": 316.4547119140625, |
| "learning_rate": 9.0618228392771e-06, |
| "loss": 4.567050933837891, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.44901960784313727, |
| "grad_norm": 29.554767608642578, |
| "learning_rate": 9.052540383200634e-06, |
| "loss": 4.180408954620361, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.45098039215686275, |
| "grad_norm": 6.75397253036499, |
| "learning_rate": 9.043217035519986e-06, |
| "loss": 4.032431125640869, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.45294117647058824, |
| "grad_norm": 52.491146087646484, |
| "learning_rate": 9.033852890311127e-06, |
| "loss": 4.559549331665039, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4549019607843137, |
| "grad_norm": 26.352832794189453, |
| "learning_rate": 9.02444804206168e-06, |
| "loss": 4.473541736602783, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4568627450980392, |
| "grad_norm": 55.94530487060547, |
| "learning_rate": 9.01500258566998e-06, |
| "loss": 4.33009147644043, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4588235294117647, |
| "grad_norm": 561.2896728515625, |
| "learning_rate": 9.005516616444112e-06, |
| "loss": 4.711042404174805, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.46078431372549017, |
| "grad_norm": 8.535683631896973, |
| "learning_rate": 8.99599023010095e-06, |
| "loss": 4.379650115966797, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4627450980392157, |
| "grad_norm": 305.4293212890625, |
| "learning_rate": 8.986423522765191e-06, |
| "loss": 4.39713191986084, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4647058823529412, |
| "grad_norm": 33.79056167602539, |
| "learning_rate": 8.976816590968388e-06, |
| "loss": 4.3444414138793945, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 96.67849731445312, |
| "learning_rate": 8.967169531647971e-06, |
| "loss": 4.552453994750977, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.46862745098039216, |
| "grad_norm": 9.912099838256836, |
| "learning_rate": 8.957482442146271e-06, |
| "loss": 4.669564723968506, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 70.12734985351562, |
| "learning_rate": 8.947755420209541e-06, |
| "loss": 4.48826789855957, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4725490196078431, |
| "grad_norm": 82.63882446289062, |
| "learning_rate": 8.937988563986963e-06, |
| "loss": 4.289237022399902, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4745098039215686, |
| "grad_norm": 8.964136123657227, |
| "learning_rate": 8.928181972029664e-06, |
| "loss": 4.6587371826171875, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4764705882352941, |
| "grad_norm": 31.586191177368164, |
| "learning_rate": 8.918335743289717e-06, |
| "loss": 4.701657295227051, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.47843137254901963, |
| "grad_norm": 11.228754043579102, |
| "learning_rate": 8.90844997711915e-06, |
| "loss": 4.612178802490234, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4803921568627451, |
| "grad_norm": 12.46069049835205, |
| "learning_rate": 8.898524773268926e-06, |
| "loss": 4.405297756195068, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4823529411764706, |
| "grad_norm": 10.117264747619629, |
| "learning_rate": 8.888560231887963e-06, |
| "loss": 4.3877387046813965, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4843137254901961, |
| "grad_norm": 10.064031600952148, |
| "learning_rate": 8.8785564535221e-06, |
| "loss": 4.765181064605713, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.48627450980392156, |
| "grad_norm": 26.27206039428711, |
| "learning_rate": 8.868513539113093e-06, |
| "loss": 4.58603572845459, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.48823529411764705, |
| "grad_norm": 9.178251266479492, |
| "learning_rate": 8.858431589997597e-06, |
| "loss": 4.693767070770264, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.49019607843137253, |
| "grad_norm": 41.67184829711914, |
| "learning_rate": 8.848310707906138e-06, |
| "loss": 4.149996280670166, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.492156862745098, |
| "grad_norm": 26.0257511138916, |
| "learning_rate": 8.838150994962094e-06, |
| "loss": 3.98866605758667, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.49411764705882355, |
| "grad_norm": 11.47574234008789, |
| "learning_rate": 8.827952553680656e-06, |
| "loss": 4.481922626495361, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.49607843137254903, |
| "grad_norm": 13.316975593566895, |
| "learning_rate": 8.817715486967803e-06, |
| "loss": 4.367927551269531, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4980392156862745, |
| "grad_norm": 25.555376052856445, |
| "learning_rate": 8.807439898119252e-06, |
| "loss": 4.406060218811035, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 19.18964195251465, |
| "learning_rate": 8.797125890819429e-06, |
| "loss": 4.455549240112305, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5019607843137255, |
| "grad_norm": 320.9529113769531, |
| "learning_rate": 8.786773569140414e-06, |
| "loss": 4.059732913970947, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.503921568627451, |
| "grad_norm": 10.907160758972168, |
| "learning_rate": 8.776383037540888e-06, |
| "loss": 4.300107955932617, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5058823529411764, |
| "grad_norm": 10.504913330078125, |
| "learning_rate": 8.765954400865093e-06, |
| "loss": 4.379460334777832, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5078431372549019, |
| "grad_norm": 8.576491355895996, |
| "learning_rate": 8.755487764341756e-06, |
| "loss": 4.10643196105957, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5098039215686274, |
| "grad_norm": 37.79690170288086, |
| "learning_rate": 8.744983233583044e-06, |
| "loss": 4.253848075866699, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5117647058823529, |
| "grad_norm": 16.08103370666504, |
| "learning_rate": 8.734440914583486e-06, |
| "loss": 4.03438663482666, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5137254901960784, |
| "grad_norm": 16.359207153320312, |
| "learning_rate": 8.72386091371891e-06, |
| "loss": 4.546582221984863, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.515686274509804, |
| "grad_norm": 14.165915489196777, |
| "learning_rate": 8.713243337745366e-06, |
| "loss": 4.424289226531982, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5176470588235295, |
| "grad_norm": 102.89573669433594, |
| "learning_rate": 8.70258829379805e-06, |
| "loss": 4.413416862487793, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5196078431372549, |
| "grad_norm": 48.94295120239258, |
| "learning_rate": 8.691895889390228e-06, |
| "loss": 4.2126665115356445, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5215686274509804, |
| "grad_norm": 4378.3564453125, |
| "learning_rate": 8.681166232412142e-06, |
| "loss": 4.454058647155762, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5235294117647059, |
| "grad_norm": 20.026926040649414, |
| "learning_rate": 8.670399431129926e-06, |
| "loss": 4.487159252166748, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5254901960784314, |
| "grad_norm": 31.539865493774414, |
| "learning_rate": 8.659595594184516e-06, |
| "loss": 3.834848642349243, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5274509803921569, |
| "grad_norm": 18.91790199279785, |
| "learning_rate": 8.648754830590552e-06, |
| "loss": 3.623075246810913, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 225.7623748779297, |
| "learning_rate": 8.637877249735274e-06, |
| "loss": 4.196628093719482, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5313725490196078, |
| "grad_norm": 61.982601165771484, |
| "learning_rate": 8.626962961377423e-06, |
| "loss": 4.607351303100586, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 29.32758140563965, |
| "learning_rate": 8.616012075646134e-06, |
| "loss": 4.233864784240723, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5352941176470588, |
| "grad_norm": 131.88658142089844, |
| "learning_rate": 8.605024703039817e-06, |
| "loss": 4.576198577880859, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5372549019607843, |
| "grad_norm": 28.853824615478516, |
| "learning_rate": 8.594000954425056e-06, |
| "loss": 4.819530010223389, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5392156862745098, |
| "grad_norm": 78.97745513916016, |
| "learning_rate": 8.582940941035476e-06, |
| "loss": 4.64754056930542, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5411764705882353, |
| "grad_norm": 84.47677612304688, |
| "learning_rate": 8.571844774470627e-06, |
| "loss": 4.477121829986572, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5431372549019607, |
| "grad_norm": 151.27618408203125, |
| "learning_rate": 8.560712566694863e-06, |
| "loss": 4.461421489715576, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5450980392156862, |
| "grad_norm": 67.91239166259766, |
| "learning_rate": 8.549544430036198e-06, |
| "loss": 4.4422101974487305, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5470588235294118, |
| "grad_norm": 27.76258087158203, |
| "learning_rate": 8.538340477185191e-06, |
| "loss": 4.3184404373168945, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 52.35835266113281, |
| "learning_rate": 8.527100821193797e-06, |
| "loss": 4.155409812927246, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5509803921568628, |
| "grad_norm": 4.545929908752441, |
| "learning_rate": 8.51582557547422e-06, |
| "loss": 4.422041893005371, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5529411764705883, |
| "grad_norm": 27.044042587280273, |
| "learning_rate": 8.504514853797789e-06, |
| "loss": 4.30333948135376, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5549019607843138, |
| "grad_norm": 16.815105438232422, |
| "learning_rate": 8.493168770293793e-06, |
| "loss": 4.171971797943115, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5568627450980392, |
| "grad_norm": 13.872841835021973, |
| "learning_rate": 8.481787439448332e-06, |
| "loss": 4.339047431945801, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 28.80792808532715, |
| "learning_rate": 8.470370976103171e-06, |
| "loss": 4.152105331420898, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5607843137254902, |
| "grad_norm": 8.963105201721191, |
| "learning_rate": 8.458919495454567e-06, |
| "loss": 4.377296447753906, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5627450980392157, |
| "grad_norm": 74.01875305175781, |
| "learning_rate": 8.447433113052124e-06, |
| "loss": 4.3900346755981445, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5647058823529412, |
| "grad_norm": 58.68815612792969, |
| "learning_rate": 8.435911944797605e-06, |
| "loss": 4.081506729125977, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5666666666666667, |
| "grad_norm": 9.66000747680664, |
| "learning_rate": 8.42435610694379e-06, |
| "loss": 4.700527191162109, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5686274509803921, |
| "grad_norm": 856.5879516601562, |
| "learning_rate": 8.412765716093273e-06, |
| "loss": 4.371489524841309, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5705882352941176, |
| "grad_norm": 54.936737060546875, |
| "learning_rate": 8.401140889197305e-06, |
| "loss": 4.387087821960449, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5725490196078431, |
| "grad_norm": 47.68635559082031, |
| "learning_rate": 8.38948174355462e-06, |
| "loss": 4.089095115661621, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5745098039215686, |
| "grad_norm": 57.09484100341797, |
| "learning_rate": 8.377788396810223e-06, |
| "loss": 4.206271171569824, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5764705882352941, |
| "grad_norm": 112.220947265625, |
| "learning_rate": 8.366060966954235e-06, |
| "loss": 4.599027156829834, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5784313725490197, |
| "grad_norm": 6.840386390686035, |
| "learning_rate": 8.354299572320679e-06, |
| "loss": 4.318846702575684, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5803921568627451, |
| "grad_norm": 9.837911605834961, |
| "learning_rate": 8.342504331586298e-06, |
| "loss": 4.517173767089844, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5823529411764706, |
| "grad_norm": 8.993966102600098, |
| "learning_rate": 8.330675363769356e-06, |
| "loss": 4.465400695800781, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5843137254901961, |
| "grad_norm": 13.560818672180176, |
| "learning_rate": 8.318812788228434e-06, |
| "loss": 4.454391002655029, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5862745098039216, |
| "grad_norm": 42.093650817871094, |
| "learning_rate": 8.306916724661225e-06, |
| "loss": 4.149091720581055, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 26.250268936157227, |
| "learning_rate": 8.294987293103334e-06, |
| "loss": 4.4396562576293945, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5901960784313726, |
| "grad_norm": 28.954051971435547, |
| "learning_rate": 8.283024613927055e-06, |
| "loss": 4.341966152191162, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.592156862745098, |
| "grad_norm": 62.48039245605469, |
| "learning_rate": 8.271028807840164e-06, |
| "loss": 4.215608596801758, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5941176470588235, |
| "grad_norm": 22.603797912597656, |
| "learning_rate": 8.258999995884706e-06, |
| "loss": 4.152694225311279, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.596078431372549, |
| "grad_norm": 8.716656684875488, |
| "learning_rate": 8.246938299435759e-06, |
| "loss": 4.047298431396484, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5980392156862745, |
| "grad_norm": 18.179229736328125, |
| "learning_rate": 8.234843840200218e-06, |
| "loss": 4.296360015869141, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 56.76976013183594, |
| "learning_rate": 8.222716740215573e-06, |
| "loss": 4.411765098571777, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6019607843137255, |
| "grad_norm": 52.26530838012695, |
| "learning_rate": 8.210557121848664e-06, |
| "loss": 4.340122222900391, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6039215686274509, |
| "grad_norm": 266.9866638183594, |
| "learning_rate": 8.198365107794457e-06, |
| "loss": 4.172665596008301, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6058823529411764, |
| "grad_norm": 15.979412078857422, |
| "learning_rate": 8.186140821074801e-06, |
| "loss": 4.39658260345459, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6078431372549019, |
| "grad_norm": 11.8402738571167, |
| "learning_rate": 8.173884385037193e-06, |
| "loss": 4.514126777648926, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6098039215686275, |
| "grad_norm": 13.766944885253906, |
| "learning_rate": 8.161595923353516e-06, |
| "loss": 4.273541450500488, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.611764705882353, |
| "grad_norm": 14.141230583190918, |
| "learning_rate": 8.149275560018816e-06, |
| "loss": 4.575261116027832, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6137254901960785, |
| "grad_norm": 11.983506202697754, |
| "learning_rate": 8.136923419350032e-06, |
| "loss": 4.441531181335449, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.615686274509804, |
| "grad_norm": 11.801518440246582, |
| "learning_rate": 8.12453962598475e-06, |
| "loss": 4.674493789672852, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 10.840680122375488, |
| "learning_rate": 8.112124304879938e-06, |
| "loss": 4.433037757873535, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6196078431372549, |
| "grad_norm": 10.749861717224121, |
| "learning_rate": 8.0996775813107e-06, |
| "loss": 4.395119667053223, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6215686274509804, |
| "grad_norm": 14.47747802734375, |
| "learning_rate": 8.087199580868997e-06, |
| "loss": 4.807774543762207, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6235294117647059, |
| "grad_norm": 145.16539001464844, |
| "learning_rate": 8.07469042946238e-06, |
| "loss": 4.5776777267456055, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6254901960784314, |
| "grad_norm": 16.87470054626465, |
| "learning_rate": 8.062150253312735e-06, |
| "loss": 4.936606407165527, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 17.10848617553711, |
| "learning_rate": 8.04957917895499e-06, |
| "loss": 4.733745574951172, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6294117647058823, |
| "grad_norm": 13.901424407958984, |
| "learning_rate": 8.03697733323585e-06, |
| "loss": 4.649224758148193, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6313725490196078, |
| "grad_norm": 9.257561683654785, |
| "learning_rate": 8.024344843312517e-06, |
| "loss": 4.121101379394531, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6333333333333333, |
| "grad_norm": 20.801715850830078, |
| "learning_rate": 8.011681836651401e-06, |
| "loss": 4.233510971069336, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6352941176470588, |
| "grad_norm": 8.654178619384766, |
| "learning_rate": 7.99898844102684e-06, |
| "loss": 4.358521938323975, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6372549019607843, |
| "grad_norm": 19.770313262939453, |
| "learning_rate": 7.986264784519801e-06, |
| "loss": 4.486325263977051, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6392156862745098, |
| "grad_norm": 24.694883346557617, |
| "learning_rate": 7.973510995516603e-06, |
| "loss": 4.33124303817749, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6411764705882353, |
| "grad_norm": 5.858190536499023, |
| "learning_rate": 7.960727202707605e-06, |
| "loss": 4.653376579284668, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6431372549019608, |
| "grad_norm": 10.262151718139648, |
| "learning_rate": 7.947913535085925e-06, |
| "loss": 4.330365180969238, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6450980392156863, |
| "grad_norm": 7.667881965637207, |
| "learning_rate": 7.935070121946116e-06, |
| "loss": 4.4410929679870605, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 8.601579666137695, |
| "learning_rate": 7.922197092882882e-06, |
| "loss": 4.40837287902832, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6490196078431373, |
| "grad_norm": 7.12674617767334, |
| "learning_rate": 7.909294577789765e-06, |
| "loss": 4.342606544494629, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6509803921568628, |
| "grad_norm": 10.018842697143555, |
| "learning_rate": 7.896362706857825e-06, |
| "loss": 4.35633659362793, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6529411764705882, |
| "grad_norm": 8.465182304382324, |
| "learning_rate": 7.883401610574338e-06, |
| "loss": 4.524230480194092, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6549019607843137, |
| "grad_norm": 11.65457534790039, |
| "learning_rate": 7.870411419721468e-06, |
| "loss": 4.69898796081543, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6568627450980392, |
| "grad_norm": 12.852667808532715, |
| "learning_rate": 7.857392265374963e-06, |
| "loss": 4.4134087562561035, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6588235294117647, |
| "grad_norm": 7.551708698272705, |
| "learning_rate": 7.844344278902815e-06, |
| "loss": 4.333009243011475, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6607843137254902, |
| "grad_norm": 8.990592002868652, |
| "learning_rate": 7.83126759196395e-06, |
| "loss": 4.12913703918457, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6627450980392157, |
| "grad_norm": 5.897281646728516, |
| "learning_rate": 7.818162336506885e-06, |
| "loss": 4.319456100463867, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6647058823529411, |
| "grad_norm": 10.881563186645508, |
| "learning_rate": 7.805028644768407e-06, |
| "loss": 3.8857927322387695, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 11.13469409942627, |
| "learning_rate": 7.791866649272236e-06, |
| "loss": 4.694910526275635, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6686274509803921, |
| "grad_norm": 9.245570182800293, |
| "learning_rate": 7.778676482827686e-06, |
| "loss": 4.456270694732666, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6705882352941176, |
| "grad_norm": 9.138643264770508, |
| "learning_rate": 7.765458278528327e-06, |
| "loss": 4.281149864196777, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6725490196078432, |
| "grad_norm": 10.280998229980469, |
| "learning_rate": 7.752212169750642e-06, |
| "loss": 3.655160665512085, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6745098039215687, |
| "grad_norm": 12.691766738891602, |
| "learning_rate": 7.738938290152675e-06, |
| "loss": 4.443702220916748, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 53.1502799987793, |
| "learning_rate": 7.725636773672694e-06, |
| "loss": 4.33807897567749, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6784313725490196, |
| "grad_norm": 32.06969451904297, |
| "learning_rate": 7.712307754527832e-06, |
| "loss": 4.4467267990112305, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6803921568627451, |
| "grad_norm": 9.82119083404541, |
| "learning_rate": 7.69895136721273e-06, |
| "loss": 4.563485145568848, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6823529411764706, |
| "grad_norm": 14.746912002563477, |
| "learning_rate": 7.685567746498191e-06, |
| "loss": 4.372725486755371, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6843137254901961, |
| "grad_norm": 8.75436019897461, |
| "learning_rate": 7.672157027429803e-06, |
| "loss": 4.417882919311523, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6862745098039216, |
| "grad_norm": 19.984905242919922, |
| "learning_rate": 7.658719345326595e-06, |
| "loss": 4.403292655944824, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6882352941176471, |
| "grad_norm": 8.184240341186523, |
| "learning_rate": 7.645254835779657e-06, |
| "loss": 4.424741268157959, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6901960784313725, |
| "grad_norm": 12.842850685119629, |
| "learning_rate": 7.631763634650783e-06, |
| "loss": 4.134533405303955, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.692156862745098, |
| "grad_norm": 6.699388027191162, |
| "learning_rate": 7.618245878071091e-06, |
| "loss": 4.04726505279541, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6941176470588235, |
| "grad_norm": 105.13713073730469, |
| "learning_rate": 7.604701702439652e-06, |
| "loss": 4.773365020751953, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.696078431372549, |
| "grad_norm": 4.884748458862305, |
| "learning_rate": 7.591131244422118e-06, |
| "loss": 4.247958660125732, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6980392156862745, |
| "grad_norm": 7.144754409790039, |
| "learning_rate": 7.57753464094934e-06, |
| "loss": 4.382415771484375, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 6.072117805480957, |
| "learning_rate": 7.563912029215983e-06, |
| "loss": 4.450734615325928, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7019607843137254, |
| "grad_norm": 12.829042434692383, |
| "learning_rate": 7.550263546679148e-06, |
| "loss": 4.965085983276367, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.703921568627451, |
| "grad_norm": 14.615585327148438, |
| "learning_rate": 7.536589331056976e-06, |
| "loss": 5.021347999572754, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 20.44240379333496, |
| "learning_rate": 7.522889520327275e-06, |
| "loss": 4.897363662719727, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.707843137254902, |
| "grad_norm": 8.430086135864258, |
| "learning_rate": 7.509164252726107e-06, |
| "loss": 4.523484230041504, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7098039215686275, |
| "grad_norm": 64.25504302978516, |
| "learning_rate": 7.495413666746406e-06, |
| "loss": 4.529187202453613, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.711764705882353, |
| "grad_norm": 7.60660982131958, |
| "learning_rate": 7.481637901136578e-06, |
| "loss": 4.3769965171813965, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7137254901960784, |
| "grad_norm": 10.102652549743652, |
| "learning_rate": 7.467837094899104e-06, |
| "loss": 4.237904071807861, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7156862745098039, |
| "grad_norm": 7.940488815307617, |
| "learning_rate": 7.454011387289127e-06, |
| "loss": 4.800004959106445, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7176470588235294, |
| "grad_norm": 9.199849128723145, |
| "learning_rate": 7.440160917813059e-06, |
| "loss": 4.4133710861206055, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7196078431372549, |
| "grad_norm": 13.037126541137695, |
| "learning_rate": 7.426285826227171e-06, |
| "loss": 3.827446699142456, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7215686274509804, |
| "grad_norm": 16.962772369384766, |
| "learning_rate": 7.412386252536168e-06, |
| "loss": 4.1103997230529785, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7235294117647059, |
| "grad_norm": 5.314018249511719, |
| "learning_rate": 7.398462336991802e-06, |
| "loss": 4.367024898529053, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7254901960784313, |
| "grad_norm": 7.412561893463135, |
| "learning_rate": 7.384514220091437e-06, |
| "loss": 4.72689962387085, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7274509803921568, |
| "grad_norm": 8.2675199508667, |
| "learning_rate": 7.370542042576635e-06, |
| "loss": 4.435096263885498, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7294117647058823, |
| "grad_norm": 9.654603004455566, |
| "learning_rate": 7.356545945431744e-06, |
| "loss": 4.463583946228027, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7313725490196078, |
| "grad_norm": 8.670485496520996, |
| "learning_rate": 7.342526069882465e-06, |
| "loss": 4.5643415451049805, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 6.766948699951172, |
| "learning_rate": 7.328482557394435e-06, |
| "loss": 4.181691646575928, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 10.84627914428711, |
| "learning_rate": 7.314415549671795e-06, |
| "loss": 4.422411918640137, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7372549019607844, |
| "grad_norm": 5.880477428436279, |
| "learning_rate": 7.300325188655762e-06, |
| "loss": 4.424395561218262, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7392156862745098, |
| "grad_norm": 20.747825622558594, |
| "learning_rate": 7.286211616523193e-06, |
| "loss": 4.11272668838501, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7411764705882353, |
| "grad_norm": 18.401670455932617, |
| "learning_rate": 7.27207497568516e-06, |
| "loss": 3.824052333831787, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7431372549019608, |
| "grad_norm": 8.974493980407715, |
| "learning_rate": 7.257915408785499e-06, |
| "loss": 4.54365348815918, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7450980392156863, |
| "grad_norm": 17.942354202270508, |
| "learning_rate": 7.243733058699386e-06, |
| "loss": 4.127044677734375, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7470588235294118, |
| "grad_norm": 25.93067169189453, |
| "learning_rate": 7.229528068531881e-06, |
| "loss": 4.338431358337402, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7490196078431373, |
| "grad_norm": 11.005992889404297, |
| "learning_rate": 7.215300581616496e-06, |
| "loss": 4.377932548522949, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7509803921568627, |
| "grad_norm": 37.77220916748047, |
| "learning_rate": 7.201050741513735e-06, |
| "loss": 4.601173400878906, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "grad_norm": 46.073551177978516, |
| "learning_rate": 7.186778692009669e-06, |
| "loss": 4.647587776184082, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7549019607843137, |
| "grad_norm": 20.6584529876709, |
| "learning_rate": 7.172484577114452e-06, |
| "loss": 3.963017702102661, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7568627450980392, |
| "grad_norm": 21.113977432250977, |
| "learning_rate": 7.1581685410609e-06, |
| "loss": 4.483006477355957, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7588235294117647, |
| "grad_norm": 8.748090744018555, |
| "learning_rate": 7.1438307283030106e-06, |
| "loss": 4.640547752380371, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7607843137254902, |
| "grad_norm": 6.323770046234131, |
| "learning_rate": 7.129471283514525e-06, |
| "loss": 4.377452850341797, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7627450980392156, |
| "grad_norm": 7.1361308097839355, |
| "learning_rate": 7.115090351587455e-06, |
| "loss": 4.528425216674805, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 7.771296977996826, |
| "learning_rate": 7.100688077630628e-06, |
| "loss": 4.031993865966797, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7666666666666667, |
| "grad_norm": 11.560522079467773, |
| "learning_rate": 7.086264606968215e-06, |
| "loss": 4.186218738555908, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7686274509803922, |
| "grad_norm": 7.255620002746582, |
| "learning_rate": 7.071820085138275e-06, |
| "loss": 4.786005973815918, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7705882352941177, |
| "grad_norm": 8.37585163116455, |
| "learning_rate": 7.05735465789128e-06, |
| "loss": 4.31743860244751, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7725490196078432, |
| "grad_norm": 5.002284049987793, |
| "learning_rate": 7.042868471188642e-06, |
| "loss": 4.3620500564575195, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7745098039215687, |
| "grad_norm": 7.566415309906006, |
| "learning_rate": 7.028361671201245e-06, |
| "loss": 4.245545387268066, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7764705882352941, |
| "grad_norm": 15.03036117553711, |
| "learning_rate": 7.013834404307972e-06, |
| "loss": 4.74299955368042, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7784313725490196, |
| "grad_norm": 7.180116176605225, |
| "learning_rate": 6.9992868170942205e-06, |
| "loss": 4.2865447998046875, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7803921568627451, |
| "grad_norm": 11.753171920776367, |
| "learning_rate": 6.9847190563504284e-06, |
| "loss": 4.315492630004883, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7823529411764706, |
| "grad_norm": 109.72077178955078, |
| "learning_rate": 6.970131269070591e-06, |
| "loss": 4.001181602478027, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 25.598417282104492, |
| "learning_rate": 6.95552360245078e-06, |
| "loss": 4.539231300354004, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7862745098039216, |
| "grad_norm": 11.810413360595703, |
| "learning_rate": 6.940896203887659e-06, |
| "loss": 4.104607105255127, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.788235294117647, |
| "grad_norm": 12.639123916625977, |
| "learning_rate": 6.926249220976988e-06, |
| "loss": 4.151772499084473, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7901960784313725, |
| "grad_norm": 11.613636016845703, |
| "learning_rate": 6.911582801512146e-06, |
| "loss": 4.29304313659668, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.792156862745098, |
| "grad_norm": 22.361234664916992, |
| "learning_rate": 6.8968970934826296e-06, |
| "loss": 3.9035181999206543, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 47.97343826293945, |
| "learning_rate": 6.88219224507257e-06, |
| "loss": 4.482244491577148, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.796078431372549, |
| "grad_norm": 53.55236053466797, |
| "learning_rate": 6.867468404659222e-06, |
| "loss": 4.42404317855835, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7980392156862746, |
| "grad_norm": 6.139882564544678, |
| "learning_rate": 6.852725720811487e-06, |
| "loss": 3.866445302963257, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 13.475299835205078, |
| "learning_rate": 6.837964342288399e-06, |
| "loss": 4.167118072509766, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8019607843137255, |
| "grad_norm": 13.994526863098145, |
| "learning_rate": 6.823184418037625e-06, |
| "loss": 4.223340034484863, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.803921568627451, |
| "grad_norm": 21.013151168823242, |
| "learning_rate": 6.808386097193969e-06, |
| "loss": 3.9206745624542236, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8058823529411765, |
| "grad_norm": 14.670804023742676, |
| "learning_rate": 6.793569529077864e-06, |
| "loss": 4.275432586669922, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.807843137254902, |
| "grad_norm": 7.984720230102539, |
| "learning_rate": 6.778734863193862e-06, |
| "loss": 4.327607154846191, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8098039215686275, |
| "grad_norm": 12.322322845458984, |
| "learning_rate": 6.76388224922913e-06, |
| "loss": 4.192738056182861, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8117647058823529, |
| "grad_norm": 14.9388427734375, |
| "learning_rate": 6.7490118370519356e-06, |
| "loss": 4.836441993713379, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8137254901960784, |
| "grad_norm": 12.723301887512207, |
| "learning_rate": 6.7341237767101375e-06, |
| "loss": 4.762911319732666, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8156862745098039, |
| "grad_norm": 14.065217018127441, |
| "learning_rate": 6.7192182184296725e-06, |
| "loss": 4.191904067993164, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8176470588235294, |
| "grad_norm": 17.27406120300293, |
| "learning_rate": 6.704295312613037e-06, |
| "loss": 4.0509748458862305, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8196078431372549, |
| "grad_norm": 6.344608783721924, |
| "learning_rate": 6.689355209837769e-06, |
| "loss": 4.776634216308594, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8215686274509804, |
| "grad_norm": 5.471117973327637, |
| "learning_rate": 6.674398060854931e-06, |
| "loss": 4.332463264465332, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 36.830535888671875, |
| "learning_rate": 6.65942401658759e-06, |
| "loss": 4.420635223388672, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8254901960784313, |
| "grad_norm": 21.876571655273438, |
| "learning_rate": 6.644433228129288e-06, |
| "loss": 4.5363006591796875, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8274509803921568, |
| "grad_norm": 20.565128326416016, |
| "learning_rate": 6.6294258467425256e-06, |
| "loss": 4.202686309814453, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8294117647058824, |
| "grad_norm": 20.264272689819336, |
| "learning_rate": 6.614402023857231e-06, |
| "loss": 4.074251174926758, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8313725490196079, |
| "grad_norm": 43.58699417114258, |
| "learning_rate": 6.599361911069235e-06, |
| "loss": 4.196136474609375, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 339.2962951660156, |
| "learning_rate": 6.584305660138734e-06, |
| "loss": 4.362434387207031, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8352941176470589, |
| "grad_norm": 10.97144603729248, |
| "learning_rate": 6.569233422988771e-06, |
| "loss": 4.28524923324585, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8372549019607843, |
| "grad_norm": 14.54588508605957, |
| "learning_rate": 6.554145351703689e-06, |
| "loss": 4.445708274841309, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.8392156862745098, |
| "grad_norm": 60.744747161865234, |
| "learning_rate": 6.539041598527612e-06, |
| "loss": 4.457370281219482, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8411764705882353, |
| "grad_norm": 200.7458038330078, |
| "learning_rate": 6.523922315862887e-06, |
| "loss": 3.9219799041748047, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8431372549019608, |
| "grad_norm": 5.749436378479004, |
| "learning_rate": 6.508787656268573e-06, |
| "loss": 4.174837112426758, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8450980392156863, |
| "grad_norm": 6.574963092803955, |
| "learning_rate": 6.4936377724588794e-06, |
| "loss": 4.107203960418701, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8470588235294118, |
| "grad_norm": 14.352339744567871, |
| "learning_rate": 6.478472817301635e-06, |
| "loss": 4.814848899841309, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.8490196078431372, |
| "grad_norm": 7.782406806945801, |
| "learning_rate": 6.463292943816747e-06, |
| "loss": 4.176547050476074, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8509803921568627, |
| "grad_norm": 62.66047668457031, |
| "learning_rate": 6.448098305174648e-06, |
| "loss": 4.225009918212891, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 31.651609420776367, |
| "learning_rate": 6.4328890546947645e-06, |
| "loss": 4.503094673156738, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8549019607843137, |
| "grad_norm": 6.525542736053467, |
| "learning_rate": 6.417665345843952e-06, |
| "loss": 3.9161875247955322, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8568627450980392, |
| "grad_norm": 7.290045738220215, |
| "learning_rate": 6.402427332234965e-06, |
| "loss": 4.532121658325195, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8588235294117647, |
| "grad_norm": 7.032063007354736, |
| "learning_rate": 6.387175167624894e-06, |
| "loss": 4.433841705322266, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8607843137254902, |
| "grad_norm": 8.59839916229248, |
| "learning_rate": 6.371909005913618e-06, |
| "loss": 4.42300271987915, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 6.988096714019775, |
| "learning_rate": 6.3566290011422515e-06, |
| "loss": 4.233433723449707, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8647058823529412, |
| "grad_norm": 4.5652313232421875, |
| "learning_rate": 6.341335307491596e-06, |
| "loss": 3.88616943359375, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 4.380303382873535, |
| "learning_rate": 6.32602807928057e-06, |
| "loss": 4.275338649749756, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.8686274509803922, |
| "grad_norm": 13.967598915100098, |
| "learning_rate": 6.310707470964668e-06, |
| "loss": 4.245949745178223, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8705882352941177, |
| "grad_norm": 6.2936506271362305, |
| "learning_rate": 6.29537363713439e-06, |
| "loss": 4.214742660522461, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8725490196078431, |
| "grad_norm": 6.2614970207214355, |
| "learning_rate": 6.280026732513689e-06, |
| "loss": 4.165116310119629, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8745098039215686, |
| "grad_norm": 13.735063552856445, |
| "learning_rate": 6.264666911958404e-06, |
| "loss": 4.574287414550781, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8764705882352941, |
| "grad_norm": 13.999856948852539, |
| "learning_rate": 6.249294330454705e-06, |
| "loss": 4.115286827087402, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8784313725490196, |
| "grad_norm": 9.688719749450684, |
| "learning_rate": 6.233909143117521e-06, |
| "loss": 4.250068664550781, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8803921568627451, |
| "grad_norm": 32.32737350463867, |
| "learning_rate": 6.21851150518898e-06, |
| "loss": 4.899062156677246, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 9.234833717346191, |
| "learning_rate": 6.203101572036839e-06, |
| "loss": 4.489043235778809, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.884313725490196, |
| "grad_norm": 19.185062408447266, |
| "learning_rate": 6.18767949915292e-06, |
| "loss": 4.504437446594238, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8862745098039215, |
| "grad_norm": 10.716368675231934, |
| "learning_rate": 6.172245442151541e-06, |
| "loss": 4.1727800369262695, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.888235294117647, |
| "grad_norm": 12.19927978515625, |
| "learning_rate": 6.156799556767941e-06, |
| "loss": 4.317794322967529, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8901960784313725, |
| "grad_norm": 22.349681854248047, |
| "learning_rate": 6.141341998856711e-06, |
| "loss": 4.239640235900879, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8921568627450981, |
| "grad_norm": 9.220239639282227, |
| "learning_rate": 6.125872924390226e-06, |
| "loss": 4.351978778839111, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8941176470588236, |
| "grad_norm": 30.204912185668945, |
| "learning_rate": 6.110392489457067e-06, |
| "loss": 4.01255989074707, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8960784313725491, |
| "grad_norm": 34.6622428894043, |
| "learning_rate": 6.094900850260439e-06, |
| "loss": 4.116291046142578, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8980392156862745, |
| "grad_norm": 53.01099395751953, |
| "learning_rate": 6.079398163116611e-06, |
| "loss": 4.077776908874512, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 13.728572845458984, |
| "learning_rate": 6.063884584453326e-06, |
| "loss": 4.205946922302246, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9019607843137255, |
| "grad_norm": 27.227886199951172, |
| "learning_rate": 6.048360270808226e-06, |
| "loss": 4.454074859619141, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.903921568627451, |
| "grad_norm": 1187.58203125, |
| "learning_rate": 6.032825378827273e-06, |
| "loss": 4.106019973754883, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9058823529411765, |
| "grad_norm": 88.40968322753906, |
| "learning_rate": 6.0172800652631706e-06, |
| "loss": 4.531243324279785, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.907843137254902, |
| "grad_norm": 76.25667572021484, |
| "learning_rate": 6.001724486973774e-06, |
| "loss": 4.6429853439331055, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9098039215686274, |
| "grad_norm": 104.32091522216797, |
| "learning_rate": 5.986158800920523e-06, |
| "loss": 4.6695685386657715, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9117647058823529, |
| "grad_norm": 101.0859603881836, |
| "learning_rate": 5.970583164166838e-06, |
| "loss": 4.725400447845459, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9137254901960784, |
| "grad_norm": 67.9173583984375, |
| "learning_rate": 5.954997733876552e-06, |
| "loss": 4.044053077697754, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9156862745098039, |
| "grad_norm": 258.1031494140625, |
| "learning_rate": 5.939402667312316e-06, |
| "loss": 4.8689799308776855, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9176470588235294, |
| "grad_norm": 36.58251953125, |
| "learning_rate": 5.923798121834016e-06, |
| "loss": 4.843733787536621, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9196078431372549, |
| "grad_norm": 22.516658782958984, |
| "learning_rate": 5.908184254897183e-06, |
| "loss": 4.461618423461914, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.9215686274509803, |
| "grad_norm": 54.69601821899414, |
| "learning_rate": 5.892561224051403e-06, |
| "loss": 4.002848148345947, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9235294117647059, |
| "grad_norm": 34.609493255615234, |
| "learning_rate": 5.876929186938734e-06, |
| "loss": 4.39974308013916, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9254901960784314, |
| "grad_norm": 158.92474365234375, |
| "learning_rate": 5.861288301292103e-06, |
| "loss": 4.6093525886535645, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9274509803921569, |
| "grad_norm": 358.99554443359375, |
| "learning_rate": 5.845638724933729e-06, |
| "loss": 4.667701721191406, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9294117647058824, |
| "grad_norm": 52.98344039916992, |
| "learning_rate": 5.82998061577352e-06, |
| "loss": 3.9046082496643066, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9313725490196079, |
| "grad_norm": 11.510079383850098, |
| "learning_rate": 5.814314131807486e-06, |
| "loss": 4.29728889465332, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 52.69283676147461, |
| "learning_rate": 5.798639431116135e-06, |
| "loss": 4.667060375213623, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9352941176470588, |
| "grad_norm": 27.900066375732422, |
| "learning_rate": 5.782956671862895e-06, |
| "loss": 4.403017044067383, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9372549019607843, |
| "grad_norm": 8.282060623168945, |
| "learning_rate": 5.767266012292496e-06, |
| "loss": 4.352850914001465, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9392156862745098, |
| "grad_norm": 9.053024291992188, |
| "learning_rate": 5.751567610729398e-06, |
| "loss": 4.167609691619873, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 39.519447326660156, |
| "learning_rate": 5.735861625576167e-06, |
| "loss": 4.330041885375977, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9431372549019608, |
| "grad_norm": 95.29867553710938, |
| "learning_rate": 5.720148215311902e-06, |
| "loss": 4.768982887268066, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9450980392156862, |
| "grad_norm": 23.550722122192383, |
| "learning_rate": 5.7044275384906164e-06, |
| "loss": 4.328036308288574, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.9470588235294117, |
| "grad_norm": 30.829458236694336, |
| "learning_rate": 5.688699753739649e-06, |
| "loss": 4.415774345397949, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9490196078431372, |
| "grad_norm": 9.180082321166992, |
| "learning_rate": 5.672965019758061e-06, |
| "loss": 4.172072410583496, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.9509803921568627, |
| "grad_norm": 15.722640037536621, |
| "learning_rate": 5.657223495315031e-06, |
| "loss": 4.710781574249268, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9529411764705882, |
| "grad_norm": 31.03820037841797, |
| "learning_rate": 5.641475339248257e-06, |
| "loss": 4.139684200286865, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9549019607843138, |
| "grad_norm": 2897.17333984375, |
| "learning_rate": 5.625720710462352e-06, |
| "loss": 4.369198322296143, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9568627450980393, |
| "grad_norm": 30.0218563079834, |
| "learning_rate": 5.609959767927247e-06, |
| "loss": 4.735037326812744, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9588235294117647, |
| "grad_norm": 16.19695281982422, |
| "learning_rate": 5.594192670676568e-06, |
| "loss": 4.406367301940918, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.9607843137254902, |
| "grad_norm": 15.989925384521484, |
| "learning_rate": 5.578419577806058e-06, |
| "loss": 4.398193836212158, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9627450980392157, |
| "grad_norm": 30.709712982177734, |
| "learning_rate": 5.562640648471951e-06, |
| "loss": 4.407459735870361, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9647058823529412, |
| "grad_norm": 48.51979446411133, |
| "learning_rate": 5.546856041889374e-06, |
| "loss": 4.81352424621582, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9666666666666667, |
| "grad_norm": 12.244742393493652, |
| "learning_rate": 5.531065917330737e-06, |
| "loss": 4.10401725769043, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.9686274509803922, |
| "grad_norm": 6.017563343048096, |
| "learning_rate": 5.515270434124136e-06, |
| "loss": 4.171624183654785, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 5.285996437072754, |
| "learning_rate": 5.499469751651728e-06, |
| "loss": 4.021862030029297, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9725490196078431, |
| "grad_norm": 8.994955062866211, |
| "learning_rate": 5.483664029348141e-06, |
| "loss": 4.736730575561523, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.9745098039215686, |
| "grad_norm": 5.072285175323486, |
| "learning_rate": 5.467853426698852e-06, |
| "loss": 4.152073383331299, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.9764705882352941, |
| "grad_norm": 5.994180202484131, |
| "learning_rate": 5.452038103238582e-06, |
| "loss": 4.670340538024902, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.9784313725490196, |
| "grad_norm": 10.215426445007324, |
| "learning_rate": 5.43621821854969e-06, |
| "loss": 4.869247913360596, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 5.956417083740234, |
| "learning_rate": 5.420393932260557e-06, |
| "loss": 4.4339447021484375, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9823529411764705, |
| "grad_norm": 4.450964450836182, |
| "learning_rate": 5.404565404043977e-06, |
| "loss": 4.339052677154541, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.984313725490196, |
| "grad_norm": 12.46487045288086, |
| "learning_rate": 5.388732793615551e-06, |
| "loss": 4.585082054138184, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9862745098039216, |
| "grad_norm": 10.983120918273926, |
| "learning_rate": 5.372896260732065e-06, |
| "loss": 4.340575695037842, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9882352941176471, |
| "grad_norm": 11.863996505737305, |
| "learning_rate": 5.357055965189888e-06, |
| "loss": 4.341768264770508, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9901960784313726, |
| "grad_norm": 19.986562728881836, |
| "learning_rate": 5.341212066823356e-06, |
| "loss": 4.160877704620361, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9921568627450981, |
| "grad_norm": 6.726130962371826, |
| "learning_rate": 5.325364725503155e-06, |
| "loss": 5.01240348815918, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9941176470588236, |
| "grad_norm": 17.171804428100586, |
| "learning_rate": 5.3095141011347155e-06, |
| "loss": 4.031642913818359, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.996078431372549, |
| "grad_norm": 5.7268524169921875, |
| "learning_rate": 5.2936603536565915e-06, |
| "loss": 4.291274547576904, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9980392156862745, |
| "grad_norm": 7.725809097290039, |
| "learning_rate": 5.277803643038855e-06, |
| "loss": 4.2685546875, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 6.054961681365967, |
| "learning_rate": 5.261944129281474e-06, |
| "loss": 4.074590682983398, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.0019607843137255, |
| "grad_norm": 7.251394748687744, |
| "learning_rate": 5.246081972412702e-06, |
| "loss": 4.44883918762207, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.003921568627451, |
| "grad_norm": 60.157867431640625, |
| "learning_rate": 5.230217332487462e-06, |
| "loss": 4.181658744812012, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.0058823529411764, |
| "grad_norm": 253.7903594970703, |
| "learning_rate": 5.214350369585731e-06, |
| "loss": 3.90329909324646, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.007843137254902, |
| "grad_norm": 17.08834457397461, |
| "learning_rate": 5.1984812438109274e-06, |
| "loss": 4.869369983673096, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.0098039215686274, |
| "grad_norm": 11.701231956481934, |
| "learning_rate": 5.182610115288296e-06, |
| "loss": 4.458084583282471, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.011764705882353, |
| "grad_norm": 7.61533784866333, |
| "learning_rate": 5.166737144163283e-06, |
| "loss": 4.643211841583252, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.0137254901960784, |
| "grad_norm": 141.76812744140625, |
| "learning_rate": 5.150862490599934e-06, |
| "loss": 4.354730606079102, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.0156862745098039, |
| "grad_norm": 11.43774700164795, |
| "learning_rate": 5.134986314779269e-06, |
| "loss": 4.488615036010742, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.0176470588235293, |
| "grad_norm": 5.24697732925415, |
| "learning_rate": 5.119108776897665e-06, |
| "loss": 4.506349086761475, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.0196078431372548, |
| "grad_norm": 78.49759674072266, |
| "learning_rate": 5.103230037165248e-06, |
| "loss": 4.484771251678467, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.0215686274509803, |
| "grad_norm": 12.482965469360352, |
| "learning_rate": 5.0873502558042665e-06, |
| "loss": 4.667587757110596, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.0235294117647058, |
| "grad_norm": 12.86687183380127, |
| "learning_rate": 5.071469593047482e-06, |
| "loss": 4.537250995635986, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.0254901960784313, |
| "grad_norm": 6.38942289352417, |
| "learning_rate": 5.055588209136548e-06, |
| "loss": 4.627140522003174, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.0274509803921568, |
| "grad_norm": 50.000144958496094, |
| "learning_rate": 5.0397062643204e-06, |
| "loss": 4.401636123657227, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.0294117647058822, |
| "grad_norm": 11.164356231689453, |
| "learning_rate": 5.023823918853622e-06, |
| "loss": 4.4169511795043945, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.0313725490196077, |
| "grad_norm": 102.13648986816406, |
| "learning_rate": 5.0079413329948524e-06, |
| "loss": 4.127865791320801, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.0333333333333334, |
| "grad_norm": 7.493802070617676, |
| "learning_rate": 4.992058667005149e-06, |
| "loss": 4.02808952331543, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.035294117647059, |
| "grad_norm": 7.781829833984375, |
| "learning_rate": 4.976176081146379e-06, |
| "loss": 4.525649070739746, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.0372549019607844, |
| "grad_norm": 9.3035888671875, |
| "learning_rate": 4.960293735679601e-06, |
| "loss": 4.452272415161133, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.0392156862745099, |
| "grad_norm": 59.47039794921875, |
| "learning_rate": 4.944411790863453e-06, |
| "loss": 4.098461151123047, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.0411764705882354, |
| "grad_norm": 22.812461853027344, |
| "learning_rate": 4.928530406952521e-06, |
| "loss": 3.912886142730713, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.0431372549019609, |
| "grad_norm": 20.064706802368164, |
| "learning_rate": 4.912649744195735e-06, |
| "loss": 4.186243057250977, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.0450980392156863, |
| "grad_norm": 35.90742111206055, |
| "learning_rate": 4.896769962834754e-06, |
| "loss": 4.1847028732299805, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.0470588235294118, |
| "grad_norm": 10.533987045288086, |
| "learning_rate": 4.880891223102337e-06, |
| "loss": 4.008520126342773, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.0490196078431373, |
| "grad_norm": 12.881206512451172, |
| "learning_rate": 4.865013685220733e-06, |
| "loss": 4.273331165313721, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.0509803921568628, |
| "grad_norm": 21.63557243347168, |
| "learning_rate": 4.8491375094000675e-06, |
| "loss": 4.49159049987793, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.0529411764705883, |
| "grad_norm": 10.12979507446289, |
| "learning_rate": 4.83326285583672e-06, |
| "loss": 4.520029067993164, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.0549019607843138, |
| "grad_norm": 97.30291748046875, |
| "learning_rate": 4.817389884711706e-06, |
| "loss": 4.093694686889648, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.0568627450980392, |
| "grad_norm": 11.112120628356934, |
| "learning_rate": 4.801518756189074e-06, |
| "loss": 4.431877136230469, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.0588235294117647, |
| "grad_norm": 16.844884872436523, |
| "learning_rate": 4.785649630414272e-06, |
| "loss": 4.923152923583984, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.0607843137254902, |
| "grad_norm": 62.37599182128906, |
| "learning_rate": 4.76978266751254e-06, |
| "loss": 4.213642120361328, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.0627450980392157, |
| "grad_norm": 14.22615909576416, |
| "learning_rate": 4.7539180275873e-06, |
| "loss": 4.106534481048584, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.0647058823529412, |
| "grad_norm": 16.207141876220703, |
| "learning_rate": 4.7380558707185285e-06, |
| "loss": 4.500801086425781, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 19.56553840637207, |
| "learning_rate": 4.7221963569611454e-06, |
| "loss": 4.4018449783325195, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.0686274509803921, |
| "grad_norm": 95.8540267944336, |
| "learning_rate": 4.70633964634341e-06, |
| "loss": 4.279302597045898, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.0705882352941176, |
| "grad_norm": 43.17036056518555, |
| "learning_rate": 4.690485898865288e-06, |
| "loss": 4.108880996704102, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.072549019607843, |
| "grad_norm": 52.357269287109375, |
| "learning_rate": 4.6746352744968474e-06, |
| "loss": 4.511680603027344, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.0745098039215686, |
| "grad_norm": 19.50657081604004, |
| "learning_rate": 4.6587879331766465e-06, |
| "loss": 4.271141529083252, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.076470588235294, |
| "grad_norm": 36.895565032958984, |
| "learning_rate": 4.642944034810113e-06, |
| "loss": 4.62483024597168, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.0784313725490196, |
| "grad_norm": 54.36555862426758, |
| "learning_rate": 4.627103739267935e-06, |
| "loss": 4.458219051361084, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.080392156862745, |
| "grad_norm": 464.3047790527344, |
| "learning_rate": 4.61126720638445e-06, |
| "loss": 4.253323554992676, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.0823529411764705, |
| "grad_norm": 84.94306945800781, |
| "learning_rate": 4.595434595956024e-06, |
| "loss": 4.691922187805176, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.084313725490196, |
| "grad_norm": 31.324134826660156, |
| "learning_rate": 4.579606067739445e-06, |
| "loss": 4.128964900970459, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.0862745098039215, |
| "grad_norm": 12.749728202819824, |
| "learning_rate": 4.563781781450312e-06, |
| "loss": 4.291423320770264, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.088235294117647, |
| "grad_norm": 997.2783813476562, |
| "learning_rate": 4.547961896761419e-06, |
| "loss": 4.028029918670654, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.0901960784313725, |
| "grad_norm": 38.891510009765625, |
| "learning_rate": 4.5321465733011495e-06, |
| "loss": 3.683849334716797, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.0921568627450982, |
| "grad_norm": 75.46724700927734, |
| "learning_rate": 4.51633597065186e-06, |
| "loss": 4.8069682121276855, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.0941176470588236, |
| "grad_norm": 15.80173110961914, |
| "learning_rate": 4.500530248348274e-06, |
| "loss": 4.055245399475098, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.0960784313725491, |
| "grad_norm": 15.971833229064941, |
| "learning_rate": 4.484729565875865e-06, |
| "loss": 4.495638370513916, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.0980392156862746, |
| "grad_norm": 23.540477752685547, |
| "learning_rate": 4.468934082669265e-06, |
| "loss": 4.262730598449707, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 29.687997817993164, |
| "learning_rate": 4.4531439581106295e-06, |
| "loss": 4.217746734619141, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.1019607843137256, |
| "grad_norm": 13.458581924438477, |
| "learning_rate": 4.43735935152805e-06, |
| "loss": 3.923027515411377, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.103921568627451, |
| "grad_norm": 64.60430908203125, |
| "learning_rate": 4.421580422193943e-06, |
| "loss": 3.8010993003845215, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.1058823529411765, |
| "grad_norm": 67.64999389648438, |
| "learning_rate": 4.405807329323434e-06, |
| "loss": 4.074517250061035, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.107843137254902, |
| "grad_norm": 220.15310668945312, |
| "learning_rate": 4.390040232072756e-06, |
| "loss": 4.491154193878174, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.1098039215686275, |
| "grad_norm": 23.060487747192383, |
| "learning_rate": 4.3742792895376494e-06, |
| "loss": 4.158168792724609, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.111764705882353, |
| "grad_norm": 28.182754516601562, |
| "learning_rate": 4.358524660751746e-06, |
| "loss": 4.115178108215332, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.1137254901960785, |
| "grad_norm": 17.95804786682129, |
| "learning_rate": 4.3427765046849715e-06, |
| "loss": 4.1569061279296875, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.115686274509804, |
| "grad_norm": 53.578575134277344, |
| "learning_rate": 4.327034980241941e-06, |
| "loss": 4.174633026123047, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.1176470588235294, |
| "grad_norm": 11.721698760986328, |
| "learning_rate": 4.3113002462603525e-06, |
| "loss": 4.331512451171875, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.119607843137255, |
| "grad_norm": 18.418899536132812, |
| "learning_rate": 4.295572461509384e-06, |
| "loss": 4.3890910148620605, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.1215686274509804, |
| "grad_norm": 14.938716888427734, |
| "learning_rate": 4.279851784688099e-06, |
| "loss": 4.322911262512207, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.1235294117647059, |
| "grad_norm": 32.67909622192383, |
| "learning_rate": 4.264138374423835e-06, |
| "loss": 4.3782219886779785, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.1254901960784314, |
| "grad_norm": 17.3731746673584, |
| "learning_rate": 4.248432389270604e-06, |
| "loss": 4.4932708740234375, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.1274509803921569, |
| "grad_norm": 7.340451717376709, |
| "learning_rate": 4.232733987707505e-06, |
| "loss": 3.999309539794922, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.1294117647058823, |
| "grad_norm": 9.819192886352539, |
| "learning_rate": 4.2170433281371085e-06, |
| "loss": 3.6122140884399414, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.1313725490196078, |
| "grad_norm": 71.66674041748047, |
| "learning_rate": 4.2013605688838656e-06, |
| "loss": 4.058876037597656, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.1333333333333333, |
| "grad_norm": 41.16455078125, |
| "learning_rate": 4.185685868192516e-06, |
| "loss": 4.322812080383301, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.1352941176470588, |
| "grad_norm": 1555.6925048828125, |
| "learning_rate": 4.170019384226482e-06, |
| "loss": 4.490335464477539, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.1372549019607843, |
| "grad_norm": 57.73488998413086, |
| "learning_rate": 4.154361275066272e-06, |
| "loss": 4.762178421020508, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.1392156862745098, |
| "grad_norm": 20.134422302246094, |
| "learning_rate": 4.138711698707899e-06, |
| "loss": 4.065451145172119, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.1411764705882352, |
| "grad_norm": 57.88652038574219, |
| "learning_rate": 4.123070813061269e-06, |
| "loss": 4.232456207275391, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.1431372549019607, |
| "grad_norm": 143.38563537597656, |
| "learning_rate": 4.107438775948598e-06, |
| "loss": 4.489979267120361, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.1450980392156862, |
| "grad_norm": 108.81521606445312, |
| "learning_rate": 4.091815745102818e-06, |
| "loss": 4.419774532318115, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.1470588235294117, |
| "grad_norm": 10.038305282592773, |
| "learning_rate": 4.076201878165985e-06, |
| "loss": 4.142699241638184, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.1490196078431372, |
| "grad_norm": 9.179925918579102, |
| "learning_rate": 4.060597332687685e-06, |
| "loss": 4.196611404418945, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.1509803921568627, |
| "grad_norm": 11.475279808044434, |
| "learning_rate": 4.04500226612345e-06, |
| "loss": 4.304692268371582, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.1529411764705881, |
| "grad_norm": 9.253667831420898, |
| "learning_rate": 4.0294168358331646e-06, |
| "loss": 4.438146591186523, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.1549019607843136, |
| "grad_norm": 14.186434745788574, |
| "learning_rate": 4.013841199079479e-06, |
| "loss": 4.354300498962402, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.156862745098039, |
| "grad_norm": 14.755526542663574, |
| "learning_rate": 3.998275513026227e-06, |
| "loss": 4.734729766845703, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.1588235294117646, |
| "grad_norm": 28.638187408447266, |
| "learning_rate": 3.982719934736832e-06, |
| "loss": 4.530861854553223, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.1607843137254903, |
| "grad_norm": 26.97394561767578, |
| "learning_rate": 3.967174621172728e-06, |
| "loss": 4.3430705070495605, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.1627450980392158, |
| "grad_norm": 9.441927909851074, |
| "learning_rate": 3.951639729191775e-06, |
| "loss": 4.322876930236816, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.1647058823529413, |
| "grad_norm": 8.678901672363281, |
| "learning_rate": 3.936115415546676e-06, |
| "loss": 3.9634523391723633, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.1666666666666667, |
| "grad_norm": 30.527114868164062, |
| "learning_rate": 3.920601836883389e-06, |
| "loss": 4.058382034301758, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.1686274509803922, |
| "grad_norm": 108.74210357666016, |
| "learning_rate": 3.9050991497395625e-06, |
| "loss": 4.502130508422852, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.1705882352941177, |
| "grad_norm": 36.77184295654297, |
| "learning_rate": 3.889607510542936e-06, |
| "loss": 4.163606643676758, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.1725490196078432, |
| "grad_norm": 15.290841102600098, |
| "learning_rate": 3.874127075609774e-06, |
| "loss": 4.36452579498291, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.1745098039215687, |
| "grad_norm": 12.470197677612305, |
| "learning_rate": 3.85865800114329e-06, |
| "loss": 4.0630950927734375, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 17.05522918701172, |
| "learning_rate": 3.8432004432320615e-06, |
| "loss": 4.621028423309326, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.1784313725490196, |
| "grad_norm": 148.23780822753906, |
| "learning_rate": 3.82775455784846e-06, |
| "loss": 4.3958916664123535, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.1803921568627451, |
| "grad_norm": 13.462661743164062, |
| "learning_rate": 3.8123205008470814e-06, |
| "loss": 4.256137847900391, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.1823529411764706, |
| "grad_norm": 8.457324028015137, |
| "learning_rate": 3.796898427963163e-06, |
| "loss": 4.346855163574219, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.184313725490196, |
| "grad_norm": 15.523514747619629, |
| "learning_rate": 3.781488494811022e-06, |
| "loss": 4.284721851348877, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.1862745098039216, |
| "grad_norm": 6.213055610656738, |
| "learning_rate": 3.7660908568824805e-06, |
| "loss": 3.8340024948120117, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.188235294117647, |
| "grad_norm": 9.198994636535645, |
| "learning_rate": 3.7507056695452966e-06, |
| "loss": 4.302409648895264, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.1901960784313725, |
| "grad_norm": 9.382796287536621, |
| "learning_rate": 3.7353330880415963e-06, |
| "loss": 4.5301127433776855, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.192156862745098, |
| "grad_norm": 69.99656677246094, |
| "learning_rate": 3.7199732674863126e-06, |
| "loss": 3.968989849090576, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.1941176470588235, |
| "grad_norm": 20.657978057861328, |
| "learning_rate": 3.704626362865612e-06, |
| "loss": 4.4946489334106445, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.196078431372549, |
| "grad_norm": 7.722006797790527, |
| "learning_rate": 3.689292529035332e-06, |
| "loss": 4.4367170333862305, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.1980392156862745, |
| "grad_norm": 6.735897064208984, |
| "learning_rate": 3.6739719207194313e-06, |
| "loss": 3.993412733078003, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 5.715364933013916, |
| "learning_rate": 3.6586646925084057e-06, |
| "loss": 4.139276504516602, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.2019607843137254, |
| "grad_norm": 54.25691223144531, |
| "learning_rate": 3.643370998857748e-06, |
| "loss": 4.590419292449951, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.203921568627451, |
| "grad_norm": 8.458606719970703, |
| "learning_rate": 3.628090994086384e-06, |
| "loss": 4.291541576385498, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.2058823529411764, |
| "grad_norm": 37.93712615966797, |
| "learning_rate": 3.612824832375109e-06, |
| "loss": 4.776224136352539, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.2078431372549019, |
| "grad_norm": 35.82539749145508, |
| "learning_rate": 3.5975726677650352e-06, |
| "loss": 4.760282516479492, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.2098039215686274, |
| "grad_norm": 413.08538818359375, |
| "learning_rate": 3.5823346541560494e-06, |
| "loss": 4.382461071014404, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.2117647058823529, |
| "grad_norm": 89.54154968261719, |
| "learning_rate": 3.5671109453052375e-06, |
| "loss": 4.182665824890137, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.2137254901960783, |
| "grad_norm": 20.636404037475586, |
| "learning_rate": 3.551901694825352e-06, |
| "loss": 4.00910758972168, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.215686274509804, |
| "grad_norm": 39.8746452331543, |
| "learning_rate": 3.536707056183254e-06, |
| "loss": 4.501626491546631, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.2176470588235295, |
| "grad_norm": 8.883325576782227, |
| "learning_rate": 3.5215271826983653e-06, |
| "loss": 4.042410373687744, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.219607843137255, |
| "grad_norm": 8.069144248962402, |
| "learning_rate": 3.5063622275411214e-06, |
| "loss": 4.196770668029785, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.2215686274509805, |
| "grad_norm": 202.91835021972656, |
| "learning_rate": 3.491212343731428e-06, |
| "loss": 4.373252868652344, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.223529411764706, |
| "grad_norm": 16.131671905517578, |
| "learning_rate": 3.4760776841371147e-06, |
| "loss": 4.37552547454834, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.2254901960784315, |
| "grad_norm": 8.377914428710938, |
| "learning_rate": 3.460958401472391e-06, |
| "loss": 4.658670902252197, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.227450980392157, |
| "grad_norm": 25.188844680786133, |
| "learning_rate": 3.4458546482963117e-06, |
| "loss": 4.512619972229004, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.2294117647058824, |
| "grad_norm": 52.12186813354492, |
| "learning_rate": 3.430766577011231e-06, |
| "loss": 4.429440498352051, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.231372549019608, |
| "grad_norm": 150.3721160888672, |
| "learning_rate": 3.415694339861266e-06, |
| "loss": 4.655492782592773, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.2333333333333334, |
| "grad_norm": 9.189016342163086, |
| "learning_rate": 3.4006380889307666e-06, |
| "loss": 4.395424842834473, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 12.38388729095459, |
| "learning_rate": 3.3855979761427705e-06, |
| "loss": 4.4131317138671875, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.2372549019607844, |
| "grad_norm": 13.973714828491211, |
| "learning_rate": 3.3705741532574744e-06, |
| "loss": 3.924534797668457, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.2392156862745098, |
| "grad_norm": 107.79778289794922, |
| "learning_rate": 3.3555667718707143e-06, |
| "loss": 4.320949554443359, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.2411764705882353, |
| "grad_norm": 11.689550399780273, |
| "learning_rate": 3.340575983412412e-06, |
| "loss": 4.405580520629883, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.2431372549019608, |
| "grad_norm": 7.956257343292236, |
| "learning_rate": 3.3256019391450696e-06, |
| "loss": 4.592574596405029, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.2450980392156863, |
| "grad_norm": 9.243956565856934, |
| "learning_rate": 3.3106447901622324e-06, |
| "loss": 4.313963890075684, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.2470588235294118, |
| "grad_norm": 18.00144386291504, |
| "learning_rate": 3.2957046873869647e-06, |
| "loss": 4.268238067626953, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.2490196078431373, |
| "grad_norm": 19.959566116333008, |
| "learning_rate": 3.280781781570328e-06, |
| "loss": 4.48428201675415, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.2509803921568627, |
| "grad_norm": 6.811706066131592, |
| "learning_rate": 3.2658762232898646e-06, |
| "loss": 4.208132266998291, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.2529411764705882, |
| "grad_norm": 46.339820861816406, |
| "learning_rate": 3.2509881629480674e-06, |
| "loss": 3.8477745056152344, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.2549019607843137, |
| "grad_norm": 10.291128158569336, |
| "learning_rate": 3.236117750770872e-06, |
| "loss": 4.190772533416748, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.2568627450980392, |
| "grad_norm": 6.017500877380371, |
| "learning_rate": 3.221265136806139e-06, |
| "loss": 4.1459479331970215, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.2588235294117647, |
| "grad_norm": 10.89311408996582, |
| "learning_rate": 3.2064304709221374e-06, |
| "loss": 4.458138465881348, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.2607843137254902, |
| "grad_norm": 17.717823028564453, |
| "learning_rate": 3.1916139028060318e-06, |
| "loss": 4.3823347091674805, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.2627450980392156, |
| "grad_norm": 7.106032848358154, |
| "learning_rate": 3.176815581962377e-06, |
| "loss": 3.8996877670288086, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.2647058823529411, |
| "grad_norm": 5.9597601890563965, |
| "learning_rate": 3.162035657711604e-06, |
| "loss": 3.9395341873168945, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.2666666666666666, |
| "grad_norm": 10.907493591308594, |
| "learning_rate": 3.1472742791885126e-06, |
| "loss": 4.373560905456543, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.268627450980392, |
| "grad_norm": 15.602867126464844, |
| "learning_rate": 3.1325315953407787e-06, |
| "loss": 4.069882392883301, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.2705882352941176, |
| "grad_norm": 6.501771926879883, |
| "learning_rate": 3.117807754927433e-06, |
| "loss": 4.230740547180176, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.272549019607843, |
| "grad_norm": 27.09207534790039, |
| "learning_rate": 3.103102906517371e-06, |
| "loss": 4.803955554962158, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.2745098039215685, |
| "grad_norm": 20.559762954711914, |
| "learning_rate": 3.0884171984878553e-06, |
| "loss": 4.1997270584106445, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.276470588235294, |
| "grad_norm": 5.8221893310546875, |
| "learning_rate": 3.0737507790230143e-06, |
| "loss": 4.570243835449219, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.2784313725490195, |
| "grad_norm": 6.412595272064209, |
| "learning_rate": 3.0591037961123414e-06, |
| "loss": 4.3107147216796875, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.280392156862745, |
| "grad_norm": 7.584194660186768, |
| "learning_rate": 3.044476397549221e-06, |
| "loss": 3.8364975452423096, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.2823529411764705, |
| "grad_norm": 30.908584594726562, |
| "learning_rate": 3.0298687309294106e-06, |
| "loss": 4.192539215087891, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.284313725490196, |
| "grad_norm": 7.657871246337891, |
| "learning_rate": 3.0152809436495732e-06, |
| "loss": 4.680438995361328, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.2862745098039214, |
| "grad_norm": 7.766997337341309, |
| "learning_rate": 3.0007131829057807e-06, |
| "loss": 4.22906494140625, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.288235294117647, |
| "grad_norm": 56.03451919555664, |
| "learning_rate": 2.9861655956920286e-06, |
| "loss": 4.155778884887695, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.2901960784313726, |
| "grad_norm": 30.009788513183594, |
| "learning_rate": 2.971638328798755e-06, |
| "loss": 4.029226779937744, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.2921568627450981, |
| "grad_norm": 10.63323974609375, |
| "learning_rate": 2.95713152881136e-06, |
| "loss": 3.739717721939087, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.2941176470588236, |
| "grad_norm": 6.4381022453308105, |
| "learning_rate": 2.942645342108723e-06, |
| "loss": 4.115414142608643, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.296078431372549, |
| "grad_norm": 18.27284049987793, |
| "learning_rate": 2.9281799148617264e-06, |
| "loss": 4.017760276794434, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.2980392156862746, |
| "grad_norm": 41.26305389404297, |
| "learning_rate": 2.913735393031786e-06, |
| "loss": 4.14659309387207, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 6.086925029754639, |
| "learning_rate": 2.8993119223693756e-06, |
| "loss": 3.9002652168273926, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.3019607843137255, |
| "grad_norm": 13.28462028503418, |
| "learning_rate": 2.884909648412545e-06, |
| "loss": 4.437386512756348, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.303921568627451, |
| "grad_norm": 11.695712089538574, |
| "learning_rate": 2.8705287164854755e-06, |
| "loss": 4.408797264099121, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.3058823529411765, |
| "grad_norm": 13.185002326965332, |
| "learning_rate": 2.8561692716969907e-06, |
| "loss": 4.035093307495117, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.307843137254902, |
| "grad_norm": 9.32931900024414, |
| "learning_rate": 2.841831458939103e-06, |
| "loss": 4.502928256988525, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.3098039215686275, |
| "grad_norm": 11394.642578125, |
| "learning_rate": 2.8275154228855495e-06, |
| "loss": 4.495089530944824, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.311764705882353, |
| "grad_norm": 17.8239803314209, |
| "learning_rate": 2.8132213079903335e-06, |
| "loss": 4.13901424407959, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.3137254901960784, |
| "grad_norm": 11.497566223144531, |
| "learning_rate": 2.798949258486263e-06, |
| "loss": 4.385655879974365, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.315686274509804, |
| "grad_norm": 78.63259887695312, |
| "learning_rate": 2.7846994183835073e-06, |
| "loss": 4.0058183670043945, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.3176470588235294, |
| "grad_norm": 74.62110900878906, |
| "learning_rate": 2.770471931468121e-06, |
| "loss": 4.446218013763428, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.3196078431372549, |
| "grad_norm": 10.491549491882324, |
| "learning_rate": 2.756266941300615e-06, |
| "loss": 4.593997955322266, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.3215686274509804, |
| "grad_norm": 18.128625869750977, |
| "learning_rate": 2.742084591214501e-06, |
| "loss": 4.071505069732666, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.3235294117647058, |
| "grad_norm": 13.249919891357422, |
| "learning_rate": 2.7279250243148416e-06, |
| "loss": 4.520431041717529, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.3254901960784313, |
| "grad_norm": 7.906222820281982, |
| "learning_rate": 2.7137883834768076e-06, |
| "loss": 4.065687656402588, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.3274509803921568, |
| "grad_norm": 52.9447021484375, |
| "learning_rate": 2.6996748113442397e-06, |
| "loss": 4.1635236740112305, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.3294117647058823, |
| "grad_norm": 4.685762405395508, |
| "learning_rate": 2.6855844503282057e-06, |
| "loss": 4.086709499359131, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.3313725490196078, |
| "grad_norm": 8.25709342956543, |
| "learning_rate": 2.6715174426055664e-06, |
| "loss": 4.124368667602539, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 92.40462493896484, |
| "learning_rate": 2.657473930117537e-06, |
| "loss": 4.330087661743164, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.3352941176470587, |
| "grad_norm": 7.260671615600586, |
| "learning_rate": 2.6434540545682585e-06, |
| "loss": 4.067335605621338, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.3372549019607844, |
| "grad_norm": 32.30806350708008, |
| "learning_rate": 2.629457957423365e-06, |
| "loss": 4.620484828948975, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.33921568627451, |
| "grad_norm": 8.185224533081055, |
| "learning_rate": 2.6154857799085643e-06, |
| "loss": 4.824517726898193, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.3411764705882354, |
| "grad_norm": 38.600643157958984, |
| "learning_rate": 2.6015376630082e-06, |
| "loss": 4.177415370941162, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.343137254901961, |
| "grad_norm": 12.150282859802246, |
| "learning_rate": 2.5876137474638323e-06, |
| "loss": 4.608708381652832, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.3450980392156864, |
| "grad_norm": 17.984052658081055, |
| "learning_rate": 2.5737141737728313e-06, |
| "loss": 4.3825225830078125, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.3470588235294119, |
| "grad_norm": 10.152755737304688, |
| "learning_rate": 2.5598390821869403e-06, |
| "loss": 4.374616622924805, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.3490196078431373, |
| "grad_norm": 8.499917030334473, |
| "learning_rate": 2.5459886127108733e-06, |
| "loss": 4.4171295166015625, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.3509803921568628, |
| "grad_norm": 9.851249694824219, |
| "learning_rate": 2.532162905100898e-06, |
| "loss": 4.149330139160156, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.3529411764705883, |
| "grad_norm": 45.60489273071289, |
| "learning_rate": 2.518362098863423e-06, |
| "loss": 4.130236625671387, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.3549019607843138, |
| "grad_norm": 5.697098731994629, |
| "learning_rate": 2.504586333253595e-06, |
| "loss": 4.183429718017578, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.3568627450980393, |
| "grad_norm": 8.099467277526855, |
| "learning_rate": 2.490835747273896e-06, |
| "loss": 3.990645408630371, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.3588235294117648, |
| "grad_norm": 4.649440288543701, |
| "learning_rate": 2.4771104796727275e-06, |
| "loss": 4.240273952484131, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.3607843137254902, |
| "grad_norm": 5.315373420715332, |
| "learning_rate": 2.4634106689430235e-06, |
| "loss": 4.3996124267578125, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.3627450980392157, |
| "grad_norm": 7.932140827178955, |
| "learning_rate": 2.449736453320854e-06, |
| "loss": 4.3941192626953125, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.3647058823529412, |
| "grad_norm": 6.591791152954102, |
| "learning_rate": 2.436087970784018e-06, |
| "loss": 4.288032531738281, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.3666666666666667, |
| "grad_norm": 6.031950950622559, |
| "learning_rate": 2.422465359050661e-06, |
| "loss": 4.261563301086426, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.3686274509803922, |
| "grad_norm": 16.477630615234375, |
| "learning_rate": 2.408868755577882e-06, |
| "loss": 4.27692174911499, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.3705882352941177, |
| "grad_norm": 8.824464797973633, |
| "learning_rate": 2.3952982975603494e-06, |
| "loss": 4.507349967956543, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.3725490196078431, |
| "grad_norm": 27.972156524658203, |
| "learning_rate": 2.3817541219289094e-06, |
| "loss": 4.322184085845947, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.3745098039215686, |
| "grad_norm": 31.712778091430664, |
| "learning_rate": 2.368236365349218e-06, |
| "loss": 4.359905242919922, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.3764705882352941, |
| "grad_norm": 12.520115852355957, |
| "learning_rate": 2.3547451642203438e-06, |
| "loss": 3.8938419818878174, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.3784313725490196, |
| "grad_norm": 17.420684814453125, |
| "learning_rate": 2.341280654673406e-06, |
| "loss": 4.232578277587891, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.380392156862745, |
| "grad_norm": 45.48374557495117, |
| "learning_rate": 2.327842972570198e-06, |
| "loss": 4.4909820556640625, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.3823529411764706, |
| "grad_norm": 56.00672912597656, |
| "learning_rate": 2.3144322535018126e-06, |
| "loss": 4.294509410858154, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.384313725490196, |
| "grad_norm": 12.862080574035645, |
| "learning_rate": 2.30104863278727e-06, |
| "loss": 4.2585577964782715, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.3862745098039215, |
| "grad_norm": 18.549734115600586, |
| "learning_rate": 2.2876922454721695e-06, |
| "loss": 4.514203071594238, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.388235294117647, |
| "grad_norm": 8.1556396484375, |
| "learning_rate": 2.2743632263273075e-06, |
| "loss": 4.094390392303467, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.3901960784313725, |
| "grad_norm": 128.47511291503906, |
| "learning_rate": 2.261061709847327e-06, |
| "loss": 4.239689826965332, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.392156862745098, |
| "grad_norm": 9.879805564880371, |
| "learning_rate": 2.247787830249361e-06, |
| "loss": 4.478032112121582, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.3941176470588235, |
| "grad_norm": 7.040520191192627, |
| "learning_rate": 2.2345417214716745e-06, |
| "loss": 4.109457492828369, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.396078431372549, |
| "grad_norm": 16.70178985595703, |
| "learning_rate": 2.2213235171723135e-06, |
| "loss": 4.442139148712158, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.3980392156862744, |
| "grad_norm": 17.51898956298828, |
| "learning_rate": 2.208133350727764e-06, |
| "loss": 4.24049186706543, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 6.293966293334961, |
| "learning_rate": 2.194971355231595e-06, |
| "loss": 4.465924263000488, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.4019607843137254, |
| "grad_norm": 11.453479766845703, |
| "learning_rate": 2.1818376634931154e-06, |
| "loss": 4.393329620361328, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.4039215686274509, |
| "grad_norm": 16.70266342163086, |
| "learning_rate": 2.1687324080360505e-06, |
| "loss": 4.209527969360352, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.4058823529411764, |
| "grad_norm": 30.97210693359375, |
| "learning_rate": 2.1556557210971845e-06, |
| "loss": 4.499849319458008, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.4078431372549018, |
| "grad_norm": 8.491568565368652, |
| "learning_rate": 2.1426077346250387e-06, |
| "loss": 4.220964431762695, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.4098039215686273, |
| "grad_norm": 20.942296981811523, |
| "learning_rate": 2.1295885802785332e-06, |
| "loss": 3.943645715713501, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.4117647058823528, |
| "grad_norm": 81.68138122558594, |
| "learning_rate": 2.1165983894256647e-06, |
| "loss": 4.511263847351074, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.4137254901960783, |
| "grad_norm": 10.52747917175293, |
| "learning_rate": 2.103637293142175e-06, |
| "loss": 4.577427864074707, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.415686274509804, |
| "grad_norm": 16.393098831176758, |
| "learning_rate": 2.0907054222102367e-06, |
| "loss": 3.7921924591064453, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.4176470588235295, |
| "grad_norm": 5.426424503326416, |
| "learning_rate": 2.077802907117119e-06, |
| "loss": 4.059392929077148, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.419607843137255, |
| "grad_norm": 56.45635986328125, |
| "learning_rate": 2.064929878053885e-06, |
| "loss": 4.6021904945373535, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.4215686274509804, |
| "grad_norm": 15.918771743774414, |
| "learning_rate": 2.0520864649140763e-06, |
| "loss": 4.29736328125, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.423529411764706, |
| "grad_norm": 30.07678985595703, |
| "learning_rate": 2.039272797292394e-06, |
| "loss": 4.57791805267334, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.4254901960784314, |
| "grad_norm": 12.101706504821777, |
| "learning_rate": 2.0264890044833995e-06, |
| "loss": 4.228304862976074, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.427450980392157, |
| "grad_norm": 24.959041595458984, |
| "learning_rate": 2.0137352154801993e-06, |
| "loss": 4.017577648162842, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.4294117647058824, |
| "grad_norm": 40.462677001953125, |
| "learning_rate": 2.0010115589731614e-06, |
| "loss": 4.473653316497803, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.4313725490196079, |
| "grad_norm": 10.04689884185791, |
| "learning_rate": 1.9883181633485994e-06, |
| "loss": 4.090883731842041, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.4333333333333333, |
| "grad_norm": 6.909872531890869, |
| "learning_rate": 1.9756551566874837e-06, |
| "loss": 4.207335948944092, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.4352941176470588, |
| "grad_norm": 32.00148391723633, |
| "learning_rate": 1.9630226667641516e-06, |
| "loss": 4.388645172119141, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.4372549019607843, |
| "grad_norm": 10.202242851257324, |
| "learning_rate": 1.9504208210450126e-06, |
| "loss": 4.090822219848633, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.4392156862745098, |
| "grad_norm": 10.043161392211914, |
| "learning_rate": 1.9378497466872657e-06, |
| "loss": 4.23163366317749, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.4411764705882353, |
| "grad_norm": 43.991817474365234, |
| "learning_rate": 1.9253095705376218e-06, |
| "loss": 4.619861602783203, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.4431372549019608, |
| "grad_norm": 22.60405921936035, |
| "learning_rate": 1.9128004191310064e-06, |
| "loss": 4.524755954742432, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.4450980392156862, |
| "grad_norm": 9.140439987182617, |
| "learning_rate": 1.9003224186893e-06, |
| "loss": 4.709308624267578, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.4470588235294117, |
| "grad_norm": 8.747527122497559, |
| "learning_rate": 1.8878756951200627e-06, |
| "loss": 3.7035531997680664, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.4490196078431372, |
| "grad_norm": 15.862771034240723, |
| "learning_rate": 1.8754603740152533e-06, |
| "loss": 4.485344409942627, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.4509803921568627, |
| "grad_norm": 29.34657859802246, |
| "learning_rate": 1.86307658064997e-06, |
| "loss": 4.254968643188477, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.4529411764705882, |
| "grad_norm": 17.50143051147461, |
| "learning_rate": 1.8507244399811858e-06, |
| "loss": 4.25479793548584, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.4549019607843137, |
| "grad_norm": 27.079608917236328, |
| "learning_rate": 1.8384040766464856e-06, |
| "loss": 4.629186630249023, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.4568627450980391, |
| "grad_norm": 48.11368942260742, |
| "learning_rate": 1.8261156149628101e-06, |
| "loss": 4.646360397338867, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.4588235294117646, |
| "grad_norm": 6.203916072845459, |
| "learning_rate": 1.8138591789251997e-06, |
| "loss": 3.85965633392334, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.4607843137254901, |
| "grad_norm": 7.8767852783203125, |
| "learning_rate": 1.8016348922055448e-06, |
| "loss": 4.597146987915039, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.4627450980392158, |
| "grad_norm": 13.2862548828125, |
| "learning_rate": 1.7894428781513367e-06, |
| "loss": 4.4817304611206055, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.4647058823529413, |
| "grad_norm": 80.48188018798828, |
| "learning_rate": 1.7772832597844286e-06, |
| "loss": 3.9338817596435547, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.4666666666666668, |
| "grad_norm": 50.88627243041992, |
| "learning_rate": 1.7651561597997846e-06, |
| "loss": 4.4560956954956055, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.4686274509803923, |
| "grad_norm": 7.296485424041748, |
| "learning_rate": 1.7530617005642431e-06, |
| "loss": 4.159041881561279, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 8.440339088439941, |
| "learning_rate": 1.7410000041152953e-06, |
| "loss": 3.9913253784179688, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.4725490196078432, |
| "grad_norm": 19.067867279052734, |
| "learning_rate": 1.7289711921598362e-06, |
| "loss": 4.387550354003906, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.4745098039215687, |
| "grad_norm": 8.490843772888184, |
| "learning_rate": 1.716975386072947e-06, |
| "loss": 4.3244218826293945, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.4764705882352942, |
| "grad_norm": 12.613040924072266, |
| "learning_rate": 1.7050127068966681e-06, |
| "loss": 4.627569198608398, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.4784313725490197, |
| "grad_norm": 39.489112854003906, |
| "learning_rate": 1.6930832753387767e-06, |
| "loss": 4.65338659286499, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.4803921568627452, |
| "grad_norm": 80.42972564697266, |
| "learning_rate": 1.6811872117715672e-06, |
| "loss": 4.373437881469727, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.4823529411764707, |
| "grad_norm": 8.485828399658203, |
| "learning_rate": 1.6693246362306465e-06, |
| "loss": 4.04635763168335, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.4843137254901961, |
| "grad_norm": 30.133621215820312, |
| "learning_rate": 1.6574956684137044e-06, |
| "loss": 4.307467460632324, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.4862745098039216, |
| "grad_norm": 21.578205108642578, |
| "learning_rate": 1.6457004276793227e-06, |
| "loss": 4.381303787231445, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.488235294117647, |
| "grad_norm": 9.480279922485352, |
| "learning_rate": 1.633939033045766e-06, |
| "loss": 4.092244625091553, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.4901960784313726, |
| "grad_norm": 5.892380237579346, |
| "learning_rate": 1.6222116031897773e-06, |
| "loss": 4.375147819519043, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.492156862745098, |
| "grad_norm": 25.891605377197266, |
| "learning_rate": 1.610518256445382e-06, |
| "loss": 3.7811484336853027, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.4941176470588236, |
| "grad_norm": 175.36309814453125, |
| "learning_rate": 1.5988591108026952e-06, |
| "loss": 4.007576942443848, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.496078431372549, |
| "grad_norm": 15.441529273986816, |
| "learning_rate": 1.5872342839067305e-06, |
| "loss": 4.280129432678223, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.4980392156862745, |
| "grad_norm": 18.408153533935547, |
| "learning_rate": 1.575643893056213e-06, |
| "loss": 4.2575297355651855, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 63.444854736328125, |
| "learning_rate": 1.5640880552023957e-06, |
| "loss": 4.243289470672607, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.5019607843137255, |
| "grad_norm": 8.91651725769043, |
| "learning_rate": 1.552566886947879e-06, |
| "loss": 4.302989959716797, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.503921568627451, |
| "grad_norm": 128.95758056640625, |
| "learning_rate": 1.541080504545433e-06, |
| "loss": 4.355408668518066, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.5058823529411764, |
| "grad_norm": 13.378387451171875, |
| "learning_rate": 1.5296290238968303e-06, |
| "loss": 4.437323570251465, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.507843137254902, |
| "grad_norm": 8.28203010559082, |
| "learning_rate": 1.5182125605516706e-06, |
| "loss": 4.488485813140869, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.5098039215686274, |
| "grad_norm": 10.438965797424316, |
| "learning_rate": 1.5068312297062089e-06, |
| "loss": 4.071782112121582, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.511764705882353, |
| "grad_norm": 27.31859016418457, |
| "learning_rate": 1.4954851462022118e-06, |
| "loss": 4.187992095947266, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.5137254901960784, |
| "grad_norm": 136.20640563964844, |
| "learning_rate": 1.4841744245257812e-06, |
| "loss": 4.506540775299072, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.5156862745098039, |
| "grad_norm": 11.443656921386719, |
| "learning_rate": 1.4728991788062052e-06, |
| "loss": 3.877199649810791, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.5176470588235293, |
| "grad_norm": 32.57830047607422, |
| "learning_rate": 1.4616595228148095e-06, |
| "loss": 3.79848051071167, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.5196078431372548, |
| "grad_norm": 36.45896911621094, |
| "learning_rate": 1.4504555699638034e-06, |
| "loss": 4.265812873840332, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.5215686274509803, |
| "grad_norm": 38.72624969482422, |
| "learning_rate": 1.4392874333051387e-06, |
| "loss": 4.287004470825195, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.5235294117647058, |
| "grad_norm": 10.811670303344727, |
| "learning_rate": 1.428155225529374e-06, |
| "loss": 4.230096340179443, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.5254901960784313, |
| "grad_norm": 12.724776268005371, |
| "learning_rate": 1.4170590589645273e-06, |
| "loss": 3.8588690757751465, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.5274509803921568, |
| "grad_norm": 43.33174514770508, |
| "learning_rate": 1.405999045574945e-06, |
| "loss": 3.9456539154052734, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.5294117647058822, |
| "grad_norm": 8.31043815612793, |
| "learning_rate": 1.3949752969601838e-06, |
| "loss": 4.745697498321533, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.5313725490196077, |
| "grad_norm": 20.55329132080078, |
| "learning_rate": 1.383987924353868e-06, |
| "loss": 4.360608100891113, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 12.237061500549316, |
| "learning_rate": 1.3730370386225778e-06, |
| "loss": 4.23661470413208, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.5352941176470587, |
| "grad_norm": 55.55708312988281, |
| "learning_rate": 1.3621227502647272e-06, |
| "loss": 4.409275054931641, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.5372549019607842, |
| "grad_norm": 10.432389259338379, |
| "learning_rate": 1.351245169409449e-06, |
| "loss": 4.187243938446045, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.5392156862745097, |
| "grad_norm": 12.222931861877441, |
| "learning_rate": 1.3404044058154836e-06, |
| "loss": 4.23618221282959, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.5411764705882351, |
| "grad_norm": 22.509572982788086, |
| "learning_rate": 1.3296005688700764e-06, |
| "loss": 4.735877513885498, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.5431372549019606, |
| "grad_norm": 104.62260437011719, |
| "learning_rate": 1.318833767587861e-06, |
| "loss": 4.043266296386719, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.5450980392156861, |
| "grad_norm": 28.126663208007812, |
| "learning_rate": 1.308104110609773e-06, |
| "loss": 4.625167369842529, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.5470588235294118, |
| "grad_norm": 13.838677406311035, |
| "learning_rate": 1.2974117062019504e-06, |
| "loss": 4.585941314697266, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.5490196078431373, |
| "grad_norm": 21.75575828552246, |
| "learning_rate": 1.2867566622546357e-06, |
| "loss": 4.354065895080566, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.5509803921568628, |
| "grad_norm": 12.070174217224121, |
| "learning_rate": 1.2761390862810907e-06, |
| "loss": 4.213344573974609, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.5529411764705883, |
| "grad_norm": 19.135971069335938, |
| "learning_rate": 1.2655590854165146e-06, |
| "loss": 3.9310250282287598, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.5549019607843138, |
| "grad_norm": 26.965707778930664, |
| "learning_rate": 1.2550167664169565e-06, |
| "loss": 4.444039344787598, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.5568627450980392, |
| "grad_norm": 8.53052806854248, |
| "learning_rate": 1.244512235658245e-06, |
| "loss": 3.9653329849243164, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.5588235294117647, |
| "grad_norm": 12.526801109313965, |
| "learning_rate": 1.2340455991349094e-06, |
| "loss": 4.354982376098633, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.5607843137254902, |
| "grad_norm": 9.729439735412598, |
| "learning_rate": 1.2236169624591138e-06, |
| "loss": 4.293777942657471, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.5627450980392157, |
| "grad_norm": 12.409886360168457, |
| "learning_rate": 1.2132264308595875e-06, |
| "loss": 3.961315631866455, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.5647058823529412, |
| "grad_norm": 9.837629318237305, |
| "learning_rate": 1.2028741091805713e-06, |
| "loss": 4.551630020141602, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.5666666666666667, |
| "grad_norm": 7.783680438995361, |
| "learning_rate": 1.1925601018807498e-06, |
| "loss": 4.144725799560547, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.5686274509803921, |
| "grad_norm": 38.960384368896484, |
| "learning_rate": 1.182284513032198e-06, |
| "loss": 4.380014419555664, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.5705882352941176, |
| "grad_norm": 10.370161056518555, |
| "learning_rate": 1.1720474463193442e-06, |
| "loss": 4.267058849334717, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.572549019607843, |
| "grad_norm": 26.05078887939453, |
| "learning_rate": 1.1618490050379073e-06, |
| "loss": 4.233059883117676, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.5745098039215686, |
| "grad_norm": 52.44851303100586, |
| "learning_rate": 1.1516892920938627e-06, |
| "loss": 4.4660186767578125, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.576470588235294, |
| "grad_norm": 8.192204475402832, |
| "learning_rate": 1.1415684100024043e-06, |
| "loss": 3.995473623275757, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.5784313725490198, |
| "grad_norm": 16.08054542541504, |
| "learning_rate": 1.131486460886908e-06, |
| "loss": 3.990022897720337, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.5803921568627453, |
| "grad_norm": 39.147544860839844, |
| "learning_rate": 1.1214435464779006e-06, |
| "loss": 4.553938388824463, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.5823529411764707, |
| "grad_norm": 25.91476821899414, |
| "learning_rate": 1.1114397681120386e-06, |
| "loss": 3.794466972351074, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.5843137254901962, |
| "grad_norm": 14.375138282775879, |
| "learning_rate": 1.1014752267310757e-06, |
| "loss": 4.648318290710449, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.5862745098039217, |
| "grad_norm": 12.88569164276123, |
| "learning_rate": 1.0915500228808523e-06, |
| "loss": 3.7970104217529297, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.5882352941176472, |
| "grad_norm": 13.928987503051758, |
| "learning_rate": 1.0816642567102832e-06, |
| "loss": 4.307559490203857, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.5901960784313727, |
| "grad_norm": 23.075586318969727, |
| "learning_rate": 1.0718180279703371e-06, |
| "loss": 4.3455023765563965, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.5921568627450982, |
| "grad_norm": 35.862205505371094, |
| "learning_rate": 1.0620114360130385e-06, |
| "loss": 4.080347061157227, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.5941176470588236, |
| "grad_norm": 8.66289234161377, |
| "learning_rate": 1.0522445797904608e-06, |
| "loss": 4.472708702087402, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.5960784313725491, |
| "grad_norm": 14.190173149108887, |
| "learning_rate": 1.04251755785373e-06, |
| "loss": 4.201245307922363, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.5980392156862746, |
| "grad_norm": 10.237516403198242, |
| "learning_rate": 1.0328304683520308e-06, |
| "loss": 4.238909721374512, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 42.68775177001953, |
| "learning_rate": 1.0231834090316135e-06, |
| "loss": 4.410212516784668, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.6019607843137256, |
| "grad_norm": 16.51223373413086, |
| "learning_rate": 1.0135764772348105e-06, |
| "loss": 4.378362655639648, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.603921568627451, |
| "grad_norm": 12.874237060546875, |
| "learning_rate": 1.004009769899051e-06, |
| "loss": 4.159359455108643, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.6058823529411765, |
| "grad_norm": 16.39068031311035, |
| "learning_rate": 9.944833835558886e-07, |
| "loss": 4.503432273864746, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.607843137254902, |
| "grad_norm": 58.67821502685547, |
| "learning_rate": 9.849974143300216e-07, |
| "loss": 4.2892165184021, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.6098039215686275, |
| "grad_norm": 14.123628616333008, |
| "learning_rate": 9.755519579383206e-07, |
| "loss": 4.417132377624512, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.611764705882353, |
| "grad_norm": 11.702075004577637, |
| "learning_rate": 9.661471096888735e-07, |
| "loss": 4.64815616607666, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.6137254901960785, |
| "grad_norm": 84.42650604248047, |
| "learning_rate": 9.567829644800141e-07, |
| "loss": 4.572826385498047, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.615686274509804, |
| "grad_norm": 17.071792602539062, |
| "learning_rate": 9.474596167993688e-07, |
| "loss": 4.3614501953125, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.6176470588235294, |
| "grad_norm": 25.60483741760254, |
| "learning_rate": 9.381771607229001e-07, |
| "loss": 4.5185651779174805, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.619607843137255, |
| "grad_norm": 15.565959930419922, |
| "learning_rate": 9.289356899139623e-07, |
| "loss": 3.915163993835449, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.6215686274509804, |
| "grad_norm": 54.5858154296875, |
| "learning_rate": 9.197352976223495e-07, |
| "loss": 4.3080902099609375, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.6235294117647059, |
| "grad_norm": 13.445226669311523, |
| "learning_rate": 9.10576076683366e-07, |
| "loss": 4.254460334777832, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.6254901960784314, |
| "grad_norm": 87.86075592041016, |
| "learning_rate": 9.014581195168726e-07, |
| "loss": 4.4500837326049805, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.6274509803921569, |
| "grad_norm": 20.077417373657227, |
| "learning_rate": 8.923815181263684e-07, |
| "loss": 4.214353084564209, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.6294117647058823, |
| "grad_norm": 10.991836547851562, |
| "learning_rate": 8.83346364098061e-07, |
| "loss": 4.4448065757751465, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.6313725490196078, |
| "grad_norm": 14.013919830322266, |
| "learning_rate": 8.743527485999342e-07, |
| "loss": 4.108705997467041, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.6333333333333333, |
| "grad_norm": 64.39735412597656, |
| "learning_rate": 8.654007623808335e-07, |
| "loss": 4.556203842163086, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.6352941176470588, |
| "grad_norm": 26.405942916870117, |
| "learning_rate": 8.564904957695524e-07, |
| "loss": 4.103427886962891, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.6372549019607843, |
| "grad_norm": 119.67282104492188, |
| "learning_rate": 8.476220386739153e-07, |
| "loss": 4.277235984802246, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.6392156862745098, |
| "grad_norm": 10.289328575134277, |
| "learning_rate": 8.387954805798748e-07, |
| "loss": 4.2001848220825195, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.6411764705882352, |
| "grad_norm": 10.340129852294922, |
| "learning_rate": 8.30010910550611e-07, |
| "loss": 4.292266845703125, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.6431372549019607, |
| "grad_norm": 24.654218673706055, |
| "learning_rate": 8.212684172256219e-07, |
| "loss": 4.3105974197387695, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.6450980392156862, |
| "grad_norm": 10.16248893737793, |
| "learning_rate": 8.125680888198395e-07, |
| "loss": 4.092971324920654, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.6470588235294117, |
| "grad_norm": 14.615108489990234, |
| "learning_rate": 8.039100131227401e-07, |
| "loss": 4.30925989151001, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.6490196078431372, |
| "grad_norm": 14.718245506286621, |
| "learning_rate": 7.95294277497452e-07, |
| "loss": 4.510540008544922, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.6509803921568627, |
| "grad_norm": 12.08092212677002, |
| "learning_rate": 7.867209688798722e-07, |
| "loss": 4.001991271972656, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.6529411764705881, |
| "grad_norm": 13.787320137023926, |
| "learning_rate": 7.781901737778014e-07, |
| "loss": 4.218091011047363, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.6549019607843136, |
| "grad_norm": 68.27767181396484, |
| "learning_rate": 7.697019782700605e-07, |
| "loss": 4.106568336486816, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.656862745098039, |
| "grad_norm": 30.536418914794922, |
| "learning_rate": 7.612564680056233e-07, |
| "loss": 4.192287445068359, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.6588235294117646, |
| "grad_norm": 18.520544052124023, |
| "learning_rate": 7.52853728202756e-07, |
| "loss": 4.293105602264404, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.66078431372549, |
| "grad_norm": 242.49095153808594, |
| "learning_rate": 7.444938436481547e-07, |
| "loss": 4.271738052368164, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.6627450980392156, |
| "grad_norm": 21.333797454833984, |
| "learning_rate": 7.361768986960893e-07, |
| "loss": 3.9610073566436768, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.664705882352941, |
| "grad_norm": 56.619956970214844, |
| "learning_rate": 7.279029772675572e-07, |
| "loss": 3.83493709564209, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 9.86070728302002, |
| "learning_rate": 7.196721628494296e-07, |
| "loss": 4.448513507843018, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.668627450980392, |
| "grad_norm": 22.179729461669922, |
| "learning_rate": 7.114845384936109e-07, |
| "loss": 3.798055648803711, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.6705882352941175, |
| "grad_norm": 20.29526138305664, |
| "learning_rate": 7.033401868162071e-07, |
| "loss": 4.640086650848389, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.6725490196078432, |
| "grad_norm": 16.003681182861328, |
| "learning_rate": 6.952391899966826e-07, |
| "loss": 4.355275630950928, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.6745098039215687, |
| "grad_norm": 47.72160339355469, |
| "learning_rate": 6.871816297770379e-07, |
| "loss": 3.878326416015625, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.6764705882352942, |
| "grad_norm": 20.697542190551758, |
| "learning_rate": 6.791675874609815e-07, |
| "loss": 3.9106733798980713, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.6784313725490196, |
| "grad_norm": 23.311349868774414, |
| "learning_rate": 6.71197143913111e-07, |
| "loss": 4.485714912414551, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.6803921568627451, |
| "grad_norm": 9.872264862060547, |
| "learning_rate": 6.632703795580947e-07, |
| "loss": 4.002383232116699, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.6823529411764706, |
| "grad_norm": 78.83197021484375, |
| "learning_rate": 6.553873743798678e-07, |
| "loss": 4.361330032348633, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.684313725490196, |
| "grad_norm": 9.547771453857422, |
| "learning_rate": 6.475482079208112e-07, |
| "loss": 4.204599380493164, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.6862745098039216, |
| "grad_norm": 18.235244750976562, |
| "learning_rate": 6.397529592809615e-07, |
| "loss": 4.262663841247559, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.688235294117647, |
| "grad_norm": 13.215642929077148, |
| "learning_rate": 6.320017071172114e-07, |
| "loss": 4.304675102233887, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.6901960784313725, |
| "grad_norm": 10.743204116821289, |
| "learning_rate": 6.242945296425074e-07, |
| "loss": 3.8973677158355713, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.692156862745098, |
| "grad_norm": 27.937570571899414, |
| "learning_rate": 6.166315046250704e-07, |
| "loss": 4.325494766235352, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.6941176470588235, |
| "grad_norm": 60.9512939453125, |
| "learning_rate": 6.090127093876058e-07, |
| "loss": 4.000061988830566, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.696078431372549, |
| "grad_norm": 21.070077896118164, |
| "learning_rate": 6.014382208065234e-07, |
| "loss": 4.016087532043457, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.6980392156862745, |
| "grad_norm": 19.379253387451172, |
| "learning_rate": 5.939081153111648e-07, |
| "loss": 4.325943946838379, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 14.437711715698242, |
| "learning_rate": 5.864224688830283e-07, |
| "loss": 4.5379767417907715, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.7019607843137254, |
| "grad_norm": 13.36697769165039, |
| "learning_rate": 5.789813570550052e-07, |
| "loss": 4.372624397277832, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.7039215686274511, |
| "grad_norm": 83.71302795410156, |
| "learning_rate": 5.715848549106146e-07, |
| "loss": 4.069514274597168, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 54.74263000488281, |
| "learning_rate": 5.642330370832521e-07, |
| "loss": 4.284305572509766, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.707843137254902, |
| "grad_norm": 90.30606079101562, |
| "learning_rate": 5.569259777554287e-07, |
| "loss": 4.711888313293457, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.7098039215686276, |
| "grad_norm": 13.181669235229492, |
| "learning_rate": 5.496637506580243e-07, |
| "loss": 3.961899995803833, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.711764705882353, |
| "grad_norm": 9.022482872009277, |
| "learning_rate": 5.424464290695497e-07, |
| "loss": 3.8905348777770996, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.7137254901960786, |
| "grad_norm": 13.499918937683105, |
| "learning_rate": 5.352740858154009e-07, |
| "loss": 4.252344608306885, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.715686274509804, |
| "grad_norm": 102.1851577758789, |
| "learning_rate": 5.281467932671253e-07, |
| "loss": 4.311603546142578, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.7176470588235295, |
| "grad_norm": 52.904563903808594, |
| "learning_rate": 5.210646233416933e-07, |
| "loss": 4.340160846710205, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.719607843137255, |
| "grad_norm": 31.053617477416992, |
| "learning_rate": 5.140276475007711e-07, |
| "loss": 4.486673355102539, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.7215686274509805, |
| "grad_norm": 7.201139450073242, |
| "learning_rate": 5.070359367499994e-07, |
| "loss": 4.224078178405762, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.723529411764706, |
| "grad_norm": 16.317426681518555, |
| "learning_rate": 5.000895616382829e-07, |
| "loss": 4.106444358825684, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.7254901960784315, |
| "grad_norm": 28.286376953125, |
| "learning_rate": 4.931885922570645e-07, |
| "loss": 4.481560230255127, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.727450980392157, |
| "grad_norm": 38.23206329345703, |
| "learning_rate": 4.86333098239632e-07, |
| "loss": 4.117794990539551, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.7294117647058824, |
| "grad_norm": 32.00136184692383, |
| "learning_rate": 4.795231487604124e-07, |
| "loss": 4.470040798187256, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.731372549019608, |
| "grad_norm": 19.59742546081543, |
| "learning_rate": 4.727588125342669e-07, |
| "loss": 4.034927845001221, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.7333333333333334, |
| "grad_norm": 21.4352970123291, |
| "learning_rate": 4.660401578158053e-07, |
| "loss": 4.167074680328369, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.7352941176470589, |
| "grad_norm": 11.949764251708984, |
| "learning_rate": 4.5936725239869364e-07, |
| "loss": 4.398115634918213, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.7372549019607844, |
| "grad_norm": 22.735158920288086, |
| "learning_rate": 4.527401636149703e-07, |
| "loss": 3.787719964981079, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.7392156862745098, |
| "grad_norm": 30.74224090576172, |
| "learning_rate": 4.4615895833436784e-07, |
| "loss": 4.396841049194336, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.7411764705882353, |
| "grad_norm": 13.555144309997559, |
| "learning_rate": 4.396237029636385e-07, |
| "loss": 3.766833782196045, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.7431372549019608, |
| "grad_norm": 12.762528419494629, |
| "learning_rate": 4.3313446344588117e-07, |
| "loss": 4.412437438964844, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.7450980392156863, |
| "grad_norm": 12.25460433959961, |
| "learning_rate": 4.266913052598792e-07, |
| "loss": 4.175087928771973, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.7470588235294118, |
| "grad_norm": 11.612493515014648, |
| "learning_rate": 4.2029429341943983e-07, |
| "loss": 4.048417091369629, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.7490196078431373, |
| "grad_norm": 26.58043098449707, |
| "learning_rate": 4.139434924727359e-07, |
| "loss": 3.725079298019409, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.7509803921568627, |
| "grad_norm": 8.003487586975098, |
| "learning_rate": 4.0763896650165227e-07, |
| "loss": 3.8874549865722656, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.7529411764705882, |
| "grad_norm": 35.708187103271484, |
| "learning_rate": 4.0138077912114824e-07, |
| "loss": 4.4934282302856445, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.7549019607843137, |
| "grad_norm": 38.73770523071289, |
| "learning_rate": 3.951689934786068e-07, |
| "loss": 4.447043418884277, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.7568627450980392, |
| "grad_norm": 73.32794952392578, |
| "learning_rate": 3.8900367225320036e-07, |
| "loss": 4.2515411376953125, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.7588235294117647, |
| "grad_norm": 8.548192024230957, |
| "learning_rate": 3.828848776552596e-07, |
| "loss": 4.277562141418457, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.7607843137254902, |
| "grad_norm": 29.69753646850586, |
| "learning_rate": 3.768126714256437e-07, |
| "loss": 4.336119651794434, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.7627450980392156, |
| "grad_norm": 36.25546646118164, |
| "learning_rate": 3.7078711483511833e-07, |
| "loss": 3.8581244945526123, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.7647058823529411, |
| "grad_norm": 615.1229858398438, |
| "learning_rate": 3.648082686837395e-07, |
| "loss": 3.8911027908325195, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.7666666666666666, |
| "grad_norm": 20.286361694335938, |
| "learning_rate": 3.588761933002344e-07, |
| "loss": 4.06037712097168, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.768627450980392, |
| "grad_norm": 8.233824729919434, |
| "learning_rate": 3.529909485413968e-07, |
| "loss": 4.315708160400391, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.7705882352941176, |
| "grad_norm": 16.410770416259766, |
| "learning_rate": 3.4715259379148656e-07, |
| "loss": 4.197503089904785, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.772549019607843, |
| "grad_norm": 6.714990139007568, |
| "learning_rate": 3.413611879616219e-07, |
| "loss": 4.66876220703125, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.7745098039215685, |
| "grad_norm": 9.479313850402832, |
| "learning_rate": 3.3561678948919094e-07, |
| "loss": 4.189940452575684, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.776470588235294, |
| "grad_norm": 21.64149284362793, |
| "learning_rate": 3.299194563372604e-07, |
| "loss": 4.629100799560547, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.7784313725490195, |
| "grad_norm": 339.67852783203125, |
| "learning_rate": 3.2426924599399056e-07, |
| "loss": 4.177216053009033, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.780392156862745, |
| "grad_norm": 32.338050842285156, |
| "learning_rate": 3.186662154720549e-07, |
| "loss": 3.9354302883148193, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.7823529411764705, |
| "grad_norm": 19.109886169433594, |
| "learning_rate": 3.131104213080688e-07, |
| "loss": 4.007468223571777, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.784313725490196, |
| "grad_norm": 15.316507339477539, |
| "learning_rate": 3.076019195620111e-07, |
| "loss": 4.159431457519531, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.7862745098039214, |
| "grad_norm": 23.8988037109375, |
| "learning_rate": 3.0214076581666364e-07, |
| "loss": 4.315700054168701, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.788235294117647, |
| "grad_norm": 132.59814453125, |
| "learning_rate": 2.9672701517705404e-07, |
| "loss": 4.334149360656738, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.7901960784313724, |
| "grad_norm": 32.49243927001953, |
| "learning_rate": 2.9136072226989054e-07, |
| "loss": 4.344616889953613, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.792156862745098, |
| "grad_norm": 9.036100387573242, |
| "learning_rate": 2.8604194124301654e-07, |
| "loss": 4.192841529846191, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.7941176470588234, |
| "grad_norm": 24.218841552734375, |
| "learning_rate": 2.807707257648662e-07, |
| "loss": 4.057707786560059, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.7960784313725489, |
| "grad_norm": 233.2913055419922, |
| "learning_rate": 2.7554712902391647e-07, |
| "loss": 3.9434196949005127, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.7980392156862746, |
| "grad_norm": 395.1879577636719, |
| "learning_rate": 2.703712037281564e-07, |
| "loss": 3.940992832183838, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 17.381824493408203, |
| "learning_rate": 2.65243002104551e-07, |
| "loss": 4.392702579498291, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.8019607843137255, |
| "grad_norm": 23.06696319580078, |
| "learning_rate": 2.6016257589851825e-07, |
| "loss": 3.9656898975372314, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.803921568627451, |
| "grad_norm": 71.60160064697266, |
| "learning_rate": 2.551299763734011e-07, |
| "loss": 4.245368003845215, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.8058823529411765, |
| "grad_norm": 70.6803207397461, |
| "learning_rate": 2.5014525430995915e-07, |
| "loss": 4.244998931884766, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.807843137254902, |
| "grad_norm": 53.35301971435547, |
| "learning_rate": 2.4520846000584795e-07, |
| "loss": 4.061452865600586, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.8098039215686275, |
| "grad_norm": 49.29062271118164, |
| "learning_rate": 2.403196432751131e-07, |
| "loss": 3.72438383102417, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.811764705882353, |
| "grad_norm": 17.173606872558594, |
| "learning_rate": 2.354788534476915e-07, |
| "loss": 4.318462371826172, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.8137254901960784, |
| "grad_norm": 9.25734806060791, |
| "learning_rate": 2.306861393689114e-07, |
| "loss": 4.17097282409668, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.815686274509804, |
| "grad_norm": 15.367701530456543, |
| "learning_rate": 2.2594154939899805e-07, |
| "loss": 4.201047897338867, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.8176470588235294, |
| "grad_norm": 20.01329231262207, |
| "learning_rate": 2.2124513141258574e-07, |
| "loss": 4.129422187805176, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.8196078431372549, |
| "grad_norm": 35.99736022949219, |
| "learning_rate": 2.1659693279823923e-07, |
| "loss": 4.317249298095703, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.8215686274509804, |
| "grad_norm": 17.21438217163086, |
| "learning_rate": 2.1199700045797077e-07, |
| "loss": 3.9976985454559326, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.8235294117647058, |
| "grad_norm": 219.86317443847656, |
| "learning_rate": 2.0744538080676669e-07, |
| "loss": 4.170305252075195, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.8254901960784313, |
| "grad_norm": 10.848552703857422, |
| "learning_rate": 2.0294211977212318e-07, |
| "loss": 4.244472503662109, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.8274509803921568, |
| "grad_norm": 53.08258819580078, |
| "learning_rate": 1.9848726279357966e-07, |
| "loss": 4.262903690338135, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.8294117647058825, |
| "grad_norm": 11.896856307983398, |
| "learning_rate": 1.9408085482225946e-07, |
| "loss": 4.359714031219482, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.831372549019608, |
| "grad_norm": 31.21965789794922, |
| "learning_rate": 1.8972294032042092e-07, |
| "loss": 4.58714485168457, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 21.309465408325195, |
| "learning_rate": 1.8541356326100436e-07, |
| "loss": 4.436410903930664, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.835294117647059, |
| "grad_norm": 14.893340110778809, |
| "learning_rate": 1.8115276712718622e-07, |
| "loss": 4.266641616821289, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.8372549019607844, |
| "grad_norm": 14.445075988769531, |
| "learning_rate": 1.7694059491195014e-07, |
| "loss": 4.441641807556152, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.83921568627451, |
| "grad_norm": 31.87690544128418, |
| "learning_rate": 1.7277708911764223e-07, |
| "loss": 4.567206859588623, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.8411764705882354, |
| "grad_norm": 40.729698181152344, |
| "learning_rate": 1.686622917555475e-07, |
| "loss": 4.24213981628418, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.843137254901961, |
| "grad_norm": 245.7069854736328, |
| "learning_rate": 1.645962443454663e-07, |
| "loss": 4.172698020935059, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.8450980392156864, |
| "grad_norm": 21.88043975830078, |
| "learning_rate": 1.6057898791529303e-07, |
| "loss": 4.32068395614624, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.8470588235294119, |
| "grad_norm": 9.085769653320312, |
| "learning_rate": 1.5661056300060428e-07, |
| "loss": 4.478321552276611, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.8490196078431373, |
| "grad_norm": 66.37223052978516, |
| "learning_rate": 1.526910096442491e-07, |
| "loss": 3.755631923675537, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.8509803921568628, |
| "grad_norm": 37.71146774291992, |
| "learning_rate": 1.4882036739594374e-07, |
| "loss": 4.105227470397949, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.8529411764705883, |
| "grad_norm": 25.141008377075195, |
| "learning_rate": 1.4499867531187372e-07, |
| "loss": 4.136166095733643, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.8549019607843138, |
| "grad_norm": 15.668458938598633, |
| "learning_rate": 1.4122597195430077e-07, |
| "loss": 4.045654773712158, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.8568627450980393, |
| "grad_norm": 2049.918701171875, |
| "learning_rate": 1.3750229539117143e-07, |
| "loss": 4.428345203399658, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.8588235294117648, |
| "grad_norm": 7.641491413116455, |
| "learning_rate": 1.3382768319573525e-07, |
| "loss": 4.479464530944824, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.8607843137254902, |
| "grad_norm": 22.296363830566406, |
| "learning_rate": 1.3020217244616273e-07, |
| "loss": 4.067809104919434, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.8627450980392157, |
| "grad_norm": 13.92015266418457, |
| "learning_rate": 1.2662579972517463e-07, |
| "loss": 3.9653892517089844, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.8647058823529412, |
| "grad_norm": 48.54196548461914, |
| "learning_rate": 1.2309860111967053e-07, |
| "loss": 3.8806185722351074, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 179.91099548339844, |
| "learning_rate": 1.196206122203647e-07, |
| "loss": 4.106685638427734, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.8686274509803922, |
| "grad_norm": 19.38648223876953, |
| "learning_rate": 1.1619186812142858e-07, |
| "loss": 4.4032464027404785, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.8705882352941177, |
| "grad_norm": 304.66949462890625, |
| "learning_rate": 1.1281240342013444e-07, |
| "loss": 4.3543620109558105, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.8725490196078431, |
| "grad_norm": 15.57471752166748, |
| "learning_rate": 1.0948225221651009e-07, |
| "loss": 4.376530647277832, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.8745098039215686, |
| "grad_norm": 10.722735404968262, |
| "learning_rate": 1.0620144811299027e-07, |
| "loss": 4.444626331329346, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.8764705882352941, |
| "grad_norm": 33.6290397644043, |
| "learning_rate": 1.0297002421407798e-07, |
| "loss": 3.8732433319091797, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.8784313725490196, |
| "grad_norm": 16.052318572998047, |
| "learning_rate": 9.978801312601538e-08, |
| "loss": 4.63797664642334, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.880392156862745, |
| "grad_norm": 12.871574401855469, |
| "learning_rate": 9.665544695645013e-08, |
| "loss": 4.0264129638671875, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.8823529411764706, |
| "grad_norm": 65.55164337158203, |
| "learning_rate": 9.357235731411174e-08, |
| "loss": 4.352207183837891, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.884313725490196, |
| "grad_norm": 32.963478088378906, |
| "learning_rate": 9.053877530849465e-08, |
| "loss": 4.248900413513184, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.8862745098039215, |
| "grad_norm": 21.75198745727539, |
| "learning_rate": 8.755473154954342e-08, |
| "loss": 4.203939437866211, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.888235294117647, |
| "grad_norm": 80.36431884765625, |
| "learning_rate": 8.462025614734193e-08, |
| "loss": 3.8435726165771484, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.8901960784313725, |
| "grad_norm": 1289.361083984375, |
| "learning_rate": 8.173537871181413e-08, |
| "loss": 4.257579803466797, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.892156862745098, |
| "grad_norm": 13.709322929382324, |
| "learning_rate": 7.890012835242045e-08, |
| "loss": 4.448129177093506, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.8941176470588235, |
| "grad_norm": 44.33952713012695, |
| "learning_rate": 7.61145336778657e-08, |
| "loss": 4.490082740783691, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.896078431372549, |
| "grad_norm": 62.72514724731445, |
| "learning_rate": 7.337862279581332e-08, |
| "loss": 4.260318756103516, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.8980392156862744, |
| "grad_norm": 61.187965393066406, |
| "learning_rate": 7.069242331259719e-08, |
| "loss": 4.129931449890137, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 89.85144805908203, |
| "learning_rate": 6.805596233294576e-08, |
| "loss": 4.300049304962158, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.9019607843137254, |
| "grad_norm": 33.04421615600586, |
| "learning_rate": 6.546926645970675e-08, |
| "loss": 4.332505226135254, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.9039215686274509, |
| "grad_norm": 42.08061218261719, |
| "learning_rate": 6.293236179358175e-08, |
| "loss": 4.362676620483398, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.9058823529411764, |
| "grad_norm": 7.202523708343506, |
| "learning_rate": 6.044527393286037e-08, |
| "loss": 4.0280914306640625, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.9078431372549018, |
| "grad_norm": 26.926128387451172, |
| "learning_rate": 5.800802797316152e-08, |
| "loss": 3.3560526371002197, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.9098039215686273, |
| "grad_norm": 2451.730224609375, |
| "learning_rate": 5.5620648507182516e-08, |
| "loss": 4.243276596069336, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.9117647058823528, |
| "grad_norm": 38.10729217529297, |
| "learning_rate": 5.3283159624448745e-08, |
| "loss": 4.296966552734375, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.9137254901960783, |
| "grad_norm": 51.848297119140625, |
| "learning_rate": 5.09955849110727e-08, |
| "loss": 4.370020866394043, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.9156862745098038, |
| "grad_norm": 22.084260940551758, |
| "learning_rate": 4.875794744951423e-08, |
| "loss": 3.9867117404937744, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.9176470588235293, |
| "grad_norm": 50.75712585449219, |
| "learning_rate": 4.657026981834623e-08, |
| "loss": 4.177669048309326, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.9196078431372547, |
| "grad_norm": 56.02933883666992, |
| "learning_rate": 4.443257409203206e-08, |
| "loss": 4.863016128540039, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.9215686274509802, |
| "grad_norm": 10.556544303894043, |
| "learning_rate": 4.2344881840697404e-08, |
| "loss": 4.148576736450195, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.923529411764706, |
| "grad_norm": 46.10426330566406, |
| "learning_rate": 4.0307214129914896e-08, |
| "loss": 4.038660049438477, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.9254901960784314, |
| "grad_norm": 12.037646293640137, |
| "learning_rate": 3.8319591520492025e-08, |
| "loss": 4.665771484375, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.927450980392157, |
| "grad_norm": 15.08922290802002, |
| "learning_rate": 3.638203406826302e-08, |
| "loss": 4.220437049865723, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.9294117647058824, |
| "grad_norm": 23.980785369873047, |
| "learning_rate": 3.449456132388562e-08, |
| "loss": 4.720032691955566, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.9313725490196079, |
| "grad_norm": 324.36517333984375, |
| "learning_rate": 3.265719233264575e-08, |
| "loss": 4.357367038726807, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 9.773921966552734, |
| "learning_rate": 3.086994563426371e-08, |
| "loss": 4.633083820343018, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.9352941176470588, |
| "grad_norm": 21.540130615234375, |
| "learning_rate": 2.9132839262707714e-08, |
| "loss": 4.309081554412842, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.9372549019607843, |
| "grad_norm": 19.026288986206055, |
| "learning_rate": 2.7445890746011782e-08, |
| "loss": 4.330454349517822, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.9392156862745098, |
| "grad_norm": 82.20018768310547, |
| "learning_rate": 2.5809117106099235e-08, |
| "loss": 4.401438236236572, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 50.91893005371094, |
| "learning_rate": 2.4222534858610036e-08, |
| "loss": 4.113171577453613, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.9431372549019608, |
| "grad_norm": 52.23246765136719, |
| "learning_rate": 2.2686160012735935e-08, |
| "loss": 4.591800212860107, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.9450980392156862, |
| "grad_norm": 25.589208602905273, |
| "learning_rate": 2.120000807105671e-08, |
| "loss": 4.057560443878174, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.9470588235294117, |
| "grad_norm": 41.304256439208984, |
| "learning_rate": 1.9764094029385285e-08, |
| "loss": 4.111077308654785, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.9490196078431372, |
| "grad_norm": 8.818951606750488, |
| "learning_rate": 1.837843237661563e-08, |
| "loss": 3.6773688793182373, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.9509803921568627, |
| "grad_norm": 19.091995239257812, |
| "learning_rate": 1.704303709457733e-08, |
| "loss": 3.89583420753479, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.9529411764705882, |
| "grad_norm": 35.950077056884766, |
| "learning_rate": 1.5757921657892915e-08, |
| "loss": 4.519157409667969, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.9549019607843139, |
| "grad_norm": 8.060112953186035, |
| "learning_rate": 1.4523099033845189e-08, |
| "loss": 4.286599159240723, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.9568627450980394, |
| "grad_norm": 23.477190017700195, |
| "learning_rate": 1.333858168224178e-08, |
| "loss": 4.344496726989746, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.9588235294117649, |
| "grad_norm": 17.623676300048828, |
| "learning_rate": 1.2204381555293021e-08, |
| "loss": 3.9677319526672363, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.9607843137254903, |
| "grad_norm": 13.321854591369629, |
| "learning_rate": 1.1120510097490933e-08, |
| "loss": 4.433586120605469, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.9627450980392158, |
| "grad_norm": 13.928075790405273, |
| "learning_rate": 1.0086978245490986e-08, |
| "loss": 4.1453046798706055, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.9647058823529413, |
| "grad_norm": 36.12109375, |
| "learning_rate": 9.103796428006074e-09, |
| "loss": 4.890933990478516, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.9666666666666668, |
| "grad_norm": 26.082658767700195, |
| "learning_rate": 8.1709745656966e-09, |
| "loss": 3.7525954246520996, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.9686274509803923, |
| "grad_norm": 485.4503479003906, |
| "learning_rate": 7.288522071074999e-09, |
| "loss": 4.128078460693359, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.9705882352941178, |
| "grad_norm": 18.146099090576172, |
| "learning_rate": 6.4564478484069326e-09, |
| "loss": 4.700654029846191, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.9725490196078432, |
| "grad_norm": 38.43018341064453, |
| "learning_rate": 5.6747602936230166e-09, |
| "loss": 4.363650321960449, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.9745098039215687, |
| "grad_norm": 20.92088508605957, |
| "learning_rate": 4.9434672942355595e-09, |
| "loss": 4.32061767578125, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.9764705882352942, |
| "grad_norm": 17.95061492919922, |
| "learning_rate": 4.2625762292553e-09, |
| "loss": 3.9346466064453125, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.9784313725490197, |
| "grad_norm": 183.8118896484375, |
| "learning_rate": 3.632093969121453e-09, |
| "loss": 3.9191904067993164, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.9803921568627452, |
| "grad_norm": 17.58165168762207, |
| "learning_rate": 3.0520268756284444e-09, |
| "loss": 4.576894760131836, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.9823529411764707, |
| "grad_norm": 23.028118133544922, |
| "learning_rate": 2.522380801863733e-09, |
| "loss": 4.249538421630859, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.9843137254901961, |
| "grad_norm": 13.785447120666504, |
| "learning_rate": 2.043161092148971e-09, |
| "loss": 4.694825649261475, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.9862745098039216, |
| "grad_norm": 21.384784698486328, |
| "learning_rate": 1.6143725819850465e-09, |
| "loss": 4.729522228240967, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.988235294117647, |
| "grad_norm": 25.501951217651367, |
| "learning_rate": 1.2360195980032351e-09, |
| "loss": 4.016307830810547, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.9901960784313726, |
| "grad_norm": 23.436403274536133, |
| "learning_rate": 9.081059579235662e-10, |
| "loss": 4.203705787658691, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.992156862745098, |
| "grad_norm": 18.131078720092773, |
| "learning_rate": 6.306349705126335e-10, |
| "loss": 4.157977104187012, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.9941176470588236, |
| "grad_norm": 644.4415893554688, |
| "learning_rate": 4.036094355541753e-10, |
| "loss": 4.596647262573242, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.996078431372549, |
| "grad_norm": 34.3750114440918, |
| "learning_rate": 2.2703164381743248e-10, |
| "loss": 4.472710609436035, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.9980392156862745, |
| "grad_norm": 16.04586410522461, |
| "learning_rate": 1.0090337703771991e-10, |
| "loss": 4.400196552276611, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 102.54801940917969, |
| "learning_rate": 2.522590789422186e-11, |
| "loss": 4.0655059814453125, |
| "step": 1020 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1020, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.829783862596862e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|