| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 510, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00196078431372549, |
| "grad_norm": 138868.515625, |
| "learning_rate": 0.0, |
| "loss": 17.658615112304688, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00392156862745098, |
| "grad_norm": 14902.44140625, |
| "learning_rate": 3.2258064516129035e-07, |
| "loss": 15.435331344604492, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0058823529411764705, |
| "grad_norm": 18682.611328125, |
| "learning_rate": 6.451612903225807e-07, |
| "loss": 17.053977966308594, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00784313725490196, |
| "grad_norm": 25779.986328125, |
| "learning_rate": 9.67741935483871e-07, |
| "loss": 17.117534637451172, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.00980392156862745, |
| "grad_norm": 15318.7373046875, |
| "learning_rate": 1.2903225806451614e-06, |
| "loss": 17.542219161987305, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.011764705882352941, |
| "grad_norm": 19811.609375, |
| "learning_rate": 1.6129032258064516e-06, |
| "loss": 17.963178634643555, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.013725490196078431, |
| "grad_norm": 9440.365234375, |
| "learning_rate": 1.935483870967742e-06, |
| "loss": 13.022825241088867, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01568627450980392, |
| "grad_norm": 88075.3046875, |
| "learning_rate": 2.2580645161290324e-06, |
| "loss": 16.652803421020508, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01764705882352941, |
| "grad_norm": 20173.8203125, |
| "learning_rate": 2.580645161290323e-06, |
| "loss": 16.681129455566406, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0196078431372549, |
| "grad_norm": 20974.09765625, |
| "learning_rate": 2.903225806451613e-06, |
| "loss": 14.24374771118164, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.021568627450980392, |
| "grad_norm": 22168.7890625, |
| "learning_rate": 3.225806451612903e-06, |
| "loss": 13.430747985839844, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.023529411764705882, |
| "grad_norm": 6798.2158203125, |
| "learning_rate": 3.548387096774194e-06, |
| "loss": 13.947168350219727, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.025490196078431372, |
| "grad_norm": 11967.0244140625, |
| "learning_rate": 3.870967741935484e-06, |
| "loss": 15.127799034118652, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.027450980392156862, |
| "grad_norm": 11873.794921875, |
| "learning_rate": 4.193548387096774e-06, |
| "loss": 13.805913925170898, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.029411764705882353, |
| "grad_norm": 25335.591796875, |
| "learning_rate": 4.516129032258065e-06, |
| "loss": 13.47291374206543, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03137254901960784, |
| "grad_norm": 21837.5234375, |
| "learning_rate": 4.838709677419355e-06, |
| "loss": 14.185405731201172, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03333333333333333, |
| "grad_norm": 6198.25146484375, |
| "learning_rate": 5.161290322580646e-06, |
| "loss": 11.65322494506836, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03529411764705882, |
| "grad_norm": 5560.4794921875, |
| "learning_rate": 5.483870967741935e-06, |
| "loss": 11.970712661743164, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03725490196078431, |
| "grad_norm": 2361.835693359375, |
| "learning_rate": 5.806451612903226e-06, |
| "loss": 11.846226692199707, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0392156862745098, |
| "grad_norm": 3759.865478515625, |
| "learning_rate": 6.129032258064517e-06, |
| "loss": 10.710617065429688, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.041176470588235294, |
| "grad_norm": 6874.8154296875, |
| "learning_rate": 6.451612903225806e-06, |
| "loss": 9.231167793273926, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.043137254901960784, |
| "grad_norm": 19366.40625, |
| "learning_rate": 6.774193548387097e-06, |
| "loss": 11.892879486083984, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.045098039215686274, |
| "grad_norm": 6423.419921875, |
| "learning_rate": 7.096774193548388e-06, |
| "loss": 9.792671203613281, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.047058823529411764, |
| "grad_norm": 3562.012451171875, |
| "learning_rate": 7.4193548387096784e-06, |
| "loss": 9.525957107543945, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.049019607843137254, |
| "grad_norm": 4905.87060546875, |
| "learning_rate": 7.741935483870968e-06, |
| "loss": 11.546586036682129, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.050980392156862744, |
| "grad_norm": 162666.359375, |
| "learning_rate": 8.064516129032258e-06, |
| "loss": 9.598024368286133, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.052941176470588235, |
| "grad_norm": 8230.1279296875, |
| "learning_rate": 8.387096774193549e-06, |
| "loss": 12.389551162719727, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.054901960784313725, |
| "grad_norm": 14983.3798828125, |
| "learning_rate": 8.70967741935484e-06, |
| "loss": 14.294782638549805, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.056862745098039215, |
| "grad_norm": 8721.31640625, |
| "learning_rate": 9.03225806451613e-06, |
| "loss": 10.350369453430176, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.058823529411764705, |
| "grad_norm": 2975.934814453125, |
| "learning_rate": 9.35483870967742e-06, |
| "loss": 9.85280704498291, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.060784313725490195, |
| "grad_norm": 4804.4228515625, |
| "learning_rate": 9.67741935483871e-06, |
| "loss": 8.866147994995117, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06274509803921569, |
| "grad_norm": 3305.45361328125, |
| "learning_rate": 1e-05, |
| "loss": 10.436628341674805, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06470588235294118, |
| "grad_norm": 11790.716796875, |
| "learning_rate": 9.999974774092107e-06, |
| "loss": 10.873075485229492, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 4500.14111328125, |
| "learning_rate": 9.999899096622962e-06, |
| "loss": 10.806778907775879, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06862745098039216, |
| "grad_norm": 164.95433044433594, |
| "learning_rate": 9.999772968356182e-06, |
| "loss": 11.633756637573242, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07058823529411765, |
| "grad_norm": 43294.8359375, |
| "learning_rate": 9.999596390564446e-06, |
| "loss": 9.791693687438965, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07254901960784314, |
| "grad_norm": 39442.3984375, |
| "learning_rate": 9.999369365029487e-06, |
| "loss": 8.951934814453125, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.07450980392156863, |
| "grad_norm": 588.51171875, |
| "learning_rate": 9.999091894042077e-06, |
| "loss": 8.9212646484375, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07647058823529412, |
| "grad_norm": 1923.2791748046875, |
| "learning_rate": 9.998763980401997e-06, |
| "loss": 7.225367546081543, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 657.7442016601562, |
| "learning_rate": 9.998385627418015e-06, |
| "loss": 6.479832172393799, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0803921568627451, |
| "grad_norm": 136.12237548828125, |
| "learning_rate": 9.997956838907853e-06, |
| "loss": 6.8389177322387695, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.08235294117647059, |
| "grad_norm": 251.0398712158203, |
| "learning_rate": 9.997477619198138e-06, |
| "loss": 6.4207072257995605, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.08431372549019608, |
| "grad_norm": 577.7664794921875, |
| "learning_rate": 9.996947973124372e-06, |
| "loss": 7.31302547454834, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.08627450980392157, |
| "grad_norm": 91.31795501708984, |
| "learning_rate": 9.996367906030879e-06, |
| "loss": 6.951511383056641, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08823529411764706, |
| "grad_norm": 6586.15673828125, |
| "learning_rate": 9.995737423770746e-06, |
| "loss": 6.607446670532227, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09019607843137255, |
| "grad_norm": 2296.7861328125, |
| "learning_rate": 9.995056532705766e-06, |
| "loss": 5.819401264190674, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.09215686274509804, |
| "grad_norm": 61.191524505615234, |
| "learning_rate": 9.994325239706377e-06, |
| "loss": 5.54649019241333, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.09411764705882353, |
| "grad_norm": 82.74103546142578, |
| "learning_rate": 9.993543552151594e-06, |
| "loss": 5.546056747436523, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.09607843137254903, |
| "grad_norm": 2329.645263671875, |
| "learning_rate": 9.992711477928925e-06, |
| "loss": 6.310848712921143, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.09803921568627451, |
| "grad_norm": 210.10841369628906, |
| "learning_rate": 9.991829025434305e-06, |
| "loss": 4.921277046203613, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 153.93605041503906, |
| "learning_rate": 9.990896203571994e-06, |
| "loss": 5.7787909507751465, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.10196078431372549, |
| "grad_norm": 146.04843139648438, |
| "learning_rate": 9.98991302175451e-06, |
| "loss": 5.398743629455566, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.10392156862745099, |
| "grad_norm": 84.42292022705078, |
| "learning_rate": 9.98887948990251e-06, |
| "loss": 5.348798751831055, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.10588235294117647, |
| "grad_norm": 1806.098388671875, |
| "learning_rate": 9.987795618444707e-06, |
| "loss": 5.749485969543457, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.10784313725490197, |
| "grad_norm": 19992.6953125, |
| "learning_rate": 9.986661418317759e-06, |
| "loss": 4.9936370849609375, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.10980392156862745, |
| "grad_norm": 479.07501220703125, |
| "learning_rate": 9.985476900966156e-06, |
| "loss": 5.318588733673096, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.11176470588235295, |
| "grad_norm": 161.11553955078125, |
| "learning_rate": 9.984242078342108e-06, |
| "loss": 5.936580181121826, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.11372549019607843, |
| "grad_norm": 47.58917236328125, |
| "learning_rate": 9.982956962905423e-06, |
| "loss": 5.245980739593506, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.11568627450980393, |
| "grad_norm": 64.91607666015625, |
| "learning_rate": 9.981621567623385e-06, |
| "loss": 5.656664848327637, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.11764705882352941, |
| "grad_norm": 193.08592224121094, |
| "learning_rate": 9.980235905970615e-06, |
| "loss": 5.183746337890625, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.11960784313725491, |
| "grad_norm": 197.5607452392578, |
| "learning_rate": 9.978799991928945e-06, |
| "loss": 4.836249351501465, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.12156862745098039, |
| "grad_norm": 836.5057983398438, |
| "learning_rate": 9.977313839987265e-06, |
| "loss": 4.21888542175293, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.12352941176470589, |
| "grad_norm": 4496.34228515625, |
| "learning_rate": 9.975777465141391e-06, |
| "loss": 4.858551979064941, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.12549019607843137, |
| "grad_norm": 33.644187927246094, |
| "learning_rate": 9.974190882893901e-06, |
| "loss": 4.454083442687988, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.12745098039215685, |
| "grad_norm": 389.5293273925781, |
| "learning_rate": 9.972554109253988e-06, |
| "loss": 4.7087812423706055, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.12941176470588237, |
| "grad_norm": 234.9954833984375, |
| "learning_rate": 9.970867160737293e-06, |
| "loss": 4.443643569946289, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.13137254901960785, |
| "grad_norm": 127.71284484863281, |
| "learning_rate": 9.969130054365737e-06, |
| "loss": 5.485596179962158, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 73.74437713623047, |
| "learning_rate": 9.967342807667355e-06, |
| "loss": 5.120403289794922, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.13529411764705881, |
| "grad_norm": 49.9027214050293, |
| "learning_rate": 9.965505438676115e-06, |
| "loss": 5.075076580047607, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.13725490196078433, |
| "grad_norm": 138.47344970703125, |
| "learning_rate": 9.963617965931738e-06, |
| "loss": 5.561940670013428, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.1392156862745098, |
| "grad_norm": 322.7532043457031, |
| "learning_rate": 9.961680408479508e-06, |
| "loss": 4.983658790588379, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.1411764705882353, |
| "grad_norm": 41682.1875, |
| "learning_rate": 9.959692785870086e-06, |
| "loss": 4.346513748168945, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.14313725490196078, |
| "grad_norm": 22.722593307495117, |
| "learning_rate": 9.957655118159304e-06, |
| "loss": 4.279910087585449, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.1450980392156863, |
| "grad_norm": 1315.896240234375, |
| "learning_rate": 9.955567425907968e-06, |
| "loss": 5.980461597442627, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.14705882352941177, |
| "grad_norm": 34.87533950805664, |
| "learning_rate": 9.953429730181653e-06, |
| "loss": 4.460562705993652, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.14901960784313725, |
| "grad_norm": 34.65178680419922, |
| "learning_rate": 9.951242052550487e-06, |
| "loss": 4.673140525817871, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.15098039215686274, |
| "grad_norm": 188.9655303955078, |
| "learning_rate": 9.949004415088928e-06, |
| "loss": 4.867604732513428, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.15294117647058825, |
| "grad_norm": 267.5862121582031, |
| "learning_rate": 9.946716840375552e-06, |
| "loss": 4.577199935913086, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.15490196078431373, |
| "grad_norm": 427.12872314453125, |
| "learning_rate": 9.944379351492818e-06, |
| "loss": 4.855893135070801, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 119.81890869140625, |
| "learning_rate": 9.941991972026839e-06, |
| "loss": 4.051677703857422, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1588235294117647, |
| "grad_norm": 6054.2958984375, |
| "learning_rate": 9.939554726067142e-06, |
| "loss": 4.933249473571777, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.1607843137254902, |
| "grad_norm": 62.10055923461914, |
| "learning_rate": 9.937067638206418e-06, |
| "loss": 4.819094657897949, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.1627450980392157, |
| "grad_norm": 450.60992431640625, |
| "learning_rate": 9.934530733540293e-06, |
| "loss": 4.19674825668335, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.16470588235294117, |
| "grad_norm": 350.167724609375, |
| "learning_rate": 9.931944037667056e-06, |
| "loss": 4.2217607498168945, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 1959.1741943359375, |
| "learning_rate": 9.929307576687404e-06, |
| "loss": 4.780778884887695, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.16862745098039217, |
| "grad_norm": 178.00027465820312, |
| "learning_rate": 9.926621377204188e-06, |
| "loss": 4.609301567077637, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.17058823529411765, |
| "grad_norm": 1623.9786376953125, |
| "learning_rate": 9.923885466322135e-06, |
| "loss": 5.524645805358887, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.17254901960784313, |
| "grad_norm": 26.787124633789062, |
| "learning_rate": 9.921099871647582e-06, |
| "loss": 4.657105445861816, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.17450980392156862, |
| "grad_norm": 438.51654052734375, |
| "learning_rate": 9.918264621288187e-06, |
| "loss": 5.135782241821289, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.17647058823529413, |
| "grad_norm": 68.33883666992188, |
| "learning_rate": 9.91537974385266e-06, |
| "loss": 4.544101715087891, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1784313725490196, |
| "grad_norm": 184.18646240234375, |
| "learning_rate": 9.912445268450459e-06, |
| "loss": 5.053075790405273, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.1803921568627451, |
| "grad_norm": 1090.8336181640625, |
| "learning_rate": 9.909461224691506e-06, |
| "loss": 4.8303327560424805, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.18235294117647058, |
| "grad_norm": 61.77042007446289, |
| "learning_rate": 9.906427642685889e-06, |
| "loss": 4.788046360015869, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1843137254901961, |
| "grad_norm": 103.00730895996094, |
| "learning_rate": 9.90334455304355e-06, |
| "loss": 5.147237300872803, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.18627450980392157, |
| "grad_norm": 46.233280181884766, |
| "learning_rate": 9.900211986873986e-06, |
| "loss": 4.351109981536865, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.18823529411764706, |
| "grad_norm": 305.6100769042969, |
| "learning_rate": 9.897029975785924e-06, |
| "loss": 4.575442790985107, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.19019607843137254, |
| "grad_norm": 109.36982727050781, |
| "learning_rate": 9.89379855188701e-06, |
| "loss": 4.224271774291992, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.19215686274509805, |
| "grad_norm": 100.88874816894531, |
| "learning_rate": 9.89051774778349e-06, |
| "loss": 4.574636936187744, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.19411764705882353, |
| "grad_norm": 285.57757568359375, |
| "learning_rate": 9.887187596579865e-06, |
| "loss": 5.0750861167907715, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.19607843137254902, |
| "grad_norm": 87.83949279785156, |
| "learning_rate": 9.883808131878573e-06, |
| "loss": 4.645264148712158, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1980392156862745, |
| "grad_norm": 414.4931335449219, |
| "learning_rate": 9.880379387779637e-06, |
| "loss": 4.512279510498047, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 607.8229370117188, |
| "learning_rate": 9.87690139888033e-06, |
| "loss": 4.384090423583984, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2019607843137255, |
| "grad_norm": 33.75107192993164, |
| "learning_rate": 9.873374200274826e-06, |
| "loss": 4.4997639656066895, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.20392156862745098, |
| "grad_norm": 302.5324401855469, |
| "learning_rate": 9.869797827553837e-06, |
| "loss": 4.900559902191162, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.20588235294117646, |
| "grad_norm": 59.07832717895508, |
| "learning_rate": 9.866172316804265e-06, |
| "loss": 4.729743957519531, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.20784313725490197, |
| "grad_norm": 555.4403686523438, |
| "learning_rate": 9.862497704608829e-06, |
| "loss": 4.753190517425537, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.20980392156862746, |
| "grad_norm": 649.705810546875, |
| "learning_rate": 9.8587740280457e-06, |
| "loss": 4.613556861877441, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.21176470588235294, |
| "grad_norm": 414.4284973144531, |
| "learning_rate": 9.855001324688128e-06, |
| "loss": 4.940046310424805, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.21372549019607842, |
| "grad_norm": 417.20989990234375, |
| "learning_rate": 9.851179632604057e-06, |
| "loss": 4.4412970542907715, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.21568627450980393, |
| "grad_norm": 60.889366149902344, |
| "learning_rate": 9.847308990355752e-06, |
| "loss": 4.690826416015625, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.21764705882352942, |
| "grad_norm": 83.28646087646484, |
| "learning_rate": 9.843389436999396e-06, |
| "loss": 4.576815605163574, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2196078431372549, |
| "grad_norm": 144.78509521484375, |
| "learning_rate": 9.839421012084709e-06, |
| "loss": 4.536053657531738, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.22156862745098038, |
| "grad_norm": 87.0609359741211, |
| "learning_rate": 9.835403755654535e-06, |
| "loss": 4.452672004699707, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2235294117647059, |
| "grad_norm": 49.286476135253906, |
| "learning_rate": 9.831337708244454e-06, |
| "loss": 4.295703887939453, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.22549019607843138, |
| "grad_norm": 225.94625854492188, |
| "learning_rate": 9.827222910882358e-06, |
| "loss": 5.346158504486084, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.22745098039215686, |
| "grad_norm": 116.64401245117188, |
| "learning_rate": 9.82305940508805e-06, |
| "loss": 4.404465675354004, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.22941176470588234, |
| "grad_norm": 71.7906265258789, |
| "learning_rate": 9.818847232872815e-06, |
| "loss": 4.849125862121582, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.23137254901960785, |
| "grad_norm": 116.41288757324219, |
| "learning_rate": 9.814586436738998e-06, |
| "loss": 4.716423988342285, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.23333333333333334, |
| "grad_norm": 107.20303344726562, |
| "learning_rate": 9.81027705967958e-06, |
| "loss": 4.078630447387695, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 44.105812072753906, |
| "learning_rate": 9.805919145177741e-06, |
| "loss": 4.686631679534912, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2372549019607843, |
| "grad_norm": 793.5034790039062, |
| "learning_rate": 9.801512737206422e-06, |
| "loss": 5.099960803985596, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.23921568627450981, |
| "grad_norm": 91.11273956298828, |
| "learning_rate": 9.797057880227878e-06, |
| "loss": 4.722168922424316, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.2411764705882353, |
| "grad_norm": 25.48920440673828, |
| "learning_rate": 9.792554619193235e-06, |
| "loss": 4.521475791931152, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.24313725490196078, |
| "grad_norm": 9693.837890625, |
| "learning_rate": 9.78800299954203e-06, |
| "loss": 4.789237976074219, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.24509803921568626, |
| "grad_norm": 144.1415252685547, |
| "learning_rate": 9.783403067201763e-06, |
| "loss": 4.778863906860352, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.24705882352941178, |
| "grad_norm": 11.692220687866211, |
| "learning_rate": 9.778754868587414e-06, |
| "loss": 5.121346473693848, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.24901960784313726, |
| "grad_norm": 35.05064010620117, |
| "learning_rate": 9.774058450601003e-06, |
| "loss": 5.001660346984863, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.25098039215686274, |
| "grad_norm": 137.78485107421875, |
| "learning_rate": 9.76931386063109e-06, |
| "loss": 4.57066535949707, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.2529411764705882, |
| "grad_norm": 43.179466247558594, |
| "learning_rate": 9.76452114655231e-06, |
| "loss": 4.620499610900879, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2549019607843137, |
| "grad_norm": 33.09333038330078, |
| "learning_rate": 9.759680356724888e-06, |
| "loss": 4.002799034118652, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2568627450980392, |
| "grad_norm": 61.24403762817383, |
| "learning_rate": 9.754791539994153e-06, |
| "loss": 5.060644149780273, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.25882352941176473, |
| "grad_norm": 8565.3466796875, |
| "learning_rate": 9.749854745690041e-06, |
| "loss": 4.671350479125977, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2607843137254902, |
| "grad_norm": 59.654541015625, |
| "learning_rate": 9.744870023626598e-06, |
| "loss": 4.615689277648926, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.2627450980392157, |
| "grad_norm": 58.113155364990234, |
| "learning_rate": 9.739837424101484e-06, |
| "loss": 4.944394588470459, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2647058823529412, |
| "grad_norm": 44.76533126831055, |
| "learning_rate": 9.73475699789545e-06, |
| "loss": 4.559343338012695, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 4049.171142578125, |
| "learning_rate": 9.729628796271844e-06, |
| "loss": 4.097330093383789, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.26862745098039215, |
| "grad_norm": 91.75375366210938, |
| "learning_rate": 9.724452870976084e-06, |
| "loss": 4.160323143005371, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.27058823529411763, |
| "grad_norm": 92.1652603149414, |
| "learning_rate": 9.719229274235134e-06, |
| "loss": 4.578685760498047, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.2725490196078431, |
| "grad_norm": 21.699565887451172, |
| "learning_rate": 9.713958058756985e-06, |
| "loss": 4.3331217765808105, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.27450980392156865, |
| "grad_norm": 2818.811279296875, |
| "learning_rate": 9.708639277730112e-06, |
| "loss": 4.388368606567383, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.27647058823529413, |
| "grad_norm": 58.03261184692383, |
| "learning_rate": 9.703272984822947e-06, |
| "loss": 4.328610897064209, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.2784313725490196, |
| "grad_norm": 85.0673599243164, |
| "learning_rate": 9.697859234183336e-06, |
| "loss": 4.389078617095947, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.2803921568627451, |
| "grad_norm": 47.508522033691406, |
| "learning_rate": 9.692398080437991e-06, |
| "loss": 4.623535633087158, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.2823529411764706, |
| "grad_norm": 26.19891357421875, |
| "learning_rate": 9.68688957869193e-06, |
| "loss": 4.690242767333984, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.28431372549019607, |
| "grad_norm": 27.067237854003906, |
| "learning_rate": 9.681333784527945e-06, |
| "loss": 4.570530414581299, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.28627450980392155, |
| "grad_norm": 241.20358276367188, |
| "learning_rate": 9.67573075400601e-06, |
| "loss": 4.467217922210693, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.28823529411764703, |
| "grad_norm": 14.455266952514648, |
| "learning_rate": 9.670080543662742e-06, |
| "loss": 4.641494274139404, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.2901960784313726, |
| "grad_norm": 31.17917251586914, |
| "learning_rate": 9.66438321051081e-06, |
| "loss": 4.5704450607299805, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.29215686274509806, |
| "grad_norm": 31.33642578125, |
| "learning_rate": 9.658638812038379e-06, |
| "loss": 4.436771869659424, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.29411764705882354, |
| "grad_norm": 44.800392150878906, |
| "learning_rate": 9.652847406208514e-06, |
| "loss": 4.712490558624268, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.296078431372549, |
| "grad_norm": 25.672563552856445, |
| "learning_rate": 9.647009051458604e-06, |
| "loss": 5.042919158935547, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.2980392156862745, |
| "grad_norm": 16.580514907836914, |
| "learning_rate": 9.641123806699769e-06, |
| "loss": 4.510254859924316, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 15.236343383789062, |
| "learning_rate": 9.635191731316262e-06, |
| "loss": 4.711069583892822, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.30196078431372547, |
| "grad_norm": 216.19012451171875, |
| "learning_rate": 9.629212885164882e-06, |
| "loss": 4.6602983474731445, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.30392156862745096, |
| "grad_norm": 23.054521560668945, |
| "learning_rate": 9.623187328574357e-06, |
| "loss": 4.656505584716797, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3058823529411765, |
| "grad_norm": 75.67474365234375, |
| "learning_rate": 9.617115122344742e-06, |
| "loss": 4.795361042022705, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.307843137254902, |
| "grad_norm": 10.801730155944824, |
| "learning_rate": 9.6109963277468e-06, |
| "loss": 4.377931118011475, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.30980392156862746, |
| "grad_norm": 50.18651580810547, |
| "learning_rate": 9.604831006521393e-06, |
| "loss": 4.209827423095703, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.31176470588235294, |
| "grad_norm": 65.79145812988281, |
| "learning_rate": 9.598619220878852e-06, |
| "loss": 4.403324127197266, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 717.3584594726562, |
| "learning_rate": 9.592361033498349e-06, |
| "loss": 4.686285972595215, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3156862745098039, |
| "grad_norm": 6.576393127441406, |
| "learning_rate": 9.586056507527266e-06, |
| "loss": 4.493882179260254, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.3176470588235294, |
| "grad_norm": 22.952342987060547, |
| "learning_rate": 9.57970570658056e-06, |
| "loss": 3.9739434719085693, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3196078431372549, |
| "grad_norm": 19.122220993041992, |
| "learning_rate": 9.57330869474012e-06, |
| "loss": 4.901614189147949, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.3215686274509804, |
| "grad_norm": 13.962799072265625, |
| "learning_rate": 9.566865536554119e-06, |
| "loss": 4.684842109680176, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3235294117647059, |
| "grad_norm": 588.8753051757812, |
| "learning_rate": 9.560376297036362e-06, |
| "loss": 4.2213664054870605, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3254901960784314, |
| "grad_norm": 52.16958999633789, |
| "learning_rate": 9.553841041665632e-06, |
| "loss": 4.610918045043945, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.32745098039215687, |
| "grad_norm": 10.890721321105957, |
| "learning_rate": 9.54725983638503e-06, |
| "loss": 4.533082485198975, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.32941176470588235, |
| "grad_norm": 16.27850914001465, |
| "learning_rate": 9.540632747601309e-06, |
| "loss": 4.84617805480957, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.33137254901960783, |
| "grad_norm": 23.593048095703125, |
| "learning_rate": 9.533959842184195e-06, |
| "loss": 4.829172611236572, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 12.077108383178711, |
| "learning_rate": 9.527241187465735e-06, |
| "loss": 4.293300151824951, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3352941176470588, |
| "grad_norm": 21.818492889404297, |
| "learning_rate": 9.520476851239588e-06, |
| "loss": 4.222914695739746, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.33725490196078434, |
| "grad_norm": 14.46260929107666, |
| "learning_rate": 9.513666901760368e-06, |
| "loss": 4.497089385986328, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.3392156862745098, |
| "grad_norm": 15.194242477416992, |
| "learning_rate": 9.506811407742938e-06, |
| "loss": 4.559511661529541, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3411764705882353, |
| "grad_norm": 23.59486198425293, |
| "learning_rate": 9.49991043836172e-06, |
| "loss": 4.664986610412598, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3431372549019608, |
| "grad_norm": 12.736374855041504, |
| "learning_rate": 9.49296406325e-06, |
| "loss": 3.997081756591797, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.34509803921568627, |
| "grad_norm": 32.026031494140625, |
| "learning_rate": 9.485972352499231e-06, |
| "loss": 3.9222970008850098, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.34705882352941175, |
| "grad_norm": 74.40033721923828, |
| "learning_rate": 9.478935376658308e-06, |
| "loss": 4.217952728271484, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.34901960784313724, |
| "grad_norm": 32.45917892456055, |
| "learning_rate": 9.471853206732875e-06, |
| "loss": 4.877760887145996, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3509803921568627, |
| "grad_norm": 133.5160675048828, |
| "learning_rate": 9.4647259141846e-06, |
| "loss": 4.599568843841553, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.35294117647058826, |
| "grad_norm": 110.89315795898438, |
| "learning_rate": 9.457553570930451e-06, |
| "loss": 4.784282684326172, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.35490196078431374, |
| "grad_norm": 7.933850288391113, |
| "learning_rate": 9.450336249341976e-06, |
| "loss": 4.384489059448242, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3568627450980392, |
| "grad_norm": 90.08252716064453, |
| "learning_rate": 9.443074022244573e-06, |
| "loss": 4.630293846130371, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3588235294117647, |
| "grad_norm": 203.20277404785156, |
| "learning_rate": 9.435766962916749e-06, |
| "loss": 4.612138748168945, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3607843137254902, |
| "grad_norm": 29.04180145263672, |
| "learning_rate": 9.428415145089385e-06, |
| "loss": 4.887096405029297, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.3627450980392157, |
| "grad_norm": 21.732030868530273, |
| "learning_rate": 9.421018642944996e-06, |
| "loss": 4.200204372406006, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.36470588235294116, |
| "grad_norm": 88.96598052978516, |
| "learning_rate": 9.413577531116973e-06, |
| "loss": 4.376042366027832, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.36666666666666664, |
| "grad_norm": 97.27217102050781, |
| "learning_rate": 9.406091884688837e-06, |
| "loss": 4.695228099822998, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.3686274509803922, |
| "grad_norm": 50.880985260009766, |
| "learning_rate": 9.398561779193477e-06, |
| "loss": 4.356112003326416, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.37058823529411766, |
| "grad_norm": 12.541425704956055, |
| "learning_rate": 9.390987290612396e-06, |
| "loss": 4.752440452575684, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.37254901960784315, |
| "grad_norm": 236.70387268066406, |
| "learning_rate": 9.38336849537493e-06, |
| "loss": 4.593203067779541, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.37450980392156863, |
| "grad_norm": 2993.429931640625, |
| "learning_rate": 9.375705470357493e-06, |
| "loss": 4.44484806060791, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.3764705882352941, |
| "grad_norm": 150.78634643554688, |
| "learning_rate": 9.367998292882789e-06, |
| "loss": 3.959789514541626, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.3784313725490196, |
| "grad_norm": 39.33837127685547, |
| "learning_rate": 9.36024704071904e-06, |
| "loss": 4.030791759490967, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.3803921568627451, |
| "grad_norm": 18.125226974487305, |
| "learning_rate": 9.35245179207919e-06, |
| "loss": 4.262718200683594, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.38235294117647056, |
| "grad_norm": 70.55377960205078, |
| "learning_rate": 9.344612625620134e-06, |
| "loss": 4.606302261352539, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.3843137254901961, |
| "grad_norm": 1980.358154296875, |
| "learning_rate": 9.336729620441906e-06, |
| "loss": 4.8002824783325195, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.3862745098039216, |
| "grad_norm": 31.0122013092041, |
| "learning_rate": 9.328802856086891e-06, |
| "loss": 4.136668682098389, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.38823529411764707, |
| "grad_norm": 13.777653694152832, |
| "learning_rate": 9.32083241253902e-06, |
| "loss": 4.453038692474365, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.39019607843137255, |
| "grad_norm": 15.520337104797363, |
| "learning_rate": 9.312818370222962e-06, |
| "loss": 4.666173934936523, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 25.686555862426758, |
| "learning_rate": 9.304760810003318e-06, |
| "loss": 4.567206859588623, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3941176470588235, |
| "grad_norm": 34.059505462646484, |
| "learning_rate": 9.296659813183794e-06, |
| "loss": 4.661189556121826, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.396078431372549, |
| "grad_norm": 20.709781646728516, |
| "learning_rate": 9.28851546150639e-06, |
| "loss": 4.140271186828613, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3980392156862745, |
| "grad_norm": 52.71310043334961, |
| "learning_rate": 9.280327837150572e-06, |
| "loss": 4.564424514770508, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 16.125553131103516, |
| "learning_rate": 9.272097022732444e-06, |
| "loss": 4.352408409118652, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4019607843137255, |
| "grad_norm": 10.148744583129883, |
| "learning_rate": 9.263823101303911e-06, |
| "loss": 3.883530616760254, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.403921568627451, |
| "grad_norm": 22.48365020751953, |
| "learning_rate": 9.255506156351846e-06, |
| "loss": 4.559526443481445, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.40588235294117647, |
| "grad_norm": 1699.036865234375, |
| "learning_rate": 9.247146271797244e-06, |
| "loss": 4.905045509338379, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.40784313725490196, |
| "grad_norm": 7.313532829284668, |
| "learning_rate": 9.238743531994378e-06, |
| "loss": 3.642618417739868, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.40980392156862744, |
| "grad_norm": 35.672157287597656, |
| "learning_rate": 9.23029802172994e-06, |
| "loss": 4.230594635009766, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4117647058823529, |
| "grad_norm": 202.3529815673828, |
| "learning_rate": 9.221809826222198e-06, |
| "loss": 4.2360124588012695, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4137254901960784, |
| "grad_norm": 117.8387222290039, |
| "learning_rate": 9.213279031120129e-06, |
| "loss": 4.491461277008057, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.41568627450980394, |
| "grad_norm": 34.6633186340332, |
| "learning_rate": 9.20470572250255e-06, |
| "loss": 4.295816898345947, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4176470588235294, |
| "grad_norm": 8.826363563537598, |
| "learning_rate": 9.196089986877262e-06, |
| "loss": 4.463611602783203, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4196078431372549, |
| "grad_norm": 26.18223762512207, |
| "learning_rate": 9.18743191118016e-06, |
| "loss": 4.4809675216674805, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.4215686274509804, |
| "grad_norm": 23.7016544342041, |
| "learning_rate": 9.17873158277438e-06, |
| "loss": 4.53269100189209, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4235294117647059, |
| "grad_norm": 147.547119140625, |
| "learning_rate": 9.16998908944939e-06, |
| "loss": 4.548085689544678, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.42549019607843136, |
| "grad_norm": 33.69279098510742, |
| "learning_rate": 9.161204519420126e-06, |
| "loss": 4.510319709777832, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.42745098039215684, |
| "grad_norm": 21.794368743896484, |
| "learning_rate": 9.152377961326085e-06, |
| "loss": 3.977487564086914, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4294117647058823, |
| "grad_norm": 17.71551513671875, |
| "learning_rate": 9.14350950423045e-06, |
| "loss": 4.281040191650391, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.43137254901960786, |
| "grad_norm": 91.0526123046875, |
| "learning_rate": 9.134599237619167e-06, |
| "loss": 4.954435348510742, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.43333333333333335, |
| "grad_norm": 39.16487121582031, |
| "learning_rate": 9.125647251400068e-06, |
| "loss": 4.5113935470581055, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.43529411764705883, |
| "grad_norm": 304.0317687988281, |
| "learning_rate": 9.11665363590194e-06, |
| "loss": 4.286795139312744, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4372549019607843, |
| "grad_norm": 64.62968444824219, |
| "learning_rate": 9.107618481873632e-06, |
| "loss": 4.302193641662598, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4392156862745098, |
| "grad_norm": 9.797171592712402, |
| "learning_rate": 9.098541880483129e-06, |
| "loss": 4.519267559051514, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4411764705882353, |
| "grad_norm": 16.81446647644043, |
| "learning_rate": 9.089423923316636e-06, |
| "loss": 4.170806884765625, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.44313725490196076, |
| "grad_norm": 97.48379516601562, |
| "learning_rate": 9.08026470237765e-06, |
| "loss": 4.171516418457031, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.44509803921568625, |
| "grad_norm": 8.226983070373535, |
| "learning_rate": 9.07106431008604e-06, |
| "loss": 4.536833763122559, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4470588235294118, |
| "grad_norm": 17.50508689880371, |
| "learning_rate": 9.0618228392771e-06, |
| "loss": 4.602504730224609, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.44901960784313727, |
| "grad_norm": 1409.4854736328125, |
| "learning_rate": 9.052540383200634e-06, |
| "loss": 4.213375091552734, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.45098039215686275, |
| "grad_norm": 18.96872901916504, |
| "learning_rate": 9.043217035519986e-06, |
| "loss": 4.0827131271362305, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.45294117647058824, |
| "grad_norm": 16.178728103637695, |
| "learning_rate": 9.033852890311127e-06, |
| "loss": 4.6328125, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.4549019607843137, |
| "grad_norm": 10.366260528564453, |
| "learning_rate": 9.02444804206168e-06, |
| "loss": 4.5647430419921875, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.4568627450980392, |
| "grad_norm": 17.84885597229004, |
| "learning_rate": 9.01500258566998e-06, |
| "loss": 4.389437198638916, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.4588235294117647, |
| "grad_norm": 11.878860473632812, |
| "learning_rate": 9.005516616444112e-06, |
| "loss": 4.770614147186279, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.46078431372549017, |
| "grad_norm": 85.99885559082031, |
| "learning_rate": 8.99599023010095e-06, |
| "loss": 4.427285194396973, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4627450980392157, |
| "grad_norm": 77.5919189453125, |
| "learning_rate": 8.986423522765191e-06, |
| "loss": 4.447712421417236, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4647058823529412, |
| "grad_norm": 19.800437927246094, |
| "learning_rate": 8.976816590968388e-06, |
| "loss": 4.388566017150879, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 29.633106231689453, |
| "learning_rate": 8.967169531647971e-06, |
| "loss": 4.662332534790039, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.46862745098039216, |
| "grad_norm": 11.136107444763184, |
| "learning_rate": 8.957482442146271e-06, |
| "loss": 4.721919059753418, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 23.172388076782227, |
| "learning_rate": 8.947755420209541e-06, |
| "loss": 4.55937385559082, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.4725490196078431, |
| "grad_norm": 22.51704216003418, |
| "learning_rate": 8.937988563986963e-06, |
| "loss": 4.346927642822266, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.4745098039215686, |
| "grad_norm": 9.842615127563477, |
| "learning_rate": 8.928181972029664e-06, |
| "loss": 4.690535545349121, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.4764705882352941, |
| "grad_norm": 158.396484375, |
| "learning_rate": 8.918335743289717e-06, |
| "loss": 4.770160675048828, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.47843137254901963, |
| "grad_norm": 18.87772560119629, |
| "learning_rate": 8.90844997711915e-06, |
| "loss": 4.694735527038574, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.4803921568627451, |
| "grad_norm": 10.401981353759766, |
| "learning_rate": 8.898524773268926e-06, |
| "loss": 4.433718681335449, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.4823529411764706, |
| "grad_norm": 35.996971130371094, |
| "learning_rate": 8.888560231887963e-06, |
| "loss": 4.435983180999756, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.4843137254901961, |
| "grad_norm": 144.66212463378906, |
| "learning_rate": 8.8785564535221e-06, |
| "loss": 4.828408241271973, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.48627450980392156, |
| "grad_norm": 61.79336166381836, |
| "learning_rate": 8.868513539113093e-06, |
| "loss": 4.478762149810791, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.48823529411764705, |
| "grad_norm": 111.1070556640625, |
| "learning_rate": 8.858431589997597e-06, |
| "loss": 4.791953086853027, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.49019607843137253, |
| "grad_norm": 43.53703308105469, |
| "learning_rate": 8.848310707906138e-06, |
| "loss": 4.221644401550293, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.492156862745098, |
| "grad_norm": 30.130136489868164, |
| "learning_rate": 8.838150994962094e-06, |
| "loss": 3.9666197299957275, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.49411764705882355, |
| "grad_norm": 24.61577033996582, |
| "learning_rate": 8.827952553680656e-06, |
| "loss": 4.494099140167236, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.49607843137254903, |
| "grad_norm": 95.78424835205078, |
| "learning_rate": 8.817715486967803e-06, |
| "loss": 4.37385368347168, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.4980392156862745, |
| "grad_norm": 12.204360008239746, |
| "learning_rate": 8.807439898119252e-06, |
| "loss": 4.433926582336426, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 14.268149375915527, |
| "learning_rate": 8.797125890819429e-06, |
| "loss": 4.530971527099609, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5019607843137255, |
| "grad_norm": 17.889156341552734, |
| "learning_rate": 8.786773569140414e-06, |
| "loss": 4.126347541809082, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.503921568627451, |
| "grad_norm": 18.556318283081055, |
| "learning_rate": 8.776383037540888e-06, |
| "loss": 4.337622165679932, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5058823529411764, |
| "grad_norm": 679.375244140625, |
| "learning_rate": 8.765954400865093e-06, |
| "loss": 4.433990478515625, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5078431372549019, |
| "grad_norm": 42.50425338745117, |
| "learning_rate": 8.755487764341756e-06, |
| "loss": 4.115379810333252, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.5098039215686274, |
| "grad_norm": 53.077613830566406, |
| "learning_rate": 8.744983233583044e-06, |
| "loss": 4.244760036468506, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5117647058823529, |
| "grad_norm": 17.79299545288086, |
| "learning_rate": 8.734440914583486e-06, |
| "loss": 4.051713943481445, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5137254901960784, |
| "grad_norm": 15.688224792480469, |
| "learning_rate": 8.72386091371891e-06, |
| "loss": 4.603087425231934, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.515686274509804, |
| "grad_norm": 55.612709045410156, |
| "learning_rate": 8.713243337745366e-06, |
| "loss": 4.455329418182373, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5176470588235295, |
| "grad_norm": 18.093618392944336, |
| "learning_rate": 8.70258829379805e-06, |
| "loss": 4.481441497802734, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5196078431372549, |
| "grad_norm": 290.83355712890625, |
| "learning_rate": 8.691895889390228e-06, |
| "loss": 4.285877227783203, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5215686274509804, |
| "grad_norm": 17.16389274597168, |
| "learning_rate": 8.681166232412142e-06, |
| "loss": 4.445030212402344, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5235294117647059, |
| "grad_norm": 26.885225296020508, |
| "learning_rate": 8.670399431129926e-06, |
| "loss": 4.563932418823242, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5254901960784314, |
| "grad_norm": 42.71232604980469, |
| "learning_rate": 8.659595594184516e-06, |
| "loss": 3.8843421936035156, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5274509803921569, |
| "grad_norm": 28.694578170776367, |
| "learning_rate": 8.648754830590552e-06, |
| "loss": 3.6372265815734863, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5294117647058824, |
| "grad_norm": 43.32936477661133, |
| "learning_rate": 8.637877249735274e-06, |
| "loss": 4.222830295562744, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.5313725490196078, |
| "grad_norm": 72.35948944091797, |
| "learning_rate": 8.626962961377423e-06, |
| "loss": 4.611291408538818, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 15.251282691955566, |
| "learning_rate": 8.616012075646134e-06, |
| "loss": 4.276963233947754, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5352941176470588, |
| "grad_norm": 776.6341552734375, |
| "learning_rate": 8.605024703039817e-06, |
| "loss": 4.6572585105896, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5372549019607843, |
| "grad_norm": 22.79600715637207, |
| "learning_rate": 8.594000954425056e-06, |
| "loss": 4.917038917541504, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5392156862745098, |
| "grad_norm": 10.690701484680176, |
| "learning_rate": 8.582940941035476e-06, |
| "loss": 4.69964599609375, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5411764705882353, |
| "grad_norm": 86.31309509277344, |
| "learning_rate": 8.571844774470627e-06, |
| "loss": 4.586027145385742, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5431372549019607, |
| "grad_norm": 17.10081672668457, |
| "learning_rate": 8.560712566694863e-06, |
| "loss": 4.531658172607422, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5450980392156862, |
| "grad_norm": 15.138278007507324, |
| "learning_rate": 8.549544430036198e-06, |
| "loss": 4.515611171722412, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5470588235294118, |
| "grad_norm": 11.231842994689941, |
| "learning_rate": 8.538340477185191e-06, |
| "loss": 4.401930809020996, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 9.649141311645508, |
| "learning_rate": 8.527100821193797e-06, |
| "loss": 4.2050909996032715, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.5509803921568628, |
| "grad_norm": 44.636756896972656, |
| "learning_rate": 8.51582557547422e-06, |
| "loss": 4.522353649139404, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.5529411764705883, |
| "grad_norm": 10.403969764709473, |
| "learning_rate": 8.504514853797789e-06, |
| "loss": 4.316591262817383, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.5549019607843138, |
| "grad_norm": 45.07345199584961, |
| "learning_rate": 8.493168770293793e-06, |
| "loss": 4.220366477966309, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.5568627450980392, |
| "grad_norm": 81.64495849609375, |
| "learning_rate": 8.481787439448332e-06, |
| "loss": 4.375057220458984, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.5588235294117647, |
| "grad_norm": 22.120563507080078, |
| "learning_rate": 8.470370976103171e-06, |
| "loss": 4.166134834289551, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.5607843137254902, |
| "grad_norm": 10.833823204040527, |
| "learning_rate": 8.458919495454567e-06, |
| "loss": 4.409770965576172, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.5627450980392157, |
| "grad_norm": 81.50211334228516, |
| "learning_rate": 8.447433113052124e-06, |
| "loss": 4.4287190437316895, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.5647058823529412, |
| "grad_norm": 16.984268188476562, |
| "learning_rate": 8.435911944797605e-06, |
| "loss": 4.146420955657959, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.5666666666666667, |
| "grad_norm": 85.84491729736328, |
| "learning_rate": 8.42435610694379e-06, |
| "loss": 4.802792549133301, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.5686274509803921, |
| "grad_norm": 1234.199951171875, |
| "learning_rate": 8.412765716093273e-06, |
| "loss": 4.3419270515441895, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.5705882352941176, |
| "grad_norm": 12.962360382080078, |
| "learning_rate": 8.401140889197305e-06, |
| "loss": 4.3528547286987305, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.5725490196078431, |
| "grad_norm": 26.18000030517578, |
| "learning_rate": 8.38948174355462e-06, |
| "loss": 4.065543174743652, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.5745098039215686, |
| "grad_norm": 35.139549255371094, |
| "learning_rate": 8.377788396810223e-06, |
| "loss": 4.251129150390625, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5764705882352941, |
| "grad_norm": 37.94339370727539, |
| "learning_rate": 8.366060966954235e-06, |
| "loss": 4.624574661254883, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5784313725490197, |
| "grad_norm": 11.896288871765137, |
| "learning_rate": 8.354299572320679e-06, |
| "loss": 4.315122604370117, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5803921568627451, |
| "grad_norm": 41.16322708129883, |
| "learning_rate": 8.342504331586298e-06, |
| "loss": 4.489446640014648, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5823529411764706, |
| "grad_norm": 28.843425750732422, |
| "learning_rate": 8.330675363769356e-06, |
| "loss": 4.456976890563965, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5843137254901961, |
| "grad_norm": 14.19128704071045, |
| "learning_rate": 8.318812788228434e-06, |
| "loss": 4.4964447021484375, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5862745098039216, |
| "grad_norm": 41.6136474609375, |
| "learning_rate": 8.306916724661225e-06, |
| "loss": 4.119976043701172, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.5882352941176471, |
| "grad_norm": 20.12372398376465, |
| "learning_rate": 8.294987293103334e-06, |
| "loss": 4.503427505493164, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.5901960784313726, |
| "grad_norm": 96.8432846069336, |
| "learning_rate": 8.283024613927055e-06, |
| "loss": 4.409475326538086, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.592156862745098, |
| "grad_norm": 24.83096694946289, |
| "learning_rate": 8.271028807840164e-06, |
| "loss": 4.263705730438232, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.5941176470588235, |
| "grad_norm": 92.04113006591797, |
| "learning_rate": 8.258999995884706e-06, |
| "loss": 4.188453674316406, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.596078431372549, |
| "grad_norm": 32.55808639526367, |
| "learning_rate": 8.246938299435759e-06, |
| "loss": 4.0705437660217285, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.5980392156862745, |
| "grad_norm": 28.917774200439453, |
| "learning_rate": 8.234843840200218e-06, |
| "loss": 4.273771286010742, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 14.942693710327148, |
| "learning_rate": 8.222716740215573e-06, |
| "loss": 4.468536376953125, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6019607843137255, |
| "grad_norm": 76.9916763305664, |
| "learning_rate": 8.210557121848664e-06, |
| "loss": 4.393499851226807, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6039215686274509, |
| "grad_norm": 87.08967590332031, |
| "learning_rate": 8.198365107794457e-06, |
| "loss": 4.2165422439575195, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6058823529411764, |
| "grad_norm": 76.53693389892578, |
| "learning_rate": 8.186140821074801e-06, |
| "loss": 4.373100757598877, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6078431372549019, |
| "grad_norm": 13.614083290100098, |
| "learning_rate": 8.173884385037193e-06, |
| "loss": 4.481573581695557, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6098039215686275, |
| "grad_norm": 39.6025505065918, |
| "learning_rate": 8.161595923353516e-06, |
| "loss": 4.316531181335449, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.611764705882353, |
| "grad_norm": 41.38591003417969, |
| "learning_rate": 8.149275560018816e-06, |
| "loss": 4.617020130157471, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6137254901960785, |
| "grad_norm": 156.15367126464844, |
| "learning_rate": 8.136923419350032e-06, |
| "loss": 4.4222869873046875, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.615686274509804, |
| "grad_norm": 32.320045471191406, |
| "learning_rate": 8.12453962598475e-06, |
| "loss": 4.656857967376709, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.6176470588235294, |
| "grad_norm": 17.588327407836914, |
| "learning_rate": 8.112124304879938e-06, |
| "loss": 4.441835403442383, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6196078431372549, |
| "grad_norm": 30.43013572692871, |
| "learning_rate": 8.0996775813107e-06, |
| "loss": 4.392027854919434, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6215686274509804, |
| "grad_norm": 21.386093139648438, |
| "learning_rate": 8.087199580868997e-06, |
| "loss": 4.848608016967773, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6235294117647059, |
| "grad_norm": 81.71918487548828, |
| "learning_rate": 8.07469042946238e-06, |
| "loss": 4.443190097808838, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6254901960784314, |
| "grad_norm": 20.856964111328125, |
| "learning_rate": 8.062150253312735e-06, |
| "loss": 4.9603166580200195, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 145.81161499023438, |
| "learning_rate": 8.04957917895499e-06, |
| "loss": 4.618847846984863, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.6294117647058823, |
| "grad_norm": 14.190864562988281, |
| "learning_rate": 8.03697733323585e-06, |
| "loss": 4.593096733093262, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6313725490196078, |
| "grad_norm": 23.47496795654297, |
| "learning_rate": 8.024344843312517e-06, |
| "loss": 4.16273307800293, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6333333333333333, |
| "grad_norm": 39.30678176879883, |
| "learning_rate": 8.011681836651401e-06, |
| "loss": 4.198973655700684, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6352941176470588, |
| "grad_norm": 55.29441452026367, |
| "learning_rate": 7.99898844102684e-06, |
| "loss": 4.373217582702637, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6372549019607843, |
| "grad_norm": 87.39408111572266, |
| "learning_rate": 7.986264784519801e-06, |
| "loss": 4.491955280303955, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.6392156862745098, |
| "grad_norm": 71.22431182861328, |
| "learning_rate": 7.973510995516603e-06, |
| "loss": 4.367103576660156, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6411764705882353, |
| "grad_norm": 32.032073974609375, |
| "learning_rate": 7.960727202707605e-06, |
| "loss": 4.673696041107178, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.6431372549019608, |
| "grad_norm": 14.861653327941895, |
| "learning_rate": 7.947913535085925e-06, |
| "loss": 4.37457275390625, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.6450980392156863, |
| "grad_norm": 43.02954864501953, |
| "learning_rate": 7.935070121946116e-06, |
| "loss": 4.4756364822387695, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.6470588235294118, |
| "grad_norm": 40.1738395690918, |
| "learning_rate": 7.922197092882882e-06, |
| "loss": 4.393209457397461, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.6490196078431373, |
| "grad_norm": 58.01616668701172, |
| "learning_rate": 7.909294577789765e-06, |
| "loss": 4.345884323120117, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.6509803921568628, |
| "grad_norm": 14.342921257019043, |
| "learning_rate": 7.896362706857825e-06, |
| "loss": 4.297840118408203, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.6529411764705882, |
| "grad_norm": 8.965250015258789, |
| "learning_rate": 7.883401610574338e-06, |
| "loss": 4.571505546569824, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.6549019607843137, |
| "grad_norm": 21.012739181518555, |
| "learning_rate": 7.870411419721468e-06, |
| "loss": 4.734814167022705, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.6568627450980392, |
| "grad_norm": 30.89315414428711, |
| "learning_rate": 7.857392265374963e-06, |
| "loss": 4.410980701446533, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.6588235294117647, |
| "grad_norm": 9.218079566955566, |
| "learning_rate": 7.844344278902815e-06, |
| "loss": 4.341933250427246, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.6607843137254902, |
| "grad_norm": 17.145496368408203, |
| "learning_rate": 7.83126759196395e-06, |
| "loss": 4.177962779998779, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.6627450980392157, |
| "grad_norm": 16.176719665527344, |
| "learning_rate": 7.818162336506885e-06, |
| "loss": 4.377812385559082, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.6647058823529411, |
| "grad_norm": 23.922420501708984, |
| "learning_rate": 7.805028644768407e-06, |
| "loss": 3.9010050296783447, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 467.1762390136719, |
| "learning_rate": 7.791866649272236e-06, |
| "loss": 4.715754508972168, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.6686274509803921, |
| "grad_norm": 86.71304321289062, |
| "learning_rate": 7.778676482827686e-06, |
| "loss": 4.450630187988281, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.6705882352941176, |
| "grad_norm": 208.03579711914062, |
| "learning_rate": 7.765458278528327e-06, |
| "loss": 4.261456489562988, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.6725490196078432, |
| "grad_norm": 10.478469848632812, |
| "learning_rate": 7.752212169750642e-06, |
| "loss": 3.6522653102874756, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.6745098039215687, |
| "grad_norm": 109.38081359863281, |
| "learning_rate": 7.738938290152675e-06, |
| "loss": 4.505516529083252, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.6764705882352942, |
| "grad_norm": 114.52716827392578, |
| "learning_rate": 7.725636773672694e-06, |
| "loss": 4.370604038238525, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.6784313725490196, |
| "grad_norm": 14.925344467163086, |
| "learning_rate": 7.712307754527832e-06, |
| "loss": 4.476314544677734, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.6803921568627451, |
| "grad_norm": 53.75554275512695, |
| "learning_rate": 7.69895136721273e-06, |
| "loss": 4.616474151611328, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.6823529411764706, |
| "grad_norm": 42.78981399536133, |
| "learning_rate": 7.685567746498191e-06, |
| "loss": 4.426451683044434, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.6843137254901961, |
| "grad_norm": 80.6156234741211, |
| "learning_rate": 7.672157027429803e-06, |
| "loss": 4.43165397644043, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.6862745098039216, |
| "grad_norm": 46.6151123046875, |
| "learning_rate": 7.658719345326595e-06, |
| "loss": 4.440042495727539, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.6882352941176471, |
| "grad_norm": 104.17079162597656, |
| "learning_rate": 7.645254835779657e-06, |
| "loss": 4.430392265319824, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.6901960784313725, |
| "grad_norm": 502.8309020996094, |
| "learning_rate": 7.631763634650783e-06, |
| "loss": 4.152275562286377, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.692156862745098, |
| "grad_norm": 23.371397018432617, |
| "learning_rate": 7.618245878071091e-06, |
| "loss": 4.074784278869629, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6941176470588235, |
| "grad_norm": 93.83110809326172, |
| "learning_rate": 7.604701702439652e-06, |
| "loss": 4.7920379638671875, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.696078431372549, |
| "grad_norm": 20.388896942138672, |
| "learning_rate": 7.591131244422118e-06, |
| "loss": 4.258466720581055, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6980392156862745, |
| "grad_norm": 25.022539138793945, |
| "learning_rate": 7.57753464094934e-06, |
| "loss": 4.348616600036621, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 24.122034072875977, |
| "learning_rate": 7.563912029215983e-06, |
| "loss": 4.450387954711914, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7019607843137254, |
| "grad_norm": 47.63944625854492, |
| "learning_rate": 7.550263546679148e-06, |
| "loss": 4.950525760650635, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.703921568627451, |
| "grad_norm": 18.94533348083496, |
| "learning_rate": 7.536589331056976e-06, |
| "loss": 5.012373924255371, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 96.9211196899414, |
| "learning_rate": 7.522889520327275e-06, |
| "loss": 4.925107955932617, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.707843137254902, |
| "grad_norm": 42.01844024658203, |
| "learning_rate": 7.509164252726107e-06, |
| "loss": 4.5356903076171875, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7098039215686275, |
| "grad_norm": 59.13837814331055, |
| "learning_rate": 7.495413666746406e-06, |
| "loss": 4.559690475463867, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.711764705882353, |
| "grad_norm": 67.81266784667969, |
| "learning_rate": 7.481637901136578e-06, |
| "loss": 4.372148513793945, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7137254901960784, |
| "grad_norm": 35.04251480102539, |
| "learning_rate": 7.467837094899104e-06, |
| "loss": 4.26740837097168, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7156862745098039, |
| "grad_norm": 28.242380142211914, |
| "learning_rate": 7.454011387289127e-06, |
| "loss": 4.79606819152832, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7176470588235294, |
| "grad_norm": 33.4775505065918, |
| "learning_rate": 7.440160917813059e-06, |
| "loss": 4.412802696228027, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7196078431372549, |
| "grad_norm": 58.53046417236328, |
| "learning_rate": 7.426285826227171e-06, |
| "loss": 3.844216823577881, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7215686274509804, |
| "grad_norm": 67.4899673461914, |
| "learning_rate": 7.412386252536168e-06, |
| "loss": 4.131912708282471, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7235294117647059, |
| "grad_norm": 3803.90185546875, |
| "learning_rate": 7.398462336991802e-06, |
| "loss": 4.365766525268555, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7254901960784313, |
| "grad_norm": 76.64689636230469, |
| "learning_rate": 7.384514220091437e-06, |
| "loss": 4.796448707580566, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7274509803921568, |
| "grad_norm": 101.20423126220703, |
| "learning_rate": 7.370542042576635e-06, |
| "loss": 4.397182941436768, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7294117647058823, |
| "grad_norm": 73.64826965332031, |
| "learning_rate": 7.356545945431744e-06, |
| "loss": 4.440197944641113, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7313725490196078, |
| "grad_norm": 66.81968688964844, |
| "learning_rate": 7.342526069882465e-06, |
| "loss": 4.575042724609375, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 40.752525329589844, |
| "learning_rate": 7.328482557394435e-06, |
| "loss": 4.159431457519531, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.7352941176470589, |
| "grad_norm": 27.12578010559082, |
| "learning_rate": 7.314415549671795e-06, |
| "loss": 4.4377336502075195, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.7372549019607844, |
| "grad_norm": 231.64231872558594, |
| "learning_rate": 7.300325188655762e-06, |
| "loss": 4.438188552856445, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.7392156862745098, |
| "grad_norm": 23.71122932434082, |
| "learning_rate": 7.286211616523193e-06, |
| "loss": 4.190389633178711, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.7411764705882353, |
| "grad_norm": 277.79718017578125, |
| "learning_rate": 7.27207497568516e-06, |
| "loss": 3.8417224884033203, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.7431372549019608, |
| "grad_norm": 30.528398513793945, |
| "learning_rate": 7.257915408785499e-06, |
| "loss": 4.584486961364746, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.7450980392156863, |
| "grad_norm": 618.6641845703125, |
| "learning_rate": 7.243733058699386e-06, |
| "loss": 4.159678936004639, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.7470588235294118, |
| "grad_norm": 20.201461791992188, |
| "learning_rate": 7.229528068531881e-06, |
| "loss": 4.334630489349365, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.7490196078431373, |
| "grad_norm": 69.16433715820312, |
| "learning_rate": 7.215300581616496e-06, |
| "loss": 4.4458160400390625, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.7509803921568627, |
| "grad_norm": 50.7408332824707, |
| "learning_rate": 7.201050741513735e-06, |
| "loss": 4.584663391113281, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.7529411764705882, |
| "grad_norm": 71.0459213256836, |
| "learning_rate": 7.186778692009669e-06, |
| "loss": 4.636325359344482, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.7549019607843137, |
| "grad_norm": 38.0345344543457, |
| "learning_rate": 7.172484577114452e-06, |
| "loss": 4.060024261474609, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.7568627450980392, |
| "grad_norm": 40.320499420166016, |
| "learning_rate": 7.1581685410609e-06, |
| "loss": 4.512998580932617, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.7588235294117647, |
| "grad_norm": 33.043148040771484, |
| "learning_rate": 7.1438307283030106e-06, |
| "loss": 4.692201614379883, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.7607843137254902, |
| "grad_norm": 137.872314453125, |
| "learning_rate": 7.129471283514525e-06, |
| "loss": 4.415122985839844, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.7627450980392156, |
| "grad_norm": 76.47061157226562, |
| "learning_rate": 7.115090351587455e-06, |
| "loss": 4.573295593261719, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.7647058823529411, |
| "grad_norm": 328.7730712890625, |
| "learning_rate": 7.100688077630628e-06, |
| "loss": 4.141142845153809, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.7666666666666667, |
| "grad_norm": 43.609642028808594, |
| "learning_rate": 7.086264606968215e-06, |
| "loss": 4.211104393005371, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.7686274509803922, |
| "grad_norm": 125.07260131835938, |
| "learning_rate": 7.071820085138275e-06, |
| "loss": 4.797672271728516, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.7705882352941177, |
| "grad_norm": 61.34317398071289, |
| "learning_rate": 7.05735465789128e-06, |
| "loss": 4.348987579345703, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.7725490196078432, |
| "grad_norm": 75.8901596069336, |
| "learning_rate": 7.042868471188642e-06, |
| "loss": 4.376434803009033, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.7745098039215687, |
| "grad_norm": 172.4529571533203, |
| "learning_rate": 7.028361671201245e-06, |
| "loss": 4.280189514160156, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.7764705882352941, |
| "grad_norm": 67.70394897460938, |
| "learning_rate": 7.013834404307972e-06, |
| "loss": 4.715417861938477, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.7784313725490196, |
| "grad_norm": 17.541534423828125, |
| "learning_rate": 6.9992868170942205e-06, |
| "loss": 4.301790237426758, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.7803921568627451, |
| "grad_norm": 17.7639217376709, |
| "learning_rate": 6.9847190563504284e-06, |
| "loss": 4.332895278930664, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.7823529411764706, |
| "grad_norm": 77.33992767333984, |
| "learning_rate": 6.970131269070591e-06, |
| "loss": 4.072659492492676, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 30.242053985595703, |
| "learning_rate": 6.95552360245078e-06, |
| "loss": 4.5907301902771, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.7862745098039216, |
| "grad_norm": 157.5186767578125, |
| "learning_rate": 6.940896203887659e-06, |
| "loss": 4.161381721496582, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.788235294117647, |
| "grad_norm": 48.990875244140625, |
| "learning_rate": 6.926249220976988e-06, |
| "loss": 4.169566631317139, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.7901960784313725, |
| "grad_norm": 33.2501220703125, |
| "learning_rate": 6.911582801512146e-06, |
| "loss": 4.37021017074585, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.792156862745098, |
| "grad_norm": 29.51424789428711, |
| "learning_rate": 6.8968970934826296e-06, |
| "loss": 3.938095808029175, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.7941176470588235, |
| "grad_norm": 86.2677001953125, |
| "learning_rate": 6.88219224507257e-06, |
| "loss": 4.483772277832031, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.796078431372549, |
| "grad_norm": 25.195167541503906, |
| "learning_rate": 6.867468404659222e-06, |
| "loss": 4.458285331726074, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.7980392156862746, |
| "grad_norm": 22.213388442993164, |
| "learning_rate": 6.852725720811487e-06, |
| "loss": 3.883963108062744, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 60.982303619384766, |
| "learning_rate": 6.837964342288399e-06, |
| "loss": 4.16390323638916, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.8019607843137255, |
| "grad_norm": 255.90065002441406, |
| "learning_rate": 6.823184418037625e-06, |
| "loss": 4.246565818786621, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.803921568627451, |
| "grad_norm": 80.53013610839844, |
| "learning_rate": 6.808386097193969e-06, |
| "loss": 3.941505193710327, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8058823529411765, |
| "grad_norm": 70.98387145996094, |
| "learning_rate": 6.793569529077864e-06, |
| "loss": 4.301395416259766, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.807843137254902, |
| "grad_norm": 117.46578979492188, |
| "learning_rate": 6.778734863193862e-06, |
| "loss": 4.3663835525512695, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8098039215686275, |
| "grad_norm": 32.978851318359375, |
| "learning_rate": 6.76388224922913e-06, |
| "loss": 4.204647064208984, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8117647058823529, |
| "grad_norm": 21.284744262695312, |
| "learning_rate": 6.7490118370519356e-06, |
| "loss": 4.878431797027588, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.8137254901960784, |
| "grad_norm": 116.35888671875, |
| "learning_rate": 6.7341237767101375e-06, |
| "loss": 4.751389503479004, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8156862745098039, |
| "grad_norm": 58.715450286865234, |
| "learning_rate": 6.7192182184296725e-06, |
| "loss": 4.176122665405273, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8176470588235294, |
| "grad_norm": 266.84912109375, |
| "learning_rate": 6.704295312613037e-06, |
| "loss": 4.090945243835449, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8196078431372549, |
| "grad_norm": 282.4599609375, |
| "learning_rate": 6.689355209837769e-06, |
| "loss": 4.8022003173828125, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8215686274509804, |
| "grad_norm": 17.034099578857422, |
| "learning_rate": 6.674398060854931e-06, |
| "loss": 4.382605075836182, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.8235294117647058, |
| "grad_norm": 45.16923904418945, |
| "learning_rate": 6.65942401658759e-06, |
| "loss": 4.5414934158325195, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8254901960784313, |
| "grad_norm": 68.16588592529297, |
| "learning_rate": 6.644433228129288e-06, |
| "loss": 4.519162654876709, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.8274509803921568, |
| "grad_norm": 30.64187240600586, |
| "learning_rate": 6.6294258467425256e-06, |
| "loss": 4.278877258300781, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.8294117647058824, |
| "grad_norm": 50.78958511352539, |
| "learning_rate": 6.614402023857231e-06, |
| "loss": 4.127124786376953, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.8313725490196079, |
| "grad_norm": 16.406599044799805, |
| "learning_rate": 6.599361911069235e-06, |
| "loss": 4.240130424499512, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 8.257003784179688, |
| "learning_rate": 6.584305660138734e-06, |
| "loss": 4.398717880249023, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.8352941176470589, |
| "grad_norm": 53.47596740722656, |
| "learning_rate": 6.569233422988771e-06, |
| "loss": 4.362873554229736, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.8372549019607843, |
| "grad_norm": 163.6345672607422, |
| "learning_rate": 6.554145351703689e-06, |
| "loss": 4.455537796020508, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.8392156862745098, |
| "grad_norm": 222.5769805908203, |
| "learning_rate": 6.539041598527612e-06, |
| "loss": 4.446180820465088, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.8411764705882353, |
| "grad_norm": 193.03797912597656, |
| "learning_rate": 6.523922315862887e-06, |
| "loss": 3.9635980129241943, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.8431372549019608, |
| "grad_norm": 259.9488830566406, |
| "learning_rate": 6.508787656268573e-06, |
| "loss": 4.20033073425293, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.8450980392156863, |
| "grad_norm": 79.64652252197266, |
| "learning_rate": 6.4936377724588794e-06, |
| "loss": 4.132991790771484, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.8470588235294118, |
| "grad_norm": 89.49686431884766, |
| "learning_rate": 6.478472817301635e-06, |
| "loss": 4.8201904296875, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.8490196078431372, |
| "grad_norm": 36.88655090332031, |
| "learning_rate": 6.463292943816747e-06, |
| "loss": 4.225468635559082, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.8509803921568627, |
| "grad_norm": 167.7744140625, |
| "learning_rate": 6.448098305174648e-06, |
| "loss": 4.24064826965332, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.8529411764705882, |
| "grad_norm": 320.5234069824219, |
| "learning_rate": 6.4328890546947645e-06, |
| "loss": 4.498333930969238, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.8549019607843137, |
| "grad_norm": 108.37223815917969, |
| "learning_rate": 6.417665345843952e-06, |
| "loss": 3.9274849891662598, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.8568627450980392, |
| "grad_norm": 53.389373779296875, |
| "learning_rate": 6.402427332234965e-06, |
| "loss": 4.50510835647583, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.8588235294117647, |
| "grad_norm": 586.7931518554688, |
| "learning_rate": 6.387175167624894e-06, |
| "loss": 4.44918966293335, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.8607843137254902, |
| "grad_norm": 64.9321517944336, |
| "learning_rate": 6.371909005913618e-06, |
| "loss": 4.453424453735352, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 21.094820022583008, |
| "learning_rate": 6.3566290011422515e-06, |
| "loss": 4.252875328063965, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.8647058823529412, |
| "grad_norm": 302.2917785644531, |
| "learning_rate": 6.341335307491596e-06, |
| "loss": 3.92726993560791, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 44.52492141723633, |
| "learning_rate": 6.32602807928057e-06, |
| "loss": 4.290216445922852, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.8686274509803922, |
| "grad_norm": 48.71710205078125, |
| "learning_rate": 6.310707470964668e-06, |
| "loss": 4.29799747467041, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.8705882352941177, |
| "grad_norm": 224.1940460205078, |
| "learning_rate": 6.29537363713439e-06, |
| "loss": 4.234777450561523, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.8725490196078431, |
| "grad_norm": 198.24740600585938, |
| "learning_rate": 6.280026732513689e-06, |
| "loss": 4.185808181762695, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.8745098039215686, |
| "grad_norm": 35.172672271728516, |
| "learning_rate": 6.264666911958404e-06, |
| "loss": 4.557499885559082, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.8764705882352941, |
| "grad_norm": 63.40365219116211, |
| "learning_rate": 6.249294330454705e-06, |
| "loss": 4.098924160003662, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.8784313725490196, |
| "grad_norm": 145.4202117919922, |
| "learning_rate": 6.233909143117521e-06, |
| "loss": 4.268922805786133, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.8803921568627451, |
| "grad_norm": 149.33828735351562, |
| "learning_rate": 6.21851150518898e-06, |
| "loss": 4.938076019287109, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.8823529411764706, |
| "grad_norm": 42.62335205078125, |
| "learning_rate": 6.203101572036839e-06, |
| "loss": 4.535097599029541, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.884313725490196, |
| "grad_norm": 29.659095764160156, |
| "learning_rate": 6.18767949915292e-06, |
| "loss": 4.5162248611450195, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.8862745098039215, |
| "grad_norm": 385.17120361328125, |
| "learning_rate": 6.172245442151541e-06, |
| "loss": 4.205960273742676, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.888235294117647, |
| "grad_norm": 41.33932113647461, |
| "learning_rate": 6.156799556767941e-06, |
| "loss": 4.351683139801025, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.8901960784313725, |
| "grad_norm": 143.6595458984375, |
| "learning_rate": 6.141341998856711e-06, |
| "loss": 4.250962257385254, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.8921568627450981, |
| "grad_norm": 37.78490447998047, |
| "learning_rate": 6.125872924390226e-06, |
| "loss": 4.384239196777344, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.8941176470588236, |
| "grad_norm": 515.0045166015625, |
| "learning_rate": 6.110392489457067e-06, |
| "loss": 4.019399166107178, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.8960784313725491, |
| "grad_norm": 55.70957946777344, |
| "learning_rate": 6.094900850260439e-06, |
| "loss": 4.14704704284668, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.8980392156862745, |
| "grad_norm": 193.2394256591797, |
| "learning_rate": 6.079398163116611e-06, |
| "loss": 4.078997611999512, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 5256.82421875, |
| "learning_rate": 6.063884584453326e-06, |
| "loss": 4.191615104675293, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9019607843137255, |
| "grad_norm": 100.2900390625, |
| "learning_rate": 6.048360270808226e-06, |
| "loss": 4.465028762817383, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.903921568627451, |
| "grad_norm": 1331.4735107421875, |
| "learning_rate": 6.032825378827273e-06, |
| "loss": 4.066887378692627, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9058823529411765, |
| "grad_norm": 452.3879699707031, |
| "learning_rate": 6.0172800652631706e-06, |
| "loss": 4.527779579162598, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.907843137254902, |
| "grad_norm": 2017.1990966796875, |
| "learning_rate": 6.001724486973774e-06, |
| "loss": 4.606598854064941, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9098039215686274, |
| "grad_norm": 754.1237182617188, |
| "learning_rate": 5.986158800920523e-06, |
| "loss": 4.566285133361816, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9117647058823529, |
| "grad_norm": 384.491943359375, |
| "learning_rate": 5.970583164166838e-06, |
| "loss": 4.586278915405273, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9137254901960784, |
| "grad_norm": 33.79523468017578, |
| "learning_rate": 5.954997733876552e-06, |
| "loss": 3.9631872177124023, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9156862745098039, |
| "grad_norm": 66.47810363769531, |
| "learning_rate": 5.939402667312316e-06, |
| "loss": 4.857361793518066, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9176470588235294, |
| "grad_norm": 32.95048904418945, |
| "learning_rate": 5.923798121834016e-06, |
| "loss": 4.744093418121338, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.9196078431372549, |
| "grad_norm": 66.45060729980469, |
| "learning_rate": 5.908184254897183e-06, |
| "loss": 4.424873352050781, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.9215686274509803, |
| "grad_norm": 160.2024688720703, |
| "learning_rate": 5.892561224051403e-06, |
| "loss": 4.0311994552612305, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.9235294117647059, |
| "grad_norm": 26.441673278808594, |
| "learning_rate": 5.876929186938734e-06, |
| "loss": 4.408843040466309, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.9254901960784314, |
| "grad_norm": 95.61007690429688, |
| "learning_rate": 5.861288301292103e-06, |
| "loss": 4.58468770980835, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.9274509803921569, |
| "grad_norm": 101.88788604736328, |
| "learning_rate": 5.845638724933729e-06, |
| "loss": 4.604763031005859, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.9294117647058824, |
| "grad_norm": 196.6406707763672, |
| "learning_rate": 5.82998061577352e-06, |
| "loss": 3.798020839691162, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.9313725490196079, |
| "grad_norm": 64.56034088134766, |
| "learning_rate": 5.814314131807486e-06, |
| "loss": 4.227663040161133, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 43.46277618408203, |
| "learning_rate": 5.798639431116135e-06, |
| "loss": 4.6275858879089355, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.9352941176470588, |
| "grad_norm": 624.3564453125, |
| "learning_rate": 5.782956671862895e-06, |
| "loss": 4.316646575927734, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.9372549019607843, |
| "grad_norm": 105.81096649169922, |
| "learning_rate": 5.767266012292496e-06, |
| "loss": 4.3191094398498535, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.9392156862745098, |
| "grad_norm": 11.598875045776367, |
| "learning_rate": 5.751567610729398e-06, |
| "loss": 4.125179767608643, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 80.84516143798828, |
| "learning_rate": 5.735861625576167e-06, |
| "loss": 4.346436977386475, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.9431372549019608, |
| "grad_norm": 74.3943862915039, |
| "learning_rate": 5.720148215311902e-06, |
| "loss": 4.7361907958984375, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.9450980392156862, |
| "grad_norm": 28.853347778320312, |
| "learning_rate": 5.7044275384906164e-06, |
| "loss": 4.320212364196777, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.9470588235294117, |
| "grad_norm": 12.190153121948242, |
| "learning_rate": 5.688699753739649e-06, |
| "loss": 4.433542251586914, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.9490196078431372, |
| "grad_norm": 14.953727722167969, |
| "learning_rate": 5.672965019758061e-06, |
| "loss": 4.154573440551758, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.9509803921568627, |
| "grad_norm": 45.093048095703125, |
| "learning_rate": 5.657223495315031e-06, |
| "loss": 4.646431922912598, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.9529411764705882, |
| "grad_norm": 67.42533111572266, |
| "learning_rate": 5.641475339248257e-06, |
| "loss": 4.0797953605651855, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.9549019607843138, |
| "grad_norm": 31.419174194335938, |
| "learning_rate": 5.625720710462352e-06, |
| "loss": 4.35106086730957, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.9568627450980393, |
| "grad_norm": 72.13565063476562, |
| "learning_rate": 5.609959767927247e-06, |
| "loss": 4.7546563148498535, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.9588235294117647, |
| "grad_norm": 1067.875, |
| "learning_rate": 5.594192670676568e-06, |
| "loss": 4.418883323669434, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.9607843137254902, |
| "grad_norm": 183.54017639160156, |
| "learning_rate": 5.578419577806058e-06, |
| "loss": 4.376974105834961, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.9627450980392157, |
| "grad_norm": 95.38218688964844, |
| "learning_rate": 5.562640648471951e-06, |
| "loss": 4.362179756164551, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.9647058823529412, |
| "grad_norm": 71.57412719726562, |
| "learning_rate": 5.546856041889374e-06, |
| "loss": 4.819134712219238, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.9666666666666667, |
| "grad_norm": 16.976749420166016, |
| "learning_rate": 5.531065917330737e-06, |
| "loss": 4.120271682739258, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.9686274509803922, |
| "grad_norm": 71.98576354980469, |
| "learning_rate": 5.515270434124136e-06, |
| "loss": 4.1797919273376465, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.9705882352941176, |
| "grad_norm": 4.768789768218994, |
| "learning_rate": 5.499469751651728e-06, |
| "loss": 4.024587631225586, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.9725490196078431, |
| "grad_norm": 408.6588134765625, |
| "learning_rate": 5.483664029348141e-06, |
| "loss": 4.756344795227051, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.9745098039215686, |
| "grad_norm": 10.495368957519531, |
| "learning_rate": 5.467853426698852e-06, |
| "loss": 4.134525299072266, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.9764705882352941, |
| "grad_norm": 183.8063201904297, |
| "learning_rate": 5.452038103238582e-06, |
| "loss": 4.6963725090026855, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.9784313725490196, |
| "grad_norm": 83.09385681152344, |
| "learning_rate": 5.43621821854969e-06, |
| "loss": 4.915416717529297, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.9803921568627451, |
| "grad_norm": 33.74040985107422, |
| "learning_rate": 5.420393932260557e-06, |
| "loss": 4.430484294891357, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.9823529411764705, |
| "grad_norm": 41.18917465209961, |
| "learning_rate": 5.404565404043977e-06, |
| "loss": 4.3449602127075195, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.984313725490196, |
| "grad_norm": 243.4974822998047, |
| "learning_rate": 5.388732793615551e-06, |
| "loss": 4.609969139099121, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.9862745098039216, |
| "grad_norm": 24.437667846679688, |
| "learning_rate": 5.372896260732065e-06, |
| "loss": 4.368291854858398, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.9882352941176471, |
| "grad_norm": 36.5069694519043, |
| "learning_rate": 5.357055965189888e-06, |
| "loss": 4.355335712432861, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.9901960784313726, |
| "grad_norm": 156.41387939453125, |
| "learning_rate": 5.341212066823356e-06, |
| "loss": 4.198671340942383, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.9921568627450981, |
| "grad_norm": 32.02665710449219, |
| "learning_rate": 5.325364725503155e-06, |
| "loss": 5.039112567901611, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.9941176470588236, |
| "grad_norm": 28.728458404541016, |
| "learning_rate": 5.3095141011347155e-06, |
| "loss": 4.022368907928467, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.996078431372549, |
| "grad_norm": 82.8222427368164, |
| "learning_rate": 5.2936603536565915e-06, |
| "loss": 4.288028717041016, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.9980392156862745, |
| "grad_norm": 46.44649124145508, |
| "learning_rate": 5.277803643038855e-06, |
| "loss": 4.239832401275635, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 85.16124725341797, |
| "learning_rate": 5.261944129281474e-06, |
| "loss": 4.053654193878174, |
| "step": 510 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1020, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.411555016453128e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|