| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2055, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00291970802919708, |
| "grad_norm": 0.3534850478172302, |
| "learning_rate": 9.70873786407767e-13, |
| "loss": 1.7768380641937256, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00583941605839416, |
| "grad_norm": 0.2643459439277649, |
| "learning_rate": 2.912621359223301e-12, |
| "loss": 1.800352931022644, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.008759124087591242, |
| "grad_norm": 0.25368738174438477, |
| "learning_rate": 4.854368932038835e-12, |
| "loss": 1.815911889076233, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01167883211678832, |
| "grad_norm": 0.2690427601337433, |
| "learning_rate": 6.796116504854369e-12, |
| "loss": 1.8008553981781006, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.014598540145985401, |
| "grad_norm": 0.27801981568336487, |
| "learning_rate": 8.737864077669904e-12, |
| "loss": 1.7111756801605225, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.017518248175182483, |
| "grad_norm": 0.30863869190216064, |
| "learning_rate": 1.0679611650485436e-11, |
| "loss": 1.7506548166275024, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.020437956204379562, |
| "grad_norm": 0.25328800082206726, |
| "learning_rate": 1.2621359223300972e-11, |
| "loss": 1.6444569826126099, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.02335766423357664, |
| "grad_norm": 0.5986829996109009, |
| "learning_rate": 1.4563106796116506e-11, |
| "loss": 1.7497678995132446, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.026277372262773723, |
| "grad_norm": 0.20500004291534424, |
| "learning_rate": 1.6504854368932042e-11, |
| "loss": 1.5234403610229492, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.029197080291970802, |
| "grad_norm": 0.2636180520057678, |
| "learning_rate": 1.8446601941747574e-11, |
| "loss": 1.8194037675857544, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.032116788321167884, |
| "grad_norm": 0.6845226883888245, |
| "learning_rate": 2.0388349514563107e-11, |
| "loss": 1.7494784593582153, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.035036496350364967, |
| "grad_norm": 0.29200759530067444, |
| "learning_rate": 2.2330097087378642e-11, |
| "loss": 1.57957124710083, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03795620437956204, |
| "grad_norm": 0.25369206070899963, |
| "learning_rate": 2.4271844660194175e-11, |
| "loss": 1.7397639751434326, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.040875912408759124, |
| "grad_norm": 0.6018857359886169, |
| "learning_rate": 2.621359223300971e-11, |
| "loss": 1.9077666997909546, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.043795620437956206, |
| "grad_norm": 0.35382694005966187, |
| "learning_rate": 2.8155339805825243e-11, |
| "loss": 1.7949832677841187, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04671532846715328, |
| "grad_norm": 1.6109180450439453, |
| "learning_rate": 3.009708737864078e-11, |
| "loss": 1.751054048538208, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.049635036496350364, |
| "grad_norm": 0.19936665892601013, |
| "learning_rate": 3.203883495145632e-11, |
| "loss": 1.4549548625946045, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.052554744525547446, |
| "grad_norm": 0.31096768379211426, |
| "learning_rate": 3.398058252427185e-11, |
| "loss": 1.3109430074691772, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.05547445255474453, |
| "grad_norm": 0.24408775568008423, |
| "learning_rate": 3.592233009708738e-11, |
| "loss": 1.644413948059082, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.058394160583941604, |
| "grad_norm": 0.3106284439563751, |
| "learning_rate": 3.7864077669902915e-11, |
| "loss": 1.7961643934249878, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.061313868613138686, |
| "grad_norm": 0.32802024483680725, |
| "learning_rate": 3.980582524271845e-11, |
| "loss": 1.8175064325332642, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.06423357664233577, |
| "grad_norm": 0.3910239040851593, |
| "learning_rate": 4.174757281553398e-11, |
| "loss": 1.8361930847167969, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.06715328467153285, |
| "grad_norm": 0.5940719246864319, |
| "learning_rate": 4.368932038834951e-11, |
| "loss": 2.0858569145202637, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07007299270072993, |
| "grad_norm": 0.2699325382709503, |
| "learning_rate": 4.563106796116505e-11, |
| "loss": 1.9244903326034546, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.072992700729927, |
| "grad_norm": 0.3519454002380371, |
| "learning_rate": 4.757281553398058e-11, |
| "loss": 1.6795828342437744, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07591240875912408, |
| "grad_norm": 1.271136999130249, |
| "learning_rate": 4.9514563106796115e-11, |
| "loss": 1.8167542219161987, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.07883211678832117, |
| "grad_norm": 0.32721835374832153, |
| "learning_rate": 5.1456310679611654e-11, |
| "loss": 1.8619266748428345, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.08175182481751825, |
| "grad_norm": 0.35416823625564575, |
| "learning_rate": 5.339805825242719e-11, |
| "loss": 1.8907665014266968, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08467153284671533, |
| "grad_norm": 0.1937120109796524, |
| "learning_rate": 5.533980582524272e-11, |
| "loss": 1.4969103336334229, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.08759124087591241, |
| "grad_norm": 0.2578057646751404, |
| "learning_rate": 5.728155339805825e-11, |
| "loss": 1.4486453533172607, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0905109489051095, |
| "grad_norm": 0.23345084488391876, |
| "learning_rate": 5.922330097087378e-11, |
| "loss": 1.5223218202590942, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09343065693430656, |
| "grad_norm": 0.23968848586082458, |
| "learning_rate": 6.116504854368932e-11, |
| "loss": 1.6385910511016846, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.09635036496350365, |
| "grad_norm": 0.23735839128494263, |
| "learning_rate": 6.310679611650486e-11, |
| "loss": 1.6215620040893555, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.09927007299270073, |
| "grad_norm": 0.274890661239624, |
| "learning_rate": 6.50485436893204e-11, |
| "loss": 1.5964994430541992, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10218978102189781, |
| "grad_norm": 0.26082801818847656, |
| "learning_rate": 6.699029126213593e-11, |
| "loss": 1.681908369064331, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10510948905109489, |
| "grad_norm": 0.4017055928707123, |
| "learning_rate": 6.893203883495146e-11, |
| "loss": 1.853885531425476, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.10802919708029197, |
| "grad_norm": 0.26503318548202515, |
| "learning_rate": 7.087378640776699e-11, |
| "loss": 1.830824375152588, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.11094890510948906, |
| "grad_norm": 0.4232666492462158, |
| "learning_rate": 7.281553398058252e-11, |
| "loss": 1.809700608253479, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11386861313868613, |
| "grad_norm": 0.5037844181060791, |
| "learning_rate": 7.475728155339806e-11, |
| "loss": 1.9286326169967651, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.11678832116788321, |
| "grad_norm": 0.39150989055633545, |
| "learning_rate": 7.669902912621359e-11, |
| "loss": 1.816016435623169, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.11970802919708029, |
| "grad_norm": 0.9030010104179382, |
| "learning_rate": 7.864077669902912e-11, |
| "loss": 1.778789758682251, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12262773722627737, |
| "grad_norm": 1.9095746278762817, |
| "learning_rate": 8.058252427184467e-11, |
| "loss": 1.9802308082580566, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.12554744525547445, |
| "grad_norm": 0.30885621905326843, |
| "learning_rate": 8.25242718446602e-11, |
| "loss": 1.7350918054580688, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.12846715328467154, |
| "grad_norm": 0.3458031415939331, |
| "learning_rate": 8.446601941747573e-11, |
| "loss": 1.5395787954330444, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13138686131386862, |
| "grad_norm": 0.5105660557746887, |
| "learning_rate": 8.640776699029126e-11, |
| "loss": 1.821498990058899, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.1343065693430657, |
| "grad_norm": 0.23492082953453064, |
| "learning_rate": 8.834951456310681e-11, |
| "loss": 1.8252274990081787, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.13722627737226278, |
| "grad_norm": 0.36012178659439087, |
| "learning_rate": 9.029126213592234e-11, |
| "loss": 1.8968510627746582, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.14014598540145987, |
| "grad_norm": 0.4315379858016968, |
| "learning_rate": 9.223300970873787e-11, |
| "loss": 1.7809394598007202, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14306569343065692, |
| "grad_norm": 0.2982156276702881, |
| "learning_rate": 9.41747572815534e-11, |
| "loss": 1.7060034275054932, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.145985401459854, |
| "grad_norm": 0.3450309634208679, |
| "learning_rate": 9.611650485436894e-11, |
| "loss": 1.9719310998916626, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.14890510948905109, |
| "grad_norm": 0.32693490386009216, |
| "learning_rate": 9.805825242718447e-11, |
| "loss": 1.9070788621902466, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15182481751824817, |
| "grad_norm": 1.445264220237732, |
| "learning_rate": 1e-10, |
| "loss": 1.7498117685317993, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.15474452554744525, |
| "grad_norm": 0.3428294062614441, |
| "learning_rate": 9.999976687854062e-11, |
| "loss": 1.8611483573913574, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.15766423357664233, |
| "grad_norm": 0.3272436857223511, |
| "learning_rate": 9.999906751657785e-11, |
| "loss": 1.9175266027450562, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16058394160583941, |
| "grad_norm": 0.909197986125946, |
| "learning_rate": 9.999790192135772e-11, |
| "loss": 1.8308496475219727, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1635036496350365, |
| "grad_norm": 0.38203534483909607, |
| "learning_rate": 9.999627010495695e-11, |
| "loss": 1.8768818378448486, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.16642335766423358, |
| "grad_norm": 0.2889254689216614, |
| "learning_rate": 9.999417208428267e-11, |
| "loss": 1.636149287223816, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.16934306569343066, |
| "grad_norm": 0.4831934869289398, |
| "learning_rate": 9.999160788107241e-11, |
| "loss": 1.942168116569519, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.17226277372262774, |
| "grad_norm": 0.2783659100532532, |
| "learning_rate": 9.998857752189376e-11, |
| "loss": 1.8512721061706543, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.17518248175182483, |
| "grad_norm": 0.31743311882019043, |
| "learning_rate": 9.99850810381441e-11, |
| "loss": 1.9434564113616943, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1781021897810219, |
| "grad_norm": 0.26472657918930054, |
| "learning_rate": 9.998111846605035e-11, |
| "loss": 1.8089823722839355, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.181021897810219, |
| "grad_norm": 0.47778958082199097, |
| "learning_rate": 9.997668984666856e-11, |
| "loss": 1.7395007610321045, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.18394160583941604, |
| "grad_norm": 0.3084363639354706, |
| "learning_rate": 9.99717952258834e-11, |
| "loss": 1.9149442911148071, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.18686131386861313, |
| "grad_norm": 0.381334513425827, |
| "learning_rate": 9.996643465440788e-11, |
| "loss": 1.7895768880844116, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1897810218978102, |
| "grad_norm": 0.3909507393836975, |
| "learning_rate": 9.996060818778257e-11, |
| "loss": 1.8174935579299927, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1927007299270073, |
| "grad_norm": 0.40029406547546387, |
| "learning_rate": 9.995431588637526e-11, |
| "loss": 1.8789606094360352, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.19562043795620437, |
| "grad_norm": 0.24144038558006287, |
| "learning_rate": 9.994755781538018e-11, |
| "loss": 1.704660177230835, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.19854014598540146, |
| "grad_norm": 0.15783090889453888, |
| "learning_rate": 9.994033404481737e-11, |
| "loss": 1.5398731231689453, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.20145985401459854, |
| "grad_norm": 0.1733558028936386, |
| "learning_rate": 9.993264464953202e-11, |
| "loss": 1.304384469985962, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.20437956204379562, |
| "grad_norm": 0.3875671923160553, |
| "learning_rate": 9.992448970919358e-11, |
| "loss": 1.786711573600769, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2072992700729927, |
| "grad_norm": 0.294428288936615, |
| "learning_rate": 9.991586930829501e-11, |
| "loss": 1.7124841213226318, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.21021897810218979, |
| "grad_norm": 0.19950203597545624, |
| "learning_rate": 9.990678353615189e-11, |
| "loss": 1.663377285003662, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.21313868613138687, |
| "grad_norm": 0.26574021577835083, |
| "learning_rate": 9.989723248690149e-11, |
| "loss": 1.7981501817703247, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.21605839416058395, |
| "grad_norm": 0.3551180958747864, |
| "learning_rate": 9.988721625950177e-11, |
| "loss": 1.694273591041565, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.21897810218978103, |
| "grad_norm": 0.22780469059944153, |
| "learning_rate": 9.987673495773042e-11, |
| "loss": 1.8389781713485718, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.22189781021897811, |
| "grad_norm": 0.29115864634513855, |
| "learning_rate": 9.986578869018371e-11, |
| "loss": 1.8860565423965454, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.22481751824817517, |
| "grad_norm": 0.44859740138053894, |
| "learning_rate": 9.985437757027541e-11, |
| "loss": 1.9679546356201172, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.22773722627737225, |
| "grad_norm": 0.8019328117370605, |
| "learning_rate": 9.98425017162356e-11, |
| "loss": 1.8494609594345093, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.23065693430656933, |
| "grad_norm": 0.4708123505115509, |
| "learning_rate": 9.98301612511095e-11, |
| "loss": 1.9518005847930908, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.23357664233576642, |
| "grad_norm": 0.39831846952438354, |
| "learning_rate": 9.981735630275602e-11, |
| "loss": 2.0240159034729004, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.2364963503649635, |
| "grad_norm": 0.5212871432304382, |
| "learning_rate": 9.980408700384671e-11, |
| "loss": 2.01702618598938, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.23941605839416058, |
| "grad_norm": 0.9411134123802185, |
| "learning_rate": 9.979035349186414e-11, |
| "loss": 2.1166324615478516, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.24233576642335766, |
| "grad_norm": 0.5210184454917908, |
| "learning_rate": 9.977615590910056e-11, |
| "loss": 1.9885681867599487, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.24525547445255474, |
| "grad_norm": 0.8235350251197815, |
| "learning_rate": 9.976149440265651e-11, |
| "loss": 2.311216354370117, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.24817518248175183, |
| "grad_norm": 0.9275002479553223, |
| "learning_rate": 9.974636912443916e-11, |
| "loss": 1.8653641939163208, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2510948905109489, |
| "grad_norm": 0.6768398880958557, |
| "learning_rate": 9.973078023116083e-11, |
| "loss": 1.848363995552063, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.25401459854014596, |
| "grad_norm": 0.7485382556915283, |
| "learning_rate": 9.971472788433731e-11, |
| "loss": 1.9982264041900635, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2569343065693431, |
| "grad_norm": 4.182097911834717, |
| "learning_rate": 9.969821225028622e-11, |
| "loss": 2.407541513442993, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.25985401459854013, |
| "grad_norm": 0.42571887373924255, |
| "learning_rate": 9.968123350012531e-11, |
| "loss": 1.7301071882247925, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.26277372262773724, |
| "grad_norm": 0.5800350308418274, |
| "learning_rate": 9.966379180977059e-11, |
| "loss": 1.6739298105239868, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2656934306569343, |
| "grad_norm": 0.34787094593048096, |
| "learning_rate": 9.964588735993461e-11, |
| "loss": 1.5040230751037598, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2686131386861314, |
| "grad_norm": 0.7273563146591187, |
| "learning_rate": 9.962752033612457e-11, |
| "loss": 1.762050747871399, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.27153284671532846, |
| "grad_norm": 1.0910371541976929, |
| "learning_rate": 9.960869092864034e-11, |
| "loss": 1.5401787757873535, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.27445255474452557, |
| "grad_norm": 0.3126949667930603, |
| "learning_rate": 9.958939933257254e-11, |
| "loss": 1.420863389968872, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2773722627737226, |
| "grad_norm": 0.37012311816215515, |
| "learning_rate": 9.95696457478005e-11, |
| "loss": 1.5388368368148804, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.28029197080291973, |
| "grad_norm": 0.20049013197422028, |
| "learning_rate": 9.954943037899022e-11, |
| "loss": 1.1660946607589722, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2832116788321168, |
| "grad_norm": 0.6460405588150024, |
| "learning_rate": 9.952875343559217e-11, |
| "loss": 1.5849355459213257, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.28613138686131384, |
| "grad_norm": 0.7101665139198303, |
| "learning_rate": 9.950761513183924e-11, |
| "loss": 1.9086475372314453, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.28905109489051095, |
| "grad_norm": 0.8782652020454407, |
| "learning_rate": 9.948601568674439e-11, |
| "loss": 1.9283380508422852, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.291970802919708, |
| "grad_norm": 0.8213207125663757, |
| "learning_rate": 9.946395532409846e-11, |
| "loss": 1.6717543601989746, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2948905109489051, |
| "grad_norm": 0.8597667217254639, |
| "learning_rate": 9.94414342724679e-11, |
| "loss": 2.2654058933258057, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.29781021897810217, |
| "grad_norm": 0.31900808215141296, |
| "learning_rate": 9.941845276519223e-11, |
| "loss": 1.2488539218902588, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3007299270072993, |
| "grad_norm": 0.2743098735809326, |
| "learning_rate": 9.93950110403818e-11, |
| "loss": 1.5524955987930298, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.30364963503649633, |
| "grad_norm": 0.44529157876968384, |
| "learning_rate": 9.937110934091525e-11, |
| "loss": 1.9126129150390625, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.30656934306569344, |
| "grad_norm": 0.2431587129831314, |
| "learning_rate": 9.934674791443698e-11, |
| "loss": 1.6700682640075684, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3094890510948905, |
| "grad_norm": 0.25222745537757874, |
| "learning_rate": 9.932192701335459e-11, |
| "loss": 1.792035460472107, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.3124087591240876, |
| "grad_norm": 0.29361167550086975, |
| "learning_rate": 9.929664689483631e-11, |
| "loss": 1.7590653896331787, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.31532846715328466, |
| "grad_norm": 0.8557894229888916, |
| "learning_rate": 9.927090782080824e-11, |
| "loss": 1.598631739616394, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3182481751824818, |
| "grad_norm": 0.19789443910121918, |
| "learning_rate": 9.924471005795176e-11, |
| "loss": 1.61155104637146, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.32116788321167883, |
| "grad_norm": 1.04644775390625, |
| "learning_rate": 9.92180538777007e-11, |
| "loss": 1.526228904724121, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.32408759124087594, |
| "grad_norm": 0.22464589774608612, |
| "learning_rate": 9.919093955623848e-11, |
| "loss": 1.368972659111023, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.327007299270073, |
| "grad_norm": 0.8330907225608826, |
| "learning_rate": 9.916336737449534e-11, |
| "loss": 2.178337335586548, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.32992700729927005, |
| "grad_norm": 0.24324865639209747, |
| "learning_rate": 9.913533761814537e-11, |
| "loss": 1.7530229091644287, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.33284671532846716, |
| "grad_norm": 0.6675015091896057, |
| "learning_rate": 9.91068505776036e-11, |
| "loss": 1.697060227394104, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3357664233576642, |
| "grad_norm": 0.5403636693954468, |
| "learning_rate": 9.907790654802293e-11, |
| "loss": 1.4822953939437866, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3386861313868613, |
| "grad_norm": 0.7010171413421631, |
| "learning_rate": 9.90485058292911e-11, |
| "loss": 1.9493117332458496, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3416058394160584, |
| "grad_norm": 0.38018572330474854, |
| "learning_rate": 9.901864872602762e-11, |
| "loss": 1.9160114526748657, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3445255474452555, |
| "grad_norm": 1.252315640449524, |
| "learning_rate": 9.898833554758054e-11, |
| "loss": 1.9603207111358643, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.34744525547445254, |
| "grad_norm": 0.41888001561164856, |
| "learning_rate": 9.895756660802331e-11, |
| "loss": 1.890695571899414, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.35036496350364965, |
| "grad_norm": 0.4101668894290924, |
| "learning_rate": 9.892634222615147e-11, |
| "loss": 1.7084157466888428, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3532846715328467, |
| "grad_norm": 1.959101915359497, |
| "learning_rate": 9.889466272547941e-11, |
| "loss": 1.9957774877548218, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3562043795620438, |
| "grad_norm": 1.0332179069519043, |
| "learning_rate": 9.886252843423699e-11, |
| "loss": 1.825124740600586, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.35912408759124087, |
| "grad_norm": 0.2979462146759033, |
| "learning_rate": 9.882993968536607e-11, |
| "loss": 1.7160868644714355, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.362043795620438, |
| "grad_norm": 0.712623655796051, |
| "learning_rate": 9.879689681651721e-11, |
| "loss": 2.088395595550537, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.36496350364963503, |
| "grad_norm": 0.4699522852897644, |
| "learning_rate": 9.876340017004604e-11, |
| "loss": 2.0031797885894775, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3678832116788321, |
| "grad_norm": 0.4952646493911743, |
| "learning_rate": 9.872945009300975e-11, |
| "loss": 2.03470516204834, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.3708029197080292, |
| "grad_norm": 0.8986260294914246, |
| "learning_rate": 9.869504693716353e-11, |
| "loss": 2.13348650932312, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.37372262773722625, |
| "grad_norm": 0.7052081823348999, |
| "learning_rate": 9.866019105895686e-11, |
| "loss": 1.8968867063522339, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.37664233576642336, |
| "grad_norm": 0.47716858983039856, |
| "learning_rate": 9.862488281952992e-11, |
| "loss": 1.7656636238098145, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3795620437956204, |
| "grad_norm": 0.21338705718517303, |
| "learning_rate": 9.858912258470973e-11, |
| "loss": 1.604569435119629, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.38248175182481753, |
| "grad_norm": 0.441703200340271, |
| "learning_rate": 9.855291072500643e-11, |
| "loss": 1.8019328117370605, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3854014598540146, |
| "grad_norm": 0.4217037558555603, |
| "learning_rate": 9.851624761560942e-11, |
| "loss": 2.2849860191345215, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3883211678832117, |
| "grad_norm": 0.358407586812973, |
| "learning_rate": 9.847913363638348e-11, |
| "loss": 1.927869200706482, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.39124087591240875, |
| "grad_norm": 0.44805291295051575, |
| "learning_rate": 9.844156917186485e-11, |
| "loss": 1.5878663063049316, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.39416058394160586, |
| "grad_norm": 0.5878186821937561, |
| "learning_rate": 9.840355461125717e-11, |
| "loss": 1.739422082901001, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3970802919708029, |
| "grad_norm": 0.3898945748806, |
| "learning_rate": 9.836509034842758e-11, |
| "loss": 1.6820441484451294, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.3018088638782501, |
| "learning_rate": 9.832617678190252e-11, |
| "loss": 1.6972731351852417, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.4029197080291971, |
| "grad_norm": 0.3607556223869324, |
| "learning_rate": 9.828681431486365e-11, |
| "loss": 1.8021835088729858, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.4058394160583942, |
| "grad_norm": 0.7184948921203613, |
| "learning_rate": 9.824700335514367e-11, |
| "loss": 1.8424593210220337, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.40875912408759124, |
| "grad_norm": 0.9060298204421997, |
| "learning_rate": 9.820674431522208e-11, |
| "loss": 1.7280572652816772, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4116788321167883, |
| "grad_norm": 0.5940150022506714, |
| "learning_rate": 9.816603761222096e-11, |
| "loss": 1.8406977653503418, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.4145985401459854, |
| "grad_norm": 0.5251798629760742, |
| "learning_rate": 9.812488366790056e-11, |
| "loss": 1.8819663524627686, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.41751824817518246, |
| "grad_norm": 0.6706494688987732, |
| "learning_rate": 9.808328290865499e-11, |
| "loss": 1.6763763427734375, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.42043795620437957, |
| "grad_norm": 0.4086778461933136, |
| "learning_rate": 9.80412357655078e-11, |
| "loss": 1.5377238988876343, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4233576642335766, |
| "grad_norm": 0.3526064455509186, |
| "learning_rate": 9.799874267410747e-11, |
| "loss": 1.6129189729690552, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.42627737226277373, |
| "grad_norm": 0.32802364230155945, |
| "learning_rate": 9.7955804074723e-11, |
| "loss": 1.7900128364562988, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.4291970802919708, |
| "grad_norm": 0.5461502075195312, |
| "learning_rate": 9.791242041223921e-11, |
| "loss": 1.7179615497589111, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4321167883211679, |
| "grad_norm": 0.3147372603416443, |
| "learning_rate": 9.786859213615222e-11, |
| "loss": 1.3112092018127441, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.43503649635036495, |
| "grad_norm": 0.5890180468559265, |
| "learning_rate": 9.782431970056477e-11, |
| "loss": 1.637431025505066, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.43795620437956206, |
| "grad_norm": 0.5663186311721802, |
| "learning_rate": 9.777960356418152e-11, |
| "loss": 1.6922191381454468, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4408759124087591, |
| "grad_norm": 0.27407780289649963, |
| "learning_rate": 9.773444419030429e-11, |
| "loss": 1.6063798666000366, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.44379562043795623, |
| "grad_norm": 0.3785094618797302, |
| "learning_rate": 9.768884204682725e-11, |
| "loss": 1.7761635780334473, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.4467153284671533, |
| "grad_norm": 0.29776790738105774, |
| "learning_rate": 9.764279760623213e-11, |
| "loss": 1.8873343467712402, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.44963503649635034, |
| "grad_norm": 0.27831506729125977, |
| "learning_rate": 9.759631134558321e-11, |
| "loss": 1.5000873804092407, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.45255474452554745, |
| "grad_norm": 0.3883137106895447, |
| "learning_rate": 9.754938374652254e-11, |
| "loss": 1.4682884216308594, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4554744525547445, |
| "grad_norm": 0.3289071321487427, |
| "learning_rate": 9.750201529526476e-11, |
| "loss": 1.616572380065918, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4583941605839416, |
| "grad_norm": 0.39953532814979553, |
| "learning_rate": 9.745420648259221e-11, |
| "loss": 1.5080758333206177, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.46131386861313867, |
| "grad_norm": 0.4054333567619324, |
| "learning_rate": 9.74059578038498e-11, |
| "loss": 1.9431893825531006, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.4642335766423358, |
| "grad_norm": 0.2948903441429138, |
| "learning_rate": 9.735726975893986e-11, |
| "loss": 1.6747829914093018, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.46715328467153283, |
| "grad_norm": 0.2299513816833496, |
| "learning_rate": 9.730814285231695e-11, |
| "loss": 1.6036838293075562, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.47007299270072994, |
| "grad_norm": 0.5000912547111511, |
| "learning_rate": 9.725857759298269e-11, |
| "loss": 1.6960393190383911, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.472992700729927, |
| "grad_norm": 0.44810453057289124, |
| "learning_rate": 9.720857449448044e-11, |
| "loss": 1.5583330392837524, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4759124087591241, |
| "grad_norm": 0.3251849412918091, |
| "learning_rate": 9.715813407489001e-11, |
| "loss": 1.4970282316207886, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.47883211678832116, |
| "grad_norm": 0.23649266362190247, |
| "learning_rate": 9.710725685682221e-11, |
| "loss": 1.4132741689682007, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.48175182481751827, |
| "grad_norm": 0.34182292222976685, |
| "learning_rate": 9.70559433674136e-11, |
| "loss": 1.5327880382537842, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4846715328467153, |
| "grad_norm": 0.7462207078933716, |
| "learning_rate": 9.700419413832081e-11, |
| "loss": 1.5539097785949707, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.48759124087591244, |
| "grad_norm": 0.23400592803955078, |
| "learning_rate": 9.695200970571525e-11, |
| "loss": 1.5931789875030518, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4905109489051095, |
| "grad_norm": 0.4315284192562103, |
| "learning_rate": 9.68993906102774e-11, |
| "loss": 1.7818516492843628, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.49343065693430654, |
| "grad_norm": 0.5085203647613525, |
| "learning_rate": 9.684633739719126e-11, |
| "loss": 1.6964106559753418, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.49635036496350365, |
| "grad_norm": 0.32666438817977905, |
| "learning_rate": 9.679285061613877e-11, |
| "loss": 1.8903117179870605, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.4992700729927007, |
| "grad_norm": 0.696006715297699, |
| "learning_rate": 9.673893082129394e-11, |
| "loss": 1.6851402521133423, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5021897810218978, |
| "grad_norm": 0.3322364091873169, |
| "learning_rate": 9.668457857131728e-11, |
| "loss": 1.4909449815750122, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5051094890510949, |
| "grad_norm": 0.32292819023132324, |
| "learning_rate": 9.662979442934995e-11, |
| "loss": 1.3091050386428833, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5080291970802919, |
| "grad_norm": 0.7607306838035583, |
| "learning_rate": 9.657457896300791e-11, |
| "loss": 1.4538644552230835, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5109489051094891, |
| "grad_norm": 0.8117560744285583, |
| "learning_rate": 9.651893274437606e-11, |
| "loss": 1.715744137763977, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5138686131386861, |
| "grad_norm": 0.42647185921669006, |
| "learning_rate": 9.646285635000226e-11, |
| "loss": 1.8959312438964844, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5167883211678832, |
| "grad_norm": 0.36822986602783203, |
| "learning_rate": 9.640635036089149e-11, |
| "loss": 1.9616965055465698, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5197080291970803, |
| "grad_norm": 0.49794426560401917, |
| "learning_rate": 9.634941536249966e-11, |
| "loss": 1.6823753118515015, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5226277372262774, |
| "grad_norm": 0.5548354983329773, |
| "learning_rate": 9.629205194472766e-11, |
| "loss": 1.640671730041504, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5255474452554745, |
| "grad_norm": 0.5667994618415833, |
| "learning_rate": 9.623426070191521e-11, |
| "loss": 1.4344934225082397, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5284671532846715, |
| "grad_norm": 0.6978429555892944, |
| "learning_rate": 9.617604223283472e-11, |
| "loss": 1.8851178884506226, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5313868613138686, |
| "grad_norm": 3.7346668243408203, |
| "learning_rate": 9.611739714068503e-11, |
| "loss": 2.039613723754883, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5343065693430656, |
| "grad_norm": 4.44684362411499, |
| "learning_rate": 9.605832603308523e-11, |
| "loss": 1.6009081602096558, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5372262773722628, |
| "grad_norm": 0.8611459136009216, |
| "learning_rate": 9.599882952206834e-11, |
| "loss": 1.4058667421340942, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5401459854014599, |
| "grad_norm": 0.3514321744441986, |
| "learning_rate": 9.593890822407497e-11, |
| "loss": 1.6223829984664917, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5430656934306569, |
| "grad_norm": 0.7854035496711731, |
| "learning_rate": 9.58785627599469e-11, |
| "loss": 1.837151050567627, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.545985401459854, |
| "grad_norm": 0.7137376666069031, |
| "learning_rate": 9.58177937549207e-11, |
| "loss": 1.902309536933899, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5489051094890511, |
| "grad_norm": 0.217190682888031, |
| "learning_rate": 9.575660183862122e-11, |
| "loss": 1.6437760591506958, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5518248175182482, |
| "grad_norm": 0.5008941292762756, |
| "learning_rate": 9.569498764505507e-11, |
| "loss": 1.7193173170089722, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.5547445255474452, |
| "grad_norm": 0.3317372500896454, |
| "learning_rate": 9.563295181260408e-11, |
| "loss": 1.4700264930725098, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5576642335766423, |
| "grad_norm": 0.24804271757602692, |
| "learning_rate": 9.55704949840186e-11, |
| "loss": 1.3685392141342163, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5605839416058395, |
| "grad_norm": 0.5515373349189758, |
| "learning_rate": 9.550761780641101e-11, |
| "loss": 1.272115707397461, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5635036496350365, |
| "grad_norm": 0.8238644003868103, |
| "learning_rate": 9.544432093124877e-11, |
| "loss": 1.5264108180999756, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5664233576642336, |
| "grad_norm": 0.6252793073654175, |
| "learning_rate": 9.53806050143479e-11, |
| "loss": 1.3205806016921997, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5693430656934306, |
| "grad_norm": 0.5369510054588318, |
| "learning_rate": 9.531647071586608e-11, |
| "loss": 1.6523046493530273, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5722627737226277, |
| "grad_norm": 0.5264310836791992, |
| "learning_rate": 9.52519187002958e-11, |
| "loss": 1.37351393699646, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.5751824817518248, |
| "grad_norm": 0.8609051704406738, |
| "learning_rate": 9.518694963645751e-11, |
| "loss": 1.3692678213119507, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5781021897810219, |
| "grad_norm": 0.28163954615592957, |
| "learning_rate": 9.512156419749268e-11, |
| "loss": 1.3704659938812256, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.581021897810219, |
| "grad_norm": 0.4875539541244507, |
| "learning_rate": 9.505576306085681e-11, |
| "loss": 1.4559335708618164, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.583941605839416, |
| "grad_norm": 0.3505980372428894, |
| "learning_rate": 9.498954690831245e-11, |
| "loss": 1.6419453620910645, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5868613138686132, |
| "grad_norm": 0.5778207182884216, |
| "learning_rate": 9.492291642592205e-11, |
| "loss": 1.2554142475128174, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.5897810218978102, |
| "grad_norm": 0.24265649914741516, |
| "learning_rate": 9.485587230404102e-11, |
| "loss": 1.2532234191894531, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5927007299270073, |
| "grad_norm": 0.6797131896018982, |
| "learning_rate": 9.478841523731036e-11, |
| "loss": 1.2978441715240479, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5956204379562043, |
| "grad_norm": 0.41169965267181396, |
| "learning_rate": 9.472054592464963e-11, |
| "loss": 1.3374297618865967, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5985401459854015, |
| "grad_norm": 0.5518661737442017, |
| "learning_rate": 9.465226506924965e-11, |
| "loss": 1.652148723602295, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6014598540145986, |
| "grad_norm": 0.1583075225353241, |
| "learning_rate": 9.458357337856519e-11, |
| "loss": 1.443751335144043, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6043795620437956, |
| "grad_norm": 0.34667131304740906, |
| "learning_rate": 9.451447156430769e-11, |
| "loss": 1.2133233547210693, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.6072992700729927, |
| "grad_norm": 0.503765344619751, |
| "learning_rate": 9.444496034243786e-11, |
| "loss": 1.3155429363250732, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6102189781021898, |
| "grad_norm": 0.43029820919036865, |
| "learning_rate": 9.437504043315824e-11, |
| "loss": 1.536086082458496, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6131386861313869, |
| "grad_norm": 1.2249265909194946, |
| "learning_rate": 9.430471256090578e-11, |
| "loss": 1.691704511642456, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6160583941605839, |
| "grad_norm": 0.25307124853134155, |
| "learning_rate": 9.42339774543443e-11, |
| "loss": 1.1645077466964722, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.618978102189781, |
| "grad_norm": 0.36436426639556885, |
| "learning_rate": 9.4162835846357e-11, |
| "loss": 1.5609655380249023, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.621897810218978, |
| "grad_norm": 0.26339077949523926, |
| "learning_rate": 9.409128847403874e-11, |
| "loss": 1.4671318531036377, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.6248175182481752, |
| "grad_norm": 0.4291604459285736, |
| "learning_rate": 9.401933607868859e-11, |
| "loss": 1.2365825176239014, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6277372262773723, |
| "grad_norm": 0.24072806537151337, |
| "learning_rate": 9.394697940580195e-11, |
| "loss": 1.2259232997894287, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6306569343065693, |
| "grad_norm": 0.38607171177864075, |
| "learning_rate": 9.387421920506298e-11, |
| "loss": 1.1846305131912231, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6335766423357664, |
| "grad_norm": 0.27039477229118347, |
| "learning_rate": 9.380105623033677e-11, |
| "loss": 1.6850125789642334, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6364963503649635, |
| "grad_norm": 0.27692389488220215, |
| "learning_rate": 9.372749123966148e-11, |
| "loss": 1.5691750049591064, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6394160583941606, |
| "grad_norm": 0.506200909614563, |
| "learning_rate": 9.365352499524059e-11, |
| "loss": 1.6711288690567017, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.6423357664233577, |
| "grad_norm": 0.2726089656352997, |
| "learning_rate": 9.357915826343496e-11, |
| "loss": 1.3896371126174927, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6452554744525547, |
| "grad_norm": 0.5812091827392578, |
| "learning_rate": 9.350439181475483e-11, |
| "loss": 1.6031279563903809, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6481751824817519, |
| "grad_norm": 0.27857470512390137, |
| "learning_rate": 9.342922642385193e-11, |
| "loss": 1.507469892501831, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6510948905109489, |
| "grad_norm": 0.7666060924530029, |
| "learning_rate": 9.33536628695114e-11, |
| "loss": 1.5827025175094604, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.654014598540146, |
| "grad_norm": 0.716264545917511, |
| "learning_rate": 9.327770193464374e-11, |
| "loss": 1.3550753593444824, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.656934306569343, |
| "grad_norm": 0.3254503309726715, |
| "learning_rate": 9.320134440627665e-11, |
| "loss": 1.4587416648864746, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6598540145985401, |
| "grad_norm": 0.4242144823074341, |
| "learning_rate": 9.312459107554698e-11, |
| "loss": 1.5037614107131958, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6627737226277373, |
| "grad_norm": 0.2750391364097595, |
| "learning_rate": 9.304744273769243e-11, |
| "loss": 1.6231236457824707, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6656934306569343, |
| "grad_norm": 0.4245796501636505, |
| "learning_rate": 9.296990019204335e-11, |
| "loss": 1.3407589197158813, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.6686131386861314, |
| "grad_norm": 0.32759228348731995, |
| "learning_rate": 9.289196424201451e-11, |
| "loss": 1.3652153015136719, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.6715328467153284, |
| "grad_norm": 0.5771019458770752, |
| "learning_rate": 9.281363569509662e-11, |
| "loss": 1.6398756504058838, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6744525547445256, |
| "grad_norm": 0.7310810089111328, |
| "learning_rate": 9.273491536284819e-11, |
| "loss": 1.7764878273010254, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.6773722627737226, |
| "grad_norm": 1.0177778005599976, |
| "learning_rate": 9.265580406088692e-11, |
| "loss": 1.4794597625732422, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.6802919708029197, |
| "grad_norm": 0.23670396208763123, |
| "learning_rate": 9.257630260888133e-11, |
| "loss": 1.2997967004776, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.6832116788321168, |
| "grad_norm": 0.4635494649410248, |
| "learning_rate": 9.249641183054232e-11, |
| "loss": 1.4545753002166748, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.6861313868613139, |
| "grad_norm": 0.5670157670974731, |
| "learning_rate": 9.241613255361454e-11, |
| "loss": 1.6660820245742798, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.689051094890511, |
| "grad_norm": 0.27268993854522705, |
| "learning_rate": 9.233546560986786e-11, |
| "loss": 1.6526291370391846, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.691970802919708, |
| "grad_norm": 1.366753339767456, |
| "learning_rate": 9.225441183508875e-11, |
| "loss": 1.6750726699829102, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.6948905109489051, |
| "grad_norm": 0.32852044701576233, |
| "learning_rate": 9.217297206907161e-11, |
| "loss": 1.4638712406158447, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.6978102189781021, |
| "grad_norm": 0.373755544424057, |
| "learning_rate": 9.209114715561012e-11, |
| "loss": 1.6217398643493652, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7007299270072993, |
| "grad_norm": 0.287253737449646, |
| "learning_rate": 9.200893794248843e-11, |
| "loss": 1.5670347213745117, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7036496350364964, |
| "grad_norm": 0.5298172831535339, |
| "learning_rate": 9.192634528147234e-11, |
| "loss": 1.6435707807540894, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7065693430656934, |
| "grad_norm": 0.23429054021835327, |
| "learning_rate": 9.184337002830065e-11, |
| "loss": 1.3780916929244995, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7094890510948905, |
| "grad_norm": 0.4721639156341553, |
| "learning_rate": 9.176001304267607e-11, |
| "loss": 1.7100255489349365, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.7124087591240876, |
| "grad_norm": 0.32687824964523315, |
| "learning_rate": 9.167627518825651e-11, |
| "loss": 1.5640754699707031, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7153284671532847, |
| "grad_norm": 0.4479893147945404, |
| "learning_rate": 9.1592157332646e-11, |
| "loss": 1.6100982427597046, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7182481751824817, |
| "grad_norm": 0.3041948080062866, |
| "learning_rate": 9.150766034738575e-11, |
| "loss": 1.625577449798584, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7211678832116788, |
| "grad_norm": 0.523112952709198, |
| "learning_rate": 9.14227851079451e-11, |
| "loss": 1.5553128719329834, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.724087591240876, |
| "grad_norm": 0.37522390484809875, |
| "learning_rate": 9.133753249371251e-11, |
| "loss": 1.6051119565963745, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.727007299270073, |
| "grad_norm": 0.5746657252311707, |
| "learning_rate": 9.125190338798634e-11, |
| "loss": 1.5265920162200928, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.7299270072992701, |
| "grad_norm": 0.48165905475616455, |
| "learning_rate": 9.116589867796582e-11, |
| "loss": 1.7097959518432617, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7328467153284671, |
| "grad_norm": 1.95722234249115, |
| "learning_rate": 9.107951925474176e-11, |
| "loss": 1.5973304510116577, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7357664233576642, |
| "grad_norm": 0.4828181266784668, |
| "learning_rate": 9.099276601328738e-11, |
| "loss": 1.4544634819030762, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7386861313868613, |
| "grad_norm": 0.35941943526268005, |
| "learning_rate": 9.090563985244899e-11, |
| "loss": 1.5493344068527222, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7416058394160584, |
| "grad_norm": 0.4957440197467804, |
| "learning_rate": 9.081814167493671e-11, |
| "loss": 1.6227343082427979, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7445255474452555, |
| "grad_norm": 0.8167078495025635, |
| "learning_rate": 9.073027238731513e-11, |
| "loss": 1.6121728420257568, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7474452554744525, |
| "grad_norm": 0.2773600220680237, |
| "learning_rate": 9.064203289999385e-11, |
| "loss": 1.6567003726959229, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7503649635036497, |
| "grad_norm": 0.38320213556289673, |
| "learning_rate": 9.055342412721815e-11, |
| "loss": 1.559574842453003, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7532846715328467, |
| "grad_norm": 0.2601078450679779, |
| "learning_rate": 9.04644469870594e-11, |
| "loss": 1.634887456893921, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7562043795620438, |
| "grad_norm": 0.3223051428794861, |
| "learning_rate": 9.037510240140563e-11, |
| "loss": 1.5338331460952759, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7591240875912408, |
| "grad_norm": 0.6172982454299927, |
| "learning_rate": 9.028539129595198e-11, |
| "loss": 1.6613255739212036, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.762043795620438, |
| "grad_norm": 0.2970709204673767, |
| "learning_rate": 9.019531460019105e-11, |
| "loss": 1.66960608959198, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.7649635036496351, |
| "grad_norm": 0.2917795479297638, |
| "learning_rate": 9.010487324740333e-11, |
| "loss": 1.5973072052001953, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.7678832116788321, |
| "grad_norm": 0.40561556816101074, |
| "learning_rate": 9.001406817464748e-11, |
| "loss": 1.8639005422592163, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.7708029197080292, |
| "grad_norm": 0.4241611659526825, |
| "learning_rate": 8.992290032275067e-11, |
| "loss": 1.574570894241333, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.7737226277372263, |
| "grad_norm": 0.3406296670436859, |
| "learning_rate": 8.983137063629879e-11, |
| "loss": 1.5100054740905762, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7766423357664234, |
| "grad_norm": 0.38461536169052124, |
| "learning_rate": 8.97394800636267e-11, |
| "loss": 1.744242787361145, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.7795620437956204, |
| "grad_norm": 0.37936046719551086, |
| "learning_rate": 8.964722955680835e-11, |
| "loss": 1.5281338691711426, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.7824817518248175, |
| "grad_norm": 0.42358264327049255, |
| "learning_rate": 8.955462007164699e-11, |
| "loss": 1.5278962850570679, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.7854014598540145, |
| "grad_norm": 0.5687382817268372, |
| "learning_rate": 8.946165256766524e-11, |
| "loss": 1.286256194114685, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.7883211678832117, |
| "grad_norm": 0.34203967452049255, |
| "learning_rate": 8.936832800809506e-11, |
| "loss": 1.2884188890457153, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7912408759124088, |
| "grad_norm": 0.33001580834388733, |
| "learning_rate": 8.927464735986792e-11, |
| "loss": 1.4524775743484497, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.7941605839416058, |
| "grad_norm": 0.24470959603786469, |
| "learning_rate": 8.918061159360468e-11, |
| "loss": 1.5741451978683472, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.7970802919708029, |
| "grad_norm": 0.22205418348312378, |
| "learning_rate": 8.908622168360558e-11, |
| "loss": 1.415770411491394, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.12828628718852997, |
| "learning_rate": 8.899147860784006e-11, |
| "loss": 1.2092006206512451, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8029197080291971, |
| "grad_norm": 0.19067053496837616, |
| "learning_rate": 8.889638334793682e-11, |
| "loss": 1.2418055534362793, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8058394160583942, |
| "grad_norm": 0.2096225470304489, |
| "learning_rate": 8.88009368891734e-11, |
| "loss": 1.150226354598999, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8087591240875912, |
| "grad_norm": 0.23198847472667694, |
| "learning_rate": 8.870514022046617e-11, |
| "loss": 1.1433364152908325, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8116788321167884, |
| "grad_norm": 0.23092319071292877, |
| "learning_rate": 8.860899433435994e-11, |
| "loss": 1.0709772109985352, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8145985401459854, |
| "grad_norm": 1.7060472965240479, |
| "learning_rate": 8.851250022701784e-11, |
| "loss": 1.0478689670562744, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8175182481751825, |
| "grad_norm": 0.20169571042060852, |
| "learning_rate": 8.841565889821084e-11, |
| "loss": 1.0691083669662476, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8204379562043795, |
| "grad_norm": 0.2585836946964264, |
| "learning_rate": 8.831847135130746e-11, |
| "loss": 1.0405468940734863, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8233576642335766, |
| "grad_norm": 0.15813350677490234, |
| "learning_rate": 8.822093859326339e-11, |
| "loss": 1.119150996208191, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8262773722627738, |
| "grad_norm": 0.2605457007884979, |
| "learning_rate": 8.8123061634611e-11, |
| "loss": 1.451040506362915, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8291970802919708, |
| "grad_norm": 0.29428720474243164, |
| "learning_rate": 8.802484148944897e-11, |
| "loss": 1.498022198677063, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8321167883211679, |
| "grad_norm": 0.32284432649612427, |
| "learning_rate": 8.792627917543165e-11, |
| "loss": 1.5406850576400757, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8350364963503649, |
| "grad_norm": 0.2935868799686432, |
| "learning_rate": 8.782737571375861e-11, |
| "loss": 1.4694265127182007, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8379562043795621, |
| "grad_norm": 0.1954089254140854, |
| "learning_rate": 8.772813212916408e-11, |
| "loss": 1.497194528579712, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8408759124087591, |
| "grad_norm": 0.29668334126472473, |
| "learning_rate": 8.762854944990623e-11, |
| "loss": 1.4351569414138794, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8437956204379562, |
| "grad_norm": 0.3149370849132538, |
| "learning_rate": 8.752862870775658e-11, |
| "loss": 1.4598360061645508, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8467153284671532, |
| "grad_norm": 0.3693007528781891, |
| "learning_rate": 8.742837093798934e-11, |
| "loss": 1.5218864679336548, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8496350364963504, |
| "grad_norm": 0.27900761365890503, |
| "learning_rate": 8.732777717937063e-11, |
| "loss": 1.4627201557159424, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8525547445255475, |
| "grad_norm": 0.303118497133255, |
| "learning_rate": 8.72268484741477e-11, |
| "loss": 1.4716007709503174, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8554744525547445, |
| "grad_norm": 0.3150079846382141, |
| "learning_rate": 8.712558586803822e-11, |
| "loss": 1.4263559579849243, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8583941605839416, |
| "grad_norm": 0.3285709619522095, |
| "learning_rate": 8.702399041021937e-11, |
| "loss": 1.4390225410461426, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.8613138686131386, |
| "grad_norm": 0.786037266254425, |
| "learning_rate": 8.692206315331697e-11, |
| "loss": 1.4343092441558838, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8642335766423358, |
| "grad_norm": 0.2554452419281006, |
| "learning_rate": 8.681980515339464e-11, |
| "loss": 1.3966456651687622, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.8671532846715329, |
| "grad_norm": 0.3106688857078552, |
| "learning_rate": 8.671721746994278e-11, |
| "loss": 1.47050142288208, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.8700729927007299, |
| "grad_norm": 0.36648714542388916, |
| "learning_rate": 8.661430116586763e-11, |
| "loss": 1.4842816591262817, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.872992700729927, |
| "grad_norm": 0.2672443985939026, |
| "learning_rate": 8.651105730748023e-11, |
| "loss": 1.5009945631027222, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.8759124087591241, |
| "grad_norm": 0.3231627345085144, |
| "learning_rate": 8.640748696448546e-11, |
| "loss": 1.4662688970565796, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.8788321167883212, |
| "grad_norm": 0.447077214717865, |
| "learning_rate": 8.630359120997084e-11, |
| "loss": 1.4902161359786987, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.8817518248175182, |
| "grad_norm": 1.3244692087173462, |
| "learning_rate": 8.61993711203955e-11, |
| "loss": 1.4570201635360718, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.8846715328467153, |
| "grad_norm": 0.323764443397522, |
| "learning_rate": 8.609482777557891e-11, |
| "loss": 1.4848111867904663, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.8875912408759125, |
| "grad_norm": 0.21813516318798065, |
| "learning_rate": 8.59899622586899e-11, |
| "loss": 1.410193920135498, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.8905109489051095, |
| "grad_norm": 0.4606623947620392, |
| "learning_rate": 8.58847756562352e-11, |
| "loss": 1.4347537755966187, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8934306569343066, |
| "grad_norm": 0.2534776031970978, |
| "learning_rate": 8.577926905804833e-11, |
| "loss": 1.4541630744934082, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.8963503649635036, |
| "grad_norm": 0.24322006106376648, |
| "learning_rate": 8.567344355727829e-11, |
| "loss": 1.432068943977356, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.8992700729927007, |
| "grad_norm": 0.21942275762557983, |
| "learning_rate": 8.556730025037819e-11, |
| "loss": 1.4721245765686035, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.9021897810218978, |
| "grad_norm": 0.32879260182380676, |
| "learning_rate": 8.546084023709393e-11, |
| "loss": 1.4534900188446045, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9051094890510949, |
| "grad_norm": 0.33828699588775635, |
| "learning_rate": 8.535406462045279e-11, |
| "loss": 1.4511345624923706, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.908029197080292, |
| "grad_norm": 0.25151824951171875, |
| "learning_rate": 8.524697450675199e-11, |
| "loss": 1.4202616214752197, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.910948905109489, |
| "grad_norm": 0.4613194465637207, |
| "learning_rate": 8.513957100554721e-11, |
| "loss": 1.4088406562805176, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9138686131386862, |
| "grad_norm": 0.2900555431842804, |
| "learning_rate": 8.503185522964122e-11, |
| "loss": 1.4401381015777588, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9167883211678832, |
| "grad_norm": 0.2588656544685364, |
| "learning_rate": 8.492382829507215e-11, |
| "loss": 1.497382402420044, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.9197080291970803, |
| "grad_norm": 0.36505982279777527, |
| "learning_rate": 8.48154913211021e-11, |
| "loss": 1.4394062757492065, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9226277372262773, |
| "grad_norm": 0.23443567752838135, |
| "learning_rate": 8.470684543020542e-11, |
| "loss": 1.4333170652389526, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.9255474452554745, |
| "grad_norm": 0.2820832133293152, |
| "learning_rate": 8.45978917480572e-11, |
| "loss": 1.3851203918457031, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.9284671532846716, |
| "grad_norm": 0.30184608697891235, |
| "learning_rate": 8.448863140352144e-11, |
| "loss": 1.4456335306167603, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9313868613138686, |
| "grad_norm": 0.3142789304256439, |
| "learning_rate": 8.437906552863958e-11, |
| "loss": 1.450710654258728, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9343065693430657, |
| "grad_norm": 0.4998232424259186, |
| "learning_rate": 8.426919525861854e-11, |
| "loss": 1.4164352416992188, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.9372262773722628, |
| "grad_norm": 0.2807002365589142, |
| "learning_rate": 8.41590217318191e-11, |
| "loss": 1.4481756687164307, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9401459854014599, |
| "grad_norm": 0.3465396463871002, |
| "learning_rate": 8.404854608974414e-11, |
| "loss": 1.4438577890396118, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9430656934306569, |
| "grad_norm": 0.2985764145851135, |
| "learning_rate": 8.39377694770266e-11, |
| "loss": 1.5020420551300049, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.945985401459854, |
| "grad_norm": 0.3235088884830475, |
| "learning_rate": 8.38266930414179e-11, |
| "loss": 1.4755083322525024, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.948905109489051, |
| "grad_norm": 0.23725037276744843, |
| "learning_rate": 8.37153179337758e-11, |
| "loss": 1.4602628946304321, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9518248175182482, |
| "grad_norm": 0.2622622847557068, |
| "learning_rate": 8.360364530805266e-11, |
| "loss": 1.4472800493240356, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.9547445255474453, |
| "grad_norm": 0.662328839302063, |
| "learning_rate": 8.349167632128342e-11, |
| "loss": 1.4257826805114746, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.9576642335766423, |
| "grad_norm": 0.3589712083339691, |
| "learning_rate": 8.337941213357355e-11, |
| "loss": 1.384074330329895, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.9605839416058394, |
| "grad_norm": 0.2395324409008026, |
| "learning_rate": 8.326685390808711e-11, |
| "loss": 1.3845038414001465, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.9635036496350365, |
| "grad_norm": 0.29251864552497864, |
| "learning_rate": 8.315400281103466e-11, |
| "loss": 1.3974683284759521, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9664233576642336, |
| "grad_norm": 0.3395976126194, |
| "learning_rate": 8.304086001166119e-11, |
| "loss": 1.4027167558670044, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.9693430656934306, |
| "grad_norm": 0.29036200046539307, |
| "learning_rate": 8.292742668223403e-11, |
| "loss": 1.4590882062911987, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.9722627737226277, |
| "grad_norm": 0.3950836658477783, |
| "learning_rate": 8.281370399803065e-11, |
| "loss": 1.4444341659545898, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.9751824817518249, |
| "grad_norm": 0.2706378400325775, |
| "learning_rate": 8.269969313732654e-11, |
| "loss": 1.4351338148117065, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.9781021897810219, |
| "grad_norm": 0.45173388719558716, |
| "learning_rate": 8.258539528138293e-11, |
| "loss": 1.4339206218719482, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.981021897810219, |
| "grad_norm": 0.25517186522483826, |
| "learning_rate": 8.247081161443463e-11, |
| "loss": 1.4406836032867432, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.983941605839416, |
| "grad_norm": 0.23826317489147186, |
| "learning_rate": 8.23559433236777e-11, |
| "loss": 1.3740425109863281, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.9868613138686131, |
| "grad_norm": 0.37786585092544556, |
| "learning_rate": 8.224079159925721e-11, |
| "loss": 1.4657270908355713, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.9897810218978103, |
| "grad_norm": 0.34262749552726746, |
| "learning_rate": 8.212535763425484e-11, |
| "loss": 1.50901460647583, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.9927007299270073, |
| "grad_norm": 0.505266547203064, |
| "learning_rate": 8.200964262467657e-11, |
| "loss": 1.7625021934509277, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.9956204379562044, |
| "grad_norm": 0.4917811453342438, |
| "learning_rate": 8.189364776944026e-11, |
| "loss": 2.1940176486968994, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.9985401459854014, |
| "grad_norm": 0.4079400300979614, |
| "learning_rate": 8.177737427036323e-11, |
| "loss": 2.142901659011841, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0014598540145985, |
| "grad_norm": 0.32731911540031433, |
| "learning_rate": 8.166082333214983e-11, |
| "loss": 1.792697548866272, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0043795620437956, |
| "grad_norm": 0.3955923020839691, |
| "learning_rate": 8.154399616237894e-11, |
| "loss": 1.509110450744629, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0072992700729928, |
| "grad_norm": 0.23040631413459778, |
| "learning_rate": 8.142689397149148e-11, |
| "loss": 1.4758418798446655, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0102189781021897, |
| "grad_norm": 0.23065227270126343, |
| "learning_rate": 8.130951797277783e-11, |
| "loss": 1.4547432661056519, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.013138686131387, |
| "grad_norm": 0.27488207817077637, |
| "learning_rate": 8.119186938236524e-11, |
| "loss": 1.5417169332504272, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0160583941605839, |
| "grad_norm": 0.3516688644886017, |
| "learning_rate": 8.107394941920536e-11, |
| "loss": 1.4036191701889038, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.018978102189781, |
| "grad_norm": 0.641706645488739, |
| "learning_rate": 8.095575930506148e-11, |
| "loss": 1.498883605003357, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.0218978102189782, |
| "grad_norm": 0.2665751874446869, |
| "learning_rate": 8.083730026449588e-11, |
| "loss": 1.324239730834961, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.0248175182481751, |
| "grad_norm": 0.18407660722732544, |
| "learning_rate": 8.07185735248572e-11, |
| "loss": 1.3473355770111084, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0277372262773723, |
| "grad_norm": 0.32454606890678406, |
| "learning_rate": 8.059958031626771e-11, |
| "loss": 1.3403379917144775, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0306569343065692, |
| "grad_norm": 0.3869016468524933, |
| "learning_rate": 8.048032187161055e-11, |
| "loss": 1.4723657369613647, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.0335766423357664, |
| "grad_norm": 0.2613857090473175, |
| "learning_rate": 8.036079942651694e-11, |
| "loss": 1.4147893190383911, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0364963503649636, |
| "grad_norm": 0.19323043525218964, |
| "learning_rate": 8.02410142193534e-11, |
| "loss": 1.2198904752731323, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0394160583941605, |
| "grad_norm": 0.28235873579978943, |
| "learning_rate": 8.01209674912089e-11, |
| "loss": 1.4131911993026733, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.0423357664233577, |
| "grad_norm": 0.2318020910024643, |
| "learning_rate": 8.00006604858821e-11, |
| "loss": 1.516754388809204, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.0452554744525548, |
| "grad_norm": 0.4528334438800812, |
| "learning_rate": 7.988009444986827e-11, |
| "loss": 1.4079957008361816, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0481751824817518, |
| "grad_norm": 0.2093593031167984, |
| "learning_rate": 7.975927063234655e-11, |
| "loss": 1.4088261127471924, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.051094890510949, |
| "grad_norm": 0.16769030690193176, |
| "learning_rate": 7.963819028516691e-11, |
| "loss": 1.084653377532959, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.054014598540146, |
| "grad_norm": 0.3169795870780945, |
| "learning_rate": 7.951685466283724e-11, |
| "loss": 1.2415783405303955, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.056934306569343, |
| "grad_norm": 0.3813137114048004, |
| "learning_rate": 7.93952650225103e-11, |
| "loss": 1.4431004524230957, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.0598540145985402, |
| "grad_norm": 0.3302343189716339, |
| "learning_rate": 7.927342262397074e-11, |
| "loss": 1.3890019655227661, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.0627737226277372, |
| "grad_norm": 0.2482709437608719, |
| "learning_rate": 7.915132872962204e-11, |
| "loss": 1.4420756101608276, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.0656934306569343, |
| "grad_norm": 0.3268437683582306, |
| "learning_rate": 7.902898460447333e-11, |
| "loss": 1.5538722276687622, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.0686131386861315, |
| "grad_norm": 0.45657190680503845, |
| "learning_rate": 7.890639151612647e-11, |
| "loss": 1.4880216121673584, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.0715328467153284, |
| "grad_norm": 0.24117252230644226, |
| "learning_rate": 7.878355073476282e-11, |
| "loss": 1.3895787000656128, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.0744525547445256, |
| "grad_norm": 0.25678950548171997, |
| "learning_rate": 7.866046353312994e-11, |
| "loss": 1.428212285041809, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.0773722627737226, |
| "grad_norm": 0.2189822942018509, |
| "learning_rate": 7.853713118652872e-11, |
| "loss": 1.4189248085021973, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.0802919708029197, |
| "grad_norm": 0.30409157276153564, |
| "learning_rate": 7.841355497279987e-11, |
| "loss": 1.4963862895965576, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0832116788321169, |
| "grad_norm": 0.18158665299415588, |
| "learning_rate": 7.828973617231079e-11, |
| "loss": 1.384456992149353, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.0861313868613138, |
| "grad_norm": 0.28591710329055786, |
| "learning_rate": 7.816567606794239e-11, |
| "loss": 1.1959834098815918, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.089051094890511, |
| "grad_norm": 0.18725569546222687, |
| "learning_rate": 7.804137594507565e-11, |
| "loss": 1.2433326244354248, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.091970802919708, |
| "grad_norm": 0.18714189529418945, |
| "learning_rate": 7.791683709157841e-11, |
| "loss": 1.3405526876449585, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.094890510948905, |
| "grad_norm": 0.3646625876426697, |
| "learning_rate": 7.779206079779196e-11, |
| "loss": 1.3495800495147705, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.0978102189781023, |
| "grad_norm": 0.4056638777256012, |
| "learning_rate": 7.766704835651772e-11, |
| "loss": 1.2475500106811523, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.1007299270072992, |
| "grad_norm": 0.5528532862663269, |
| "learning_rate": 7.75418010630038e-11, |
| "loss": 1.3547694683074951, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1036496350364964, |
| "grad_norm": 0.45539093017578125, |
| "learning_rate": 7.741632021493161e-11, |
| "loss": 1.3677958250045776, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1065693430656935, |
| "grad_norm": 0.26975327730178833, |
| "learning_rate": 7.729060711240244e-11, |
| "loss": 1.6038460731506348, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.1094890510948905, |
| "grad_norm": 0.31061655282974243, |
| "learning_rate": 7.71646630579239e-11, |
| "loss": 1.3528101444244385, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.1124087591240877, |
| "grad_norm": 0.5260862708091736, |
| "learning_rate": 7.703848935639653e-11, |
| "loss": 1.6168701648712158, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1153284671532846, |
| "grad_norm": 0.29489070177078247, |
| "learning_rate": 7.691208731510022e-11, |
| "loss": 1.455543875694275, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1182481751824818, |
| "grad_norm": 0.42220962047576904, |
| "learning_rate": 7.678545824368068e-11, |
| "loss": 1.2831261157989502, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.121167883211679, |
| "grad_norm": 0.8123577833175659, |
| "learning_rate": 7.665860345413583e-11, |
| "loss": 1.337751865386963, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1240875912408759, |
| "grad_norm": 0.29676365852355957, |
| "learning_rate": 7.653152426080231e-11, |
| "loss": 1.1918377876281738, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.127007299270073, |
| "grad_norm": 0.2986065447330475, |
| "learning_rate": 7.640422198034175e-11, |
| "loss": 1.2475813627243042, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.12992700729927, |
| "grad_norm": 0.5116667747497559, |
| "learning_rate": 7.627669793172719e-11, |
| "loss": 1.365373969078064, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.1328467153284671, |
| "grad_norm": 0.2317640781402588, |
| "learning_rate": 7.614895343622941e-11, |
| "loss": 1.4166637659072876, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1357664233576643, |
| "grad_norm": 0.39576828479766846, |
| "learning_rate": 7.60209898174032e-11, |
| "loss": 1.5067970752716064, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1386861313868613, |
| "grad_norm": 0.2449396550655365, |
| "learning_rate": 7.589280840107372e-11, |
| "loss": 1.4031476974487305, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1416058394160584, |
| "grad_norm": 0.14816564321517944, |
| "learning_rate": 7.576441051532268e-11, |
| "loss": 1.3129007816314697, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.1445255474452556, |
| "grad_norm": 0.3485988676548004, |
| "learning_rate": 7.563579749047463e-11, |
| "loss": 1.4514999389648438, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.1474452554744525, |
| "grad_norm": 0.5801905393600464, |
| "learning_rate": 7.55069706590832e-11, |
| "loss": 1.5295640230178833, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.1503649635036497, |
| "grad_norm": 0.29618364572525024, |
| "learning_rate": 7.537793135591721e-11, |
| "loss": 1.3779162168502808, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.1532846715328466, |
| "grad_norm": 0.342839777469635, |
| "learning_rate": 7.524868091794691e-11, |
| "loss": 1.4380520582199097, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.1562043795620438, |
| "grad_norm": 0.34696051478385925, |
| "learning_rate": 7.511922068433012e-11, |
| "loss": 1.4846165180206299, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.159124087591241, |
| "grad_norm": 0.7464672327041626, |
| "learning_rate": 7.498955199639834e-11, |
| "loss": 1.4885586500167847, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.162043795620438, |
| "grad_norm": 0.3263891637325287, |
| "learning_rate": 7.485967619764282e-11, |
| "loss": 1.4655848741531372, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.164963503649635, |
| "grad_norm": 0.4067479372024536, |
| "learning_rate": 7.472959463370074e-11, |
| "loss": 1.2892862558364868, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.167883211678832, |
| "grad_norm": 0.3189847469329834, |
| "learning_rate": 7.459930865234115e-11, |
| "loss": 1.4570207595825195, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1708029197080292, |
| "grad_norm": 0.5341388583183289, |
| "learning_rate": 7.446881960345107e-11, |
| "loss": 1.4906622171401978, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.1737226277372264, |
| "grad_norm": 0.30527299642562866, |
| "learning_rate": 7.433812883902151e-11, |
| "loss": 1.4360853433609009, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.1766423357664233, |
| "grad_norm": 0.26061245799064636, |
| "learning_rate": 7.420723771313342e-11, |
| "loss": 1.499984860420227, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.1795620437956205, |
| "grad_norm": 0.3794720470905304, |
| "learning_rate": 7.407614758194375e-11, |
| "loss": 1.4522984027862549, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.1824817518248176, |
| "grad_norm": 0.32486245036125183, |
| "learning_rate": 7.394485980367121e-11, |
| "loss": 1.3547807931900024, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.1854014598540146, |
| "grad_norm": 0.3289315104484558, |
| "learning_rate": 7.381337573858245e-11, |
| "loss": 1.3755017518997192, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.1883211678832117, |
| "grad_norm": 0.6499447226524353, |
| "learning_rate": 7.36816967489778e-11, |
| "loss": 1.4222335815429688, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.1912408759124087, |
| "grad_norm": 0.26047471165657043, |
| "learning_rate": 7.354982419917714e-11, |
| "loss": 1.3651729822158813, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.1941605839416058, |
| "grad_norm": 0.3220467269420624, |
| "learning_rate": 7.34177594555059e-11, |
| "loss": 1.4682464599609375, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.197080291970803, |
| "grad_norm": 0.26152145862579346, |
| "learning_rate": 7.328550388628072e-11, |
| "loss": 1.3593947887420654, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.3044966459274292, |
| "learning_rate": 7.31530588617955e-11, |
| "loss": 1.1967570781707764, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2029197080291971, |
| "grad_norm": 0.27290457487106323, |
| "learning_rate": 7.3020425754307e-11, |
| "loss": 1.276015281677246, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.205839416058394, |
| "grad_norm": 0.2363789975643158, |
| "learning_rate": 7.288760593802067e-11, |
| "loss": 1.3282839059829712, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2087591240875912, |
| "grad_norm": 0.25797799229621887, |
| "learning_rate": 7.275460078907654e-11, |
| "loss": 1.3120553493499756, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2116788321167884, |
| "grad_norm": 0.2650147080421448, |
| "learning_rate": 7.262141168553478e-11, |
| "loss": 1.4232453107833862, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.2145985401459853, |
| "grad_norm": 0.38055944442749023, |
| "learning_rate": 7.248804000736153e-11, |
| "loss": 1.3195230960845947, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.2175182481751825, |
| "grad_norm": 0.8058264255523682, |
| "learning_rate": 7.235448713641457e-11, |
| "loss": 1.3820394277572632, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.2204379562043797, |
| "grad_norm": 0.27823376655578613, |
| "learning_rate": 7.222075445642904e-11, |
| "loss": 1.477295994758606, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2233576642335766, |
| "grad_norm": 0.30671426653862, |
| "learning_rate": 7.208684335300305e-11, |
| "loss": 1.6407607793807983, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.2262773722627738, |
| "grad_norm": 0.2697727382183075, |
| "learning_rate": 7.195275521358333e-11, |
| "loss": 1.5532571077346802, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.2291970802919707, |
| "grad_norm": 0.34386980533599854, |
| "learning_rate": 7.181849142745091e-11, |
| "loss": 1.5054192543029785, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.2321167883211679, |
| "grad_norm": 0.81573086977005, |
| "learning_rate": 7.168405338570667e-11, |
| "loss": 1.5115678310394287, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.235036496350365, |
| "grad_norm": 0.5397254228591919, |
| "learning_rate": 7.154944248125693e-11, |
| "loss": 1.638497233390808, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.237956204379562, |
| "grad_norm": 0.5890994668006897, |
| "learning_rate": 7.141466010879904e-11, |
| "loss": 1.4858747720718384, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.2408759124087592, |
| "grad_norm": 0.44891178607940674, |
| "learning_rate": 7.127970766480695e-11, |
| "loss": 1.6084527969360352, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.243795620437956, |
| "grad_norm": 0.48259827494621277, |
| "learning_rate": 7.114458654751666e-11, |
| "loss": 1.5594782829284668, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.2467153284671533, |
| "grad_norm": 0.7319297790527344, |
| "learning_rate": 7.100929815691185e-11, |
| "loss": 1.5059024095535278, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.2496350364963504, |
| "grad_norm": 0.5670822262763977, |
| "learning_rate": 7.087384389470928e-11, |
| "loss": 1.0539096593856812, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.2525547445255474, |
| "grad_norm": 0.7238962054252625, |
| "learning_rate": 7.073822516434425e-11, |
| "loss": 1.2365281581878662, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.2554744525547445, |
| "grad_norm": 0.5037800073623657, |
| "learning_rate": 7.060244337095619e-11, |
| "loss": 1.411769986152649, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.2583941605839417, |
| "grad_norm": 0.4767591655254364, |
| "learning_rate": 7.046649992137399e-11, |
| "loss": 1.281144380569458, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.2613138686131387, |
| "grad_norm": 0.3418749272823334, |
| "learning_rate": 7.03303962241014e-11, |
| "loss": 1.2626243829727173, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.2642335766423358, |
| "grad_norm": 0.29436102509498596, |
| "learning_rate": 7.019413368930259e-11, |
| "loss": 1.3997855186462402, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.2671532846715328, |
| "grad_norm": 0.5215297341346741, |
| "learning_rate": 7.00577137287873e-11, |
| "loss": 1.2505439519882202, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.27007299270073, |
| "grad_norm": 0.8407970666885376, |
| "learning_rate": 6.992113775599654e-11, |
| "loss": 0.9684967994689941, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.2729927007299269, |
| "grad_norm": 1.0256235599517822, |
| "learning_rate": 6.978440718598756e-11, |
| "loss": 1.1375266313552856, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.275912408759124, |
| "grad_norm": 0.5392014384269714, |
| "learning_rate": 6.964752343541952e-11, |
| "loss": 1.3095382452011108, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.2788321167883212, |
| "grad_norm": 0.3686670958995819, |
| "learning_rate": 6.95104879225386e-11, |
| "loss": 1.0908489227294922, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.2817518248175181, |
| "grad_norm": 0.5632182359695435, |
| "learning_rate": 6.937330206716343e-11, |
| "loss": 1.1164169311523438, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.2846715328467153, |
| "grad_norm": 0.32079240679740906, |
| "learning_rate": 6.923596729067029e-11, |
| "loss": 1.232709527015686, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.2875912408759125, |
| "grad_norm": 0.3154524564743042, |
| "learning_rate": 6.909848501597848e-11, |
| "loss": 1.255340576171875, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.2905109489051094, |
| "grad_norm": 0.28902700543403625, |
| "learning_rate": 6.896085666753544e-11, |
| "loss": 1.377984881401062, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.2934306569343066, |
| "grad_norm": 0.4770423173904419, |
| "learning_rate": 6.882308367130217e-11, |
| "loss": 1.4664411544799805, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.2963503649635038, |
| "grad_norm": 1.9368946552276611, |
| "learning_rate": 6.868516745473831e-11, |
| "loss": 1.1397716999053955, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.2992700729927007, |
| "grad_norm": 0.6642079949378967, |
| "learning_rate": 6.854710944678737e-11, |
| "loss": 1.1195887327194214, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3021897810218979, |
| "grad_norm": 0.4591664671897888, |
| "learning_rate": 6.840891107786203e-11, |
| "loss": 1.4305362701416016, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.305109489051095, |
| "grad_norm": 0.4840669631958008, |
| "learning_rate": 6.827057377982915e-11, |
| "loss": 1.4628136157989502, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.308029197080292, |
| "grad_norm": 0.3429550230503082, |
| "learning_rate": 6.81320989859951e-11, |
| "loss": 1.4985345602035522, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.310948905109489, |
| "grad_norm": 0.3146049380302429, |
| "learning_rate": 6.799348813109082e-11, |
| "loss": 1.6390399932861328, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.313868613138686, |
| "grad_norm": 0.23190069198608398, |
| "learning_rate": 6.785474265125695e-11, |
| "loss": 1.2628161907196045, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.3167883211678832, |
| "grad_norm": 0.4373994767665863, |
| "learning_rate": 6.771586398402901e-11, |
| "loss": 1.3325560092926025, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.3197080291970802, |
| "grad_norm": 0.3247528672218323, |
| "learning_rate": 6.757685356832243e-11, |
| "loss": 1.2263948917388916, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.3226277372262774, |
| "grad_norm": 0.2387562245130539, |
| "learning_rate": 6.743771284441771e-11, |
| "loss": 1.206580638885498, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3255474452554745, |
| "grad_norm": 0.5596776008605957, |
| "learning_rate": 6.729844325394546e-11, |
| "loss": 1.2958136796951294, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3284671532846715, |
| "grad_norm": 0.20952443778514862, |
| "learning_rate": 6.715904623987145e-11, |
| "loss": 1.6216472387313843, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.3313868613138686, |
| "grad_norm": 0.5061504244804382, |
| "learning_rate": 6.701952324648167e-11, |
| "loss": 1.4907279014587402, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3343065693430658, |
| "grad_norm": 0.3604317903518677, |
| "learning_rate": 6.687987571936747e-11, |
| "loss": 1.1069793701171875, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.3372262773722627, |
| "grad_norm": 0.2837623953819275, |
| "learning_rate": 6.674010510541039e-11, |
| "loss": 1.3102599382400513, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.34014598540146, |
| "grad_norm": 0.4640481472015381, |
| "learning_rate": 6.660021285276727e-11, |
| "loss": 1.6540470123291016, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.343065693430657, |
| "grad_norm": 0.3107167184352875, |
| "learning_rate": 6.646020041085529e-11, |
| "loss": 1.4386550188064575, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.345985401459854, |
| "grad_norm": 0.3694753646850586, |
| "learning_rate": 6.632006923033689e-11, |
| "loss": 1.5420114994049072, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.348905109489051, |
| "grad_norm": 0.26372024416923523, |
| "learning_rate": 6.617982076310476e-11, |
| "loss": 1.5645748376846313, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.3518248175182481, |
| "grad_norm": 0.9855953454971313, |
| "learning_rate": 6.603945646226675e-11, |
| "loss": 1.351561188697815, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.3547445255474453, |
| "grad_norm": 0.425919771194458, |
| "learning_rate": 6.58989777821309e-11, |
| "loss": 1.2176544666290283, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.3576642335766422, |
| "grad_norm": 0.26680389046669006, |
| "learning_rate": 6.575838617819036e-11, |
| "loss": 1.3588489294052124, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.3605839416058394, |
| "grad_norm": 0.49709874391555786, |
| "learning_rate": 6.561768310710817e-11, |
| "loss": 1.4943910837173462, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.3635036496350366, |
| "grad_norm": 0.5887729525566101, |
| "learning_rate": 6.547687002670238e-11, |
| "loss": 1.7052679061889648, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.3664233576642335, |
| "grad_norm": 0.4863436222076416, |
| "learning_rate": 6.53359483959308e-11, |
| "loss": 1.5693230628967285, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.3693430656934307, |
| "grad_norm": 0.57687908411026, |
| "learning_rate": 6.519491967487592e-11, |
| "loss": 1.7266438007354736, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.3722627737226278, |
| "grad_norm": 0.27145904302597046, |
| "learning_rate": 6.505378532472983e-11, |
| "loss": 1.4036791324615479, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.3751824817518248, |
| "grad_norm": 0.4891437292098999, |
| "learning_rate": 6.491254680777894e-11, |
| "loss": 1.4227482080459595, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.378102189781022, |
| "grad_norm": 0.2843855619430542, |
| "learning_rate": 6.477120558738903e-11, |
| "loss": 1.0680041313171387, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.3810218978102191, |
| "grad_norm": 0.5112000703811646, |
| "learning_rate": 6.462976312798997e-11, |
| "loss": 1.4661967754364014, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.383941605839416, |
| "grad_norm": 1.1939078569412231, |
| "learning_rate": 6.44882208950605e-11, |
| "loss": 1.5780178308486938, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.3868613138686132, |
| "grad_norm": 0.5088428258895874, |
| "learning_rate": 6.434658035511315e-11, |
| "loss": 1.398094892501831, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.3897810218978102, |
| "grad_norm": 0.2530817985534668, |
| "learning_rate": 6.420484297567905e-11, |
| "loss": 1.348752737045288, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.3927007299270073, |
| "grad_norm": 0.8184604048728943, |
| "learning_rate": 6.406301022529257e-11, |
| "loss": 1.2859934568405151, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.3956204379562043, |
| "grad_norm": 0.2624606490135193, |
| "learning_rate": 6.392108357347634e-11, |
| "loss": 1.185379981994629, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.3985401459854014, |
| "grad_norm": 0.3192046582698822, |
| "learning_rate": 6.377906449072578e-11, |
| "loss": 1.074135422706604, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.4014598540145986, |
| "grad_norm": 1.6579471826553345, |
| "learning_rate": 6.363695444849407e-11, |
| "loss": 1.3976491689682007, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.4043795620437955, |
| "grad_norm": 1.3667290210723877, |
| "learning_rate": 6.349475491917677e-11, |
| "loss": 1.4886680841445923, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.4072992700729927, |
| "grad_norm": 0.25920569896698, |
| "learning_rate": 6.335246737609664e-11, |
| "loss": 1.5233007669448853, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4102189781021899, |
| "grad_norm": 0.2395065426826477, |
| "learning_rate": 6.321009329348832e-11, |
| "loss": 1.3459280729293823, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4131386861313868, |
| "grad_norm": 0.6280210614204407, |
| "learning_rate": 6.306763414648311e-11, |
| "loss": 1.6271347999572754, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.416058394160584, |
| "grad_norm": 0.2879810631275177, |
| "learning_rate": 6.292509141109364e-11, |
| "loss": 1.3719046115875244, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4189781021897812, |
| "grad_norm": 0.1828605681657791, |
| "learning_rate": 6.278246656419859e-11, |
| "loss": 1.2339953184127808, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.421897810218978, |
| "grad_norm": 0.4212108552455902, |
| "learning_rate": 6.263976108352739e-11, |
| "loss": 1.4191358089447021, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.4248175182481753, |
| "grad_norm": 0.576755166053772, |
| "learning_rate": 6.249697644764493e-11, |
| "loss": 1.3736066818237305, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.4277372262773722, |
| "grad_norm": 0.39886170625686646, |
| "learning_rate": 6.235411413593627e-11, |
| "loss": 1.3101552724838257, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4306569343065694, |
| "grad_norm": 3.171804666519165, |
| "learning_rate": 6.221117562859115e-11, |
| "loss": 1.160914421081543, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.4335766423357663, |
| "grad_norm": 0.28551721572875977, |
| "learning_rate": 6.206816240658887e-11, |
| "loss": 1.2121726274490356, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.4364963503649635, |
| "grad_norm": 0.41442498564720154, |
| "learning_rate": 6.192507595168279e-11, |
| "loss": 1.4567291736602783, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.4394160583941606, |
| "grad_norm": 0.2783399224281311, |
| "learning_rate": 6.178191774638506e-11, |
| "loss": 1.2854819297790527, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.4423357664233576, |
| "grad_norm": 0.3941711187362671, |
| "learning_rate": 6.163868927395123e-11, |
| "loss": 1.473130226135254, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.4452554744525548, |
| "grad_norm": 0.34142744541168213, |
| "learning_rate": 6.149539201836484e-11, |
| "loss": 1.5927597284317017, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.448175182481752, |
| "grad_norm": 0.2587209641933441, |
| "learning_rate": 6.135202746432217e-11, |
| "loss": 1.50771963596344, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.4510948905109489, |
| "grad_norm": 0.23753675818443298, |
| "learning_rate": 6.120859709721674e-11, |
| "loss": 1.2842893600463867, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.454014598540146, |
| "grad_norm": 0.3801853060722351, |
| "learning_rate": 6.106510240312393e-11, |
| "loss": 1.3453171253204346, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.4569343065693432, |
| "grad_norm": 0.3056936264038086, |
| "learning_rate": 6.092154486878562e-11, |
| "loss": 1.2746431827545166, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.4598540145985401, |
| "grad_norm": 0.7941753268241882, |
| "learning_rate": 6.077792598159479e-11, |
| "loss": 1.3984593152999878, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.4627737226277373, |
| "grad_norm": 0.7185998558998108, |
| "learning_rate": 6.06342472295801e-11, |
| "loss": 1.588934302330017, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.4656934306569342, |
| "grad_norm": 0.36543312668800354, |
| "learning_rate": 6.049051010139046e-11, |
| "loss": 1.4541923999786377, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.4686131386861314, |
| "grad_norm": 0.39861613512039185, |
| "learning_rate": 6.034671608627957e-11, |
| "loss": 1.330125331878662, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.4715328467153284, |
| "grad_norm": 0.39358770847320557, |
| "learning_rate": 6.020286667409061e-11, |
| "loss": 1.4012455940246582, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.4744525547445255, |
| "grad_norm": 0.6596587896347046, |
| "learning_rate": 6.005896335524069e-11, |
| "loss": 1.3372608423233032, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.4773722627737227, |
| "grad_norm": 0.19527080655097961, |
| "learning_rate": 5.99150076207054e-11, |
| "loss": 1.2573559284210205, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.4802919708029196, |
| "grad_norm": 0.439053475856781, |
| "learning_rate": 5.977100096200347e-11, |
| "loss": 1.3484768867492676, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.4832116788321168, |
| "grad_norm": 1.0919126272201538, |
| "learning_rate": 5.962694487118122e-11, |
| "loss": 1.2934248447418213, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.486131386861314, |
| "grad_norm": 0.4834900498390198, |
| "learning_rate": 5.948284084079716e-11, |
| "loss": 1.359639286994934, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.489051094890511, |
| "grad_norm": 0.32894521951675415, |
| "learning_rate": 5.933869036390651e-11, |
| "loss": 1.3357242345809937, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.491970802919708, |
| "grad_norm": 0.25190240144729614, |
| "learning_rate": 5.919449493404563e-11, |
| "loss": 1.5525610446929932, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.4948905109489052, |
| "grad_norm": 0.36443647742271423, |
| "learning_rate": 5.905025604521674e-11, |
| "loss": 1.7397009134292603, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.4978102189781022, |
| "grad_norm": 0.28807729482650757, |
| "learning_rate": 5.890597519187229e-11, |
| "loss": 1.5908589363098145, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.5007299270072991, |
| "grad_norm": 1.0596458911895752, |
| "learning_rate": 5.876165386889952e-11, |
| "loss": 1.2815005779266357, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.5036496350364965, |
| "grad_norm": 0.6740530133247375, |
| "learning_rate": 5.861729357160501e-11, |
| "loss": 1.121368408203125, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5065693430656935, |
| "grad_norm": 0.25546136498451233, |
| "learning_rate": 5.84728957956991e-11, |
| "loss": 1.2286404371261597, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.5094890510948904, |
| "grad_norm": 0.6252645254135132, |
| "learning_rate": 5.832846203728045e-11, |
| "loss": 1.3033808469772339, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.5124087591240876, |
| "grad_norm": 0.5182809829711914, |
| "learning_rate": 5.81839937928206e-11, |
| "loss": 1.7270663976669312, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5153284671532847, |
| "grad_norm": 0.458566814661026, |
| "learning_rate": 5.8039492559148314e-11, |
| "loss": 1.8393914699554443, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5182481751824817, |
| "grad_norm": 0.522005021572113, |
| "learning_rate": 5.789495983343418e-11, |
| "loss": 1.5257136821746826, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5211678832116788, |
| "grad_norm": 0.3969437777996063, |
| "learning_rate": 5.7750397113175114e-11, |
| "loss": 1.3572144508361816, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.524087591240876, |
| "grad_norm": 0.22145526111125946, |
| "learning_rate": 5.760580589617876e-11, |
| "loss": 1.3691705465316772, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.527007299270073, |
| "grad_norm": 0.8607689142227173, |
| "learning_rate": 5.746118768054806e-11, |
| "loss": 1.4887243509292603, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.5299270072992701, |
| "grad_norm": 0.5395622253417969, |
| "learning_rate": 5.73165439646656e-11, |
| "loss": 1.6568856239318848, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.5328467153284673, |
| "grad_norm": 0.587317168712616, |
| "learning_rate": 5.717187624717827e-11, |
| "loss": 1.622255563735962, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.5357664233576642, |
| "grad_norm": 0.40985527634620667, |
| "learning_rate": 5.7027186026981604e-11, |
| "loss": 1.221044659614563, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.5386861313868612, |
| "grad_norm": 0.7627142071723938, |
| "learning_rate": 5.6882474803204254e-11, |
| "loss": 1.3784704208374023, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.5416058394160586, |
| "grad_norm": 0.7809776067733765, |
| "learning_rate": 5.673774407519253e-11, |
| "loss": 1.5165852308273315, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.5445255474452555, |
| "grad_norm": 0.7891870737075806, |
| "learning_rate": 5.6592995342494836e-11, |
| "loss": 1.712707757949829, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.5474452554744524, |
| "grad_norm": 0.7284282445907593, |
| "learning_rate": 5.644823010484604e-11, |
| "loss": 1.7109613418579102, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.5503649635036496, |
| "grad_norm": 0.4619555175304413, |
| "learning_rate": 5.6303449862152144e-11, |
| "loss": 1.3397283554077148, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.5532846715328468, |
| "grad_norm": 0.7122825980186462, |
| "learning_rate": 5.61586561144745e-11, |
| "loss": 1.565988540649414, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.5562043795620437, |
| "grad_norm": 0.3608238101005554, |
| "learning_rate": 5.601385036201443e-11, |
| "loss": 1.2163538932800293, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.5591240875912409, |
| "grad_norm": 1.3949124813079834, |
| "learning_rate": 5.586903410509765e-11, |
| "loss": 1.230873942375183, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.562043795620438, |
| "grad_norm": 0.38245004415512085, |
| "learning_rate": 5.572420884415871e-11, |
| "loss": 1.021897315979004, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.564963503649635, |
| "grad_norm": 0.34170427918434143, |
| "learning_rate": 5.557937607972542e-11, |
| "loss": 1.0241258144378662, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.5678832116788322, |
| "grad_norm": 0.315836638212204, |
| "learning_rate": 5.543453731240338e-11, |
| "loss": 1.202805519104004, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.5708029197080293, |
| "grad_norm": 0.3503977656364441, |
| "learning_rate": 5.528969404286032e-11, |
| "loss": 1.213181734085083, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.5737226277372263, |
| "grad_norm": 0.46344345808029175, |
| "learning_rate": 5.51448477718107e-11, |
| "loss": 1.1099185943603516, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.5766423357664232, |
| "grad_norm": 0.37887194752693176, |
| "learning_rate": 5.5000000000000004e-11, |
| "loss": 1.25699782371521, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.5795620437956206, |
| "grad_norm": 0.26375895738601685, |
| "learning_rate": 5.485515222818931e-11, |
| "loss": 1.2985727787017822, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.5824817518248175, |
| "grad_norm": 0.587094783782959, |
| "learning_rate": 5.4710305957139695e-11, |
| "loss": 1.3389722108840942, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.5854014598540145, |
| "grad_norm": 0.26151588559150696, |
| "learning_rate": 5.4565462687596646e-11, |
| "loss": 1.3401594161987305, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.5883211678832116, |
| "grad_norm": 0.33108699321746826, |
| "learning_rate": 5.442062392027459e-11, |
| "loss": 1.1463489532470703, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.5912408759124088, |
| "grad_norm": 0.12337132543325424, |
| "learning_rate": 5.4275791155841305e-11, |
| "loss": 1.1203198432922363, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.5941605839416058, |
| "grad_norm": 0.4239310622215271, |
| "learning_rate": 5.4130965894902354e-11, |
| "loss": 1.1963176727294922, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.597080291970803, |
| "grad_norm": 0.3204934000968933, |
| "learning_rate": 5.398614963798558e-11, |
| "loss": 1.313279151916504, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.28497034311294556, |
| "learning_rate": 5.384134388552552e-11, |
| "loss": 1.5193721055984497, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.602919708029197, |
| "grad_norm": 0.1569192260503769, |
| "learning_rate": 5.3696550137847864e-11, |
| "loss": 1.2503010034561157, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6058394160583942, |
| "grad_norm": 0.7396665811538696, |
| "learning_rate": 5.355176989515396e-11, |
| "loss": 1.1561777591705322, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6087591240875914, |
| "grad_norm": 0.27860796451568604, |
| "learning_rate": 5.3407004657505185e-11, |
| "loss": 1.3068069219589233, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.6116788321167883, |
| "grad_norm": 0.653042733669281, |
| "learning_rate": 5.326225592480748e-11, |
| "loss": 1.439526081085205, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.6145985401459853, |
| "grad_norm": 0.335859090089798, |
| "learning_rate": 5.3117525196795767e-11, |
| "loss": 1.2161372900009155, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.6175182481751826, |
| "grad_norm": 0.3679628074169159, |
| "learning_rate": 5.297281397301841e-11, |
| "loss": 1.167518973350525, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6204379562043796, |
| "grad_norm": 0.17621150612831116, |
| "learning_rate": 5.282812375282173e-11, |
| "loss": 1.581429362297058, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.6233576642335765, |
| "grad_norm": 0.3105796277523041, |
| "learning_rate": 5.268345603533441e-11, |
| "loss": 1.1999684572219849, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.6262773722627737, |
| "grad_norm": 0.5713252425193787, |
| "learning_rate": 5.2538812319451956e-11, |
| "loss": 1.129384994506836, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.6291970802919709, |
| "grad_norm": 0.245794415473938, |
| "learning_rate": 5.2394194103821245e-11, |
| "loss": 1.1704528331756592, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.6321167883211678, |
| "grad_norm": 0.8822201490402222, |
| "learning_rate": 5.22496028868249e-11, |
| "loss": 1.220594048500061, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.635036496350365, |
| "grad_norm": 0.394856721162796, |
| "learning_rate": 5.210504016656583e-11, |
| "loss": 1.506517767906189, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.6379562043795621, |
| "grad_norm": 0.3811044991016388, |
| "learning_rate": 5.1960507440851726e-11, |
| "loss": 1.4898362159729004, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.640875912408759, |
| "grad_norm": 0.7341606020927429, |
| "learning_rate": 5.1816006207179414e-11, |
| "loss": 1.4160077571868896, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.6437956204379562, |
| "grad_norm": 0.27225232124328613, |
| "learning_rate": 5.167153796271955e-11, |
| "loss": 1.1867581605911255, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.6467153284671534, |
| "grad_norm": 0.250491738319397, |
| "learning_rate": 5.152710420430091e-11, |
| "loss": 1.469104290008545, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.6496350364963503, |
| "grad_norm": 0.3392203748226166, |
| "learning_rate": 5.1382706428395e-11, |
| "loss": 1.2819688320159912, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.6525547445255473, |
| "grad_norm": 0.2684314548969269, |
| "learning_rate": 5.1238346131100486e-11, |
| "loss": 1.3832699060440063, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.6554744525547447, |
| "grad_norm": 0.27301549911499023, |
| "learning_rate": 5.1094024808127713e-11, |
| "loss": 1.3297004699707031, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.6583941605839416, |
| "grad_norm": 0.8703414797782898, |
| "learning_rate": 5.094974395478327e-11, |
| "loss": 1.3337628841400146, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.6613138686131386, |
| "grad_norm": 0.48248016834259033, |
| "learning_rate": 5.080550506595439e-11, |
| "loss": 1.4017032384872437, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.6642335766423357, |
| "grad_norm": 0.6301165223121643, |
| "learning_rate": 5.0661309636093525e-11, |
| "loss": 1.3380167484283447, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.667153284671533, |
| "grad_norm": 0.5514047741889954, |
| "learning_rate": 5.0517159159202843e-11, |
| "loss": 1.1581861972808838, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.6700729927007298, |
| "grad_norm": 0.3049074709415436, |
| "learning_rate": 5.037305512881878e-11, |
| "loss": 1.3807573318481445, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.672992700729927, |
| "grad_norm": 0.42181819677352905, |
| "learning_rate": 5.022899903799655e-11, |
| "loss": 1.4547597169876099, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.6759124087591242, |
| "grad_norm": 3.0331032276153564, |
| "learning_rate": 5.0084992379294625e-11, |
| "loss": 1.6627700328826904, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.6788321167883211, |
| "grad_norm": 0.5782711505889893, |
| "learning_rate": 4.994103664475933e-11, |
| "loss": 1.1384409666061401, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.6817518248175183, |
| "grad_norm": 0.2746776342391968, |
| "learning_rate": 4.979713332590939e-11, |
| "loss": 1.2250109910964966, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.6846715328467154, |
| "grad_norm": 0.3908049464225769, |
| "learning_rate": 4.9653283913720435e-11, |
| "loss": 1.4306840896606445, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.6875912408759124, |
| "grad_norm": 0.4787120521068573, |
| "learning_rate": 4.950948989860955e-11, |
| "loss": 1.584275722503662, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.6905109489051093, |
| "grad_norm": 0.3665141463279724, |
| "learning_rate": 4.9365752770419915e-11, |
| "loss": 1.496099591255188, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.6934306569343067, |
| "grad_norm": 0.36038002371788025, |
| "learning_rate": 4.922207401840521e-11, |
| "loss": 1.3737331628799438, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.6963503649635037, |
| "grad_norm": 0.38061758875846863, |
| "learning_rate": 4.9078455131214394e-11, |
| "loss": 1.3694974184036255, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.6992700729927006, |
| "grad_norm": 0.28912433981895447, |
| "learning_rate": 4.8934897596876084e-11, |
| "loss": 1.5625628232955933, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.7021897810218978, |
| "grad_norm": 0.3381723463535309, |
| "learning_rate": 4.879140290278327e-11, |
| "loss": 1.3396795988082886, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.705109489051095, |
| "grad_norm": 0.21722273528575897, |
| "learning_rate": 4.8647972535677835e-11, |
| "loss": 1.4543912410736084, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7080291970802919, |
| "grad_norm": 0.7460881471633911, |
| "learning_rate": 4.8504607981635173e-11, |
| "loss": 1.4622814655303955, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.710948905109489, |
| "grad_norm": 1.0124627351760864, |
| "learning_rate": 4.83613107260488e-11, |
| "loss": 1.5359344482421875, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.7138686131386862, |
| "grad_norm": 0.3741784393787384, |
| "learning_rate": 4.821808225361497e-11, |
| "loss": 1.4243507385253906, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7167883211678832, |
| "grad_norm": 0.3962346911430359, |
| "learning_rate": 4.8074924048317217e-11, |
| "loss": 1.4934279918670654, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.7197080291970803, |
| "grad_norm": 0.4626739025115967, |
| "learning_rate": 4.793183759341114e-11, |
| "loss": 1.478497862815857, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.7226277372262775, |
| "grad_norm": 0.3711540102958679, |
| "learning_rate": 4.7788824371408855e-11, |
| "loss": 1.4607126712799072, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.7255474452554744, |
| "grad_norm": 0.6520898938179016, |
| "learning_rate": 4.764588586406373e-11, |
| "loss": 1.3575026988983154, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.7284671532846714, |
| "grad_norm": 0.8203355669975281, |
| "learning_rate": 4.750302355235506e-11, |
| "loss": 1.6894546747207642, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.7313868613138688, |
| "grad_norm": 0.37086644768714905, |
| "learning_rate": 4.7360238916472634e-11, |
| "loss": 1.42867910861969, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.7343065693430657, |
| "grad_norm": 0.9349713325500488, |
| "learning_rate": 4.721753343580143e-11, |
| "loss": 1.4776962995529175, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.7372262773722627, |
| "grad_norm": 0.6913307905197144, |
| "learning_rate": 4.7074908588906385e-11, |
| "loss": 1.3140593767166138, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.7401459854014598, |
| "grad_norm": 0.38876020908355713, |
| "learning_rate": 4.6932365853516914e-11, |
| "loss": 1.4444148540496826, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.743065693430657, |
| "grad_norm": 0.26223206520080566, |
| "learning_rate": 4.678990670651169e-11, |
| "loss": 1.5006850957870483, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.745985401459854, |
| "grad_norm": 0.4009760916233063, |
| "learning_rate": 4.664753262390337e-11, |
| "loss": 1.4603517055511475, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.748905109489051, |
| "grad_norm": 0.2159179002046585, |
| "learning_rate": 4.6505245080823234e-11, |
| "loss": 1.4046112298965454, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.7518248175182483, |
| "grad_norm": 0.7279269695281982, |
| "learning_rate": 4.6363045551505944e-11, |
| "loss": 1.41069757938385, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.7547445255474452, |
| "grad_norm": 0.34113809466362, |
| "learning_rate": 4.622093550927423e-11, |
| "loss": 1.4951008558273315, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.7576642335766424, |
| "grad_norm": 0.2637474834918976, |
| "learning_rate": 4.6078916426523674e-11, |
| "loss": 1.3514400720596313, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.7605839416058395, |
| "grad_norm": 0.316500723361969, |
| "learning_rate": 4.593698977470744e-11, |
| "loss": 1.4736438989639282, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.7635036496350365, |
| "grad_norm": 0.3700624406337738, |
| "learning_rate": 4.579515702432098e-11, |
| "loss": 1.4961127042770386, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.7664233576642334, |
| "grad_norm": 1.0039310455322266, |
| "learning_rate": 4.565341964488686e-11, |
| "loss": 1.5141915082931519, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.7693430656934308, |
| "grad_norm": 0.4779518246650696, |
| "learning_rate": 4.551177910493951e-11, |
| "loss": 1.5229568481445312, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.7722627737226277, |
| "grad_norm": 0.3397350013256073, |
| "learning_rate": 4.537023687201004e-11, |
| "loss": 1.368076205253601, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.7751824817518247, |
| "grad_norm": 2.7952754497528076, |
| "learning_rate": 4.522879441261097e-11, |
| "loss": 1.538074016571045, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.7781021897810219, |
| "grad_norm": 1.0508959293365479, |
| "learning_rate": 4.5087453192221077e-11, |
| "loss": 1.4159822463989258, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.781021897810219, |
| "grad_norm": 0.3718510866165161, |
| "learning_rate": 4.4946214675270194e-11, |
| "loss": 1.3541284799575806, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.783941605839416, |
| "grad_norm": 0.3654969036579132, |
| "learning_rate": 4.4805080325124085e-11, |
| "loss": 1.2151225805282593, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.7868613138686131, |
| "grad_norm": 0.40370652079582214, |
| "learning_rate": 4.4664051604069215e-11, |
| "loss": 1.1107691526412964, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.7897810218978103, |
| "grad_norm": 0.2435276061296463, |
| "learning_rate": 4.452312997329763e-11, |
| "loss": 1.1705870628356934, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.7927007299270072, |
| "grad_norm": 0.22067563235759735, |
| "learning_rate": 4.438231689289185e-11, |
| "loss": 1.4070066213607788, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.7956204379562044, |
| "grad_norm": 0.22086317837238312, |
| "learning_rate": 4.4241613821809646e-11, |
| "loss": 1.4132890701293945, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.7985401459854016, |
| "grad_norm": 0.17591090500354767, |
| "learning_rate": 4.410102221786909e-11, |
| "loss": 1.191218614578247, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.8014598540145985, |
| "grad_norm": 0.14714421331882477, |
| "learning_rate": 4.3960543537733255e-11, |
| "loss": 1.2550400495529175, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.8043795620437955, |
| "grad_norm": 0.23958607017993927, |
| "learning_rate": 4.382017923689525e-11, |
| "loss": 1.1083970069885254, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.8072992700729928, |
| "grad_norm": 0.16653022170066833, |
| "learning_rate": 4.3679930769663116e-11, |
| "loss": 1.0630451440811157, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.8102189781021898, |
| "grad_norm": 0.48908543586730957, |
| "learning_rate": 4.3539799589144715e-11, |
| "loss": 1.1045050621032715, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8131386861313867, |
| "grad_norm": 0.2572908103466034, |
| "learning_rate": 4.339978714723274e-11, |
| "loss": 1.0034579038619995, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.816058394160584, |
| "grad_norm": 0.19536933302879333, |
| "learning_rate": 4.325989489458963e-11, |
| "loss": 1.0157605409622192, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.818978102189781, |
| "grad_norm": 0.3267905116081238, |
| "learning_rate": 4.312012428063252e-11, |
| "loss": 1.004263997077942, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.821897810218978, |
| "grad_norm": 0.2991647720336914, |
| "learning_rate": 4.298047675351832e-11, |
| "loss": 1.0326032638549805, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.8248175182481752, |
| "grad_norm": 0.68552166223526, |
| "learning_rate": 4.2840953760128575e-11, |
| "loss": 1.171319603919983, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.8277372262773723, |
| "grad_norm": 0.30608347058296204, |
| "learning_rate": 4.270155674605455e-11, |
| "loss": 1.382737636566162, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.8306569343065693, |
| "grad_norm": 0.38271135091781616, |
| "learning_rate": 4.25622871555823e-11, |
| "loss": 1.37453293800354, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.8335766423357664, |
| "grad_norm": 0.20676696300506592, |
| "learning_rate": 4.242314643167759e-11, |
| "loss": 1.3673899173736572, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.8364963503649636, |
| "grad_norm": 0.40299898386001587, |
| "learning_rate": 4.2284136015971e-11, |
| "loss": 1.342198133468628, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.8394160583941606, |
| "grad_norm": 0.19543984532356262, |
| "learning_rate": 4.214525734874306e-11, |
| "loss": 1.3685353994369507, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.8423357664233575, |
| "grad_norm": 0.18539777398109436, |
| "learning_rate": 4.2006511868909207e-11, |
| "loss": 1.296134114265442, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.845255474452555, |
| "grad_norm": 0.21791622042655945, |
| "learning_rate": 4.186790101400491e-11, |
| "loss": 1.3704713582992554, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.8481751824817518, |
| "grad_norm": 0.5269507169723511, |
| "learning_rate": 4.172942622017086e-11, |
| "loss": 1.366510272026062, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.8510948905109488, |
| "grad_norm": 0.34738093614578247, |
| "learning_rate": 4.159108892213799e-11, |
| "loss": 1.3568106889724731, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.854014598540146, |
| "grad_norm": 0.6385413408279419, |
| "learning_rate": 4.145289055321263e-11, |
| "loss": 1.2829129695892334, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.856934306569343, |
| "grad_norm": 0.17486143112182617, |
| "learning_rate": 4.1314832545261706e-11, |
| "loss": 1.3451555967330933, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.85985401459854, |
| "grad_norm": 0.20117926597595215, |
| "learning_rate": 4.1176916328697836e-11, |
| "loss": 1.31380033493042, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.8627737226277372, |
| "grad_norm": 0.3269534111022949, |
| "learning_rate": 4.103914333246458e-11, |
| "loss": 1.295755386352539, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.8656934306569344, |
| "grad_norm": 0.2715974450111389, |
| "learning_rate": 4.090151498402155e-11, |
| "loss": 1.322004795074463, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.8686131386861313, |
| "grad_norm": 0.29098689556121826, |
| "learning_rate": 4.076403270932973e-11, |
| "loss": 1.3752413988113403, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.8715328467153285, |
| "grad_norm": 0.3070428967475891, |
| "learning_rate": 4.0626697932836585e-11, |
| "loss": 1.3564178943634033, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.8744525547445257, |
| "grad_norm": 0.24386434257030487, |
| "learning_rate": 4.048951207746142e-11, |
| "loss": 1.3851372003555298, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.8773722627737226, |
| "grad_norm": 0.2833121120929718, |
| "learning_rate": 4.0352476564580485e-11, |
| "loss": 1.3745468854904175, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.8802919708029195, |
| "grad_norm": 0.22633326053619385, |
| "learning_rate": 4.021559281401244e-11, |
| "loss": 1.3696306943893433, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.883211678832117, |
| "grad_norm": 0.2947603464126587, |
| "learning_rate": 4.0078862244003477e-11, |
| "loss": 1.4001069068908691, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.8861313868613139, |
| "grad_norm": 0.24118058383464813, |
| "learning_rate": 3.994228627121269e-11, |
| "loss": 1.3127013444900513, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.8890510948905108, |
| "grad_norm": 0.32797881960868835, |
| "learning_rate": 3.980586631069744e-11, |
| "loss": 1.3768930435180664, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.891970802919708, |
| "grad_norm": 0.38555362820625305, |
| "learning_rate": 3.9669603775898614e-11, |
| "loss": 1.3195134401321411, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.8948905109489051, |
| "grad_norm": 0.25967398285865784, |
| "learning_rate": 3.953350007862603e-11, |
| "loss": 1.3479055166244507, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.897810218978102, |
| "grad_norm": 0.21513788402080536, |
| "learning_rate": 3.9397556629043816e-11, |
| "loss": 1.358872890472412, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9007299270072993, |
| "grad_norm": 0.1812690943479538, |
| "learning_rate": 3.926177483565576e-11, |
| "loss": 1.3878893852233887, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.9036496350364964, |
| "grad_norm": 0.2857212722301483, |
| "learning_rate": 3.912615610529074e-11, |
| "loss": 1.3680543899536133, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.9065693430656934, |
| "grad_norm": 0.17340198159217834, |
| "learning_rate": 3.899070184308815e-11, |
| "loss": 1.299302339553833, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.9094890510948905, |
| "grad_norm": 0.22302907705307007, |
| "learning_rate": 3.885541345248335e-11, |
| "loss": 1.333414912223816, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.9124087591240877, |
| "grad_norm": 0.2631547749042511, |
| "learning_rate": 3.8720292335193065e-11, |
| "loss": 1.3633759021759033, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.9153284671532846, |
| "grad_norm": 0.20353633165359497, |
| "learning_rate": 3.858533989120098e-11, |
| "loss": 1.3834773302078247, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.9182481751824818, |
| "grad_norm": 0.38067495822906494, |
| "learning_rate": 3.845055751874309e-11, |
| "loss": 1.369248390197754, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.921167883211679, |
| "grad_norm": 0.2600199282169342, |
| "learning_rate": 3.831594661429334e-11, |
| "loss": 1.3464183807373047, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.924087591240876, |
| "grad_norm": 0.2344445139169693, |
| "learning_rate": 3.8181508572549096e-11, |
| "loss": 1.3000050783157349, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.9270072992700729, |
| "grad_norm": 0.2518412470817566, |
| "learning_rate": 3.8047244786416667e-11, |
| "loss": 1.3449358940124512, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.92992700729927, |
| "grad_norm": 0.3552137017250061, |
| "learning_rate": 3.791315664699697e-11, |
| "loss": 1.3735779523849487, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.9328467153284672, |
| "grad_norm": 0.8765981793403625, |
| "learning_rate": 3.777924554357096e-11, |
| "loss": 1.3276495933532715, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.9357664233576641, |
| "grad_norm": 0.2266838103532791, |
| "learning_rate": 3.764551286358544e-11, |
| "loss": 1.355583906173706, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.9386861313868613, |
| "grad_norm": 0.21473616361618042, |
| "learning_rate": 3.751195999263849e-11, |
| "loss": 1.3490177392959595, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.9416058394160585, |
| "grad_norm": 0.3912813365459442, |
| "learning_rate": 3.737858831446524e-11, |
| "loss": 1.3904234170913696, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.9445255474452554, |
| "grad_norm": 0.2954106628894806, |
| "learning_rate": 3.724539921092347e-11, |
| "loss": 1.4133667945861816, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.9474452554744526, |
| "grad_norm": 0.2548486590385437, |
| "learning_rate": 3.711239406197932e-11, |
| "loss": 1.3827425241470337, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.9503649635036497, |
| "grad_norm": 0.4357367753982544, |
| "learning_rate": 3.6979574245693e-11, |
| "loss": 1.3763043880462646, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.9532846715328467, |
| "grad_norm": 0.26733970642089844, |
| "learning_rate": 3.68469411382045e-11, |
| "loss": 1.3509302139282227, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.9562043795620438, |
| "grad_norm": 0.20610250532627106, |
| "learning_rate": 3.671449611371928e-11, |
| "loss": 1.2913023233413696, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.959124087591241, |
| "grad_norm": 0.2972968518733978, |
| "learning_rate": 3.658224054449412e-11, |
| "loss": 1.3234026432037354, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.962043795620438, |
| "grad_norm": 0.2919084131717682, |
| "learning_rate": 3.645017580082287e-11, |
| "loss": 1.3362833261489868, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.964963503649635, |
| "grad_norm": 0.27695927023887634, |
| "learning_rate": 3.631830325102221e-11, |
| "loss": 1.3145451545715332, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.967883211678832, |
| "grad_norm": 0.42268478870391846, |
| "learning_rate": 3.618662426141754e-11, |
| "loss": 1.337138295173645, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.9708029197080292, |
| "grad_norm": 0.2148294746875763, |
| "learning_rate": 3.6055140196328793e-11, |
| "loss": 1.380530834197998, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.9737226277372262, |
| "grad_norm": 0.2545860707759857, |
| "learning_rate": 3.592385241805628e-11, |
| "loss": 1.3746337890625, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.9766423357664233, |
| "grad_norm": 1.5863921642303467, |
| "learning_rate": 3.579276228686658e-11, |
| "loss": 1.3830804824829102, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.9795620437956205, |
| "grad_norm": 0.35024622082710266, |
| "learning_rate": 3.56618711609785e-11, |
| "loss": 1.33449125289917, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.9824817518248175, |
| "grad_norm": 0.2447447031736374, |
| "learning_rate": 3.5531180396548933e-11, |
| "loss": 1.3325806856155396, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.9854014598540146, |
| "grad_norm": 0.2641764283180237, |
| "learning_rate": 3.5400691347658855e-11, |
| "loss": 1.3506903648376465, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.9883211678832118, |
| "grad_norm": 0.4542999267578125, |
| "learning_rate": 3.527040536629927e-11, |
| "loss": 1.3841180801391602, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.9912408759124087, |
| "grad_norm": 0.29342707991600037, |
| "learning_rate": 3.514032380235719e-11, |
| "loss": 1.4284846782684326, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.994160583941606, |
| "grad_norm": 0.5308235287666321, |
| "learning_rate": 3.5010448003601684e-11, |
| "loss": 1.8175984621047974, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.997080291970803, |
| "grad_norm": 0.4636997580528259, |
| "learning_rate": 3.4880779315669865e-11, |
| "loss": 1.7896119356155396, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.8472878932952881, |
| "learning_rate": 3.475131908205308e-11, |
| "loss": 1.7334907054901123, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.002919708029197, |
| "grad_norm": 0.27248427271842957, |
| "learning_rate": 3.4622068644082805e-11, |
| "loss": 1.4240479469299316, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.0058394160583943, |
| "grad_norm": 0.21887673437595367, |
| "learning_rate": 3.44930293409168e-11, |
| "loss": 1.4103740453720093, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.0087591240875913, |
| "grad_norm": 0.22277064621448517, |
| "learning_rate": 3.436420250952537e-11, |
| "loss": 1.4105690717697144, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.011678832116788, |
| "grad_norm": 0.24514085054397583, |
| "learning_rate": 3.423558948467733e-11, |
| "loss": 1.4243745803833008, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.0145985401459856, |
| "grad_norm": 0.2835189402103424, |
| "learning_rate": 3.4107191598926294e-11, |
| "loss": 1.3502079248428345, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.0175182481751825, |
| "grad_norm": 0.29972848296165466, |
| "learning_rate": 3.3979010182596804e-11, |
| "loss": 1.3972976207733154, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.0204379562043795, |
| "grad_norm": 0.23961658775806427, |
| "learning_rate": 3.3851046563770615e-11, |
| "loss": 1.3070876598358154, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.0233576642335764, |
| "grad_norm": 0.7350632548332214, |
| "learning_rate": 3.372330206827281e-11, |
| "loss": 1.2981512546539307, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.026277372262774, |
| "grad_norm": 0.3386096656322479, |
| "learning_rate": 3.3595778019658256e-11, |
| "loss": 1.2211928367614746, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.0291970802919708, |
| "grad_norm": 0.20256347954273224, |
| "learning_rate": 3.346847573919769e-11, |
| "loss": 1.4054863452911377, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.0321167883211677, |
| "grad_norm": 0.48250511288642883, |
| "learning_rate": 3.3341396545864166e-11, |
| "loss": 1.3012176752090454, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.035036496350365, |
| "grad_norm": 0.16802747547626495, |
| "learning_rate": 3.321454175631934e-11, |
| "loss": 1.249161720275879, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.037956204379562, |
| "grad_norm": 0.29464009404182434, |
| "learning_rate": 3.308791268489979e-11, |
| "loss": 1.303439974784851, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.040875912408759, |
| "grad_norm": 0.6583344340324402, |
| "learning_rate": 3.2961510643603477e-11, |
| "loss": 1.3693758249282837, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.0437956204379564, |
| "grad_norm": 0.4720025658607483, |
| "learning_rate": 3.283533694207611e-11, |
| "loss": 1.371114730834961, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.0467153284671533, |
| "grad_norm": 0.4682556688785553, |
| "learning_rate": 3.270939288759758e-11, |
| "loss": 1.3761752843856812, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.0496350364963503, |
| "grad_norm": 0.23124952614307404, |
| "learning_rate": 3.25836797850684e-11, |
| "loss": 1.1644366979599, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.0525547445255476, |
| "grad_norm": 0.21318967640399933, |
| "learning_rate": 3.2458198936996216e-11, |
| "loss": 1.1261866092681885, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.0554744525547446, |
| "grad_norm": 0.2597343921661377, |
| "learning_rate": 3.233295164348229e-11, |
| "loss": 1.2748457193374634, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.0583941605839415, |
| "grad_norm": 0.36715397238731384, |
| "learning_rate": 3.2207939202208046e-11, |
| "loss": 1.3361799716949463, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.0613138686131385, |
| "grad_norm": 0.24065475165843964, |
| "learning_rate": 3.208316290842159e-11, |
| "loss": 1.361956238746643, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.064233576642336, |
| "grad_norm": 0.33580994606018066, |
| "learning_rate": 3.1958624054924346e-11, |
| "loss": 1.3905457258224487, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.067153284671533, |
| "grad_norm": 0.4103996753692627, |
| "learning_rate": 3.183432393205763e-11, |
| "loss": 1.6151682138442993, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.0700729927007298, |
| "grad_norm": 0.2529504597187042, |
| "learning_rate": 3.171026382768923e-11, |
| "loss": 1.2452044486999512, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.072992700729927, |
| "grad_norm": 0.352078378200531, |
| "learning_rate": 3.158644502720015e-11, |
| "loss": 1.301306962966919, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.075912408759124, |
| "grad_norm": 0.23457178473472595, |
| "learning_rate": 3.146286881347128e-11, |
| "loss": 1.3487284183502197, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.078832116788321, |
| "grad_norm": 0.30150166153907776, |
| "learning_rate": 3.1339536466870046e-11, |
| "loss": 1.3885747194290161, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.0817518248175184, |
| "grad_norm": 0.377258837223053, |
| "learning_rate": 3.121644926523721e-11, |
| "loss": 1.405432939529419, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.0846715328467154, |
| "grad_norm": 0.13830381631851196, |
| "learning_rate": 3.1093608483873524e-11, |
| "loss": 1.209243893623352, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.0875912408759123, |
| "grad_norm": 0.18046823143959045, |
| "learning_rate": 3.097101539552668e-11, |
| "loss": 1.2217992544174194, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.0905109489051097, |
| "grad_norm": 0.22244705259799957, |
| "learning_rate": 3.0848671270377985e-11, |
| "loss": 1.2246205806732178, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.0934306569343066, |
| "grad_norm": 0.21235236525535583, |
| "learning_rate": 3.0726577376029264e-11, |
| "loss": 1.2530802488327026, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.0963503649635036, |
| "grad_norm": 0.2744048237800598, |
| "learning_rate": 3.0604734977489704e-11, |
| "loss": 1.2528806924819946, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.0992700729927005, |
| "grad_norm": 0.16244126856327057, |
| "learning_rate": 3.048314533716277e-11, |
| "loss": 1.2494616508483887, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.102189781021898, |
| "grad_norm": 0.2635323107242584, |
| "learning_rate": 3.03618097148331e-11, |
| "loss": 1.31374192237854, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.105109489051095, |
| "grad_norm": 0.343732625246048, |
| "learning_rate": 3.0240729367653456e-11, |
| "loss": 1.4676549434661865, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.108029197080292, |
| "grad_norm": 0.6998947262763977, |
| "learning_rate": 3.0119905550131735e-11, |
| "loss": 1.4312711954116821, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.110948905109489, |
| "grad_norm": 0.3323758840560913, |
| "learning_rate": 2.9999339514117915e-11, |
| "loss": 1.3852202892303467, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.113868613138686, |
| "grad_norm": 0.3845193386077881, |
| "learning_rate": 2.987903250879109e-11, |
| "loss": 1.4576423168182373, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.116788321167883, |
| "grad_norm": 0.3180224597454071, |
| "learning_rate": 2.975898578064662e-11, |
| "loss": 1.3024555444717407, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.1197080291970805, |
| "grad_norm": 0.4924963116645813, |
| "learning_rate": 2.9639200573483075e-11, |
| "loss": 1.2051639556884766, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.1226277372262774, |
| "grad_norm": 0.2880760729312897, |
| "learning_rate": 2.951967812838946e-11, |
| "loss": 1.1336140632629395, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.1255474452554743, |
| "grad_norm": 0.42956921458244324, |
| "learning_rate": 2.940041968373229e-11, |
| "loss": 1.2710269689559937, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.1284671532846717, |
| "grad_norm": 1.3568767309188843, |
| "learning_rate": 2.928142647514281e-11, |
| "loss": 1.1412277221679688, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.1313868613138687, |
| "grad_norm": 0.1728566586971283, |
| "learning_rate": 2.916269973550413e-11, |
| "loss": 1.3375706672668457, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.1343065693430656, |
| "grad_norm": 0.2527841627597809, |
| "learning_rate": 2.904424069493853e-11, |
| "loss": 1.3697013854980469, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.137226277372263, |
| "grad_norm": 0.18707573413848877, |
| "learning_rate": 2.892605058079464e-11, |
| "loss": 1.4107820987701416, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.14014598540146, |
| "grad_norm": 0.7027086019515991, |
| "learning_rate": 2.8808130617634766e-11, |
| "loss": 1.3240883350372314, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.143065693430657, |
| "grad_norm": 0.2566525340080261, |
| "learning_rate": 2.8690482027222204e-11, |
| "loss": 1.3276679515838623, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.145985401459854, |
| "grad_norm": 0.33008280396461487, |
| "learning_rate": 2.857310602850854e-11, |
| "loss": 1.4535688161849976, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.1489051094890512, |
| "grad_norm": 0.2940777540206909, |
| "learning_rate": 2.845600383762107e-11, |
| "loss": 1.3314485549926758, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.151824817518248, |
| "grad_norm": 0.263931542634964, |
| "learning_rate": 2.833917666785017e-11, |
| "loss": 1.3149549961090088, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.154744525547445, |
| "grad_norm": 0.23515236377716064, |
| "learning_rate": 2.8222625729636774e-11, |
| "loss": 1.3796401023864746, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.1576642335766425, |
| "grad_norm": 3.0907609462738037, |
| "learning_rate": 2.8106352230559756e-11, |
| "loss": 1.441535472869873, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.1605839416058394, |
| "grad_norm": 0.28067415952682495, |
| "learning_rate": 2.799035737532344e-11, |
| "loss": 1.3874502182006836, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.1635036496350364, |
| "grad_norm": 0.2478429526090622, |
| "learning_rate": 2.7874642365745163e-11, |
| "loss": 1.433276891708374, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.1664233576642338, |
| "grad_norm": 0.19423043727874756, |
| "learning_rate": 2.7759208400742797e-11, |
| "loss": 1.2716385126113892, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.1693430656934307, |
| "grad_norm": 0.4264819622039795, |
| "learning_rate": 2.764405667632231e-11, |
| "loss": 1.450852394104004, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.1722627737226277, |
| "grad_norm": 0.2181134819984436, |
| "learning_rate": 2.7529188385565386e-11, |
| "loss": 1.2895259857177734, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.1751824817518246, |
| "grad_norm": 0.2849787175655365, |
| "learning_rate": 2.741460471861708e-11, |
| "loss": 1.4493159055709839, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.178102189781022, |
| "grad_norm": 0.20413418114185333, |
| "learning_rate": 2.730030686267347e-11, |
| "loss": 1.3337498903274536, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.181021897810219, |
| "grad_norm": 0.2518576383590698, |
| "learning_rate": 2.7186296001969352e-11, |
| "loss": 1.3332432508468628, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.183941605839416, |
| "grad_norm": 0.4149719774723053, |
| "learning_rate": 2.7072573317765974e-11, |
| "loss": 1.3538711071014404, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.1868613138686133, |
| "grad_norm": 0.3612917363643646, |
| "learning_rate": 2.695913998833881e-11, |
| "loss": 1.3591725826263428, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.18978102189781, |
| "grad_norm": 0.26884135603904724, |
| "learning_rate": 2.6845997188965365e-11, |
| "loss": 1.304358720779419, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.192700729927007, |
| "grad_norm": 0.34493714570999146, |
| "learning_rate": 2.673314609191291e-11, |
| "loss": 1.3458648920059204, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.1956204379562045, |
| "grad_norm": 0.15932005643844604, |
| "learning_rate": 2.662058786642646e-11, |
| "loss": 1.371425986289978, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.1985401459854015, |
| "grad_norm": 0.2096133679151535, |
| "learning_rate": 2.6508323678716584e-11, |
| "loss": 1.2557461261749268, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.2014598540145984, |
| "grad_norm": 0.253741592168808, |
| "learning_rate": 2.6396354691947322e-11, |
| "loss": 1.1103127002716064, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.204379562043796, |
| "grad_norm": 0.23460233211517334, |
| "learning_rate": 2.628468206622421e-11, |
| "loss": 1.3412158489227295, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.2072992700729928, |
| "grad_norm": 0.19601471722126007, |
| "learning_rate": 2.617330695858212e-11, |
| "loss": 1.2751479148864746, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.2102189781021897, |
| "grad_norm": 0.26254287362098694, |
| "learning_rate": 2.6062230522973407e-11, |
| "loss": 1.2706090211868286, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.213138686131387, |
| "grad_norm": 0.2905943691730499, |
| "learning_rate": 2.5951453910255874e-11, |
| "loss": 1.3488496541976929, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.216058394160584, |
| "grad_norm": 0.24975769221782684, |
| "learning_rate": 2.5840978268180892e-11, |
| "loss": 1.267167329788208, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.218978102189781, |
| "grad_norm": 0.34441080689430237, |
| "learning_rate": 2.5730804741381475e-11, |
| "loss": 1.3788913488388062, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.221897810218978, |
| "grad_norm": 0.5073427557945251, |
| "learning_rate": 2.5620934471360446e-11, |
| "loss": 1.5342570543289185, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.2248175182481753, |
| "grad_norm": 0.3326243758201599, |
| "learning_rate": 2.5511368596478575e-11, |
| "loss": 1.589991807937622, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.2277372262773723, |
| "grad_norm": 0.7213902473449707, |
| "learning_rate": 2.5402108251942813e-11, |
| "loss": 1.393171787261963, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.230656934306569, |
| "grad_norm": 0.3098500072956085, |
| "learning_rate": 2.529315456979457e-11, |
| "loss": 1.482409119606018, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.2335766423357666, |
| "grad_norm": 1.4665156602859497, |
| "learning_rate": 2.5184508678897894e-11, |
| "loss": 1.541229248046875, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.2364963503649635, |
| "grad_norm": 0.3888106942176819, |
| "learning_rate": 2.5076171704927847e-11, |
| "loss": 1.5156506299972534, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.2394160583941605, |
| "grad_norm": 0.4767511487007141, |
| "learning_rate": 2.4968144770358785e-11, |
| "loss": 1.474044919013977, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.242335766423358, |
| "grad_norm": 0.671013593673706, |
| "learning_rate": 2.4860428994452785e-11, |
| "loss": 1.3635804653167725, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.245255474452555, |
| "grad_norm": 0.5932507514953613, |
| "learning_rate": 2.4753025493248032e-11, |
| "loss": 1.5467637777328491, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.2481751824817517, |
| "grad_norm": 0.4003777503967285, |
| "learning_rate": 2.464593537954722e-11, |
| "loss": 1.075831651687622, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.2510948905109487, |
| "grad_norm": 2.531550168991089, |
| "learning_rate": 2.453915976290607e-11, |
| "loss": 1.1271113157272339, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.254014598540146, |
| "grad_norm": 0.5367615222930908, |
| "learning_rate": 2.4432699749621813e-11, |
| "loss": 1.1544005870819092, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.256934306569343, |
| "grad_norm": 0.5404489636421204, |
| "learning_rate": 2.4326556442721715e-11, |
| "loss": 1.0723111629486084, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.25985401459854, |
| "grad_norm": 0.4654960632324219, |
| "learning_rate": 2.4220730941951673e-11, |
| "loss": 1.3666132688522339, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.2627737226277373, |
| "grad_norm": 0.4631255567073822, |
| "learning_rate": 2.411522434376481e-11, |
| "loss": 1.3214212656021118, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.2656934306569343, |
| "grad_norm": 3.1865177154541016, |
| "learning_rate": 2.4010037741310103e-11, |
| "loss": 1.2218841314315796, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.2686131386861312, |
| "grad_norm": 0.3000134527683258, |
| "learning_rate": 2.3905172224421092e-11, |
| "loss": 1.1274064779281616, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.2715328467153286, |
| "grad_norm": 0.2830018699169159, |
| "learning_rate": 2.3800628879604523e-11, |
| "loss": 0.938373327255249, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.2744525547445256, |
| "grad_norm": 0.2360849827528, |
| "learning_rate": 2.3696408790029166e-11, |
| "loss": 1.1650950908660889, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.2773722627737225, |
| "grad_norm": 0.34656640887260437, |
| "learning_rate": 2.3592513035514534e-11, |
| "loss": 1.2450108528137207, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.28029197080292, |
| "grad_norm": 0.31296175718307495, |
| "learning_rate": 2.348894269251978e-11, |
| "loss": 0.9854579567909241, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.283211678832117, |
| "grad_norm": 0.4444359540939331, |
| "learning_rate": 2.3385698834132398e-11, |
| "loss": 1.1185884475708008, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.286131386861314, |
| "grad_norm": 0.5716680288314819, |
| "learning_rate": 2.3282782530057236e-11, |
| "loss": 1.3133447170257568, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.289051094890511, |
| "grad_norm": 0.5097543597221375, |
| "learning_rate": 2.3180194846605366e-11, |
| "loss": 1.203965187072754, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.291970802919708, |
| "grad_norm": 0.3655783534049988, |
| "learning_rate": 2.307793684668303e-11, |
| "loss": 1.262579083442688, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.294890510948905, |
| "grad_norm": 0.4481443166732788, |
| "learning_rate": 2.297600958978064e-11, |
| "loss": 1.3191816806793213, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.297810218978102, |
| "grad_norm": 0.2837509512901306, |
| "learning_rate": 2.2874414131961783e-11, |
| "loss": 1.0644941329956055, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.3007299270072994, |
| "grad_norm": 0.44186243414878845, |
| "learning_rate": 2.277315152585231e-11, |
| "loss": 1.1767611503601074, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.3036496350364963, |
| "grad_norm": 0.476301908493042, |
| "learning_rate": 2.2672222820629375e-11, |
| "loss": 1.5055090188980103, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.3065693430656933, |
| "grad_norm": 0.5160381197929382, |
| "learning_rate": 2.2571629062010654e-11, |
| "loss": 1.3974980115890503, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.3094890510948907, |
| "grad_norm": 0.22894681990146637, |
| "learning_rate": 2.2471371292243415e-11, |
| "loss": 1.4789299964904785, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.3124087591240876, |
| "grad_norm": 0.24650397896766663, |
| "learning_rate": 2.2371450550093786e-11, |
| "loss": 1.438536524772644, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.3153284671532846, |
| "grad_norm": 0.26311492919921875, |
| "learning_rate": 2.227186787083593e-11, |
| "loss": 1.152450680732727, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.318248175182482, |
| "grad_norm": 0.3128260374069214, |
| "learning_rate": 2.2172624286241394e-11, |
| "loss": 1.2790557146072388, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.321167883211679, |
| "grad_norm": 0.5634860396385193, |
| "learning_rate": 2.2073720824568366e-11, |
| "loss": 1.2126829624176025, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.324087591240876, |
| "grad_norm": 0.3091767430305481, |
| "learning_rate": 2.1975158510551046e-11, |
| "loss": 1.0758181810379028, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.3270072992700728, |
| "grad_norm": 0.5447919964790344, |
| "learning_rate": 2.1876938365389005e-11, |
| "loss": 1.5189749002456665, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.32992700729927, |
| "grad_norm": 0.25837060809135437, |
| "learning_rate": 2.1779061406736623e-11, |
| "loss": 1.4296108484268188, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.332846715328467, |
| "grad_norm": 0.29859817028045654, |
| "learning_rate": 2.1681528648692546e-11, |
| "loss": 1.3460187911987305, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.335766423357664, |
| "grad_norm": 0.3790282607078552, |
| "learning_rate": 2.1584341101789163e-11, |
| "loss": 1.0567677021026611, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.3386861313868614, |
| "grad_norm": 0.37978944182395935, |
| "learning_rate": 2.1487499772982154e-11, |
| "loss": 1.4715806245803833, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.3416058394160584, |
| "grad_norm": 0.33095240592956543, |
| "learning_rate": 2.1391005665640057e-11, |
| "loss": 1.4167277812957764, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.3445255474452553, |
| "grad_norm": 0.7214593291282654, |
| "learning_rate": 2.129485977953386e-11, |
| "loss": 1.5120526552200317, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.3474452554744527, |
| "grad_norm": 0.575445830821991, |
| "learning_rate": 2.1199063110826615e-11, |
| "loss": 1.4879270792007446, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.3503649635036497, |
| "grad_norm": 0.30642205476760864, |
| "learning_rate": 2.1103616652063197e-11, |
| "loss": 1.3315616846084595, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.3532846715328466, |
| "grad_norm": 1.8541874885559082, |
| "learning_rate": 2.1008521392159927e-11, |
| "loss": 1.2089598178863525, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.356204379562044, |
| "grad_norm": 0.6219841241836548, |
| "learning_rate": 2.0913778316394434e-11, |
| "loss": 1.2917388677597046, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.359124087591241, |
| "grad_norm": 0.7050977945327759, |
| "learning_rate": 2.081938840639533e-11, |
| "loss": 1.3587377071380615, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.362043795620438, |
| "grad_norm": 0.42739561200141907, |
| "learning_rate": 2.072535264013209e-11, |
| "loss": 1.522105097770691, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.3649635036496353, |
| "grad_norm": 1.166688323020935, |
| "learning_rate": 2.063167199190495e-11, |
| "loss": 1.4914265871047974, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.367883211678832, |
| "grad_norm": 0.29809263348579407, |
| "learning_rate": 2.053834743233477e-11, |
| "loss": 1.6098909378051758, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.370802919708029, |
| "grad_norm": 0.33959904313087463, |
| "learning_rate": 2.0445379928353005e-11, |
| "loss": 1.443519949913025, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.373722627737226, |
| "grad_norm": 0.7890218496322632, |
| "learning_rate": 2.035277044319165e-11, |
| "loss": 1.422911524772644, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.3766423357664235, |
| "grad_norm": 0.3700925409793854, |
| "learning_rate": 2.026051993637332e-11, |
| "loss": 1.1868336200714111, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.3795620437956204, |
| "grad_norm": 0.25010111927986145, |
| "learning_rate": 2.0168629363701215e-11, |
| "loss": 1.1865566968917847, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.3824817518248174, |
| "grad_norm": 0.4608599841594696, |
| "learning_rate": 2.0077099677249334e-11, |
| "loss": 1.4894611835479736, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.3854014598540147, |
| "grad_norm": 0.3453768193721771, |
| "learning_rate": 1.9985931825352528e-11, |
| "loss": 1.3610191345214844, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.3883211678832117, |
| "grad_norm": 0.323233038187027, |
| "learning_rate": 1.989512675259668e-11, |
| "loss": 1.4355524778366089, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.3912408759124086, |
| "grad_norm": 0.3967125415802002, |
| "learning_rate": 1.980468539980897e-11, |
| "loss": 1.1551700830459595, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.394160583941606, |
| "grad_norm": 0.2280547320842743, |
| "learning_rate": 1.9714608704048038e-11, |
| "loss": 1.136304497718811, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.397080291970803, |
| "grad_norm": 0.2169790118932724, |
| "learning_rate": 1.9624897598594387e-11, |
| "loss": 1.1340363025665283, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.2436823695898056, |
| "learning_rate": 1.953555301294062e-11, |
| "loss": 1.1512538194656372, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.402919708029197, |
| "grad_norm": 0.2881166636943817, |
| "learning_rate": 1.9446575872781863e-11, |
| "loss": 1.4590939283370972, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.4058394160583942, |
| "grad_norm": 0.7346351742744446, |
| "learning_rate": 1.9357967100006153e-11, |
| "loss": 1.4373379945755005, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.408759124087591, |
| "grad_norm": 0.2763451039791107, |
| "learning_rate": 1.9269727612684882e-11, |
| "loss": 1.4031740427017212, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.411678832116788, |
| "grad_norm": 0.3692055642604828, |
| "learning_rate": 1.9181858325063297e-11, |
| "loss": 1.413076400756836, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.4145985401459855, |
| "grad_norm": 0.4550726115703583, |
| "learning_rate": 1.9094360147551022e-11, |
| "loss": 1.4948763847351074, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.4175182481751825, |
| "grad_norm": 0.3476276695728302, |
| "learning_rate": 1.900723398671263e-11, |
| "loss": 1.2074034214019775, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.4204379562043794, |
| "grad_norm": 0.2987731993198395, |
| "learning_rate": 1.892048074525824e-11, |
| "loss": 1.208572506904602, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.423357664233577, |
| "grad_norm": 0.30724695324897766, |
| "learning_rate": 1.8834101322034192e-11, |
| "loss": 1.3055994510650635, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.4262773722627737, |
| "grad_norm": 0.3010522723197937, |
| "learning_rate": 1.874809661201367e-11, |
| "loss": 1.384286880493164, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.4291970802919707, |
| "grad_norm": 0.48422113060951233, |
| "learning_rate": 1.8662467506287496e-11, |
| "loss": 1.2188966274261475, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.432116788321168, |
| "grad_norm": 0.22298695147037506, |
| "learning_rate": 1.8577214892054895e-11, |
| "loss": 1.1136993169784546, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.435036496350365, |
| "grad_norm": 0.3978652358055115, |
| "learning_rate": 1.8492339652614253e-11, |
| "loss": 1.3406078815460205, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.437956204379562, |
| "grad_norm": 0.35969844460487366, |
| "learning_rate": 1.8407842667354008e-11, |
| "loss": 1.2762571573257446, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.4408759124087593, |
| "grad_norm": 0.21341219544410706, |
| "learning_rate": 1.8323724811743495e-11, |
| "loss": 1.310207724571228, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.4437956204379563, |
| "grad_norm": 0.27594175934791565, |
| "learning_rate": 1.8239986957323938e-11, |
| "loss": 1.4516338109970093, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.4467153284671532, |
| "grad_norm": 0.5084519386291504, |
| "learning_rate": 1.8156629971699374e-11, |
| "loss": 1.572962999343872, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.44963503649635, |
| "grad_norm": 0.24259944260120392, |
| "learning_rate": 1.807365471852767e-11, |
| "loss": 1.2814817428588867, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.4525547445255476, |
| "grad_norm": 0.32336655259132385, |
| "learning_rate": 1.7991062057511587e-11, |
| "loss": 1.2371183633804321, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.4554744525547445, |
| "grad_norm": 0.3355330526828766, |
| "learning_rate": 1.7908852844389878e-11, |
| "loss": 1.329166054725647, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.4583941605839414, |
| "grad_norm": 0.23916582763195038, |
| "learning_rate": 1.7827027930928388e-11, |
| "loss": 1.2250287532806396, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.461313868613139, |
| "grad_norm": 0.509010910987854, |
| "learning_rate": 1.7745588164911263e-11, |
| "loss": 1.5886147022247314, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.4642335766423358, |
| "grad_norm": 0.24120256304740906, |
| "learning_rate": 1.766453439013215e-11, |
| "loss": 1.3899035453796387, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.4671532846715327, |
| "grad_norm": 0.34605830907821655, |
| "learning_rate": 1.758386744638546e-11, |
| "loss": 1.3214712142944336, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.47007299270073, |
| "grad_norm": 0.47554486989974976, |
| "learning_rate": 1.7503588169457688e-11, |
| "loss": 1.3142483234405518, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.472992700729927, |
| "grad_norm": 0.42094510793685913, |
| "learning_rate": 1.7423697391118673e-11, |
| "loss": 1.274762511253357, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.475912408759124, |
| "grad_norm": 0.9319076538085938, |
| "learning_rate": 1.7344195939113094e-11, |
| "loss": 1.3079981803894043, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.478832116788321, |
| "grad_norm": 0.25632455945014954, |
| "learning_rate": 1.7265084637151817e-11, |
| "loss": 1.2449252605438232, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.4817518248175183, |
| "grad_norm": 0.4942947030067444, |
| "learning_rate": 1.718636430490338e-11, |
| "loss": 1.2960014343261719, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.4846715328467153, |
| "grad_norm": 0.20054902136325836, |
| "learning_rate": 1.7108035757985506e-11, |
| "loss": 1.3219631910324097, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.487591240875912, |
| "grad_norm": 0.34794673323631287, |
| "learning_rate": 1.7030099807956648e-11, |
| "loss": 1.2419843673706055, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.4905109489051096, |
| "grad_norm": 1.3033686876296997, |
| "learning_rate": 1.695255726230758e-11, |
| "loss": 1.5983386039733887, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.4934306569343065, |
| "grad_norm": 0.44935575127601624, |
| "learning_rate": 1.6875408924453033e-11, |
| "loss": 1.4845508337020874, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.4963503649635035, |
| "grad_norm": 0.3333362936973572, |
| "learning_rate": 1.6798655593723362e-11, |
| "loss": 1.6482346057891846, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.499270072992701, |
| "grad_norm": 0.568465530872345, |
| "learning_rate": 1.6722298065356283e-11, |
| "loss": 1.282273292541504, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.502189781021898, |
| "grad_norm": 0.285226434469223, |
| "learning_rate": 1.6646337130488608e-11, |
| "loss": 1.2390754222869873, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.5051094890510948, |
| "grad_norm": 0.35810387134552, |
| "learning_rate": 1.657077357614808e-11, |
| "loss": 1.1541006565093994, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.508029197080292, |
| "grad_norm": 0.22877416014671326, |
| "learning_rate": 1.649560818524517e-11, |
| "loss": 1.2132242918014526, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.510948905109489, |
| "grad_norm": 0.650364875793457, |
| "learning_rate": 1.6420841736565042e-11, |
| "loss": 1.4655767679214478, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.513868613138686, |
| "grad_norm": 2.0470120906829834, |
| "learning_rate": 1.6346475004759414e-11, |
| "loss": 1.6555049419403076, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.5167883211678834, |
| "grad_norm": 0.3082306385040283, |
| "learning_rate": 1.627250876033853e-11, |
| "loss": 1.7491514682769775, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.5197080291970804, |
| "grad_norm": 0.8077488541603088, |
| "learning_rate": 1.619894376966325e-11, |
| "loss": 1.3254021406173706, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.5226277372262773, |
| "grad_norm": 0.48932626843452454, |
| "learning_rate": 1.612578079493702e-11, |
| "loss": 1.440732717514038, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.5255474452554747, |
| "grad_norm": 0.4108842611312866, |
| "learning_rate": 1.6053020594198053e-11, |
| "loss": 1.2445178031921387, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.5284671532846716, |
| "grad_norm": 0.746144711971283, |
| "learning_rate": 1.598066392131142e-11, |
| "loss": 1.5634169578552246, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.5313868613138686, |
| "grad_norm": 0.809232771396637, |
| "learning_rate": 1.5908711525961263e-11, |
| "loss": 1.751116156578064, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.5343065693430655, |
| "grad_norm": 0.31983619928359985, |
| "learning_rate": 1.5837164153643013e-11, |
| "loss": 1.1314088106155396, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.537226277372263, |
| "grad_norm": 0.35743609070777893, |
| "learning_rate": 1.5766022545655703e-11, |
| "loss": 1.249573826789856, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.54014598540146, |
| "grad_norm": 0.25417211651802063, |
| "learning_rate": 1.569528743909423e-11, |
| "loss": 1.3246897459030151, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.543065693430657, |
| "grad_norm": 0.521004319190979, |
| "learning_rate": 1.5624959566841764e-11, |
| "loss": 1.5900986194610596, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.5459854014598537, |
| "grad_norm": 0.5410935878753662, |
| "learning_rate": 1.5555039657562147e-11, |
| "loss": 1.7000155448913574, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.548905109489051, |
| "grad_norm": 0.1969938427209854, |
| "learning_rate": 1.548552843569231e-11, |
| "loss": 1.4420926570892334, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.551824817518248, |
| "grad_norm": 0.9155759215354919, |
| "learning_rate": 1.541642662143481e-11, |
| "loss": 1.553064227104187, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.554744525547445, |
| "grad_norm": 0.33890122175216675, |
| "learning_rate": 1.5347734930750357e-11, |
| "loss": 1.3361493349075317, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.5576642335766424, |
| "grad_norm": 0.2208409309387207, |
| "learning_rate": 1.5279454075350363e-11, |
| "loss": 1.1686358451843262, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.5605839416058394, |
| "grad_norm": 0.2458084523677826, |
| "learning_rate": 1.521158476268965e-11, |
| "loss": 0.9916486740112305, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.5635036496350363, |
| "grad_norm": 0.5456938743591309, |
| "learning_rate": 1.514412769595899e-11, |
| "loss": 1.1382449865341187, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.5664233576642337, |
| "grad_norm": 0.32823365926742554, |
| "learning_rate": 1.5077083574077948e-11, |
| "loss": 0.9516903162002563, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.5693430656934306, |
| "grad_norm": 0.5023689270019531, |
| "learning_rate": 1.5010453091687567e-11, |
| "loss": 1.284109115600586, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.5722627737226276, |
| "grad_norm": 0.30572813749313354, |
| "learning_rate": 1.494423693914319e-11, |
| "loss": 1.1433796882629395, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.575182481751825, |
| "grad_norm": 0.8459102511405945, |
| "learning_rate": 1.4878435802507326e-11, |
| "loss": 1.1362444162368774, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.578102189781022, |
| "grad_norm": 0.44648513197898865, |
| "learning_rate": 1.48130503635425e-11, |
| "loss": 1.214508295059204, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.581021897810219, |
| "grad_norm": 0.1434440016746521, |
| "learning_rate": 1.474808129970421e-11, |
| "loss": 1.1404409408569336, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.5839416058394162, |
| "grad_norm": 0.4274137020111084, |
| "learning_rate": 1.468352928413392e-11, |
| "loss": 1.478256106376648, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.586861313868613, |
| "grad_norm": 0.20402590930461884, |
| "learning_rate": 1.4619394985652097e-11, |
| "loss": 1.0844522714614868, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.58978102189781, |
| "grad_norm": 0.6682690978050232, |
| "learning_rate": 1.4555679068751232e-11, |
| "loss": 1.0875853300094604, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.5927007299270075, |
| "grad_norm": 0.1818685382604599, |
| "learning_rate": 1.4492382193589005e-11, |
| "loss": 1.1487854719161987, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.5956204379562045, |
| "grad_norm": 0.2316112518310547, |
| "learning_rate": 1.4429505015981392e-11, |
| "loss": 1.1742403507232666, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.5985401459854014, |
| "grad_norm": 0.5319095849990845, |
| "learning_rate": 1.4367048187395926e-11, |
| "loss": 1.4584426879882812, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.601459854014599, |
| "grad_norm": 0.20086680352687836, |
| "learning_rate": 1.430501235494493e-11, |
| "loss": 1.3289785385131836, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.6043795620437957, |
| "grad_norm": 0.12394100427627563, |
| "learning_rate": 1.4243398161378788e-11, |
| "loss": 1.0840532779693604, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.6072992700729927, |
| "grad_norm": 0.44066891074180603, |
| "learning_rate": 1.418220624507931e-11, |
| "loss": 1.1645475625991821, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.61021897810219, |
| "grad_norm": 0.5535151362419128, |
| "learning_rate": 1.412143724005311e-11, |
| "loss": 1.4112097024917603, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.613138686131387, |
| "grad_norm": 0.5862986445426941, |
| "learning_rate": 1.4061091775925042e-11, |
| "loss": 1.57581627368927, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.616058394160584, |
| "grad_norm": 0.7794012427330017, |
| "learning_rate": 1.4001170477931665e-11, |
| "loss": 0.9675093293190002, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.618978102189781, |
| "grad_norm": 0.39823436737060547, |
| "learning_rate": 1.3941673966914778e-11, |
| "loss": 1.2986432313919067, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.621897810218978, |
| "grad_norm": 0.34392768144607544, |
| "learning_rate": 1.3882602859314983e-11, |
| "loss": 1.3523763418197632, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.624817518248175, |
| "grad_norm": 0.7295415997505188, |
| "learning_rate": 1.3823957767165299e-11, |
| "loss": 1.1150239706039429, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.627737226277372, |
| "grad_norm": 0.8080652952194214, |
| "learning_rate": 1.3765739298084793e-11, |
| "loss": 1.1515074968338013, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.630656934306569, |
| "grad_norm": 0.3648310601711273, |
| "learning_rate": 1.3707948055272349e-11, |
| "loss": 1.0616378784179688, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.6335766423357665, |
| "grad_norm": 0.281863272190094, |
| "learning_rate": 1.3650584637500337e-11, |
| "loss": 1.4396766424179077, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.6364963503649634, |
| "grad_norm": 0.33628129959106445, |
| "learning_rate": 1.3593649639108521e-11, |
| "loss": 1.3579171895980835, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.6394160583941604, |
| "grad_norm": 0.43391627073287964, |
| "learning_rate": 1.3537143649997743e-11, |
| "loss": 1.4490206241607666, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.6423357664233578, |
| "grad_norm": 0.38222044706344604, |
| "learning_rate": 1.3481067255623958e-11, |
| "loss": 1.1668449640274048, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.6452554744525547, |
| "grad_norm": 0.3526589870452881, |
| "learning_rate": 1.3425421036992098e-11, |
| "loss": 1.387364387512207, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.6481751824817517, |
| "grad_norm": 0.2548363506793976, |
| "learning_rate": 1.337020557065006e-11, |
| "loss": 1.2788268327713013, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.651094890510949, |
| "grad_norm": 0.41914018988609314, |
| "learning_rate": 1.3315421428682727e-11, |
| "loss": 1.3902523517608643, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.654014598540146, |
| "grad_norm": 0.45936498045921326, |
| "learning_rate": 1.326106917870607e-11, |
| "loss": 1.1867153644561768, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.656934306569343, |
| "grad_norm": 0.3785250782966614, |
| "learning_rate": 1.320714938386125e-11, |
| "loss": 1.2837830781936646, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.6598540145985403, |
| "grad_norm": 0.5063408017158508, |
| "learning_rate": 1.3153662602808731e-11, |
| "loss": 1.2772409915924072, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.6627737226277373, |
| "grad_norm": 0.42612773180007935, |
| "learning_rate": 1.3100609389722604e-11, |
| "loss": 1.4097553491592407, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.665693430656934, |
| "grad_norm": 1.5975621938705444, |
| "learning_rate": 1.3047990294284753e-11, |
| "loss": 1.1698918342590332, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.6686131386861316, |
| "grad_norm": 0.5364381074905396, |
| "learning_rate": 1.29958058616792e-11, |
| "loss": 1.182719349861145, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.6715328467153285, |
| "grad_norm": 0.26085424423217773, |
| "learning_rate": 1.2944056632586418e-11, |
| "loss": 1.4151949882507324, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.6744525547445255, |
| "grad_norm": 0.4390961825847626, |
| "learning_rate": 1.2892743143177793e-11, |
| "loss": 1.6285353899002075, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.677372262773723, |
| "grad_norm": 1.3621900081634521, |
| "learning_rate": 1.284186592511e-11, |
| "loss": 1.251800775527954, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.68029197080292, |
| "grad_norm": 0.5485129356384277, |
| "learning_rate": 1.2791425505519557e-11, |
| "loss": 1.1241846084594727, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.6832116788321168, |
| "grad_norm": 0.24132966995239258, |
| "learning_rate": 1.2741422407017312e-11, |
| "loss": 1.27989661693573, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.686131386861314, |
| "grad_norm": 2.0611886978149414, |
| "learning_rate": 1.2691857147683055e-11, |
| "loss": 1.456813097000122, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.689051094890511, |
| "grad_norm": 0.25726816058158875, |
| "learning_rate": 1.2642730241060149e-11, |
| "loss": 1.4273651838302612, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.691970802919708, |
| "grad_norm": 0.6684473752975464, |
| "learning_rate": 1.2594042196150196e-11, |
| "loss": 1.4390050172805786, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.694890510948905, |
| "grad_norm": 0.31177812814712524, |
| "learning_rate": 1.254579351740779e-11, |
| "loss": 1.2763824462890625, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.697810218978102, |
| "grad_norm": 0.4132930636405945, |
| "learning_rate": 1.2497984704735244e-11, |
| "loss": 1.4589380025863647, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.7007299270072993, |
| "grad_norm": 0.5510165691375732, |
| "learning_rate": 1.2450616253477472e-11, |
| "loss": 1.4122458696365356, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.7036496350364962, |
| "grad_norm": 0.7220962643623352, |
| "learning_rate": 1.2403688654416788e-11, |
| "loss": 1.4138315916061401, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.706569343065693, |
| "grad_norm": 0.33070141077041626, |
| "learning_rate": 1.2357202393767884e-11, |
| "loss": 1.2186477184295654, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.7094890510948906, |
| "grad_norm": 0.5884044170379639, |
| "learning_rate": 1.2311157953172753e-11, |
| "loss": 1.546258568763733, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.7124087591240875, |
| "grad_norm": 0.2696082592010498, |
| "learning_rate": 1.2265555809695725e-11, |
| "loss": 1.3984894752502441, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.7153284671532845, |
| "grad_norm": 0.38939806818962097, |
| "learning_rate": 1.2220396435818494e-11, |
| "loss": 1.4466756582260132, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.718248175182482, |
| "grad_norm": 0.5408584475517273, |
| "learning_rate": 1.217568029943524e-11, |
| "loss": 1.4394067525863647, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.721167883211679, |
| "grad_norm": 0.3784240782260895, |
| "learning_rate": 1.2131407863847788e-11, |
| "loss": 1.4045777320861816, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.7240875912408757, |
| "grad_norm": 0.4126115143299103, |
| "learning_rate": 1.2087579587760794e-11, |
| "loss": 1.4569664001464844, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.727007299270073, |
| "grad_norm": 0.40902724862098694, |
| "learning_rate": 1.2044195925277e-11, |
| "loss": 1.3677961826324463, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.72992700729927, |
| "grad_norm": 1.7693758010864258, |
| "learning_rate": 1.2001257325892525e-11, |
| "loss": 1.5177785158157349, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.732846715328467, |
| "grad_norm": 0.592065691947937, |
| "learning_rate": 1.1958764234492219e-11, |
| "loss": 1.395766258239746, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.7357664233576644, |
| "grad_norm": 0.5635214447975159, |
| "learning_rate": 1.1916717091345023e-11, |
| "loss": 1.3134100437164307, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.7386861313868613, |
| "grad_norm": 0.41125619411468506, |
| "learning_rate": 1.1875116332099453e-11, |
| "loss": 1.3772425651550293, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.7416058394160583, |
| "grad_norm": 0.35679513216018677, |
| "learning_rate": 1.1833962387779047e-11, |
| "loss": 1.4052398204803467, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.7445255474452557, |
| "grad_norm": 0.26886072754859924, |
| "learning_rate": 1.1793255684777923e-11, |
| "loss": 1.4129647016525269, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.7474452554744526, |
| "grad_norm": 0.2874450087547302, |
| "learning_rate": 1.1752996644856346e-11, |
| "loss": 1.4309979677200317, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.7503649635036496, |
| "grad_norm": 0.5160027146339417, |
| "learning_rate": 1.1713185685136364e-11, |
| "loss": 1.3455803394317627, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.753284671532847, |
| "grad_norm": 0.2807236313819885, |
| "learning_rate": 1.167382321809749e-11, |
| "loss": 1.4191386699676514, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.756204379562044, |
| "grad_norm": 0.3917008638381958, |
| "learning_rate": 1.1634909651572425e-11, |
| "loss": 1.3121984004974365, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.759124087591241, |
| "grad_norm": 0.3935042917728424, |
| "learning_rate": 1.1596445388742837e-11, |
| "loss": 1.4232765436172485, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.7620437956204382, |
| "grad_norm": 0.2913266718387604, |
| "learning_rate": 1.1558430828135168e-11, |
| "loss": 1.4180057048797607, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.764963503649635, |
| "grad_norm": 0.24990983307361603, |
| "learning_rate": 1.1520866363616524e-11, |
| "loss": 1.3848999738693237, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.767883211678832, |
| "grad_norm": 2.3751542568206787, |
| "learning_rate": 1.1483752384390583e-11, |
| "loss": 1.5882635116577148, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.770802919708029, |
| "grad_norm": 0.4262540340423584, |
| "learning_rate": 1.1447089274993575e-11, |
| "loss": 1.3331663608551025, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.7737226277372264, |
| "grad_norm": 0.44628793001174927, |
| "learning_rate": 1.1410877415290269e-11, |
| "loss": 1.2856853008270264, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.7766423357664234, |
| "grad_norm": 0.39279812574386597, |
| "learning_rate": 1.1375117180470078e-11, |
| "loss": 1.4837859869003296, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.7795620437956203, |
| "grad_norm": 0.3202086389064789, |
| "learning_rate": 1.133980894104314e-11, |
| "loss": 1.2985246181488037, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.7824817518248173, |
| "grad_norm": 0.5055567622184753, |
| "learning_rate": 1.1304953062836486e-11, |
| "loss": 1.2733244895935059, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.7854014598540147, |
| "grad_norm": 0.42744991183280945, |
| "learning_rate": 1.1270549906990256e-11, |
| "loss": 1.0509121417999268, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.7883211678832116, |
| "grad_norm": 0.3167889416217804, |
| "learning_rate": 1.1236599829953968e-11, |
| "loss": 1.095268964767456, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.7912408759124085, |
| "grad_norm": 0.2934759557247162, |
| "learning_rate": 1.1203103183482787e-11, |
| "loss": 1.2570604085922241, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.794160583941606, |
| "grad_norm": 0.3890041410923004, |
| "learning_rate": 1.1170060314633928e-11, |
| "loss": 1.4037665128707886, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.797080291970803, |
| "grad_norm": 0.14640799164772034, |
| "learning_rate": 1.1137471565763024e-11, |
| "loss": 1.2871328592300415, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.1849203109741211, |
| "learning_rate": 1.1105337274520589e-11, |
| "loss": 1.1548789739608765, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.802919708029197, |
| "grad_norm": 0.18686549365520477, |
| "learning_rate": 1.1073657773848535e-11, |
| "loss": 1.1780543327331543, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.805839416058394, |
| "grad_norm": 0.3322215676307678, |
| "learning_rate": 1.10424333919767e-11, |
| "loss": 1.088761329650879, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.808759124087591, |
| "grad_norm": 0.18809480965137482, |
| "learning_rate": 1.1011664452419465e-11, |
| "loss": 1.0823053121566772, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.8116788321167885, |
| "grad_norm": 0.17418228089809418, |
| "learning_rate": 1.0981351273972383e-11, |
| "loss": 1.0169756412506104, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.8145985401459854, |
| "grad_norm": 0.5126947164535522, |
| "learning_rate": 1.09514941707089e-11, |
| "loss": 0.994587242603302, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.8175182481751824, |
| "grad_norm": 0.9765654802322388, |
| "learning_rate": 1.0922093451977073e-11, |
| "loss": 1.0193537473678589, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.8204379562043798, |
| "grad_norm": 0.20624187588691711, |
| "learning_rate": 1.0893149422396403e-11, |
| "loss": 0.989415168762207, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.8233576642335767, |
| "grad_norm": 0.24683533608913422, |
| "learning_rate": 1.0864662381854632e-11, |
| "loss": 1.0691274404525757, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.8262773722627736, |
| "grad_norm": 0.24659498035907745, |
| "learning_rate": 1.0836632625504674e-11, |
| "loss": 1.2702159881591797, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.829197080291971, |
| "grad_norm": 0.5301987528800964, |
| "learning_rate": 1.0809060443761531e-11, |
| "loss": 1.3106719255447388, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.832116788321168, |
| "grad_norm": 0.22561290860176086, |
| "learning_rate": 1.0781946122299307e-11, |
| "loss": 1.3655246496200562, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.835036496350365, |
| "grad_norm": 0.2964118719100952, |
| "learning_rate": 1.0755289942048237e-11, |
| "loss": 1.298119306564331, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.8379562043795623, |
| "grad_norm": 0.2465992569923401, |
| "learning_rate": 1.0729092179191765e-11, |
| "loss": 1.3384490013122559, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.8408759124087593, |
| "grad_norm": 0.2166963517665863, |
| "learning_rate": 1.0703353105163708e-11, |
| "loss": 1.2705466747283936, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.843795620437956, |
| "grad_norm": 0.2356664091348648, |
| "learning_rate": 1.0678072986645414e-11, |
| "loss": 1.2964879274368286, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.846715328467153, |
| "grad_norm": 0.1898549348115921, |
| "learning_rate": 1.0653252085563021e-11, |
| "loss": 1.3481731414794922, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.8496350364963505, |
| "grad_norm": 0.22473156452178955, |
| "learning_rate": 1.0628890659084748e-11, |
| "loss": 1.2900036573410034, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.8525547445255475, |
| "grad_norm": 0.37998268008232117, |
| "learning_rate": 1.0604988959618193e-11, |
| "loss": 1.3140424489974976, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.8554744525547444, |
| "grad_norm": 0.4351639449596405, |
| "learning_rate": 1.0581547234807778e-11, |
| "loss": 1.2694008350372314, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.8583941605839414, |
| "grad_norm": 0.2931780219078064, |
| "learning_rate": 1.055856572753211e-11, |
| "loss": 1.2881304025650024, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.8613138686131387, |
| "grad_norm": 0.3236960172653198, |
| "learning_rate": 1.0536044675901533e-11, |
| "loss": 1.2829375267028809, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.8642335766423357, |
| "grad_norm": 0.2794724702835083, |
| "learning_rate": 1.0513984313255612e-11, |
| "loss": 1.2485144138336182, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.8671532846715326, |
| "grad_norm": 0.3454572856426239, |
| "learning_rate": 1.0492384868160763e-11, |
| "loss": 1.3162847757339478, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.87007299270073, |
| "grad_norm": 0.40962308645248413, |
| "learning_rate": 1.0471246564407825e-11, |
| "loss": 1.327282428741455, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.872992700729927, |
| "grad_norm": 0.6799861788749695, |
| "learning_rate": 1.0450569621009781e-11, |
| "loss": 1.3605397939682007, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.875912408759124, |
| "grad_norm": 0.5539891123771667, |
| "learning_rate": 1.0430354252199495e-11, |
| "loss": 1.3370953798294067, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.8788321167883213, |
| "grad_norm": 0.25739309191703796, |
| "learning_rate": 1.0410600667427462e-11, |
| "loss": 1.354380488395691, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.8817518248175182, |
| "grad_norm": 0.3870704770088196, |
| "learning_rate": 1.0391309071359665e-11, |
| "loss": 1.3251416683197021, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.884671532846715, |
| "grad_norm": 0.20314446091651917, |
| "learning_rate": 1.0372479663875433e-11, |
| "loss": 1.3485803604125977, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.8875912408759126, |
| "grad_norm": 0.31354570388793945, |
| "learning_rate": 1.0354112640065392e-11, |
| "loss": 1.2970892190933228, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.8905109489051095, |
| "grad_norm": 0.4762999713420868, |
| "learning_rate": 1.0336208190229425e-11, |
| "loss": 1.302712321281433, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.8934306569343065, |
| "grad_norm": 0.24994267523288727, |
| "learning_rate": 1.0318766499874702e-11, |
| "loss": 1.3298907279968262, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.896350364963504, |
| "grad_norm": 0.256294846534729, |
| "learning_rate": 1.0301787749713778e-11, |
| "loss": 1.3118003606796265, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.899270072992701, |
| "grad_norm": 0.7621220946311951, |
| "learning_rate": 1.0285272115662697e-11, |
| "loss": 1.3401591777801514, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.9021897810218977, |
| "grad_norm": 0.30471134185791016, |
| "learning_rate": 1.0269219768839177e-11, |
| "loss": 1.3362921476364136, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.905109489051095, |
| "grad_norm": 0.41349539160728455, |
| "learning_rate": 1.0253630875560841e-11, |
| "loss": 1.330914855003357, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.908029197080292, |
| "grad_norm": 0.2509981691837311, |
| "learning_rate": 1.0238505597343493e-11, |
| "loss": 1.288212537765503, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.910948905109489, |
| "grad_norm": 0.2082396149635315, |
| "learning_rate": 1.0223844090899445e-11, |
| "loss": 1.2984920740127563, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.9138686131386864, |
| "grad_norm": 0.32519200444221497, |
| "learning_rate": 1.0209646508135873e-11, |
| "loss": 1.3151469230651855, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.9167883211678833, |
| "grad_norm": 0.26818934082984924, |
| "learning_rate": 1.0195912996153294e-11, |
| "loss": 1.3731813430786133, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.9197080291970803, |
| "grad_norm": 0.43063884973526, |
| "learning_rate": 1.0182643697243976e-11, |
| "loss": 1.3158377408981323, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.9226277372262772, |
| "grad_norm": 0.280540406703949, |
| "learning_rate": 1.0169838748890516e-11, |
| "loss": 1.3163559436798096, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.9255474452554746, |
| "grad_norm": 0.22299645841121674, |
| "learning_rate": 1.0157498283764395e-11, |
| "loss": 1.2669562101364136, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.9284671532846716, |
| "grad_norm": 0.4416807293891907, |
| "learning_rate": 1.0145622429724598e-11, |
| "loss": 1.320312261581421, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.9313868613138685, |
| "grad_norm": 0.311170756816864, |
| "learning_rate": 1.0134211309816299e-11, |
| "loss": 1.3365989923477173, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.9343065693430654, |
| "grad_norm": 0.26800814270973206, |
| "learning_rate": 1.0123265042269589e-11, |
| "loss": 1.2997403144836426, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.937226277372263, |
| "grad_norm": 0.334674596786499, |
| "learning_rate": 1.0112783740498235e-11, |
| "loss": 1.3328243494033813, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.9401459854014598, |
| "grad_norm": 0.3755459189414978, |
| "learning_rate": 1.0102767513098516e-11, |
| "loss": 1.3310189247131348, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.9430656934306567, |
| "grad_norm": 0.2287389189004898, |
| "learning_rate": 1.0093216463848107e-11, |
| "loss": 1.383652687072754, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.945985401459854, |
| "grad_norm": 0.2728535830974579, |
| "learning_rate": 1.0084130691704985e-11, |
| "loss": 1.357974648475647, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.948905109489051, |
| "grad_norm": 0.31655973196029663, |
| "learning_rate": 1.0075510290806418e-11, |
| "loss": 1.3427679538726807, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.951824817518248, |
| "grad_norm": 0.24858510494232178, |
| "learning_rate": 1.0067355350467981e-11, |
| "loss": 1.3368231058120728, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.9547445255474454, |
| "grad_norm": 0.262162446975708, |
| "learning_rate": 1.0059665955182627e-11, |
| "loss": 1.3229783773422241, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.9576642335766423, |
| "grad_norm": 0.2759620249271393, |
| "learning_rate": 1.0052442184619831e-11, |
| "loss": 1.2838077545166016, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.9605839416058393, |
| "grad_norm": 0.2278604358434677, |
| "learning_rate": 1.0045684113624746e-11, |
| "loss": 1.286117434501648, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.9635036496350367, |
| "grad_norm": 0.2492004632949829, |
| "learning_rate": 1.0039391812217433e-11, |
| "loss": 1.2981648445129395, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.9664233576642336, |
| "grad_norm": 0.37556126713752747, |
| "learning_rate": 1.0033565345592127e-11, |
| "loss": 1.2995569705963135, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.9693430656934305, |
| "grad_norm": 0.2894899249076843, |
| "learning_rate": 1.0028204774116592e-11, |
| "loss": 1.3662912845611572, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.972262773722628, |
| "grad_norm": 0.310973197221756, |
| "learning_rate": 1.0023310153331455e-11, |
| "loss": 1.3318140506744385, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.975182481751825, |
| "grad_norm": 0.2315581887960434, |
| "learning_rate": 1.0018881533949651e-11, |
| "loss": 1.334710955619812, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.978102189781022, |
| "grad_norm": 0.3643578290939331, |
| "learning_rate": 1.0014918961855914e-11, |
| "loss": 1.330309510231018, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.981021897810219, |
| "grad_norm": 0.30834120512008667, |
| "learning_rate": 1.0011422478106256e-11, |
| "loss": 1.3401741981506348, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.983941605839416, |
| "grad_norm": 0.6918445229530334, |
| "learning_rate": 1.000839211892759e-11, |
| "loss": 1.2814037799835205, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.986861313868613, |
| "grad_norm": 0.26494693756103516, |
| "learning_rate": 1.0005827915717327e-11, |
| "loss": 1.3775584697723389, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.9897810218978105, |
| "grad_norm": 0.2850579619407654, |
| "learning_rate": 1.0003729895043056e-11, |
| "loss": 1.4125664234161377, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.9927007299270074, |
| "grad_norm": 0.544228732585907, |
| "learning_rate": 1.0002098078642278e-11, |
| "loss": 1.465717077255249, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.9956204379562044, |
| "grad_norm": 0.5321989059448242, |
| "learning_rate": 1.000093248342216e-11, |
| "loss": 1.5938138961791992, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.9985401459854013, |
| "grad_norm": 0.5018138885498047, |
| "learning_rate": 1.0000233121459382e-11, |
| "loss": 1.6167956590652466, |
| "step": 2054 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 2055, |
| "total_flos": 3.6341125364349665e+18, |
| "train_loss": 1.4411861958004843, |
| "train_runtime": 21029.937, |
| "train_samples_per_second": 1.563, |
| "train_steps_per_second": 0.098 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 2055, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.6341125364349665e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|