| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 2619, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002290950744558992, |
| "grad_norm": 0.9735844731330872, |
| "learning_rate": 7.633587786259542e-08, |
| "loss": 1.4360580444335938, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.004581901489117984, |
| "grad_norm": 1.5835797786712646, |
| "learning_rate": 2.2900763358778629e-07, |
| "loss": 1.971843957901001, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.006872852233676976, |
| "grad_norm": 0.8149327635765076, |
| "learning_rate": 3.8167938931297716e-07, |
| "loss": 1.8271703720092773, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.009163802978235968, |
| "grad_norm": 2.2085700035095215, |
| "learning_rate": 5.34351145038168e-07, |
| "loss": 2.4666566848754883, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.011454753722794959, |
| "grad_norm": 0.4170507788658142, |
| "learning_rate": 6.870229007633589e-07, |
| "loss": 1.2739520072937012, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013745704467353952, |
| "grad_norm": 0.33925074338912964, |
| "learning_rate": 8.396946564885497e-07, |
| "loss": 1.6276776790618896, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.016036655211912942, |
| "grad_norm": 0.8046349883079529, |
| "learning_rate": 9.923664122137404e-07, |
| "loss": 1.938399076461792, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.018327605956471937, |
| "grad_norm": 0.39774009585380554, |
| "learning_rate": 1.1450381679389313e-06, |
| "loss": 1.5784566402435303, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.020618556701030927, |
| "grad_norm": 2.0479557514190674, |
| "learning_rate": 1.297709923664122e-06, |
| "loss": 2.207265615463257, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.022909507445589918, |
| "grad_norm": 1.4070589542388916, |
| "learning_rate": 1.450381679389313e-06, |
| "loss": 2.08921480178833, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.025200458190148912, |
| "grad_norm": 1.3663133382797241, |
| "learning_rate": 1.603053435114504e-06, |
| "loss": 1.5717980861663818, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.027491408934707903, |
| "grad_norm": 0.5743522644042969, |
| "learning_rate": 1.7557251908396948e-06, |
| "loss": 1.9206372499465942, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.029782359679266894, |
| "grad_norm": 0.5176920294761658, |
| "learning_rate": 1.908396946564886e-06, |
| "loss": 1.903019905090332, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.032073310423825885, |
| "grad_norm": 0.5487824082374573, |
| "learning_rate": 2.0610687022900764e-06, |
| "loss": 1.724461555480957, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03436426116838488, |
| "grad_norm": 0.6357744932174683, |
| "learning_rate": 2.2137404580152674e-06, |
| "loss": 2.329512119293213, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03665521191294387, |
| "grad_norm": 0.5210757255554199, |
| "learning_rate": 2.3664122137404585e-06, |
| "loss": 1.7047933340072632, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.038946162657502864, |
| "grad_norm": 0.45220088958740234, |
| "learning_rate": 2.5190839694656487e-06, |
| "loss": 1.8027178049087524, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.041237113402061855, |
| "grad_norm": 0.6867555975914001, |
| "learning_rate": 2.67175572519084e-06, |
| "loss": 1.8687299489974976, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.043528064146620846, |
| "grad_norm": 1.022306203842163, |
| "learning_rate": 2.824427480916031e-06, |
| "loss": 1.516510248184204, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.045819014891179836, |
| "grad_norm": 0.4305284023284912, |
| "learning_rate": 2.9770992366412218e-06, |
| "loss": 1.3579959869384766, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.048109965635738834, |
| "grad_norm": 0.530888020992279, |
| "learning_rate": 3.129770992366413e-06, |
| "loss": 1.86295747756958, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.050400916380297825, |
| "grad_norm": 0.545220673084259, |
| "learning_rate": 3.2824427480916034e-06, |
| "loss": 1.8225860595703125, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.052691867124856816, |
| "grad_norm": 4.15400505065918, |
| "learning_rate": 3.4351145038167944e-06, |
| "loss": 2.479480743408203, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.054982817869415807, |
| "grad_norm": 2.230881452560425, |
| "learning_rate": 3.587786259541985e-06, |
| "loss": 1.9663472175598145, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0572737686139748, |
| "grad_norm": 0.5587590932846069, |
| "learning_rate": 3.740458015267176e-06, |
| "loss": 1.7919893264770508, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05956471935853379, |
| "grad_norm": 0.2856103777885437, |
| "learning_rate": 3.893129770992366e-06, |
| "loss": 1.5717711448669434, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.061855670103092786, |
| "grad_norm": 0.680905282497406, |
| "learning_rate": 4.045801526717557e-06, |
| "loss": 1.937774658203125, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06414662084765177, |
| "grad_norm": 1.862351894378662, |
| "learning_rate": 4.198473282442748e-06, |
| "loss": 1.5575186014175415, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06643757159221077, |
| "grad_norm": 0.7627670764923096, |
| "learning_rate": 4.351145038167939e-06, |
| "loss": 2.3777570724487305, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06872852233676977, |
| "grad_norm": 3.405487537384033, |
| "learning_rate": 4.5038167938931296e-06, |
| "loss": 2.064112663269043, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07101947308132875, |
| "grad_norm": 0.22335933148860931, |
| "learning_rate": 4.656488549618321e-06, |
| "loss": 1.5149509906768799, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07331042382588775, |
| "grad_norm": 1.8504610061645508, |
| "learning_rate": 4.8091603053435125e-06, |
| "loss": 2.0704846382141113, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07560137457044673, |
| "grad_norm": 0.48437029123306274, |
| "learning_rate": 4.961832061068703e-06, |
| "loss": 1.5357882976531982, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07789232531500573, |
| "grad_norm": 0.3509262800216675, |
| "learning_rate": 5.114503816793893e-06, |
| "loss": 1.7561030387878418, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08018327605956473, |
| "grad_norm": 1.3714377880096436, |
| "learning_rate": 5.267175572519084e-06, |
| "loss": 1.6338742971420288, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08247422680412371, |
| "grad_norm": 1.4160592555999756, |
| "learning_rate": 5.419847328244276e-06, |
| "loss": 1.5981454849243164, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08476517754868271, |
| "grad_norm": 0.396519273519516, |
| "learning_rate": 5.572519083969467e-06, |
| "loss": 1.8558708429336548, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08705612829324169, |
| "grad_norm": 0.9855345487594604, |
| "learning_rate": 5.725190839694656e-06, |
| "loss": 1.506034016609192, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.08934707903780069, |
| "grad_norm": 1.6816610097885132, |
| "learning_rate": 5.877862595419848e-06, |
| "loss": 1.978309988975525, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09163802978235967, |
| "grad_norm": 0.49164819717407227, |
| "learning_rate": 6.030534351145039e-06, |
| "loss": 1.7104287147521973, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09392898052691867, |
| "grad_norm": 0.5583735704421997, |
| "learning_rate": 6.18320610687023e-06, |
| "loss": 1.4178941249847412, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09621993127147767, |
| "grad_norm": 0.5168147087097168, |
| "learning_rate": 6.335877862595419e-06, |
| "loss": 1.7817795276641846, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09851088201603665, |
| "grad_norm": 1.2413756847381592, |
| "learning_rate": 6.488549618320611e-06, |
| "loss": 1.3074209690093994, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.10080183276059565, |
| "grad_norm": 0.6331862807273865, |
| "learning_rate": 6.641221374045802e-06, |
| "loss": 1.5984269380569458, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.10309278350515463, |
| "grad_norm": 0.5664435029029846, |
| "learning_rate": 6.793893129770993e-06, |
| "loss": 1.7241010665893555, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10538373424971363, |
| "grad_norm": 0.32952845096588135, |
| "learning_rate": 6.946564885496184e-06, |
| "loss": 1.5395644903182983, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10767468499427263, |
| "grad_norm": 0.41334185004234314, |
| "learning_rate": 7.0992366412213746e-06, |
| "loss": 1.7171587944030762, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.10996563573883161, |
| "grad_norm": 0.3259168863296509, |
| "learning_rate": 7.251908396946566e-06, |
| "loss": 1.3641917705535889, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.11225658648339061, |
| "grad_norm": 0.8971285223960876, |
| "learning_rate": 7.404580152671757e-06, |
| "loss": 1.6456998586654663, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1145475372279496, |
| "grad_norm": 2.403928279876709, |
| "learning_rate": 7.557251908396948e-06, |
| "loss": 1.6644506454467773, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11683848797250859, |
| "grad_norm": 0.5824722647666931, |
| "learning_rate": 7.709923664122137e-06, |
| "loss": 1.7684416770935059, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.11912943871706758, |
| "grad_norm": 0.2737939655780792, |
| "learning_rate": 7.862595419847328e-06, |
| "loss": 1.457775354385376, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.12142038946162657, |
| "grad_norm": 0.20071668922901154, |
| "learning_rate": 8.015267175572519e-06, |
| "loss": 1.3262975215911865, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.12371134020618557, |
| "grad_norm": 0.43577471375465393, |
| "learning_rate": 8.16793893129771e-06, |
| "loss": 1.5433118343353271, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12600229095074456, |
| "grad_norm": 0.43102404475212097, |
| "learning_rate": 8.320610687022901e-06, |
| "loss": 1.4821274280548096, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.12829324169530354, |
| "grad_norm": 0.450750470161438, |
| "learning_rate": 8.473282442748092e-06, |
| "loss": 1.7386858463287354, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.13058419243986255, |
| "grad_norm": 0.49831417202949524, |
| "learning_rate": 8.625954198473283e-06, |
| "loss": 1.5783125162124634, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.13287514318442153, |
| "grad_norm": 0.5326170921325684, |
| "learning_rate": 8.778625954198474e-06, |
| "loss": 1.6655735969543457, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.13516609392898052, |
| "grad_norm": 0.4478776752948761, |
| "learning_rate": 8.931297709923665e-06, |
| "loss": 1.5613455772399902, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13745704467353953, |
| "grad_norm": 1.2814429998397827, |
| "learning_rate": 9.083969465648855e-06, |
| "loss": 1.3779879808425903, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.13974799541809851, |
| "grad_norm": 0.3124754726886749, |
| "learning_rate": 9.236641221374046e-06, |
| "loss": 1.2922519445419312, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.1420389461626575, |
| "grad_norm": 0.5107002258300781, |
| "learning_rate": 9.389312977099237e-06, |
| "loss": 1.204099416732788, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.14432989690721648, |
| "grad_norm": 0.42890945076942444, |
| "learning_rate": 9.54198473282443e-06, |
| "loss": 1.5295710563659668, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.1466208476517755, |
| "grad_norm": 0.3888114094734192, |
| "learning_rate": 9.694656488549619e-06, |
| "loss": 1.446217656135559, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.14891179839633448, |
| "grad_norm": 1.3135894536972046, |
| "learning_rate": 9.84732824427481e-06, |
| "loss": 1.696859359741211, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.15120274914089346, |
| "grad_norm": 0.5034719705581665, |
| "learning_rate": 1e-05, |
| "loss": 1.4995859861373901, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.15349369988545247, |
| "grad_norm": 0.22584287822246552, |
| "learning_rate": 9.999985650351204e-06, |
| "loss": 1.1352298259735107, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15578465063001146, |
| "grad_norm": 0.3132134974002838, |
| "learning_rate": 9.999942601496331e-06, |
| "loss": 1.208528995513916, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.15807560137457044, |
| "grad_norm": 0.41238704323768616, |
| "learning_rate": 9.999870853709929e-06, |
| "loss": 1.4549105167388916, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.16036655211912945, |
| "grad_norm": 0.18063485622406006, |
| "learning_rate": 9.999770407449582e-06, |
| "loss": 1.1948139667510986, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16265750286368844, |
| "grad_norm": 0.7935987710952759, |
| "learning_rate": 9.999641263355893e-06, |
| "loss": 1.3297820091247559, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.16494845360824742, |
| "grad_norm": 0.3728874921798706, |
| "learning_rate": 9.999483422252499e-06, |
| "loss": 1.4431211948394775, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1672394043528064, |
| "grad_norm": 0.32941386103630066, |
| "learning_rate": 9.999296885146047e-06, |
| "loss": 1.2147037982940674, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.16953035509736541, |
| "grad_norm": 0.4308471381664276, |
| "learning_rate": 9.999081653226205e-06, |
| "loss": 1.3048083782196045, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1718213058419244, |
| "grad_norm": 0.7290281057357788, |
| "learning_rate": 9.998837727865636e-06, |
| "loss": 1.1929588317871094, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17411225658648338, |
| "grad_norm": 1.055001139640808, |
| "learning_rate": 9.998565110620006e-06, |
| "loss": 1.0773563385009766, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.1764032073310424, |
| "grad_norm": 0.3784625828266144, |
| "learning_rate": 9.998263803227965e-06, |
| "loss": 1.4228711128234863, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.17869415807560138, |
| "grad_norm": 0.9958500862121582, |
| "learning_rate": 9.997933807611133e-06, |
| "loss": 1.798612356185913, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.18098510882016036, |
| "grad_norm": 0.370061457157135, |
| "learning_rate": 9.997575125874104e-06, |
| "loss": 1.4642460346221924, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.18327605956471935, |
| "grad_norm": 0.7844455242156982, |
| "learning_rate": 9.997187760304411e-06, |
| "loss": 1.0867466926574707, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18556701030927836, |
| "grad_norm": 0.5508542060852051, |
| "learning_rate": 9.996771713372525e-06, |
| "loss": 1.295719861984253, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.18785796105383734, |
| "grad_norm": 0.393774151802063, |
| "learning_rate": 9.996326987731836e-06, |
| "loss": 1.0488743782043457, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.19014891179839633, |
| "grad_norm": 0.3985389173030853, |
| "learning_rate": 9.995853586218636e-06, |
| "loss": 1.5313889980316162, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.19243986254295534, |
| "grad_norm": 1.3936595916748047, |
| "learning_rate": 9.995351511852102e-06, |
| "loss": 0.6738256812095642, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.19473081328751432, |
| "grad_norm": 0.3983599543571472, |
| "learning_rate": 9.994820767834273e-06, |
| "loss": 1.4490211009979248, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1970217640320733, |
| "grad_norm": 0.8028084635734558, |
| "learning_rate": 9.994261357550034e-06, |
| "loss": 1.5482444763183594, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.19931271477663232, |
| "grad_norm": 0.5651741623878479, |
| "learning_rate": 9.993673284567092e-06, |
| "loss": 1.6904449462890625, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2016036655211913, |
| "grad_norm": 0.6857726573944092, |
| "learning_rate": 9.993056552635954e-06, |
| "loss": 1.4769976139068604, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.20389461626575028, |
| "grad_norm": 1.0751330852508545, |
| "learning_rate": 9.992411165689902e-06, |
| "loss": 1.4480886459350586, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.20618556701030927, |
| "grad_norm": 0.9354397654533386, |
| "learning_rate": 9.99173712784497e-06, |
| "loss": 1.581247329711914, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20847651775486828, |
| "grad_norm": 0.49207422137260437, |
| "learning_rate": 9.99103444339992e-06, |
| "loss": 1.327151894569397, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.21076746849942726, |
| "grad_norm": 0.7072939872741699, |
| "learning_rate": 9.990303116836204e-06, |
| "loss": 1.3334730863571167, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.21305841924398625, |
| "grad_norm": 0.9365259408950806, |
| "learning_rate": 9.989543152817945e-06, |
| "loss": 1.1465511322021484, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.21534936998854526, |
| "grad_norm": 0.4507530629634857, |
| "learning_rate": 9.98875455619191e-06, |
| "loss": 1.3069469928741455, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.21764032073310424, |
| "grad_norm": 0.2077871412038803, |
| "learning_rate": 9.987937331987466e-06, |
| "loss": 1.2803224325180054, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21993127147766323, |
| "grad_norm": 0.2749679386615753, |
| "learning_rate": 9.987091485416564e-06, |
| "loss": 1.1666932106018066, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.5907368659973145, |
| "learning_rate": 9.986217021873688e-06, |
| "loss": 1.28110933303833, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.22451317296678122, |
| "grad_norm": 0.34716930985450745, |
| "learning_rate": 9.985313946935841e-06, |
| "loss": 1.1845691204071045, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.2268041237113402, |
| "grad_norm": 0.5304384231567383, |
| "learning_rate": 9.98438226636249e-06, |
| "loss": 1.4581682682037354, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.2290950744558992, |
| "grad_norm": 0.5311458110809326, |
| "learning_rate": 9.983421986095543e-06, |
| "loss": 1.5132863521575928, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2313860252004582, |
| "grad_norm": 0.9410415291786194, |
| "learning_rate": 9.982433112259304e-06, |
| "loss": 1.147586464881897, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.23367697594501718, |
| "grad_norm": 0.5174320936203003, |
| "learning_rate": 9.981415651160434e-06, |
| "loss": 1.3612395524978638, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.23596792668957617, |
| "grad_norm": 0.3282371461391449, |
| "learning_rate": 9.980369609287918e-06, |
| "loss": 1.2773010730743408, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.23825887743413515, |
| "grad_norm": 1.3350720405578613, |
| "learning_rate": 9.979294993313013e-06, |
| "loss": 1.3632477521896362, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.24054982817869416, |
| "grad_norm": 0.31105804443359375, |
| "learning_rate": 9.978191810089213e-06, |
| "loss": 1.2480723857879639, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.24284077892325315, |
| "grad_norm": 0.3278603255748749, |
| "learning_rate": 9.977060066652208e-06, |
| "loss": 1.1874191761016846, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.24513172966781213, |
| "grad_norm": 0.21448446810245514, |
| "learning_rate": 9.975899770219823e-06, |
| "loss": 1.24106764793396, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.24742268041237114, |
| "grad_norm": 0.8085087537765503, |
| "learning_rate": 9.974710928191994e-06, |
| "loss": 1.4966447353363037, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.24971363115693013, |
| "grad_norm": 0.4925968647003174, |
| "learning_rate": 9.973493548150705e-06, |
| "loss": 1.246765375137329, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2520045819014891, |
| "grad_norm": 0.5599343180656433, |
| "learning_rate": 9.972247637859942e-06, |
| "loss": 1.5764102935791016, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2542955326460481, |
| "grad_norm": 1.497615933418274, |
| "learning_rate": 9.970973205265654e-06, |
| "loss": 1.1964993476867676, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2565864833906071, |
| "grad_norm": 0.284583181142807, |
| "learning_rate": 9.969670258495689e-06, |
| "loss": 1.4045443534851074, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2588774341351661, |
| "grad_norm": 0.37673652172088623, |
| "learning_rate": 9.968338805859746e-06, |
| "loss": 1.1107515096664429, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2611683848797251, |
| "grad_norm": 0.1895330548286438, |
| "learning_rate": 9.966978855849328e-06, |
| "loss": 1.2244822978973389, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2634593356242841, |
| "grad_norm": 0.6602473258972168, |
| "learning_rate": 9.965590417137683e-06, |
| "loss": 1.4389466047286987, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.26575028636884307, |
| "grad_norm": 0.7053688764572144, |
| "learning_rate": 9.964173498579744e-06, |
| "loss": 1.3052401542663574, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.26804123711340205, |
| "grad_norm": 0.46937069296836853, |
| "learning_rate": 9.962728109212087e-06, |
| "loss": 1.6006953716278076, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.27033218785796104, |
| "grad_norm": 0.6499706506729126, |
| "learning_rate": 9.961254258252853e-06, |
| "loss": 1.7075961828231812, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.27262313860252, |
| "grad_norm": 0.28833746910095215, |
| "learning_rate": 9.95975195510171e-06, |
| "loss": 1.2591079473495483, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.27491408934707906, |
| "grad_norm": 0.33017802238464355, |
| "learning_rate": 9.958221209339776e-06, |
| "loss": 1.3728235960006714, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.27720504009163804, |
| "grad_norm": 0.42698171734809875, |
| "learning_rate": 9.956662030729571e-06, |
| "loss": 1.4098225831985474, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.27949599083619703, |
| "grad_norm": 1.0393098592758179, |
| "learning_rate": 9.955074429214945e-06, |
| "loss": 1.2753400802612305, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.281786941580756, |
| "grad_norm": 0.3506874144077301, |
| "learning_rate": 9.95345841492102e-06, |
| "loss": 1.3643712997436523, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.284077892325315, |
| "grad_norm": 0.36692970991134644, |
| "learning_rate": 9.951813998154122e-06, |
| "loss": 1.1439402103424072, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.286368843069874, |
| "grad_norm": 1.3749401569366455, |
| "learning_rate": 9.950141189401722e-06, |
| "loss": 1.4724159240722656, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.28865979381443296, |
| "grad_norm": 0.8339785933494568, |
| "learning_rate": 9.948439999332362e-06, |
| "loss": 1.4834284782409668, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.290950744558992, |
| "grad_norm": 0.7085672616958618, |
| "learning_rate": 9.946710438795586e-06, |
| "loss": 1.4935444593429565, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.293241695303551, |
| "grad_norm": 0.39228612184524536, |
| "learning_rate": 9.944952518821877e-06, |
| "loss": 0.9836521148681641, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.29553264604810997, |
| "grad_norm": 0.8522858023643494, |
| "learning_rate": 9.943166250622585e-06, |
| "loss": 1.2665026187896729, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.29782359679266895, |
| "grad_norm": 1.1095147132873535, |
| "learning_rate": 9.941351645589853e-06, |
| "loss": 0.9271557331085205, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.30011454753722794, |
| "grad_norm": 0.5586176514625549, |
| "learning_rate": 9.939508715296543e-06, |
| "loss": 1.329547643661499, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3024054982817869, |
| "grad_norm": 0.662735641002655, |
| "learning_rate": 9.93763747149617e-06, |
| "loss": 1.247317910194397, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.30469644902634596, |
| "grad_norm": 0.8601267337799072, |
| "learning_rate": 9.935737926122816e-06, |
| "loss": 1.2623186111450195, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.30698739977090495, |
| "grad_norm": 0.20243214070796967, |
| "learning_rate": 9.933810091291065e-06, |
| "loss": 1.289118766784668, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.30927835051546393, |
| "grad_norm": 0.28063586354255676, |
| "learning_rate": 9.93185397929592e-06, |
| "loss": 1.1982347965240479, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3115693012600229, |
| "grad_norm": 0.42941388487815857, |
| "learning_rate": 9.929869602612718e-06, |
| "loss": 1.137695074081421, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3138602520045819, |
| "grad_norm": 0.45877066254615784, |
| "learning_rate": 9.927856973897068e-06, |
| "loss": 0.5235239267349243, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3161512027491409, |
| "grad_norm": 0.6997596025466919, |
| "learning_rate": 9.925816105984751e-06, |
| "loss": 1.3950223922729492, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.31844215349369986, |
| "grad_norm": 1.7510356903076172, |
| "learning_rate": 9.923747011891653e-06, |
| "loss": 0.6258569955825806, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.3207331042382589, |
| "grad_norm": 0.5782098770141602, |
| "learning_rate": 9.92164970481367e-06, |
| "loss": 1.348238229751587, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3230240549828179, |
| "grad_norm": 0.8980709910392761, |
| "learning_rate": 9.919524198126637e-06, |
| "loss": 1.1449611186981201, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.32531500572737687, |
| "grad_norm": 0.8491981029510498, |
| "learning_rate": 9.91737050538623e-06, |
| "loss": 1.620896816253662, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.32760595647193586, |
| "grad_norm": 0.6017739772796631, |
| "learning_rate": 9.915188640327887e-06, |
| "loss": 0.7882466316223145, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.32989690721649484, |
| "grad_norm": 0.41330039501190186, |
| "learning_rate": 9.912978616866716e-06, |
| "loss": 1.4667167663574219, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3321878579610538, |
| "grad_norm": 1.4287890195846558, |
| "learning_rate": 9.910740449097412e-06, |
| "loss": 1.4368730783462524, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3344788087056128, |
| "grad_norm": 0.7372951507568359, |
| "learning_rate": 9.908474151294161e-06, |
| "loss": 1.0971758365631104, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.33676975945017185, |
| "grad_norm": 1.4568077325820923, |
| "learning_rate": 9.906179737910554e-06, |
| "loss": 1.174417495727539, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.33906071019473083, |
| "grad_norm": 0.5502243638038635, |
| "learning_rate": 9.903857223579496e-06, |
| "loss": 0.9996079206466675, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.3413516609392898, |
| "grad_norm": 0.2131074070930481, |
| "learning_rate": 9.901506623113098e-06, |
| "loss": 0.9655183553695679, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.3436426116838488, |
| "grad_norm": 0.45683225989341736, |
| "learning_rate": 9.899127951502601e-06, |
| "loss": 1.239179253578186, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3459335624284078, |
| "grad_norm": 0.7904418110847473, |
| "learning_rate": 9.896721223918276e-06, |
| "loss": 1.5110833644866943, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.34822451317296677, |
| "grad_norm": 0.19148200750350952, |
| "learning_rate": 9.89428645570932e-06, |
| "loss": 1.169365406036377, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.35051546391752575, |
| "grad_norm": 0.9735309481620789, |
| "learning_rate": 9.891823662403763e-06, |
| "loss": 1.0482746362686157, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3528064146620848, |
| "grad_norm": 0.5635564923286438, |
| "learning_rate": 9.88933285970837e-06, |
| "loss": 1.3521157503128052, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3550973654066438, |
| "grad_norm": 0.41833338141441345, |
| "learning_rate": 9.886814063508536e-06, |
| "loss": 1.1932569742202759, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.35738831615120276, |
| "grad_norm": 0.5040867328643799, |
| "learning_rate": 9.884267289868194e-06, |
| "loss": 1.2639671564102173, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.35967926689576174, |
| "grad_norm": 0.3760809004306793, |
| "learning_rate": 9.8816925550297e-06, |
| "loss": 1.36051607131958, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3619702176403207, |
| "grad_norm": 1.2754006385803223, |
| "learning_rate": 9.879089875413736e-06, |
| "loss": 1.253896713256836, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3642611683848797, |
| "grad_norm": 0.43398040533065796, |
| "learning_rate": 9.876459267619215e-06, |
| "loss": 1.2675503492355347, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3665521191294387, |
| "grad_norm": 1.7002805471420288, |
| "learning_rate": 9.873800748423152e-06, |
| "loss": 1.3285064697265625, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.36884306987399773, |
| "grad_norm": 1.125661849975586, |
| "learning_rate": 9.871114334780583e-06, |
| "loss": 1.3152642250061035, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3711340206185567, |
| "grad_norm": 0.5501522421836853, |
| "learning_rate": 9.868400043824431e-06, |
| "loss": 1.3212919235229492, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.3734249713631157, |
| "grad_norm": 3.444338083267212, |
| "learning_rate": 9.86565789286542e-06, |
| "loss": 1.4326891899108887, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3757159221076747, |
| "grad_norm": 0.23539075255393982, |
| "learning_rate": 9.862887899391953e-06, |
| "loss": 1.2380142211914062, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.37800687285223367, |
| "grad_norm": 0.7268555164337158, |
| "learning_rate": 9.860090081069998e-06, |
| "loss": 1.1204144954681396, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.38029782359679265, |
| "grad_norm": 0.8944157958030701, |
| "learning_rate": 9.857264455742983e-06, |
| "loss": 1.2395751476287842, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.38258877434135163, |
| "grad_norm": 0.3225472867488861, |
| "learning_rate": 9.854411041431678e-06, |
| "loss": 1.1439430713653564, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3848797250859107, |
| "grad_norm": 0.7560716867446899, |
| "learning_rate": 9.851529856334079e-06, |
| "loss": 1.2860313653945923, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.38717067583046966, |
| "grad_norm": 0.2908443212509155, |
| "learning_rate": 9.848620918825294e-06, |
| "loss": 1.2428202629089355, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.38946162657502864, |
| "grad_norm": 0.6176069974899292, |
| "learning_rate": 9.845684247457425e-06, |
| "loss": 1.371431827545166, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3917525773195876, |
| "grad_norm": 0.308256059885025, |
| "learning_rate": 9.842719860959455e-06, |
| "loss": 1.2362736463546753, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3940435280641466, |
| "grad_norm": 0.2752687931060791, |
| "learning_rate": 9.839727778237116e-06, |
| "loss": 1.3125758171081543, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3963344788087056, |
| "grad_norm": 1.0439542531967163, |
| "learning_rate": 9.836708018372782e-06, |
| "loss": 1.3099775314331055, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.39862542955326463, |
| "grad_norm": 0.514342188835144, |
| "learning_rate": 9.833660600625338e-06, |
| "loss": 1.212874412536621, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.4009163802978236, |
| "grad_norm": 0.5850813388824463, |
| "learning_rate": 9.83058554443006e-06, |
| "loss": 1.2086424827575684, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4032073310423826, |
| "grad_norm": 1.0894215106964111, |
| "learning_rate": 9.827482869398496e-06, |
| "loss": 1.3154696226119995, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4054982817869416, |
| "grad_norm": 3.362022876739502, |
| "learning_rate": 9.82435259531833e-06, |
| "loss": 1.058045506477356, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.40778923253150057, |
| "grad_norm": 0.2783519923686981, |
| "learning_rate": 9.82119474215327e-06, |
| "loss": 0.8597267866134644, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.41008018327605955, |
| "grad_norm": 0.3932453691959381, |
| "learning_rate": 9.818009330042906e-06, |
| "loss": 1.4340499639511108, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.41237113402061853, |
| "grad_norm": 0.5507175922393799, |
| "learning_rate": 9.814796379302592e-06, |
| "loss": 1.2930049896240234, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4146620847651776, |
| "grad_norm": 0.40081146359443665, |
| "learning_rate": 9.811555910423312e-06, |
| "loss": 1.1826398372650146, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.41695303550973656, |
| "grad_norm": 0.4745732545852661, |
| "learning_rate": 9.808287944071552e-06, |
| "loss": 1.3191637992858887, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.41924398625429554, |
| "grad_norm": 0.5023146867752075, |
| "learning_rate": 9.804992501089164e-06, |
| "loss": 1.4524482488632202, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.4215349369988545, |
| "grad_norm": 0.6656132340431213, |
| "learning_rate": 9.801669602493236e-06, |
| "loss": 1.2732700109481812, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4238258877434135, |
| "grad_norm": 2.312171220779419, |
| "learning_rate": 9.798319269475959e-06, |
| "loss": 1.1589066982269287, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4261168384879725, |
| "grad_norm": 2.0123043060302734, |
| "learning_rate": 9.794941523404491e-06, |
| "loss": 1.3179020881652832, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.4284077892325315, |
| "grad_norm": 0.36115938425064087, |
| "learning_rate": 9.791536385820815e-06, |
| "loss": 1.330890417098999, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.4306987399770905, |
| "grad_norm": 0.10276070982217789, |
| "learning_rate": 9.788103878441614e-06, |
| "loss": 0.9845031499862671, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4329896907216495, |
| "grad_norm": 0.5846933126449585, |
| "learning_rate": 9.784644023158118e-06, |
| "loss": 1.2902302742004395, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4352806414662085, |
| "grad_norm": 0.45211294293403625, |
| "learning_rate": 9.781156842035978e-06, |
| "loss": 1.2718219757080078, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.43757159221076747, |
| "grad_norm": 0.4230896234512329, |
| "learning_rate": 9.777642357315115e-06, |
| "loss": 1.4458763599395752, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.43986254295532645, |
| "grad_norm": 0.5726656913757324, |
| "learning_rate": 9.774100591409583e-06, |
| "loss": 0.866162896156311, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.44215349369988544, |
| "grad_norm": 0.2680141031742096, |
| "learning_rate": 9.770531566907424e-06, |
| "loss": 1.1903131008148193, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.21644671261310577, |
| "learning_rate": 9.766935306570528e-06, |
| "loss": 1.0767426490783691, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.44673539518900346, |
| "grad_norm": 0.40251392126083374, |
| "learning_rate": 9.763311833334482e-06, |
| "loss": 1.206404209136963, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.44902634593356244, |
| "grad_norm": 0.26591402292251587, |
| "learning_rate": 9.759661170308426e-06, |
| "loss": 1.0683099031448364, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4513172966781214, |
| "grad_norm": 0.6021142601966858, |
| "learning_rate": 9.75598334077491e-06, |
| "loss": 1.4385409355163574, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4536082474226804, |
| "grad_norm": 0.4870530068874359, |
| "learning_rate": 9.752278368189738e-06, |
| "loss": 1.2794133424758911, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.4558991981672394, |
| "grad_norm": 0.5457776784896851, |
| "learning_rate": 9.748546276181824e-06, |
| "loss": 1.3290512561798096, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4581901489117984, |
| "grad_norm": 0.6035308837890625, |
| "learning_rate": 9.74478708855304e-06, |
| "loss": 1.1363365650177002, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46048109965635736, |
| "grad_norm": 1.4187884330749512, |
| "learning_rate": 9.74100082927806e-06, |
| "loss": 1.1181583404541016, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4627720504009164, |
| "grad_norm": 0.6478531360626221, |
| "learning_rate": 9.737187522504215e-06, |
| "loss": 1.292447805404663, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.4650630011454754, |
| "grad_norm": 0.45188528299331665, |
| "learning_rate": 9.733347192551333e-06, |
| "loss": 1.341691493988037, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.46735395189003437, |
| "grad_norm": 1.0683960914611816, |
| "learning_rate": 9.729479863911585e-06, |
| "loss": 0.9544723033905029, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.46964490263459335, |
| "grad_norm": 0.40036776661872864, |
| "learning_rate": 9.725585561249331e-06, |
| "loss": 1.299605369567871, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.47193585337915234, |
| "grad_norm": 1.0352967977523804, |
| "learning_rate": 9.72166430940096e-06, |
| "loss": 1.5143306255340576, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.4742268041237113, |
| "grad_norm": 0.47124430537223816, |
| "learning_rate": 9.71771613337473e-06, |
| "loss": 1.2042604684829712, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4765177548682703, |
| "grad_norm": 0.40640538930892944, |
| "learning_rate": 9.713741058350618e-06, |
| "loss": 1.3638975620269775, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.47880870561282934, |
| "grad_norm": 0.22503964602947235, |
| "learning_rate": 9.709739109680146e-06, |
| "loss": 1.0454559326171875, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.48109965635738833, |
| "grad_norm": 0.7164255976676941, |
| "learning_rate": 9.70571031288623e-06, |
| "loss": 1.4321982860565186, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4833906071019473, |
| "grad_norm": 0.22116360068321228, |
| "learning_rate": 9.701654693663012e-06, |
| "loss": 1.2148916721343994, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.4856815578465063, |
| "grad_norm": 0.40741488337516785, |
| "learning_rate": 9.697572277875696e-06, |
| "loss": 1.2174824476242065, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.4879725085910653, |
| "grad_norm": 0.33800235390663147, |
| "learning_rate": 9.693463091560387e-06, |
| "loss": 1.0184588432312012, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.49026345933562426, |
| "grad_norm": 1.2050279378890991, |
| "learning_rate": 9.689327160923918e-06, |
| "loss": 1.278519868850708, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4925544100801833, |
| "grad_norm": 0.3495669364929199, |
| "learning_rate": 9.685164512343694e-06, |
| "loss": 1.2338956594467163, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4948453608247423, |
| "grad_norm": 0.23146063089370728, |
| "learning_rate": 9.680975172367508e-06, |
| "loss": 1.40183687210083, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.49713631156930127, |
| "grad_norm": 0.21940575540065765, |
| "learning_rate": 9.67675916771339e-06, |
| "loss": 1.1633131504058838, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.49942726231386025, |
| "grad_norm": 0.47240111231803894, |
| "learning_rate": 9.67251652526942e-06, |
| "loss": 1.1047279834747314, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5017182130584192, |
| "grad_norm": 0.36501702666282654, |
| "learning_rate": 9.668247272093568e-06, |
| "loss": 1.256941795349121, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5040091638029782, |
| "grad_norm": 0.4658450782299042, |
| "learning_rate": 9.663951435413512e-06, |
| "loss": 1.454789400100708, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5063001145475372, |
| "grad_norm": 0.5242023468017578, |
| "learning_rate": 9.659629042626478e-06, |
| "loss": 1.2262189388275146, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5085910652920962, |
| "grad_norm": 1.0691118240356445, |
| "learning_rate": 9.655280121299049e-06, |
| "loss": 1.2670984268188477, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5108820160366552, |
| "grad_norm": 0.7168107032775879, |
| "learning_rate": 9.650904699167002e-06, |
| "loss": 1.2349098920822144, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5131729667812142, |
| "grad_norm": 0.3086417019367218, |
| "learning_rate": 9.646502804135125e-06, |
| "loss": 1.1503790616989136, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5154639175257731, |
| "grad_norm": 0.15410441160202026, |
| "learning_rate": 9.642074464277035e-06, |
| "loss": 1.050001859664917, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5177548682703322, |
| "grad_norm": 0.923253059387207, |
| "learning_rate": 9.637619707835011e-06, |
| "loss": 1.10781729221344, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5200458190148912, |
| "grad_norm": 0.38856205344200134, |
| "learning_rate": 9.633138563219805e-06, |
| "loss": 1.2779165506362915, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5223367697594502, |
| "grad_norm": 0.22952410578727722, |
| "learning_rate": 9.628631059010459e-06, |
| "loss": 1.1592912673950195, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5246277205040092, |
| "grad_norm": 0.42092597484588623, |
| "learning_rate": 9.624097223954132e-06, |
| "loss": 1.2500171661376953, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5269186712485682, |
| "grad_norm": 1.195084571838379, |
| "learning_rate": 9.619537086965909e-06, |
| "loss": 0.9345007538795471, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5292096219931272, |
| "grad_norm": 0.25558075308799744, |
| "learning_rate": 9.614950677128618e-06, |
| "loss": 1.0049808025360107, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5315005727376861, |
| "grad_norm": 0.31603124737739563, |
| "learning_rate": 9.610338023692644e-06, |
| "loss": 1.2373789548873901, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5337915234822451, |
| "grad_norm": 0.5582301616668701, |
| "learning_rate": 9.60569915607575e-06, |
| "loss": 0.9904681444168091, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5360824742268041, |
| "grad_norm": 0.20000745356082916, |
| "learning_rate": 9.601034103862875e-06, |
| "loss": 1.1860427856445312, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5383734249713631, |
| "grad_norm": 0.40028443932533264, |
| "learning_rate": 9.596342896805958e-06, |
| "loss": 1.07786226272583, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5406643757159221, |
| "grad_norm": 3.4503417015075684, |
| "learning_rate": 9.591625564823743e-06, |
| "loss": 0.6197147965431213, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5429553264604811, |
| "grad_norm": 0.3767026960849762, |
| "learning_rate": 9.58688213800159e-06, |
| "loss": 1.141902208328247, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.54524627720504, |
| "grad_norm": 0.3436010181903839, |
| "learning_rate": 9.58211264659128e-06, |
| "loss": 1.2863194942474365, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5475372279495991, |
| "grad_norm": 0.582786500453949, |
| "learning_rate": 9.577317121010822e-06, |
| "loss": 1.0963010787963867, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5498281786941581, |
| "grad_norm": 0.31849485635757446, |
| "learning_rate": 9.572495591844268e-06, |
| "loss": 1.269704818725586, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5521191294387171, |
| "grad_norm": 0.20424437522888184, |
| "learning_rate": 9.567648089841504e-06, |
| "loss": 1.0950038433074951, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5544100801832761, |
| "grad_norm": 0.4071334898471832, |
| "learning_rate": 9.562774645918067e-06, |
| "loss": 1.1633511781692505, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5567010309278351, |
| "grad_norm": 0.14564399421215057, |
| "learning_rate": 9.557875291154937e-06, |
| "loss": 1.0785776376724243, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.5589919816723941, |
| "grad_norm": 0.5690705180168152, |
| "learning_rate": 9.552950056798345e-06, |
| "loss": 1.397330403327942, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.561282932416953, |
| "grad_norm": 0.715670108795166, |
| "learning_rate": 9.547998974259573e-06, |
| "loss": 1.26545250415802, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.563573883161512, |
| "grad_norm": 0.293247789144516, |
| "learning_rate": 9.543022075114751e-06, |
| "loss": 1.1600966453552246, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.565864833906071, |
| "grad_norm": 0.6295793652534485, |
| "learning_rate": 9.538019391104659e-06, |
| "loss": 1.1647446155548096, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.56815578465063, |
| "grad_norm": 0.49164626002311707, |
| "learning_rate": 9.532990954134527e-06, |
| "loss": 1.2747015953063965, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.570446735395189, |
| "grad_norm": 0.9389271140098572, |
| "learning_rate": 9.527936796273818e-06, |
| "loss": 1.3396897315979004, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.572737686139748, |
| "grad_norm": 0.6148760318756104, |
| "learning_rate": 9.522856949756042e-06, |
| "loss": 1.3895974159240723, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5750286368843069, |
| "grad_norm": 0.257207989692688, |
| "learning_rate": 9.517751446978537e-06, |
| "loss": 1.1982309818267822, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5773195876288659, |
| "grad_norm": 0.2164141982793808, |
| "learning_rate": 9.51262032050227e-06, |
| "loss": 1.1153879165649414, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.579610538373425, |
| "grad_norm": 0.4683654010295868, |
| "learning_rate": 9.507463603051624e-06, |
| "loss": 1.2678872346878052, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.581901489117984, |
| "grad_norm": 1.9290564060211182, |
| "learning_rate": 9.502281327514192e-06, |
| "loss": 1.3685619831085205, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.584192439862543, |
| "grad_norm": 0.3087227940559387, |
| "learning_rate": 9.497073526940564e-06, |
| "loss": 1.1183393001556396, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.586483390607102, |
| "grad_norm": 0.39474597573280334, |
| "learning_rate": 9.491840234544127e-06, |
| "loss": 1.1198551654815674, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.588774341351661, |
| "grad_norm": 0.4169851243495941, |
| "learning_rate": 9.486581483700836e-06, |
| "loss": 1.087975263595581, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5910652920962199, |
| "grad_norm": 0.6149902939796448, |
| "learning_rate": 9.481297307949016e-06, |
| "loss": 1.3570208549499512, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5933562428407789, |
| "grad_norm": 0.35397911071777344, |
| "learning_rate": 9.47598774098914e-06, |
| "loss": 1.1269992589950562, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5956471935853379, |
| "grad_norm": 0.9242658615112305, |
| "learning_rate": 9.470652816683619e-06, |
| "loss": 1.1659682989120483, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.5979381443298969, |
| "grad_norm": 0.6208882331848145, |
| "learning_rate": 9.46529256905658e-06, |
| "loss": 1.3326858282089233, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6002290950744559, |
| "grad_norm": 0.48353537917137146, |
| "learning_rate": 9.459907032293654e-06, |
| "loss": 1.2706080675125122, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6025200458190149, |
| "grad_norm": 0.35826799273490906, |
| "learning_rate": 9.454496240741761e-06, |
| "loss": 0.9778612852096558, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.6048109965635738, |
| "grad_norm": 0.2744974195957184, |
| "learning_rate": 9.44906022890888e-06, |
| "loss": 1.1908738613128662, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6071019473081328, |
| "grad_norm": 0.45797595381736755, |
| "learning_rate": 9.443599031463838e-06, |
| "loss": 1.4189608097076416, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6093928980526919, |
| "grad_norm": 1.2901155948638916, |
| "learning_rate": 9.438112683236086e-06, |
| "loss": 0.7630795836448669, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6116838487972509, |
| "grad_norm": 0.2464115470647812, |
| "learning_rate": 9.432601219215479e-06, |
| "loss": 1.1513936519622803, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6139747995418099, |
| "grad_norm": 0.2185329645872116, |
| "learning_rate": 9.427064674552046e-06, |
| "loss": 0.974953293800354, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.6162657502863689, |
| "grad_norm": 2.7072620391845703, |
| "learning_rate": 9.421503084555778e-06, |
| "loss": 1.123814582824707, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6185567010309279, |
| "grad_norm": 0.25285181403160095, |
| "learning_rate": 9.41591648469639e-06, |
| "loss": 1.1648335456848145, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6208476517754868, |
| "grad_norm": 0.6194114685058594, |
| "learning_rate": 9.410304910603105e-06, |
| "loss": 0.9610656499862671, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6231386025200458, |
| "grad_norm": 0.1943773478269577, |
| "learning_rate": 9.404668398064415e-06, |
| "loss": 1.216538429260254, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6254295532646048, |
| "grad_norm": 0.5252053737640381, |
| "learning_rate": 9.399006983027869e-06, |
| "loss": 1.2029259204864502, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6277205040091638, |
| "grad_norm": 1.5968742370605469, |
| "learning_rate": 9.393320701599826e-06, |
| "loss": 1.3843748569488525, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6300114547537228, |
| "grad_norm": 1.534686803817749, |
| "learning_rate": 9.387609590045243e-06, |
| "loss": 1.522881269454956, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6323024054982818, |
| "grad_norm": 0.215438574552536, |
| "learning_rate": 9.381873684787424e-06, |
| "loss": 1.085440993309021, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6345933562428407, |
| "grad_norm": 0.7377695441246033, |
| "learning_rate": 9.376113022407806e-06, |
| "loss": 1.3917303085327148, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6368843069873997, |
| "grad_norm": 0.29986539483070374, |
| "learning_rate": 9.370327639645715e-06, |
| "loss": 1.3055980205535889, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6391752577319587, |
| "grad_norm": 0.5736729502677917, |
| "learning_rate": 9.364517573398128e-06, |
| "loss": 1.1343388557434082, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6414662084765178, |
| "grad_norm": 0.3546328842639923, |
| "learning_rate": 9.358682860719456e-06, |
| "loss": 1.2395200729370117, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6437571592210768, |
| "grad_norm": 0.8976812958717346, |
| "learning_rate": 9.352823538821286e-06, |
| "loss": 1.3456403017044067, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6460481099656358, |
| "grad_norm": 0.5660920143127441, |
| "learning_rate": 9.346939645072158e-06, |
| "loss": 1.2232890129089355, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6483390607101948, |
| "grad_norm": 0.5268477201461792, |
| "learning_rate": 9.341031216997318e-06, |
| "loss": 1.3191683292388916, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6506300114547537, |
| "grad_norm": 0.4400789141654968, |
| "learning_rate": 9.335098292278487e-06, |
| "loss": 1.060530185699463, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6529209621993127, |
| "grad_norm": 0.33317258954048157, |
| "learning_rate": 9.329140908753612e-06, |
| "loss": 1.2318658828735352, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6552119129438717, |
| "grad_norm": 1.2683199644088745, |
| "learning_rate": 9.323159104416637e-06, |
| "loss": 1.393534779548645, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6575028636884307, |
| "grad_norm": 0.47602105140686035, |
| "learning_rate": 9.31715291741724e-06, |
| "loss": 1.2734475135803223, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6597938144329897, |
| "grad_norm": 0.5834298729896545, |
| "learning_rate": 9.311122386060612e-06, |
| "loss": 1.109555959701538, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.6620847651775487, |
| "grad_norm": 0.4927360713481903, |
| "learning_rate": 9.305067548807202e-06, |
| "loss": 1.1678287982940674, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6643757159221076, |
| "grad_norm": 0.6562817692756653, |
| "learning_rate": 9.29898844427247e-06, |
| "loss": 1.290650486946106, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.25812843441963196, |
| "learning_rate": 9.292885111226647e-06, |
| "loss": 1.173723816871643, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6689576174112256, |
| "grad_norm": 0.35074305534362793, |
| "learning_rate": 9.286757588594479e-06, |
| "loss": 1.3392133712768555, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6712485681557846, |
| "grad_norm": 1.0929124355316162, |
| "learning_rate": 9.28060591545499e-06, |
| "loss": 0.5634982585906982, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6735395189003437, |
| "grad_norm": 1.2900134325027466, |
| "learning_rate": 9.274430131041224e-06, |
| "loss": 1.2917420864105225, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6758304696449027, |
| "grad_norm": 0.9884699583053589, |
| "learning_rate": 9.268230274739993e-06, |
| "loss": 1.6777942180633545, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6781214203894617, |
| "grad_norm": 0.3877362906932831, |
| "learning_rate": 9.262006386091643e-06, |
| "loss": 1.284725546836853, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6804123711340206, |
| "grad_norm": 0.44240447878837585, |
| "learning_rate": 9.255758504789773e-06, |
| "loss": 1.4049007892608643, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6827033218785796, |
| "grad_norm": 0.6455042362213135, |
| "learning_rate": 9.249486670681011e-06, |
| "loss": 1.3378018140792847, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6849942726231386, |
| "grad_norm": 2.295257091522217, |
| "learning_rate": 9.243190923764743e-06, |
| "loss": 0.9357068538665771, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6872852233676976, |
| "grad_norm": 0.6263779997825623, |
| "learning_rate": 9.236871304192857e-06, |
| "loss": 1.3586604595184326, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6895761741122566, |
| "grad_norm": 0.2799982726573944, |
| "learning_rate": 9.2305278522695e-06, |
| "loss": 1.185795545578003, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6918671248568156, |
| "grad_norm": 0.5048342347145081, |
| "learning_rate": 9.224160608450806e-06, |
| "loss": 1.218984603881836, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6941580756013745, |
| "grad_norm": 0.337823748588562, |
| "learning_rate": 9.217769613344647e-06, |
| "loss": 1.0943036079406738, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6964490263459335, |
| "grad_norm": 0.2669612169265747, |
| "learning_rate": 9.211354907710373e-06, |
| "loss": 1.2221721410751343, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6987399770904925, |
| "grad_norm": 0.5142564177513123, |
| "learning_rate": 9.204916532458552e-06, |
| "loss": 1.2275896072387695, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7010309278350515, |
| "grad_norm": 0.5470200181007385, |
| "learning_rate": 9.198454528650702e-06, |
| "loss": 1.284466028213501, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7033218785796106, |
| "grad_norm": 0.2950684130191803, |
| "learning_rate": 9.191968937499041e-06, |
| "loss": 1.1009457111358643, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7056128293241696, |
| "grad_norm": 0.29451650381088257, |
| "learning_rate": 9.185459800366212e-06, |
| "loss": 1.1608242988586426, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.7079037800687286, |
| "grad_norm": 0.7399727702140808, |
| "learning_rate": 9.178927158765037e-06, |
| "loss": 1.1171135902404785, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.7101947308132875, |
| "grad_norm": 0.646661639213562, |
| "learning_rate": 9.172371054358224e-06, |
| "loss": 1.211768627166748, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7124856815578465, |
| "grad_norm": 0.43126344680786133, |
| "learning_rate": 9.16579152895813e-06, |
| "loss": 1.0997836589813232, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.7147766323024055, |
| "grad_norm": 0.4805259108543396, |
| "learning_rate": 9.15918862452648e-06, |
| "loss": 1.0585315227508545, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.7170675830469645, |
| "grad_norm": 2.9120118618011475, |
| "learning_rate": 9.152562383174102e-06, |
| "loss": 0.6636776328086853, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7193585337915235, |
| "grad_norm": 0.7202600836753845, |
| "learning_rate": 9.145912847160652e-06, |
| "loss": 1.3869521617889404, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.7216494845360825, |
| "grad_norm": 0.4563579559326172, |
| "learning_rate": 9.139240058894358e-06, |
| "loss": 1.2350819110870361, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7239404352806414, |
| "grad_norm": 0.4965321719646454, |
| "learning_rate": 9.132544060931738e-06, |
| "loss": 1.4039654731750488, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7262313860252004, |
| "grad_norm": 0.4682888984680176, |
| "learning_rate": 9.125824895977334e-06, |
| "loss": 0.7936815023422241, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7285223367697594, |
| "grad_norm": 1.1855103969573975, |
| "learning_rate": 9.11908260688344e-06, |
| "loss": 1.312853455543518, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7308132875143184, |
| "grad_norm": 0.3794934153556824, |
| "learning_rate": 9.112317236649822e-06, |
| "loss": 1.1484042406082153, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7331042382588774, |
| "grad_norm": 0.5372730493545532, |
| "learning_rate": 9.105528828423455e-06, |
| "loss": 1.3890142440795898, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7353951890034365, |
| "grad_norm": 0.30828553438186646, |
| "learning_rate": 9.098717425498237e-06, |
| "loss": 1.122969150543213, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7376861397479955, |
| "grad_norm": 0.8401303291320801, |
| "learning_rate": 9.09188307131472e-06, |
| "loss": 1.3887906074523926, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7399770904925544, |
| "grad_norm": 0.23984971642494202, |
| "learning_rate": 9.085025809459826e-06, |
| "loss": 1.1644659042358398, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7422680412371134, |
| "grad_norm": 0.3041433095932007, |
| "learning_rate": 9.078145683666582e-06, |
| "loss": 1.028432011604309, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7445589919816724, |
| "grad_norm": 0.6665249466896057, |
| "learning_rate": 9.071242737813824e-06, |
| "loss": 1.0644886493682861, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7468499427262314, |
| "grad_norm": 0.20975077152252197, |
| "learning_rate": 9.06431701592593e-06, |
| "loss": 1.1107673645019531, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7491408934707904, |
| "grad_norm": 0.45067790150642395, |
| "learning_rate": 9.057368562172535e-06, |
| "loss": 1.0495760440826416, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7514318442153494, |
| "grad_norm": 0.651390016078949, |
| "learning_rate": 9.050397420868246e-06, |
| "loss": 1.1846449375152588, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7537227949599083, |
| "grad_norm": 0.49499931931495667, |
| "learning_rate": 9.043403636472368e-06, |
| "loss": 0.9847328662872314, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7560137457044673, |
| "grad_norm": 0.8829602003097534, |
| "learning_rate": 9.036387253588611e-06, |
| "loss": 1.4708762168884277, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7583046964490263, |
| "grad_norm": 0.5728069543838501, |
| "learning_rate": 9.02934831696481e-06, |
| "loss": 0.6352672576904297, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7605956471935853, |
| "grad_norm": 0.23112104833126068, |
| "learning_rate": 9.022286871492641e-06, |
| "loss": 1.1570534706115723, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7628865979381443, |
| "grad_norm": 0.40691277384757996, |
| "learning_rate": 9.015202962207329e-06, |
| "loss": 1.2407546043395996, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7651775486827033, |
| "grad_norm": 0.6645967960357666, |
| "learning_rate": 9.008096634287372e-06, |
| "loss": 1.211501121520996, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7674684994272624, |
| "grad_norm": 0.7196452617645264, |
| "learning_rate": 9.000967933054236e-06, |
| "loss": 1.2097508907318115, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7697594501718213, |
| "grad_norm": 0.2818904221057892, |
| "learning_rate": 8.993816903972083e-06, |
| "loss": 1.3354947566986084, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7720504009163803, |
| "grad_norm": 0.42488253116607666, |
| "learning_rate": 8.986643592647473e-06, |
| "loss": 0.9214844703674316, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7743413516609393, |
| "grad_norm": 1.0121887922286987, |
| "learning_rate": 8.979448044829068e-06, |
| "loss": 1.2559878826141357, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7766323024054983, |
| "grad_norm": 0.48196929693222046, |
| "learning_rate": 8.972230306407354e-06, |
| "loss": 0.9865573048591614, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.7789232531500573, |
| "grad_norm": 0.5423112511634827, |
| "learning_rate": 8.964990423414334e-06, |
| "loss": 1.2802906036376953, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7812142038946163, |
| "grad_norm": 0.7373243570327759, |
| "learning_rate": 8.957728442023243e-06, |
| "loss": 1.2485854625701904, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7835051546391752, |
| "grad_norm": 0.7472283244132996, |
| "learning_rate": 8.95044440854825e-06, |
| "loss": 1.4083813428878784, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7857961053837342, |
| "grad_norm": 0.425252228975296, |
| "learning_rate": 8.943138369444165e-06, |
| "loss": 1.2072433233261108, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7880870561282932, |
| "grad_norm": 0.28261420130729675, |
| "learning_rate": 8.935810371306143e-06, |
| "loss": 1.0523269176483154, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7903780068728522, |
| "grad_norm": 0.6173751950263977, |
| "learning_rate": 8.928460460869383e-06, |
| "loss": 1.3250004053115845, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.7926689576174112, |
| "grad_norm": 0.7808064818382263, |
| "learning_rate": 8.921088685008833e-06, |
| "loss": 1.450615644454956, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7949599083619702, |
| "grad_norm": 0.28962916135787964, |
| "learning_rate": 8.913695090738891e-06, |
| "loss": 1.1594984531402588, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7972508591065293, |
| "grad_norm": 0.24257132411003113, |
| "learning_rate": 8.906279725213105e-06, |
| "loss": 1.0624207258224487, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7995418098510882, |
| "grad_norm": 0.5376023650169373, |
| "learning_rate": 8.898842635723868e-06, |
| "loss": 1.2058117389678955, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8018327605956472, |
| "grad_norm": 0.32557961344718933, |
| "learning_rate": 8.891383869702127e-06, |
| "loss": 1.2131527662277222, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8041237113402062, |
| "grad_norm": 0.4365437626838684, |
| "learning_rate": 8.883903474717067e-06, |
| "loss": 1.1353981494903564, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.8064146620847652, |
| "grad_norm": 1.537105679512024, |
| "learning_rate": 8.876401498475818e-06, |
| "loss": 1.2067625522613525, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.8087056128293242, |
| "grad_norm": 0.3798339068889618, |
| "learning_rate": 8.868877988823148e-06, |
| "loss": 1.2047245502471924, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.8109965635738832, |
| "grad_norm": 0.4750516414642334, |
| "learning_rate": 8.861332993741155e-06, |
| "loss": 1.3064179420471191, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.8132875143184422, |
| "grad_norm": 2.4261434078216553, |
| "learning_rate": 8.85376656134896e-06, |
| "loss": 1.1926203966140747, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8155784650630011, |
| "grad_norm": 0.767508864402771, |
| "learning_rate": 8.846178739902409e-06, |
| "loss": 1.511869192123413, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8178694158075601, |
| "grad_norm": 0.36493149399757385, |
| "learning_rate": 8.838569577793756e-06, |
| "loss": 1.021366834640503, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.8201603665521191, |
| "grad_norm": 0.4858986437320709, |
| "learning_rate": 8.83093912355136e-06, |
| "loss": 1.1372207403182983, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.8224513172966781, |
| "grad_norm": 0.353909432888031, |
| "learning_rate": 8.82328742583937e-06, |
| "loss": 1.1546082496643066, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8247422680412371, |
| "grad_norm": 0.27604976296424866, |
| "learning_rate": 8.815614533457419e-06, |
| "loss": 1.0815863609313965, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.827033218785796, |
| "grad_norm": 0.4933943450450897, |
| "learning_rate": 8.807920495340313e-06, |
| "loss": 1.2626594305038452, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8293241695303551, |
| "grad_norm": 0.4562585651874542, |
| "learning_rate": 8.800205360557714e-06, |
| "loss": 1.305412769317627, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8316151202749141, |
| "grad_norm": 0.844664454460144, |
| "learning_rate": 8.792469178313835e-06, |
| "loss": 1.1361956596374512, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8339060710194731, |
| "grad_norm": 0.5677139759063721, |
| "learning_rate": 8.784711997947121e-06, |
| "loss": 1.2585844993591309, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8361970217640321, |
| "grad_norm": 0.3627360165119171, |
| "learning_rate": 8.776933868929929e-06, |
| "loss": 1.2242045402526855, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8384879725085911, |
| "grad_norm": 0.16927999258041382, |
| "learning_rate": 8.769134840868228e-06, |
| "loss": 1.1687601804733276, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.8407789232531501, |
| "grad_norm": 0.33561190962791443, |
| "learning_rate": 8.761314963501265e-06, |
| "loss": 1.2507251501083374, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.843069873997709, |
| "grad_norm": 0.22226959466934204, |
| "learning_rate": 8.753474286701263e-06, |
| "loss": 1.0193073749542236, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.845360824742268, |
| "grad_norm": 1.0019639730453491, |
| "learning_rate": 8.74561286047309e-06, |
| "loss": 1.3349125385284424, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.847651775486827, |
| "grad_norm": 0.4346841275691986, |
| "learning_rate": 8.737730734953949e-06, |
| "loss": 1.3727765083312988, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.849942726231386, |
| "grad_norm": 0.1936601996421814, |
| "learning_rate": 8.729827960413054e-06, |
| "loss": 1.3784360885620117, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.852233676975945, |
| "grad_norm": 0.36480435729026794, |
| "learning_rate": 8.721904587251315e-06, |
| "loss": 1.2935837507247925, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.854524627720504, |
| "grad_norm": 0.38390636444091797, |
| "learning_rate": 8.713960666001e-06, |
| "loss": 0.992585301399231, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.856815578465063, |
| "grad_norm": 0.21102914214134216, |
| "learning_rate": 8.705996247325443e-06, |
| "loss": 1.1434128284454346, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8591065292096219, |
| "grad_norm": 0.43868809938430786, |
| "learning_rate": 8.698011382018687e-06, |
| "loss": 1.4248082637786865, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.861397479954181, |
| "grad_norm": 0.5018251538276672, |
| "learning_rate": 8.690006121005187e-06, |
| "loss": 1.4735299348831177, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.86368843069874, |
| "grad_norm": 0.7337985038757324, |
| "learning_rate": 8.681980515339464e-06, |
| "loss": 1.163881778717041, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.865979381443299, |
| "grad_norm": 0.47560250759124756, |
| "learning_rate": 8.6739346162058e-06, |
| "loss": 1.0997637510299683, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.868270332187858, |
| "grad_norm": 0.3398919403553009, |
| "learning_rate": 8.66586847491789e-06, |
| "loss": 1.0989038944244385, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.870561282932417, |
| "grad_norm": 0.38324427604675293, |
| "learning_rate": 8.657782142918537e-06, |
| "loss": 1.2464152574539185, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.872852233676976, |
| "grad_norm": 0.39253002405166626, |
| "learning_rate": 8.649675671779304e-06, |
| "loss": 1.1929283142089844, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8751431844215349, |
| "grad_norm": 0.7560232877731323, |
| "learning_rate": 8.641549113200198e-06, |
| "loss": 1.1025712490081787, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8774341351660939, |
| "grad_norm": 0.29719632863998413, |
| "learning_rate": 8.633402519009337e-06, |
| "loss": 1.297807216644287, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8797250859106529, |
| "grad_norm": 0.5266314744949341, |
| "learning_rate": 8.625235941162615e-06, |
| "loss": 1.127463459968567, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.8820160366552119, |
| "grad_norm": 3.540093421936035, |
| "learning_rate": 8.617049431743376e-06, |
| "loss": 1.0660855770111084, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8843069873997709, |
| "grad_norm": 0.38510146737098694, |
| "learning_rate": 8.60884304296208e-06, |
| "loss": 0.8022444248199463, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8865979381443299, |
| "grad_norm": 0.44131770730018616, |
| "learning_rate": 8.600616827155968e-06, |
| "loss": 1.2196941375732422, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.4722777307033539, |
| "learning_rate": 8.592370836788738e-06, |
| "loss": 1.3086035251617432, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8911798396334479, |
| "grad_norm": 0.491374135017395, |
| "learning_rate": 8.584105124450192e-06, |
| "loss": 1.425337791442871, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8934707903780069, |
| "grad_norm": 1.0334980487823486, |
| "learning_rate": 8.575819742855918e-06, |
| "loss": 1.6096653938293457, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8957617411225659, |
| "grad_norm": 0.19674751162528992, |
| "learning_rate": 8.567514744846947e-06, |
| "loss": 1.1552717685699463, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8980526918671249, |
| "grad_norm": 0.5686236619949341, |
| "learning_rate": 8.559190183389411e-06, |
| "loss": 0.6807040572166443, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.9003436426116839, |
| "grad_norm": 0.3094983994960785, |
| "learning_rate": 8.550846111574216e-06, |
| "loss": 0.990125298500061, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.9026345933562429, |
| "grad_norm": 1.2661654949188232, |
| "learning_rate": 8.542482582616694e-06, |
| "loss": 1.1654900312423706, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.9049255441008018, |
| "grad_norm": 0.20958510041236877, |
| "learning_rate": 8.53409964985627e-06, |
| "loss": 1.1051843166351318, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9072164948453608, |
| "grad_norm": 2.105391502380371, |
| "learning_rate": 8.525697366756117e-06, |
| "loss": 1.1716747283935547, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.9095074455899198, |
| "grad_norm": 0.840846836566925, |
| "learning_rate": 8.51727578690282e-06, |
| "loss": 1.2360508441925049, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.9117983963344788, |
| "grad_norm": 0.2139447033405304, |
| "learning_rate": 8.508834964006026e-06, |
| "loss": 1.1012890338897705, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.9140893470790378, |
| "grad_norm": 0.6380893588066101, |
| "learning_rate": 8.500374951898111e-06, |
| "loss": 1.2604258060455322, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.9163802978235968, |
| "grad_norm": 7.733399391174316, |
| "learning_rate": 8.491895804533834e-06, |
| "loss": 1.3236706256866455, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9186712485681557, |
| "grad_norm": 0.38475513458251953, |
| "learning_rate": 8.483397575989984e-06, |
| "loss": 1.2977417707443237, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.9209621993127147, |
| "grad_norm": 0.7843623757362366, |
| "learning_rate": 8.474880320465054e-06, |
| "loss": 1.3058006763458252, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.9232531500572738, |
| "grad_norm": 0.9258148670196533, |
| "learning_rate": 8.466344092278874e-06, |
| "loss": 1.4281094074249268, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.9255441008018328, |
| "grad_norm": 0.456764280796051, |
| "learning_rate": 8.457788945872278e-06, |
| "loss": 1.2990665435791016, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.9278350515463918, |
| "grad_norm": 0.9124121069908142, |
| "learning_rate": 8.449214935806754e-06, |
| "loss": 1.274019718170166, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9301260022909508, |
| "grad_norm": 0.8229918479919434, |
| "learning_rate": 8.440622116764095e-06, |
| "loss": 1.3842912912368774, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.9324169530355098, |
| "grad_norm": 0.3444792628288269, |
| "learning_rate": 8.43201054354605e-06, |
| "loss": 1.054539442062378, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.9347079037800687, |
| "grad_norm": 0.8576228618621826, |
| "learning_rate": 8.423380271073975e-06, |
| "loss": 1.0527242422103882, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.9369988545246277, |
| "grad_norm": 1.4201858043670654, |
| "learning_rate": 8.41473135438848e-06, |
| "loss": 1.2259652614593506, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.9392898052691867, |
| "grad_norm": 0.32819488644599915, |
| "learning_rate": 8.406063848649089e-06, |
| "loss": 1.1133246421813965, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9415807560137457, |
| "grad_norm": 0.22478768229484558, |
| "learning_rate": 8.397377809133872e-06, |
| "loss": 1.0828020572662354, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.9438717067583047, |
| "grad_norm": 0.4433528780937195, |
| "learning_rate": 8.388673291239098e-06, |
| "loss": 1.2108690738677979, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.9461626575028637, |
| "grad_norm": 0.4043940603733063, |
| "learning_rate": 8.379950350478899e-06, |
| "loss": 1.1333519220352173, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.9484536082474226, |
| "grad_norm": 0.17816071212291718, |
| "learning_rate": 8.371209042484884e-06, |
| "loss": 1.1817986965179443, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.9507445589919816, |
| "grad_norm": 1.2147220373153687, |
| "learning_rate": 8.362449423005811e-06, |
| "loss": 0.941910445690155, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9530355097365406, |
| "grad_norm": 0.828575611114502, |
| "learning_rate": 8.353671547907218e-06, |
| "loss": 1.2004121541976929, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.9553264604810997, |
| "grad_norm": 0.24938572943210602, |
| "learning_rate": 8.344875473171072e-06, |
| "loss": 1.0861434936523438, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.9576174112256587, |
| "grad_norm": 0.2983783483505249, |
| "learning_rate": 8.33606125489541e-06, |
| "loss": 1.1088454723358154, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.9599083619702177, |
| "grad_norm": 0.26970595121383667, |
| "learning_rate": 8.327228949293983e-06, |
| "loss": 1.179089903831482, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9621993127147767, |
| "grad_norm": 0.30218416452407837, |
| "learning_rate": 8.318378612695893e-06, |
| "loss": 1.1748452186584473, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9644902634593356, |
| "grad_norm": 0.5073446035385132, |
| "learning_rate": 8.30951030154524e-06, |
| "loss": 1.1994376182556152, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9667812142038946, |
| "grad_norm": 0.4085674285888672, |
| "learning_rate": 8.300624072400757e-06, |
| "loss": 1.11344313621521, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9690721649484536, |
| "grad_norm": 0.511972963809967, |
| "learning_rate": 8.29171998193545e-06, |
| "loss": 1.2800376415252686, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9713631156930126, |
| "grad_norm": 0.9452718496322632, |
| "learning_rate": 8.28279808693624e-06, |
| "loss": 1.2644078731536865, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9736540664375716, |
| "grad_norm": 1.4905763864517212, |
| "learning_rate": 8.273858444303601e-06, |
| "loss": 1.2725701332092285, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9759450171821306, |
| "grad_norm": 0.47297149896621704, |
| "learning_rate": 8.26490111105119e-06, |
| "loss": 1.2751691341400146, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9782359679266895, |
| "grad_norm": 0.3333577811717987, |
| "learning_rate": 8.25592614430549e-06, |
| "loss": 1.4911508560180664, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9805269186712485, |
| "grad_norm": 1.9368016719818115, |
| "learning_rate": 8.246933601305441e-06, |
| "loss": 1.2820303440093994, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9828178694158075, |
| "grad_norm": 0.6824301481246948, |
| "learning_rate": 8.237923539402083e-06, |
| "loss": 1.1723785400390625, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9851088201603666, |
| "grad_norm": 0.1673649251461029, |
| "learning_rate": 8.228896016058182e-06, |
| "loss": 1.0617311000823975, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9873997709049256, |
| "grad_norm": 1.2521167993545532, |
| "learning_rate": 8.219851088847866e-06, |
| "loss": 0.845752477645874, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9896907216494846, |
| "grad_norm": 1.0000566244125366, |
| "learning_rate": 8.210788815456259e-06, |
| "loss": 0.5226180553436279, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9919816723940436, |
| "grad_norm": 0.2924060523509979, |
| "learning_rate": 8.201709253679113e-06, |
| "loss": 1.100063443183899, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9942726231386025, |
| "grad_norm": 0.6394639015197754, |
| "learning_rate": 8.192612461422436e-06, |
| "loss": 1.1543686389923096, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9965635738831615, |
| "grad_norm": 0.6142589449882507, |
| "learning_rate": 8.18349849670213e-06, |
| "loss": 1.1854156255722046, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.9988545246277205, |
| "grad_norm": 0.3970600366592407, |
| "learning_rate": 8.174367417643614e-06, |
| "loss": 1.2869349718093872, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.0011454753722795, |
| "grad_norm": 0.13177210092544556, |
| "learning_rate": 8.165219282481454e-06, |
| "loss": 1.085939645767212, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.0034364261168385, |
| "grad_norm": 0.5160970091819763, |
| "learning_rate": 8.156054149558997e-06, |
| "loss": 1.2138652801513672, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.0057273768613975, |
| "grad_norm": 0.2649502456188202, |
| "learning_rate": 8.146872077327992e-06, |
| "loss": 1.2543463706970215, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.0080183276059564, |
| "grad_norm": 0.5178881883621216, |
| "learning_rate": 8.137673124348224e-06, |
| "loss": 1.2511537075042725, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.0103092783505154, |
| "grad_norm": 0.9306449294090271, |
| "learning_rate": 8.128457349287134e-06, |
| "loss": 1.297982931137085, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.0126002290950744, |
| "grad_norm": 1.041662573814392, |
| "learning_rate": 8.119224810919446e-06, |
| "loss": 1.1431050300598145, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.0148911798396334, |
| "grad_norm": 0.6204004287719727, |
| "learning_rate": 8.1099755681268e-06, |
| "loss": 1.2724859714508057, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.0171821305841924, |
| "grad_norm": 0.36028027534484863, |
| "learning_rate": 8.10070967989737e-06, |
| "loss": 1.2920498847961426, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.0194730813287514, |
| "grad_norm": 0.2737988531589508, |
| "learning_rate": 8.091427205325481e-06, |
| "loss": 1.1133158206939697, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0217640320733103, |
| "grad_norm": 0.3814859688282013, |
| "learning_rate": 8.082128203611245e-06, |
| "loss": 1.1604249477386475, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.0240549828178693, |
| "grad_norm": 0.7075831890106201, |
| "learning_rate": 8.07281273406018e-06, |
| "loss": 1.2947951555252075, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.0263459335624283, |
| "grad_norm": 0.43392547965049744, |
| "learning_rate": 8.063480856082822e-06, |
| "loss": 1.3201167583465576, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.0286368843069873, |
| "grad_norm": 0.6384571194648743, |
| "learning_rate": 8.054132629194363e-06, |
| "loss": 1.2386162281036377, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.0309278350515463, |
| "grad_norm": 1.1632153987884521, |
| "learning_rate": 8.044768113014253e-06, |
| "loss": 1.2521460056304932, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0332187857961055, |
| "grad_norm": 1.460950255393982, |
| "learning_rate": 8.03538736726584e-06, |
| "loss": 0.9340270161628723, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.0355097365406645, |
| "grad_norm": 0.32956740260124207, |
| "learning_rate": 8.025990451775963e-06, |
| "loss": 1.191983938217163, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.0378006872852235, |
| "grad_norm": 0.39645904302597046, |
| "learning_rate": 8.016577426474602e-06, |
| "loss": 1.1712646484375, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.0400916380297824, |
| "grad_norm": 0.7310532331466675, |
| "learning_rate": 8.007148351394465e-06, |
| "loss": 1.208326816558838, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.0423825887743414, |
| "grad_norm": 0.3210288882255554, |
| "learning_rate": 7.99770328667063e-06, |
| "loss": 1.0705335140228271, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.0446735395189004, |
| "grad_norm": 0.8159183263778687, |
| "learning_rate": 7.988242292540144e-06, |
| "loss": 1.2352807521820068, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.0469644902634594, |
| "grad_norm": 1.189197063446045, |
| "learning_rate": 7.978765429341651e-06, |
| "loss": 1.1748485565185547, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.0492554410080184, |
| "grad_norm": 0.30064672231674194, |
| "learning_rate": 7.969272757514997e-06, |
| "loss": 1.1746854782104492, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.0515463917525774, |
| "grad_norm": 0.5656450986862183, |
| "learning_rate": 7.959764337600852e-06, |
| "loss": 1.2387423515319824, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.0538373424971363, |
| "grad_norm": 0.32169046998023987, |
| "learning_rate": 7.950240230240323e-06, |
| "loss": 0.6924710273742676, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.0561282932416953, |
| "grad_norm": 0.7423054575920105, |
| "learning_rate": 7.94070049617456e-06, |
| "loss": 1.2082648277282715, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.0584192439862543, |
| "grad_norm": 0.7247334718704224, |
| "learning_rate": 7.93114519624438e-06, |
| "loss": 1.2238739728927612, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.0607101947308133, |
| "grad_norm": 0.7445449233055115, |
| "learning_rate": 7.921574391389874e-06, |
| "loss": 1.161942720413208, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.0630011454753723, |
| "grad_norm": 0.3218993544578552, |
| "learning_rate": 7.911988142650008e-06, |
| "loss": 1.0611815452575684, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.0652920962199313, |
| "grad_norm": 2.6120293140411377, |
| "learning_rate": 7.902386511162257e-06, |
| "loss": 1.066215991973877, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0675830469644902, |
| "grad_norm": 0.4084852933883667, |
| "learning_rate": 7.892769558162188e-06, |
| "loss": 1.1941471099853516, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.0698739977090492, |
| "grad_norm": 0.3998633325099945, |
| "learning_rate": 7.883137344983094e-06, |
| "loss": 0.7665129899978638, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.0721649484536082, |
| "grad_norm": 0.40679097175598145, |
| "learning_rate": 7.873489933055586e-06, |
| "loss": 1.1121021509170532, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.0744558991981672, |
| "grad_norm": 0.6797943711280823, |
| "learning_rate": 7.863827383907202e-06, |
| "loss": 1.3281772136688232, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.0767468499427262, |
| "grad_norm": 0.510895848274231, |
| "learning_rate": 7.85414975916203e-06, |
| "loss": 1.2209117412567139, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.0790378006872852, |
| "grad_norm": 0.21708233654499054, |
| "learning_rate": 7.8444571205403e-06, |
| "loss": 1.1070363521575928, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.0813287514318441, |
| "grad_norm": 1.5223441123962402, |
| "learning_rate": 7.834749529857991e-06, |
| "loss": 1.1975922584533691, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0836197021764031, |
| "grad_norm": 0.400249183177948, |
| "learning_rate": 7.825027049026448e-06, |
| "loss": 1.2500934600830078, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.0859106529209621, |
| "grad_norm": 0.48843708634376526, |
| "learning_rate": 7.81528974005197e-06, |
| "loss": 1.253622055053711, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.088201603665521, |
| "grad_norm": 0.5512809157371521, |
| "learning_rate": 7.805537665035435e-06, |
| "loss": 1.1393404006958008, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.09049255441008, |
| "grad_norm": 0.35912221670150757, |
| "learning_rate": 7.795770886171885e-06, |
| "loss": 1.2351994514465332, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.0927835051546393, |
| "grad_norm": 0.2847960293292999, |
| "learning_rate": 7.785989465750144e-06, |
| "loss": 1.1068283319473267, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.0950744558991983, |
| "grad_norm": 0.37099456787109375, |
| "learning_rate": 7.776193466152408e-06, |
| "loss": 1.2183043956756592, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.0973654066437573, |
| "grad_norm": 0.2889074981212616, |
| "learning_rate": 7.766382949853856e-06, |
| "loss": 1.1053792238235474, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.0996563573883162, |
| "grad_norm": 0.23075656592845917, |
| "learning_rate": 7.756557979422254e-06, |
| "loss": 1.1999101638793945, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.1019473081328752, |
| "grad_norm": 0.3924773037433624, |
| "learning_rate": 7.746718617517541e-06, |
| "loss": 1.232895016670227, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.1042382588774342, |
| "grad_norm": 0.5827606320381165, |
| "learning_rate": 7.73686492689145e-06, |
| "loss": 1.258862018585205, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.1065292096219932, |
| "grad_norm": 0.5682635307312012, |
| "learning_rate": 7.726996970387087e-06, |
| "loss": 1.1788777112960815, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.1088201603665522, |
| "grad_norm": 0.5773354172706604, |
| "learning_rate": 7.717114810938548e-06, |
| "loss": 1.2178382873535156, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 1.0229160785675049, |
| "learning_rate": 7.707218511570506e-06, |
| "loss": 0.9783726930618286, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.1134020618556701, |
| "grad_norm": 0.48322662711143494, |
| "learning_rate": 7.697308135397819e-06, |
| "loss": 1.293108344078064, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.1156930126002291, |
| "grad_norm": 0.406630277633667, |
| "learning_rate": 7.687383745625113e-06, |
| "loss": 0.7449958324432373, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.1179839633447881, |
| "grad_norm": 0.7270604372024536, |
| "learning_rate": 7.67744540554639e-06, |
| "loss": 1.3136848211288452, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.120274914089347, |
| "grad_norm": 0.6745506525039673, |
| "learning_rate": 7.667493178544626e-06, |
| "loss": 1.030921459197998, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.122565864833906, |
| "grad_norm": 0.5561051368713379, |
| "learning_rate": 7.65752712809136e-06, |
| "loss": 1.0987008810043335, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.124856815578465, |
| "grad_norm": 0.7598734498023987, |
| "learning_rate": 7.64754731774629e-06, |
| "loss": 1.3208575248718262, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.127147766323024, |
| "grad_norm": 0.5683021545410156, |
| "learning_rate": 7.637553811156871e-06, |
| "loss": 1.1968867778778076, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.129438717067583, |
| "grad_norm": 0.41832515597343445, |
| "learning_rate": 7.627546672057908e-06, |
| "loss": 1.1648495197296143, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.131729667812142, |
| "grad_norm": 0.6963409185409546, |
| "learning_rate": 7.617525964271149e-06, |
| "loss": 1.1976368427276611, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.134020618556701, |
| "grad_norm": 0.3125353753566742, |
| "learning_rate": 7.607491751704876e-06, |
| "loss": 1.1840250492095947, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.13631156930126, |
| "grad_norm": 0.18655972182750702, |
| "learning_rate": 7.5974440983535015e-06, |
| "loss": 0.9917019009590149, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.138602520045819, |
| "grad_norm": 0.4015827178955078, |
| "learning_rate": 7.587383068297157e-06, |
| "loss": 1.1198314428329468, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.140893470790378, |
| "grad_norm": 0.3271810710430145, |
| "learning_rate": 7.577308725701285e-06, |
| "loss": 0.9866247177124023, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.143184421534937, |
| "grad_norm": 0.3470871150493622, |
| "learning_rate": 7.567221134816235e-06, |
| "loss": 1.1671900749206543, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.145475372279496, |
| "grad_norm": 0.5754879713058472, |
| "learning_rate": 7.557120359976843e-06, |
| "loss": 1.1137046813964844, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.147766323024055, |
| "grad_norm": 0.8881956338882446, |
| "learning_rate": 7.547006465602026e-06, |
| "loss": 1.0529272556304932, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.1500572737686139, |
| "grad_norm": 0.3590288758277893, |
| "learning_rate": 7.5368795161943835e-06, |
| "loss": 1.3641059398651123, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.1523482245131729, |
| "grad_norm": 0.21775324642658234, |
| "learning_rate": 7.526739576339761e-06, |
| "loss": 1.063247561454773, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.1546391752577319, |
| "grad_norm": 0.27955248951911926, |
| "learning_rate": 7.516586710706862e-06, |
| "loss": 1.0667047500610352, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.1569301260022908, |
| "grad_norm": 0.35729917883872986, |
| "learning_rate": 7.506420984046823e-06, |
| "loss": 1.1514363288879395, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1592210767468498, |
| "grad_norm": 0.4915781021118164, |
| "learning_rate": 7.496242461192801e-06, |
| "loss": 1.1422107219696045, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.161512027491409, |
| "grad_norm": 0.42877474427223206, |
| "learning_rate": 7.486051207059567e-06, |
| "loss": 0.9425073862075806, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.163802978235968, |
| "grad_norm": 0.7864556312561035, |
| "learning_rate": 7.475847286643081e-06, |
| "loss": 1.1090211868286133, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.166093928980527, |
| "grad_norm": 0.28250840306282043, |
| "learning_rate": 7.46563076502009e-06, |
| "loss": 1.1747761964797974, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.168384879725086, |
| "grad_norm": 0.3922346532344818, |
| "learning_rate": 7.4554017073477e-06, |
| "loss": 1.0735399723052979, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.170675830469645, |
| "grad_norm": 0.30916616320610046, |
| "learning_rate": 7.445160178862977e-06, |
| "loss": 0.9813652038574219, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.172966781214204, |
| "grad_norm": 0.5802461504936218, |
| "learning_rate": 7.434906244882508e-06, |
| "loss": 0.9689698219299316, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.175257731958763, |
| "grad_norm": 0.5440467596054077, |
| "learning_rate": 7.42463997080201e-06, |
| "loss": 0.9294706583023071, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.177548682703322, |
| "grad_norm": 0.2637293040752411, |
| "learning_rate": 7.414361422095894e-06, |
| "loss": 1.1335558891296387, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.179839633447881, |
| "grad_norm": 0.40597668290138245, |
| "learning_rate": 7.404070664316855e-06, |
| "loss": 1.4880869388580322, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.1821305841924399, |
| "grad_norm": 1.225714087486267, |
| "learning_rate": 7.393767763095452e-06, |
| "loss": 0.6794233322143555, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.1844215349369989, |
| "grad_norm": 0.4729476571083069, |
| "learning_rate": 7.383452784139694e-06, |
| "loss": 1.1326751708984375, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.1867124856815578, |
| "grad_norm": 0.7771596908569336, |
| "learning_rate": 7.37312579323461e-06, |
| "loss": 0.7189903259277344, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.1890034364261168, |
| "grad_norm": 0.7064245939254761, |
| "learning_rate": 7.362786856241845e-06, |
| "loss": 1.1460908651351929, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.1912943871706758, |
| "grad_norm": 0.6097919344902039, |
| "learning_rate": 7.3524360390992275e-06, |
| "loss": 1.0353729724884033, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.1935853379152348, |
| "grad_norm": 0.4319731295108795, |
| "learning_rate": 7.342073407820351e-06, |
| "loss": 0.6625040173530579, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.1958762886597938, |
| "grad_norm": 0.9910304546356201, |
| "learning_rate": 7.331699028494161e-06, |
| "loss": 1.1734858751296997, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.1981672394043528, |
| "grad_norm": 1.1285514831542969, |
| "learning_rate": 7.321312967284518e-06, |
| "loss": 1.3283900022506714, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.2004581901489118, |
| "grad_norm": 1.028917908668518, |
| "learning_rate": 7.310915290429799e-06, |
| "loss": 1.2122178077697754, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.2027491408934707, |
| "grad_norm": 0.46919187903404236, |
| "learning_rate": 7.300506064242448e-06, |
| "loss": 1.1828129291534424, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.2050400916380297, |
| "grad_norm": 0.3256652355194092, |
| "learning_rate": 7.290085355108573e-06, |
| "loss": 1.177816390991211, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.2073310423825887, |
| "grad_norm": 0.268441766500473, |
| "learning_rate": 7.279653229487517e-06, |
| "loss": 1.0987030267715454, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.2096219931271477, |
| "grad_norm": 0.3454778492450714, |
| "learning_rate": 7.269209753911426e-06, |
| "loss": 1.1410846710205078, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.2119129438717067, |
| "grad_norm": 1.5586289167404175, |
| "learning_rate": 7.258754994984839e-06, |
| "loss": 0.9169238805770874, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.2142038946162657, |
| "grad_norm": 0.24782694876194, |
| "learning_rate": 7.248289019384255e-06, |
| "loss": 1.0707839727401733, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.2164948453608249, |
| "grad_norm": 0.3990442156791687, |
| "learning_rate": 7.237811893857703e-06, |
| "loss": 1.201479434967041, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.2187857961053838, |
| "grad_norm": 0.2834305465221405, |
| "learning_rate": 7.227323685224329e-06, |
| "loss": 1.0706955194473267, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.2210767468499428, |
| "grad_norm": 0.6435824036598206, |
| "learning_rate": 7.216824460373959e-06, |
| "loss": 1.0990760326385498, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.2233676975945018, |
| "grad_norm": 3.215423822402954, |
| "learning_rate": 7.206314286266676e-06, |
| "loss": 0.9824307560920715, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.2256586483390608, |
| "grad_norm": 0.4849179983139038, |
| "learning_rate": 7.195793229932397e-06, |
| "loss": 1.1502206325531006, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.2279495990836198, |
| "grad_norm": 2.8261635303497314, |
| "learning_rate": 7.185261358470436e-06, |
| "loss": 1.0585294961929321, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.2302405498281788, |
| "grad_norm": 0.9895238280296326, |
| "learning_rate": 7.174718739049087e-06, |
| "loss": 1.0261123180389404, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.2325315005727377, |
| "grad_norm": 0.7041395902633667, |
| "learning_rate": 7.164165438905186e-06, |
| "loss": 0.9829214811325073, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.2348224513172967, |
| "grad_norm": 0.5983326435089111, |
| "learning_rate": 7.153601525343692e-06, |
| "loss": 1.1627838611602783, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.2371134020618557, |
| "grad_norm": 0.6902378797531128, |
| "learning_rate": 7.143027065737247e-06, |
| "loss": 1.0614171028137207, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.2394043528064147, |
| "grad_norm": 0.22289074957370758, |
| "learning_rate": 7.132442127525754e-06, |
| "loss": 0.9967485070228577, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.2416953035509737, |
| "grad_norm": 0.3911464512348175, |
| "learning_rate": 7.121846778215946e-06, |
| "loss": 1.1069986820220947, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.2439862542955327, |
| "grad_norm": 0.5967843532562256, |
| "learning_rate": 7.111241085380951e-06, |
| "loss": 1.0382379293441772, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.2462772050400917, |
| "grad_norm": 0.46687158942222595, |
| "learning_rate": 7.100625116659867e-06, |
| "loss": 1.1105608940124512, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.2485681557846506, |
| "grad_norm": 0.17509859800338745, |
| "learning_rate": 7.089998939757323e-06, |
| "loss": 1.0866317749023438, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.2508591065292096, |
| "grad_norm": 0.709657609462738, |
| "learning_rate": 7.07936262244306e-06, |
| "loss": 1.2985808849334717, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.2531500572737686, |
| "grad_norm": 0.298130601644516, |
| "learning_rate": 7.068716232551484e-06, |
| "loss": 1.1646193265914917, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.2554410080183276, |
| "grad_norm": 0.5657819509506226, |
| "learning_rate": 7.05805983798124e-06, |
| "loss": 1.226839303970337, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.2577319587628866, |
| "grad_norm": 0.2218884825706482, |
| "learning_rate": 7.047393506694784e-06, |
| "loss": 1.1182987689971924, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.2600229095074456, |
| "grad_norm": 0.5544705986976624, |
| "learning_rate": 7.036717306717941e-06, |
| "loss": 1.0833134651184082, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2623138602520045, |
| "grad_norm": 0.8727872371673584, |
| "learning_rate": 7.026031306139476e-06, |
| "loss": 1.1887166500091553, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.2646048109965635, |
| "grad_norm": 0.31878840923309326, |
| "learning_rate": 7.015335573110655e-06, |
| "loss": 1.0471493005752563, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.2668957617411225, |
| "grad_norm": 1.7133678197860718, |
| "learning_rate": 7.004630175844821e-06, |
| "loss": 1.2911756038665771, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.2691867124856815, |
| "grad_norm": 0.4181298017501831, |
| "learning_rate": 6.9939151826169435e-06, |
| "loss": 1.1934268474578857, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.2714776632302405, |
| "grad_norm": 0.4590524137020111, |
| "learning_rate": 6.9831906617632015e-06, |
| "loss": 1.0381492376327515, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.2737686139747995, |
| "grad_norm": 0.35924550890922546, |
| "learning_rate": 6.972456681680526e-06, |
| "loss": 1.1387617588043213, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.2760595647193584, |
| "grad_norm": 0.284288614988327, |
| "learning_rate": 6.961713310826186e-06, |
| "loss": 1.2039177417755127, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.2783505154639174, |
| "grad_norm": 0.5767810344696045, |
| "learning_rate": 6.950960617717331e-06, |
| "loss": 1.193761944770813, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.2806414662084764, |
| "grad_norm": 0.5597623586654663, |
| "learning_rate": 6.940198670930575e-06, |
| "loss": 1.1562107801437378, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.2829324169530354, |
| "grad_norm": 1.776769995689392, |
| "learning_rate": 6.929427539101542e-06, |
| "loss": 1.1486637592315674, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.2852233676975944, |
| "grad_norm": 2.092482089996338, |
| "learning_rate": 6.918647290924431e-06, |
| "loss": 0.7264701128005981, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.2875143184421534, |
| "grad_norm": 0.5090541243553162, |
| "learning_rate": 6.907857995151593e-06, |
| "loss": 1.331784725189209, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.2898052691867126, |
| "grad_norm": 1.2451262474060059, |
| "learning_rate": 6.897059720593072e-06, |
| "loss": 1.2851134538650513, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.2920962199312716, |
| "grad_norm": 1.4176346063613892, |
| "learning_rate": 6.886252536116178e-06, |
| "loss": 0.7333021759986877, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.2943871706758305, |
| "grad_norm": 0.6894351840019226, |
| "learning_rate": 6.875436510645046e-06, |
| "loss": 1.1808993816375732, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.2966781214203895, |
| "grad_norm": 0.5326879620552063, |
| "learning_rate": 6.864611713160195e-06, |
| "loss": 0.4958106279373169, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.2989690721649485, |
| "grad_norm": 0.5978541374206543, |
| "learning_rate": 6.853778212698085e-06, |
| "loss": 1.1578189134597778, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.3012600229095075, |
| "grad_norm": 0.27673229575157166, |
| "learning_rate": 6.842936078350687e-06, |
| "loss": 1.1419076919555664, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.3035509736540665, |
| "grad_norm": 0.6036549806594849, |
| "learning_rate": 6.832085379265032e-06, |
| "loss": 1.0068280696868896, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.3058419243986255, |
| "grad_norm": 1.3319073915481567, |
| "learning_rate": 6.821226184642769e-06, |
| "loss": 1.1533417701721191, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.3081328751431844, |
| "grad_norm": 1.2838786840438843, |
| "learning_rate": 6.810358563739741e-06, |
| "loss": 0.9974702000617981, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.3104238258877434, |
| "grad_norm": 0.25137636065483093, |
| "learning_rate": 6.799482585865517e-06, |
| "loss": 1.1108713150024414, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.3127147766323024, |
| "grad_norm": 0.9687121510505676, |
| "learning_rate": 6.788598320382969e-06, |
| "loss": 0.7017291784286499, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.3150057273768614, |
| "grad_norm": 0.6507570743560791, |
| "learning_rate": 6.777705836707827e-06, |
| "loss": 0.6254103183746338, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.3172966781214204, |
| "grad_norm": 0.24653151631355286, |
| "learning_rate": 6.766805204308232e-06, |
| "loss": 1.0639063119888306, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.3195876288659794, |
| "grad_norm": 0.8183524012565613, |
| "learning_rate": 6.755896492704292e-06, |
| "loss": 0.7704499363899231, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.3218785796105383, |
| "grad_norm": 1.3019349575042725, |
| "learning_rate": 6.7449797714676446e-06, |
| "loss": 1.0398871898651123, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.3241695303550973, |
| "grad_norm": 0.5958473682403564, |
| "learning_rate": 6.734055110221004e-06, |
| "loss": 0.9920108914375305, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.3264604810996563, |
| "grad_norm": 0.34555068612098694, |
| "learning_rate": 6.72312257863773e-06, |
| "loss": 0.7964114546775818, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.3287514318442153, |
| "grad_norm": 1.3161815404891968, |
| "learning_rate": 6.712182246441372e-06, |
| "loss": 1.170238971710205, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.3310423825887743, |
| "grad_norm": 0.4162517189979553, |
| "learning_rate": 6.701234183405228e-06, |
| "loss": 1.1864508390426636, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.682020902633667, |
| "learning_rate": 6.690278459351907e-06, |
| "loss": 1.0800316333770752, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.3356242840778922, |
| "grad_norm": 0.8428366780281067, |
| "learning_rate": 6.679315144152867e-06, |
| "loss": 1.0788335800170898, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.3379152348224514, |
| "grad_norm": 0.5951038599014282, |
| "learning_rate": 6.6683443077279885e-06, |
| "loss": 0.9050284028053284, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.3402061855670104, |
| "grad_norm": 14.55526351928711, |
| "learning_rate": 6.6573660200451155e-06, |
| "loss": 0.8427594900131226, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.3424971363115694, |
| "grad_norm": 0.4018735885620117, |
| "learning_rate": 6.646380351119612e-06, |
| "loss": 0.9908223152160645, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.3447880870561284, |
| "grad_norm": 0.44170042872428894, |
| "learning_rate": 6.6353873710139185e-06, |
| "loss": 1.1631572246551514, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.3470790378006874, |
| "grad_norm": 0.4431830644607544, |
| "learning_rate": 6.624387149837105e-06, |
| "loss": 1.1010160446166992, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.3493699885452464, |
| "grad_norm": 0.20745538175106049, |
| "learning_rate": 6.613379757744419e-06, |
| "loss": 1.083961844444275, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.3516609392898054, |
| "grad_norm": 0.23701387643814087, |
| "learning_rate": 6.602365264936843e-06, |
| "loss": 0.91713547706604, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.3539518900343643, |
| "grad_norm": 0.24008503556251526, |
| "learning_rate": 6.5913437416606485e-06, |
| "loss": 1.1394959688186646, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.3562428407789233, |
| "grad_norm": 0.5425772070884705, |
| "learning_rate": 6.5803152582069365e-06, |
| "loss": 1.0138827562332153, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.3585337915234823, |
| "grad_norm": 0.5500049591064453, |
| "learning_rate": 6.569279884911205e-06, |
| "loss": 1.1640450954437256, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.3608247422680413, |
| "grad_norm": 0.23986561596393585, |
| "learning_rate": 6.558237692152889e-06, |
| "loss": 1.0996382236480713, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.3631156930126003, |
| "grad_norm": 0.33864185214042664, |
| "learning_rate": 6.547188750354917e-06, |
| "loss": 1.1096324920654297, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3654066437571593, |
| "grad_norm": 1.339373230934143, |
| "learning_rate": 6.536133129983261e-06, |
| "loss": 1.0554709434509277, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.3676975945017182, |
| "grad_norm": 0.5389803051948547, |
| "learning_rate": 6.5250709015464834e-06, |
| "loss": 1.1650443077087402, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.3699885452462772, |
| "grad_norm": 0.42033982276916504, |
| "learning_rate": 6.5140021355952935e-06, |
| "loss": 1.177642583847046, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.3722794959908362, |
| "grad_norm": 0.9180698990821838, |
| "learning_rate": 6.502926902722092e-06, |
| "loss": 1.0504584312438965, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.3745704467353952, |
| "grad_norm": 0.48793044686317444, |
| "learning_rate": 6.4918452735605245e-06, |
| "loss": 1.1541026830673218, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3768613974799542, |
| "grad_norm": 0.14638172090053558, |
| "learning_rate": 6.4807573187850295e-06, |
| "loss": 0.9544370174407959, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.3791523482245132, |
| "grad_norm": 1.1175258159637451, |
| "learning_rate": 6.469663109110389e-06, |
| "loss": 0.94257652759552, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.3814432989690721, |
| "grad_norm": 0.5161193609237671, |
| "learning_rate": 6.458562715291273e-06, |
| "loss": 1.1248518228530884, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.3837342497136311, |
| "grad_norm": 0.3805331289768219, |
| "learning_rate": 6.4474562081217975e-06, |
| "loss": 0.9945235252380371, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.38602520045819, |
| "grad_norm": 0.4004988670349121, |
| "learning_rate": 6.436343658435059e-06, |
| "loss": 1.2267534732818604, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.388316151202749, |
| "grad_norm": 0.5942703485488892, |
| "learning_rate": 6.4252251371026984e-06, |
| "loss": 1.1504127979278564, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.390607101947308, |
| "grad_norm": 0.6351091265678406, |
| "learning_rate": 6.414100715034437e-06, |
| "loss": 1.2171190977096558, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.392898052691867, |
| "grad_norm": 0.3700306713581085, |
| "learning_rate": 6.4029704631776334e-06, |
| "loss": 1.153677225112915, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.395189003436426, |
| "grad_norm": 0.5730115175247192, |
| "learning_rate": 6.3918344525168176e-06, |
| "loss": 1.2128088474273682, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.397479954180985, |
| "grad_norm": 0.8086961507797241, |
| "learning_rate": 6.380692754073257e-06, |
| "loss": 1.1901538372039795, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.399770904925544, |
| "grad_norm": 0.2177879959344864, |
| "learning_rate": 6.3695454389044885e-06, |
| "loss": 1.050641655921936, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.402061855670103, |
| "grad_norm": 0.48566994071006775, |
| "learning_rate": 6.35839257810387e-06, |
| "loss": 1.1169625520706177, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.404352806414662, |
| "grad_norm": 0.5073995590209961, |
| "learning_rate": 6.347234242800131e-06, |
| "loss": 1.0769405364990234, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.406643757159221, |
| "grad_norm": 1.046706199645996, |
| "learning_rate": 6.336070504156907e-06, |
| "loss": 0.498112291097641, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.40893470790378, |
| "grad_norm": 0.571991503238678, |
| "learning_rate": 6.324901433372307e-06, |
| "loss": 1.0861037969589233, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.411225658648339, |
| "grad_norm": 0.3702922463417053, |
| "learning_rate": 6.313727101678433e-06, |
| "loss": 1.1421905755996704, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.413516609392898, |
| "grad_norm": 0.8220073580741882, |
| "learning_rate": 6.302547580340949e-06, |
| "loss": 1.2322626113891602, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.4158075601374571, |
| "grad_norm": 0.5405969619750977, |
| "learning_rate": 6.291362940658612e-06, |
| "loss": 0.9657451510429382, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.418098510882016, |
| "grad_norm": 1.5954561233520508, |
| "learning_rate": 6.2801732539628205e-06, |
| "loss": 1.1020958423614502, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.420389461626575, |
| "grad_norm": 0.42759695649147034, |
| "learning_rate": 6.2689785916171656e-06, |
| "loss": 1.098386526107788, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.422680412371134, |
| "grad_norm": 0.19068364799022675, |
| "learning_rate": 6.257779025016967e-06, |
| "loss": 1.1080601215362549, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.424971363115693, |
| "grad_norm": 0.6803228855133057, |
| "learning_rate": 6.246574625588824e-06, |
| "loss": 1.1234514713287354, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.427262313860252, |
| "grad_norm": 0.3850613236427307, |
| "learning_rate": 6.235365464790158e-06, |
| "loss": 1.191713571548462, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.429553264604811, |
| "grad_norm": 0.5191882252693176, |
| "learning_rate": 6.224151614108755e-06, |
| "loss": 1.113290548324585, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.43184421534937, |
| "grad_norm": 0.32683122158050537, |
| "learning_rate": 6.212933145062313e-06, |
| "loss": 0.977238655090332, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.434135166093929, |
| "grad_norm": 0.3372647762298584, |
| "learning_rate": 6.201710129197984e-06, |
| "loss": 1.0865776538848877, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.436426116838488, |
| "grad_norm": 1.7988835573196411, |
| "learning_rate": 6.190482638091917e-06, |
| "loss": 1.2454376220703125, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.438717067583047, |
| "grad_norm": 0.5903953313827515, |
| "learning_rate": 6.179250743348801e-06, |
| "loss": 0.7004958391189575, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.441008018327606, |
| "grad_norm": 1.3094062805175781, |
| "learning_rate": 6.1680145166014135e-06, |
| "loss": 1.1775736808776855, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.443298969072165, |
| "grad_norm": 0.15861783921718597, |
| "learning_rate": 6.156774029510158e-06, |
| "loss": 1.0173115730285645, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.445589919816724, |
| "grad_norm": 0.31646618247032166, |
| "learning_rate": 6.145529353762608e-06, |
| "loss": 1.0695950984954834, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.447880870561283, |
| "grad_norm": 0.2128850221633911, |
| "learning_rate": 6.1342805610730515e-06, |
| "loss": 1.0297033786773682, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.4501718213058419, |
| "grad_norm": 0.43015554547309875, |
| "learning_rate": 6.123027723182034e-06, |
| "loss": 1.0786964893341064, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.4524627720504009, |
| "grad_norm": 0.5410537719726562, |
| "learning_rate": 6.111770911855895e-06, |
| "loss": 1.2960965633392334, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.4547537227949598, |
| "grad_norm": 0.40598443150520325, |
| "learning_rate": 6.100510198886324e-06, |
| "loss": 1.150607943534851, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.4570446735395188, |
| "grad_norm": 0.25774961709976196, |
| "learning_rate": 6.089245656089882e-06, |
| "loss": 1.0670363903045654, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.4593356242840778, |
| "grad_norm": 1.2430238723754883, |
| "learning_rate": 6.077977355307564e-06, |
| "loss": 0.929009199142456, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.461626575028637, |
| "grad_norm": 0.819111704826355, |
| "learning_rate": 6.066705368404332e-06, |
| "loss": 1.0384223461151123, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.463917525773196, |
| "grad_norm": 0.6329976320266724, |
| "learning_rate": 6.0554297672686515e-06, |
| "loss": 1.146793246269226, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.466208476517755, |
| "grad_norm": 0.5748082399368286, |
| "learning_rate": 6.044150623812041e-06, |
| "loss": 1.21446692943573, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.468499427262314, |
| "grad_norm": 0.37093842029571533, |
| "learning_rate": 6.032868009968611e-06, |
| "loss": 0.6608114242553711, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.470790378006873, |
| "grad_norm": 0.4401775598526001, |
| "learning_rate": 6.021581997694604e-06, |
| "loss": 1.1975040435791016, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.473081328751432, |
| "grad_norm": 0.198956698179245, |
| "learning_rate": 6.010292658967937e-06, |
| "loss": 0.8298950791358948, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.475372279495991, |
| "grad_norm": 1.1646336317062378, |
| "learning_rate": 5.999000065787741e-06, |
| "loss": 1.121684193611145, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.47766323024055, |
| "grad_norm": 0.19620084762573242, |
| "learning_rate": 5.987704290173904e-06, |
| "loss": 1.113356113433838, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.479954180985109, |
| "grad_norm": 0.8701173663139343, |
| "learning_rate": 5.976405404166609e-06, |
| "loss": 1.0445246696472168, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.4822451317296679, |
| "grad_norm": 0.8870877623558044, |
| "learning_rate": 5.965103479825874e-06, |
| "loss": 1.3073999881744385, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.4845360824742269, |
| "grad_norm": 0.6979129910469055, |
| "learning_rate": 5.953798589231102e-06, |
| "loss": 1.0925014019012451, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.4868270332187858, |
| "grad_norm": 0.3692026734352112, |
| "learning_rate": 5.942490804480605e-06, |
| "loss": 0.999457597732544, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.4891179839633448, |
| "grad_norm": 0.6890159845352173, |
| "learning_rate": 5.931180197691155e-06, |
| "loss": 1.0261509418487549, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4914089347079038, |
| "grad_norm": 0.40297093987464905, |
| "learning_rate": 5.919866840997528e-06, |
| "loss": 0.8686540126800537, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.4936998854524628, |
| "grad_norm": 1.5102224349975586, |
| "learning_rate": 5.908550806552027e-06, |
| "loss": 1.269791841506958, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.4959908361970218, |
| "grad_norm": 0.3148708939552307, |
| "learning_rate": 5.897232166524044e-06, |
| "loss": 1.1037763357162476, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.4982817869415808, |
| "grad_norm": 1.3338543176651, |
| "learning_rate": 5.885910993099581e-06, |
| "loss": 0.8658826351165771, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.5005727376861397, |
| "grad_norm": 0.594698429107666, |
| "learning_rate": 5.874587358480798e-06, |
| "loss": 1.0999151468276978, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.5028636884306987, |
| "grad_norm": 0.8571167588233948, |
| "learning_rate": 5.863261334885553e-06, |
| "loss": 1.3375155925750732, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.5051546391752577, |
| "grad_norm": 0.3977302610874176, |
| "learning_rate": 5.851932994546941e-06, |
| "loss": 1.1367615461349487, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.5074455899198167, |
| "grad_norm": 1.0233980417251587, |
| "learning_rate": 5.840602409712831e-06, |
| "loss": 1.1128208637237549, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.5097365406643757, |
| "grad_norm": 0.42678776383399963, |
| "learning_rate": 5.829269652645404e-06, |
| "loss": 0.9502562880516052, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.5120274914089347, |
| "grad_norm": 0.6844682097434998, |
| "learning_rate": 5.817934795620702e-06, |
| "loss": 0.842738151550293, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.5143184421534936, |
| "grad_norm": 0.17597302794456482, |
| "learning_rate": 5.8065979109281515e-06, |
| "loss": 1.1172516345977783, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.5166093928980526, |
| "grad_norm": 1.0520700216293335, |
| "learning_rate": 5.795259070870114e-06, |
| "loss": 1.019298791885376, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.5189003436426116, |
| "grad_norm": 0.301179975271225, |
| "learning_rate": 5.783918347761424e-06, |
| "loss": 1.0366371870040894, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.5211912943871706, |
| "grad_norm": 1.4107294082641602, |
| "learning_rate": 5.77257581392892e-06, |
| "loss": 1.0230910778045654, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.5234822451317296, |
| "grad_norm": 3.041111946105957, |
| "learning_rate": 5.761231541710994e-06, |
| "loss": 1.078089714050293, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.5257731958762886, |
| "grad_norm": 0.39890924096107483, |
| "learning_rate": 5.7498856034571235e-06, |
| "loss": 0.9173415303230286, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.5280641466208476, |
| "grad_norm": 1.2413023710250854, |
| "learning_rate": 5.7385380715274075e-06, |
| "loss": 0.7650175094604492, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.5303550973654065, |
| "grad_norm": 0.40077197551727295, |
| "learning_rate": 5.727189018292115e-06, |
| "loss": 1.0416836738586426, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.5326460481099655, |
| "grad_norm": 1.2296018600463867, |
| "learning_rate": 5.715838516131212e-06, |
| "loss": 0.7319103479385376, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.5349369988545245, |
| "grad_norm": 0.5661179423332214, |
| "learning_rate": 5.704486637433907e-06, |
| "loss": 1.1215949058532715, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.5372279495990835, |
| "grad_norm": 1.0080679655075073, |
| "learning_rate": 5.6931334545981876e-06, |
| "loss": 1.0732643604278564, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.5395189003436425, |
| "grad_norm": 0.46233344078063965, |
| "learning_rate": 5.68177904003036e-06, |
| "loss": 0.5498278737068176, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.5418098510882015, |
| "grad_norm": 1.1307381391525269, |
| "learning_rate": 5.670423466144585e-06, |
| "loss": 0.8027646541595459, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.5441008018327604, |
| "grad_norm": 0.2457914799451828, |
| "learning_rate": 5.659066805362416e-06, |
| "loss": 1.099556803703308, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.5463917525773194, |
| "grad_norm": 0.1576959639787674, |
| "learning_rate": 5.647709130112339e-06, |
| "loss": 1.0494180917739868, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.5486827033218786, |
| "grad_norm": 1.4281489849090576, |
| "learning_rate": 5.6363505128293105e-06, |
| "loss": 1.0572658777236938, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.5509736540664376, |
| "grad_norm": 0.3911938965320587, |
| "learning_rate": 5.624991025954296e-06, |
| "loss": 1.112137794494629, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.5532646048109966, |
| "grad_norm": 0.7160745859146118, |
| "learning_rate": 5.613630741933801e-06, |
| "loss": 1.0537481307983398, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 1.9618110656738281, |
| "learning_rate": 5.602269733219422e-06, |
| "loss": 0.8826972246170044, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.5578465063001146, |
| "grad_norm": 0.6684516668319702, |
| "learning_rate": 5.590908072267376e-06, |
| "loss": 1.1781320571899414, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.5601374570446735, |
| "grad_norm": 0.9931756854057312, |
| "learning_rate": 5.579545831538036e-06, |
| "loss": 1.003735065460205, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.5624284077892325, |
| "grad_norm": 0.35055479407310486, |
| "learning_rate": 5.568183083495476e-06, |
| "loss": 1.188180923461914, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.5647193585337915, |
| "grad_norm": 0.2840157747268677, |
| "learning_rate": 5.556819900607004e-06, |
| "loss": 1.0069973468780518, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.5670103092783505, |
| "grad_norm": 0.2898299992084503, |
| "learning_rate": 5.545456355342702e-06, |
| "loss": 1.0996860265731812, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.5693012600229095, |
| "grad_norm": 0.44947919249534607, |
| "learning_rate": 5.53409252017496e-06, |
| "loss": 1.120392918586731, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.5715922107674685, |
| "grad_norm": 1.1559861898422241, |
| "learning_rate": 5.522728467578024e-06, |
| "loss": 1.3419742584228516, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.5738831615120275, |
| "grad_norm": 0.7148109674453735, |
| "learning_rate": 5.51136427002752e-06, |
| "loss": 1.1241408586502075, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.5761741122565864, |
| "grad_norm": 1.7949758768081665, |
| "learning_rate": 5.500000000000001e-06, |
| "loss": 1.0823068618774414, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.5784650630011456, |
| "grad_norm": 0.5439109206199646, |
| "learning_rate": 5.488635729972482e-06, |
| "loss": 1.0603818893432617, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.5807560137457046, |
| "grad_norm": 0.6118595004081726, |
| "learning_rate": 5.477271532421978e-06, |
| "loss": 1.1041834354400635, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.5830469644902636, |
| "grad_norm": 0.6366990804672241, |
| "learning_rate": 5.465907479825041e-06, |
| "loss": 1.2534537315368652, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.5853379152348226, |
| "grad_norm": 0.3186343014240265, |
| "learning_rate": 5.454543644657302e-06, |
| "loss": 1.00857412815094, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.5876288659793816, |
| "grad_norm": 0.23584212362766266, |
| "learning_rate": 5.4431800993929985e-06, |
| "loss": 0.9454631805419922, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.5899198167239406, |
| "grad_norm": 0.7114871740341187, |
| "learning_rate": 5.431816916504526e-06, |
| "loss": 1.1222310066223145, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.5922107674684995, |
| "grad_norm": 0.3654209077358246, |
| "learning_rate": 5.420454168461966e-06, |
| "loss": 1.1215158700942993, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.5945017182130585, |
| "grad_norm": 0.7768158912658691, |
| "learning_rate": 5.4090919277326255e-06, |
| "loss": 0.8392388820648193, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.5967926689576175, |
| "grad_norm": 0.2861047387123108, |
| "learning_rate": 5.39773026678058e-06, |
| "loss": 0.7805435061454773, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.5990836197021765, |
| "grad_norm": 0.6634432673454285, |
| "learning_rate": 5.386369258066201e-06, |
| "loss": 1.295003890991211, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.6013745704467355, |
| "grad_norm": 0.3201494812965393, |
| "learning_rate": 5.3750089740457075e-06, |
| "loss": 1.061468243598938, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.6036655211912945, |
| "grad_norm": 0.5713483095169067, |
| "learning_rate": 5.363649487170691e-06, |
| "loss": 0.9963769912719727, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.6059564719358534, |
| "grad_norm": 0.9836020469665527, |
| "learning_rate": 5.352290869887663e-06, |
| "loss": 1.194199800491333, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.6082474226804124, |
| "grad_norm": 0.1987893432378769, |
| "learning_rate": 5.340933194637586e-06, |
| "loss": 0.6283943057060242, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.6105383734249714, |
| "grad_norm": 0.580379843711853, |
| "learning_rate": 5.329576533855415e-06, |
| "loss": 1.2401604652404785, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.6128293241695304, |
| "grad_norm": 0.48753857612609863, |
| "learning_rate": 5.3182209599696415e-06, |
| "loss": 0.9093868732452393, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.6151202749140894, |
| "grad_norm": 1.1758943796157837, |
| "learning_rate": 5.306866545401813e-06, |
| "loss": 1.1071420907974243, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.6174112256586484, |
| "grad_norm": 0.4928566515445709, |
| "learning_rate": 5.295513362566096e-06, |
| "loss": 0.7807958722114563, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.6197021764032073, |
| "grad_norm": 0.4523963928222656, |
| "learning_rate": 5.28416148386879e-06, |
| "loss": 1.0439610481262207, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.6219931271477663, |
| "grad_norm": 0.7955020070075989, |
| "learning_rate": 5.272810981707886e-06, |
| "loss": 1.0640166997909546, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.6242840778923253, |
| "grad_norm": 0.7474223375320435, |
| "learning_rate": 5.261461928472593e-06, |
| "loss": 0.889735758304596, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.6265750286368843, |
| "grad_norm": 0.3920307457447052, |
| "learning_rate": 5.250114396542877e-06, |
| "loss": 1.061119794845581, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.6288659793814433, |
| "grad_norm": 1.0079574584960938, |
| "learning_rate": 5.238768458289007e-06, |
| "loss": 1.3186607360839844, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.6311569301260023, |
| "grad_norm": 0.4474199712276459, |
| "learning_rate": 5.227424186071081e-06, |
| "loss": 1.0147289037704468, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.6334478808705613, |
| "grad_norm": 0.480244904756546, |
| "learning_rate": 5.216081652238579e-06, |
| "loss": 1.1268162727355957, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.6357388316151202, |
| "grad_norm": 0.9012577533721924, |
| "learning_rate": 5.204740929129887e-06, |
| "loss": 1.120902180671692, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.6380297823596792, |
| "grad_norm": 0.21147756278514862, |
| "learning_rate": 5.193402089071852e-06, |
| "loss": 1.1125855445861816, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.6403207331042382, |
| "grad_norm": 0.3258548378944397, |
| "learning_rate": 5.1820652043793e-06, |
| "loss": 1.011163592338562, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.6426116838487972, |
| "grad_norm": 0.39005911350250244, |
| "learning_rate": 5.1707303473545955e-06, |
| "loss": 1.2018420696258545, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.6449026345933562, |
| "grad_norm": 0.5742803812026978, |
| "learning_rate": 5.1593975902871705e-06, |
| "loss": 1.1393346786499023, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.6471935853379152, |
| "grad_norm": 0.41290178894996643, |
| "learning_rate": 5.1480670054530605e-06, |
| "loss": 1.1971912384033203, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.6494845360824741, |
| "grad_norm": 0.23765195906162262, |
| "learning_rate": 5.136738665114449e-06, |
| "loss": 1.047823190689087, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.6517754868270331, |
| "grad_norm": 0.7021539211273193, |
| "learning_rate": 5.125412641519204e-06, |
| "loss": 1.1116011142730713, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.654066437571592, |
| "grad_norm": 1.0828512907028198, |
| "learning_rate": 5.114089006900422e-06, |
| "loss": 1.3231321573257446, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.656357388316151, |
| "grad_norm": 0.4457782208919525, |
| "learning_rate": 5.102767833475958e-06, |
| "loss": 1.1356236934661865, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.65864833906071, |
| "grad_norm": 0.862468421459198, |
| "learning_rate": 5.091449193447974e-06, |
| "loss": 1.0704690217971802, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.660939289805269, |
| "grad_norm": 0.3140993118286133, |
| "learning_rate": 5.080133159002474e-06, |
| "loss": 0.9936932325363159, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.663230240549828, |
| "grad_norm": 0.4344022274017334, |
| "learning_rate": 5.068819802308845e-06, |
| "loss": 1.1224863529205322, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.665521191294387, |
| "grad_norm": 0.5366426706314087, |
| "learning_rate": 5.057509195519398e-06, |
| "loss": 1.1559295654296875, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.667812142038946, |
| "grad_norm": 0.6061311960220337, |
| "learning_rate": 5.046201410768899e-06, |
| "loss": 1.1576176881790161, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.670103092783505, |
| "grad_norm": 0.9727919697761536, |
| "learning_rate": 5.034896520174126e-06, |
| "loss": 0.7988566160202026, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.6723940435280642, |
| "grad_norm": 0.1961892545223236, |
| "learning_rate": 5.023594595833393e-06, |
| "loss": 0.4671269953250885, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.6746849942726232, |
| "grad_norm": 0.8168536424636841, |
| "learning_rate": 5.0122957098260975e-06, |
| "loss": 1.3973238468170166, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.6769759450171822, |
| "grad_norm": 0.5047257542610168, |
| "learning_rate": 5.0009999342122606e-06, |
| "loss": 1.2853825092315674, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.6792668957617412, |
| "grad_norm": 0.6044178605079651, |
| "learning_rate": 4.989707341032064e-06, |
| "loss": 1.174715280532837, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.6815578465063001, |
| "grad_norm": 0.46799567341804504, |
| "learning_rate": 4.978418002305399e-06, |
| "loss": 1.2637076377868652, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.6838487972508591, |
| "grad_norm": 0.27708613872528076, |
| "learning_rate": 4.967131990031391e-06, |
| "loss": 1.092246174812317, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.686139747995418, |
| "grad_norm": 0.5905908346176147, |
| "learning_rate": 4.955849376187961e-06, |
| "loss": 0.8725787997245789, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.688430698739977, |
| "grad_norm": 0.49280673265457153, |
| "learning_rate": 4.944570232731351e-06, |
| "loss": 1.1290966272354126, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.690721649484536, |
| "grad_norm": 0.6904608011245728, |
| "learning_rate": 4.9332946315956695e-06, |
| "loss": 0.8955701589584351, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.693012600229095, |
| "grad_norm": 0.6627287268638611, |
| "learning_rate": 4.922022644692438e-06, |
| "loss": 1.1323654651641846, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.695303550973654, |
| "grad_norm": 0.7109958529472351, |
| "learning_rate": 4.910754343910121e-06, |
| "loss": 1.025420904159546, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.697594501718213, |
| "grad_norm": 0.43384861946105957, |
| "learning_rate": 4.89948980111368e-06, |
| "loss": 1.111706256866455, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.699885452462772, |
| "grad_norm": 0.5754409432411194, |
| "learning_rate": 4.888229088144106e-06, |
| "loss": 1.1376874446868896, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.702176403207331, |
| "grad_norm": 0.31369054317474365, |
| "learning_rate": 4.8769722768179686e-06, |
| "loss": 1.0659782886505127, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.7044673539518902, |
| "grad_norm": 1.1461002826690674, |
| "learning_rate": 4.86571943892695e-06, |
| "loss": 1.1083794832229614, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.7067583046964492, |
| "grad_norm": 0.5363646149635315, |
| "learning_rate": 4.854470646237392e-06, |
| "loss": 1.103102684020996, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.7090492554410082, |
| "grad_norm": 0.22202357649803162, |
| "learning_rate": 4.843225970489843e-06, |
| "loss": 0.7583179473876953, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.7113402061855671, |
| "grad_norm": 0.19881503283977509, |
| "learning_rate": 4.831985483398587e-06, |
| "loss": 1.2146061658859253, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.7136311569301261, |
| "grad_norm": 0.6612064838409424, |
| "learning_rate": 4.820749256651202e-06, |
| "loss": 0.949057936668396, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.7159221076746851, |
| "grad_norm": 0.25396284461021423, |
| "learning_rate": 4.809517361908086e-06, |
| "loss": 1.0635230541229248, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.718213058419244, |
| "grad_norm": 0.2346191257238388, |
| "learning_rate": 4.798289870802018e-06, |
| "loss": 0.6289651393890381, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.720504009163803, |
| "grad_norm": 0.8989472389221191, |
| "learning_rate": 4.787066854937689e-06, |
| "loss": 1.1217808723449707, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.722794959908362, |
| "grad_norm": 2.643550395965576, |
| "learning_rate": 4.775848385891245e-06, |
| "loss": 1.2906912565231323, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.725085910652921, |
| "grad_norm": 0.731056272983551, |
| "learning_rate": 4.7646345352098445e-06, |
| "loss": 0.6282181739807129, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.72737686139748, |
| "grad_norm": 0.3717769682407379, |
| "learning_rate": 4.753425374411177e-06, |
| "loss": 1.0892517566680908, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.729667812142039, |
| "grad_norm": 0.2676532566547394, |
| "learning_rate": 4.742220974983036e-06, |
| "loss": 1.0730390548706055, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.731958762886598, |
| "grad_norm": 0.16937556862831116, |
| "learning_rate": 4.731021408382837e-06, |
| "loss": 1.120283603668213, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.734249713631157, |
| "grad_norm": 0.30678778886795044, |
| "learning_rate": 4.7198267460371826e-06, |
| "loss": 1.369347095489502, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.736540664375716, |
| "grad_norm": 0.4600529670715332, |
| "learning_rate": 4.70863705934139e-06, |
| "loss": 1.0413298606872559, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.738831615120275, |
| "grad_norm": 0.4162696301937103, |
| "learning_rate": 4.697452419659052e-06, |
| "loss": 1.2059823274612427, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.741122565864834, |
| "grad_norm": 0.36466091871261597, |
| "learning_rate": 4.686272898321568e-06, |
| "loss": 1.0452512502670288, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.743413516609393, |
| "grad_norm": 0.4611301124095917, |
| "learning_rate": 4.675098566627695e-06, |
| "loss": 0.9318735599517822, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.745704467353952, |
| "grad_norm": 0.6732238531112671, |
| "learning_rate": 4.6639294958430946e-06, |
| "loss": 0.9963729977607727, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.747995418098511, |
| "grad_norm": 0.310858815908432, |
| "learning_rate": 4.652765757199872e-06, |
| "loss": 1.0314257144927979, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.7502863688430699, |
| "grad_norm": 0.2551518678665161, |
| "learning_rate": 4.641607421896132e-06, |
| "loss": 0.9765444993972778, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.7525773195876289, |
| "grad_norm": 0.890846848487854, |
| "learning_rate": 4.630454561095513e-06, |
| "loss": 0.8637576699256897, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.7548682703321878, |
| "grad_norm": 0.3232835829257965, |
| "learning_rate": 4.619307245926743e-06, |
| "loss": 1.139219880104065, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.7571592210767468, |
| "grad_norm": 0.35957878828048706, |
| "learning_rate": 4.608165547483183e-06, |
| "loss": 1.202222466468811, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.7594501718213058, |
| "grad_norm": 2.1918275356292725, |
| "learning_rate": 4.597029536822368e-06, |
| "loss": 0.6219435930252075, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.7617411225658648, |
| "grad_norm": 0.4178512692451477, |
| "learning_rate": 4.585899284965563e-06, |
| "loss": 1.0911495685577393, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.7640320733104238, |
| "grad_norm": 0.5344310402870178, |
| "learning_rate": 4.574774862897302e-06, |
| "loss": 1.1106891632080078, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.7663230240549828, |
| "grad_norm": 0.41900160908699036, |
| "learning_rate": 4.563656341564941e-06, |
| "loss": 1.0348694324493408, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.7686139747995417, |
| "grad_norm": 2.1765964031219482, |
| "learning_rate": 4.552543791878205e-06, |
| "loss": 1.2949724197387695, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.7709049255441007, |
| "grad_norm": 0.28183940052986145, |
| "learning_rate": 4.541437284708727e-06, |
| "loss": 0.9392028450965881, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.7731958762886597, |
| "grad_norm": 0.3175741732120514, |
| "learning_rate": 4.530336890889613e-06, |
| "loss": 1.0502426624298096, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.7754868270332187, |
| "grad_norm": 1.4262926578521729, |
| "learning_rate": 4.519242681214971e-06, |
| "loss": 0.7205455303192139, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 1.2114626169204712, |
| "learning_rate": 4.508154726439478e-06, |
| "loss": 1.0237972736358643, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.7800687285223367, |
| "grad_norm": 0.8481336832046509, |
| "learning_rate": 4.49707309727791e-06, |
| "loss": 1.1520320177078247, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.7823596792668956, |
| "grad_norm": 0.3496226966381073, |
| "learning_rate": 4.485997864404709e-06, |
| "loss": 1.1129924058914185, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.7846506300114546, |
| "grad_norm": 0.2375352829694748, |
| "learning_rate": 4.474929098453519e-06, |
| "loss": 1.211329460144043, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.7869415807560136, |
| "grad_norm": 0.2744106650352478, |
| "learning_rate": 4.463866870016741e-06, |
| "loss": 1.0723626613616943, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.7892325315005726, |
| "grad_norm": 1.0329090356826782, |
| "learning_rate": 4.452811249645085e-06, |
| "loss": 1.088670015335083, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.7915234822451316, |
| "grad_norm": 0.6205560564994812, |
| "learning_rate": 4.441762307847113e-06, |
| "loss": 1.2151033878326416, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.7938144329896906, |
| "grad_norm": 0.6296006441116333, |
| "learning_rate": 4.430720115088797e-06, |
| "loss": 1.1723359823226929, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.7961053837342495, |
| "grad_norm": 1.2101175785064697, |
| "learning_rate": 4.419684741793066e-06, |
| "loss": 0.9560754299163818, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.7983963344788088, |
| "grad_norm": 0.33595010638237, |
| "learning_rate": 4.408656258339355e-06, |
| "loss": 1.0018739700317383, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.8006872852233677, |
| "grad_norm": 0.34481048583984375, |
| "learning_rate": 4.397634735063158e-06, |
| "loss": 1.0918338298797607, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.8029782359679267, |
| "grad_norm": 0.506317138671875, |
| "learning_rate": 4.386620242255583e-06, |
| "loss": 1.082423448562622, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.8052691867124857, |
| "grad_norm": 1.5229862928390503, |
| "learning_rate": 4.375612850162897e-06, |
| "loss": 1.20133638381958, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.8075601374570447, |
| "grad_norm": 0.4049842357635498, |
| "learning_rate": 4.364612628986082e-06, |
| "loss": 0.9879454970359802, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.8098510882016037, |
| "grad_norm": 0.7496724128723145, |
| "learning_rate": 4.35361964888039e-06, |
| "loss": 0.9724379777908325, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.8121420389461627, |
| "grad_norm": 0.7204214930534363, |
| "learning_rate": 4.342633979954887e-06, |
| "loss": 1.1547467708587646, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.8144329896907216, |
| "grad_norm": 1.05171537399292, |
| "learning_rate": 4.331655692272011e-06, |
| "loss": 1.0399408340454102, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.8167239404352806, |
| "grad_norm": 0.36879026889801025, |
| "learning_rate": 4.320684855847135e-06, |
| "loss": 1.2286815643310547, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.8190148911798396, |
| "grad_norm": 0.27584725618362427, |
| "learning_rate": 4.309721540648094e-06, |
| "loss": 0.946727991104126, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.8213058419243986, |
| "grad_norm": 0.2803502678871155, |
| "learning_rate": 4.298765816594773e-06, |
| "loss": 1.0936319828033447, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.8235967926689576, |
| "grad_norm": 0.46186119318008423, |
| "learning_rate": 4.2878177535586294e-06, |
| "loss": 1.0281469821929932, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.8258877434135166, |
| "grad_norm": 0.8824283480644226, |
| "learning_rate": 4.276877421362273e-06, |
| "loss": 1.1012415885925293, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.8281786941580758, |
| "grad_norm": 0.563686192035675, |
| "learning_rate": 4.265944889778998e-06, |
| "loss": 1.2015641927719116, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.8304696449026348, |
| "grad_norm": 0.45275604724884033, |
| "learning_rate": 4.255020228532357e-06, |
| "loss": 1.168736457824707, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.8327605956471937, |
| "grad_norm": 0.5189002156257629, |
| "learning_rate": 4.244103507295709e-06, |
| "loss": 0.8564615249633789, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.8350515463917527, |
| "grad_norm": 0.4436849355697632, |
| "learning_rate": 4.233194795691767e-06, |
| "loss": 1.1107850074768066, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.8373424971363117, |
| "grad_norm": 0.5029271841049194, |
| "learning_rate": 4.222294163292173e-06, |
| "loss": 1.1380796432495117, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.8396334478808707, |
| "grad_norm": 0.38482290506362915, |
| "learning_rate": 4.211401679617031e-06, |
| "loss": 1.0681747198104858, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.8419243986254297, |
| "grad_norm": 0.3398834466934204, |
| "learning_rate": 4.200517414134487e-06, |
| "loss": 1.1440093517303467, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.8442153493699887, |
| "grad_norm": 0.3168809115886688, |
| "learning_rate": 4.1896414362602615e-06, |
| "loss": 0.9699110984802246, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.8465063001145476, |
| "grad_norm": 0.608744204044342, |
| "learning_rate": 4.178773815357231e-06, |
| "loss": 1.3050799369812012, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.8487972508591066, |
| "grad_norm": 1.0546008348464966, |
| "learning_rate": 4.167914620734971e-06, |
| "loss": 1.372516393661499, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.8510882016036656, |
| "grad_norm": 0.6382185220718384, |
| "learning_rate": 4.157063921649314e-06, |
| "loss": 1.0993999242782593, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.8533791523482246, |
| "grad_norm": 0.24312907457351685, |
| "learning_rate": 4.146221787301916e-06, |
| "loss": 1.1152019500732422, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.8556701030927836, |
| "grad_norm": 0.5500279664993286, |
| "learning_rate": 4.135388286839806e-06, |
| "loss": 1.0518808364868164, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.8579610538373426, |
| "grad_norm": 0.6796494126319885, |
| "learning_rate": 4.1245634893549545e-06, |
| "loss": 1.1548529863357544, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.8602520045819015, |
| "grad_norm": 0.5995921492576599, |
| "learning_rate": 4.113747463883823e-06, |
| "loss": 1.352916955947876, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.8625429553264605, |
| "grad_norm": 0.24284528195858002, |
| "learning_rate": 4.102940279406928e-06, |
| "loss": 1.0819270610809326, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.8648339060710195, |
| "grad_norm": 0.6232653260231018, |
| "learning_rate": 4.092142004848408e-06, |
| "loss": 1.1328476667404175, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.8671248568155785, |
| "grad_norm": 0.27039143443107605, |
| "learning_rate": 4.08135270907557e-06, |
| "loss": 1.0456500053405762, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.8694158075601375, |
| "grad_norm": 0.5682404637336731, |
| "learning_rate": 4.070572460898462e-06, |
| "loss": 1.0884917974472046, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.8717067583046965, |
| "grad_norm": 0.28413209319114685, |
| "learning_rate": 4.059801329069426e-06, |
| "loss": 1.0192196369171143, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.8739977090492554, |
| "grad_norm": 0.20320376753807068, |
| "learning_rate": 4.04903938228267e-06, |
| "loss": 1.1282374858856201, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.8762886597938144, |
| "grad_norm": 0.5271807909011841, |
| "learning_rate": 4.038286689173817e-06, |
| "loss": 1.1059315204620361, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.8785796105383734, |
| "grad_norm": 0.18544262647628784, |
| "learning_rate": 4.027543318319474e-06, |
| "loss": 1.027941346168518, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.8808705612829324, |
| "grad_norm": 0.6330454349517822, |
| "learning_rate": 4.0168093382368e-06, |
| "loss": 1.1752293109893799, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.8831615120274914, |
| "grad_norm": 0.4557496905326843, |
| "learning_rate": 4.006084817383056e-06, |
| "loss": 0.3644358515739441, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.8854524627720504, |
| "grad_norm": 1.3597580194473267, |
| "learning_rate": 3.995369824155181e-06, |
| "loss": 1.1257150173187256, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.8877434135166093, |
| "grad_norm": 0.4406583607196808, |
| "learning_rate": 3.984664426889346e-06, |
| "loss": 1.1774303913116455, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.8900343642611683, |
| "grad_norm": 0.43711021542549133, |
| "learning_rate": 3.973968693860527e-06, |
| "loss": 1.2031350135803223, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8923253150057273, |
| "grad_norm": 0.4777853190898895, |
| "learning_rate": 3.96328269328206e-06, |
| "loss": 1.2918949127197266, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.8946162657502863, |
| "grad_norm": 0.6269339323043823, |
| "learning_rate": 3.952606493305216e-06, |
| "loss": 1.4281535148620605, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.8969072164948453, |
| "grad_norm": 0.3071335256099701, |
| "learning_rate": 3.941940162018761e-06, |
| "loss": 1.010391116142273, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.8991981672394043, |
| "grad_norm": 0.18624217808246613, |
| "learning_rate": 3.931283767448518e-06, |
| "loss": 0.5205022096633911, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.9014891179839633, |
| "grad_norm": 1.4488636255264282, |
| "learning_rate": 3.920637377556942e-06, |
| "loss": 0.844072699546814, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.9037800687285222, |
| "grad_norm": 0.5701979994773865, |
| "learning_rate": 3.910001060242677e-06, |
| "loss": 0.9334324598312378, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.9060710194730812, |
| "grad_norm": 0.7097718119621277, |
| "learning_rate": 3.899374883340137e-06, |
| "loss": 1.1183058023452759, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.9083619702176402, |
| "grad_norm": 0.3661045730113983, |
| "learning_rate": 3.88875891461905e-06, |
| "loss": 0.9330714344978333, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.9106529209621992, |
| "grad_norm": 0.29769042134284973, |
| "learning_rate": 3.878153221784054e-06, |
| "loss": 1.1027047634124756, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.9129438717067582, |
| "grad_norm": 0.8713205456733704, |
| "learning_rate": 3.867557872474248e-06, |
| "loss": 1.0835906267166138, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.9152348224513172, |
| "grad_norm": 0.4693801701068878, |
| "learning_rate": 3.856972934262755e-06, |
| "loss": 1.1927740573883057, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.9175257731958761, |
| "grad_norm": 0.4635125696659088, |
| "learning_rate": 3.84639847465631e-06, |
| "loss": 1.2767350673675537, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.9198167239404351, |
| "grad_norm": 0.48501482605934143, |
| "learning_rate": 3.835834561094815e-06, |
| "loss": 1.130952000617981, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.9221076746849943, |
| "grad_norm": 0.9517636895179749, |
| "learning_rate": 3.825281260950916e-06, |
| "loss": 1.3384122848510742, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.9243986254295533, |
| "grad_norm": 1.5027124881744385, |
| "learning_rate": 3.814738641529566e-06, |
| "loss": 1.2897770404815674, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.9266895761741123, |
| "grad_norm": 0.17051805555820465, |
| "learning_rate": 3.8042067700676044e-06, |
| "loss": 0.9062872529029846, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.9289805269186713, |
| "grad_norm": 0.3399532437324524, |
| "learning_rate": 3.793685713733326e-06, |
| "loss": 1.1948344707489014, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.9312714776632303, |
| "grad_norm": 1.0268810987472534, |
| "learning_rate": 3.7831755396260416e-06, |
| "loss": 1.1116868257522583, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.9335624284077892, |
| "grad_norm": 0.25544682145118713, |
| "learning_rate": 3.772676314775674e-06, |
| "loss": 0.9785124063491821, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.9358533791523482, |
| "grad_norm": 0.2780124545097351, |
| "learning_rate": 3.762188106142298e-06, |
| "loss": 0.8251289129257202, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.9381443298969072, |
| "grad_norm": 0.6381999254226685, |
| "learning_rate": 3.751710980615748e-06, |
| "loss": 1.154158353805542, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.9404352806414662, |
| "grad_norm": 0.36244654655456543, |
| "learning_rate": 3.741245005015162e-06, |
| "loss": 0.9876756072044373, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.9427262313860252, |
| "grad_norm": 0.5184593796730042, |
| "learning_rate": 3.7307902460885746e-06, |
| "loss": 1.1275906562805176, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.9450171821305842, |
| "grad_norm": 0.51296067237854, |
| "learning_rate": 3.720346770512486e-06, |
| "loss": 1.025963544845581, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.9473081328751431, |
| "grad_norm": 0.5093493461608887, |
| "learning_rate": 3.709914644891427e-06, |
| "loss": 1.1869707107543945, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.9495990836197021, |
| "grad_norm": 2.1370980739593506, |
| "learning_rate": 3.6994939357575533e-06, |
| "loss": 0.6620631217956543, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.9518900343642611, |
| "grad_norm": 0.5478183031082153, |
| "learning_rate": 3.689084709570202e-06, |
| "loss": 1.123410701751709, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.9541809851088203, |
| "grad_norm": 0.4446258842945099, |
| "learning_rate": 3.6786870327154832e-06, |
| "loss": 1.1439985036849976, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.9564719358533793, |
| "grad_norm": 0.5509153008460999, |
| "learning_rate": 3.6683009715058416e-06, |
| "loss": 1.0780513286590576, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.9587628865979383, |
| "grad_norm": 0.48575830459594727, |
| "learning_rate": 3.657926592179649e-06, |
| "loss": 1.0788171291351318, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.9610538373424973, |
| "grad_norm": 0.21410536766052246, |
| "learning_rate": 3.647563960900774e-06, |
| "loss": 1.091663122177124, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.9633447880870563, |
| "grad_norm": 0.25092241168022156, |
| "learning_rate": 3.637213143758156e-06, |
| "loss": 1.0151268243789673, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.9656357388316152, |
| "grad_norm": 0.7077304124832153, |
| "learning_rate": 3.626874206765392e-06, |
| "loss": 1.1096687316894531, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.9679266895761742, |
| "grad_norm": 0.4164002537727356, |
| "learning_rate": 3.616547215860309e-06, |
| "loss": 1.0841827392578125, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.9702176403207332, |
| "grad_norm": 0.8496370911598206, |
| "learning_rate": 3.606232236904549e-06, |
| "loss": 1.2367123365402222, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.9725085910652922, |
| "grad_norm": 0.4818964898586273, |
| "learning_rate": 3.595929335683146e-06, |
| "loss": 0.9790089130401611, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.9747995418098512, |
| "grad_norm": 0.33810457587242126, |
| "learning_rate": 3.585638577904107e-06, |
| "loss": 1.145442247390747, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.9770904925544102, |
| "grad_norm": 1.0560132265090942, |
| "learning_rate": 3.5753600291979917e-06, |
| "loss": 1.3760294914245605, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.9793814432989691, |
| "grad_norm": 0.710391104221344, |
| "learning_rate": 3.5650937551174914e-06, |
| "loss": 1.148451328277588, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.9816723940435281, |
| "grad_norm": 1.249986171722412, |
| "learning_rate": 3.554839821137026e-06, |
| "loss": 0.9829602837562561, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.9839633447880871, |
| "grad_norm": 0.2360655516386032, |
| "learning_rate": 3.5445982926523006e-06, |
| "loss": 1.1001036167144775, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.986254295532646, |
| "grad_norm": 0.8706629276275635, |
| "learning_rate": 3.534369234979914e-06, |
| "loss": 1.0795059204101562, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.988545246277205, |
| "grad_norm": 0.3764958381652832, |
| "learning_rate": 3.5241527133569197e-06, |
| "loss": 0.0997859463095665, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.990836197021764, |
| "grad_norm": 0.8101068139076233, |
| "learning_rate": 3.5139487929404335e-06, |
| "loss": 0.8904887437820435, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.993127147766323, |
| "grad_norm": 0.5189135074615479, |
| "learning_rate": 3.5037575388071997e-06, |
| "loss": 1.174041509628296, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.995418098510882, |
| "grad_norm": 1.1633189916610718, |
| "learning_rate": 3.4935790159531786e-06, |
| "loss": 1.0208793878555298, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.997709049255441, |
| "grad_norm": 0.41635146737098694, |
| "learning_rate": 3.4834132892931405e-06, |
| "loss": 1.1487672328948975, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.46879705786705017, |
| "learning_rate": 3.47326042366024e-06, |
| "loss": 1.1573315858840942, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.002290950744559, |
| "grad_norm": 0.42209404706954956, |
| "learning_rate": 3.46312048380562e-06, |
| "loss": 1.0264599323272705, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.004581901489118, |
| "grad_norm": 0.5175220370292664, |
| "learning_rate": 3.452993534397975e-06, |
| "loss": 1.196134090423584, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.006872852233677, |
| "grad_norm": 0.4058225750923157, |
| "learning_rate": 3.4428796400231595e-06, |
| "loss": 1.1240901947021484, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.009163802978236, |
| "grad_norm": 1.5380558967590332, |
| "learning_rate": 3.4327788651837658e-06, |
| "loss": 1.1184686422348022, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.011454753722795, |
| "grad_norm": 0.24166224896907806, |
| "learning_rate": 3.422691274298714e-06, |
| "loss": 1.0136035680770874, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.013745704467354, |
| "grad_norm": 0.3540262281894684, |
| "learning_rate": 3.4126169317028445e-06, |
| "loss": 1.099753975868225, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.016036655211913, |
| "grad_norm": 4.086158752441406, |
| "learning_rate": 3.4025559016465003e-06, |
| "loss": 1.2752199172973633, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.018327605956472, |
| "grad_norm": 0.2709142863750458, |
| "learning_rate": 3.3925082482951253e-06, |
| "loss": 1.0859545469284058, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.020618556701031, |
| "grad_norm": 1.6579638719558716, |
| "learning_rate": 3.382474035728852e-06, |
| "loss": 0.9678882360458374, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.02290950744559, |
| "grad_norm": 0.9905798435211182, |
| "learning_rate": 3.372453327942092e-06, |
| "loss": 1.2576557397842407, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.025200458190149, |
| "grad_norm": 0.586355984210968, |
| "learning_rate": 3.3624461888431315e-06, |
| "loss": 1.0932533740997314, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.027491408934708, |
| "grad_norm": 0.7022833824157715, |
| "learning_rate": 3.352452682253713e-06, |
| "loss": 1.1744213104248047, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.029782359679267, |
| "grad_norm": 0.5173978805541992, |
| "learning_rate": 3.3424728719086424e-06, |
| "loss": 1.1649706363677979, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.0320733104238258, |
| "grad_norm": 0.4220377206802368, |
| "learning_rate": 3.332506821455376e-06, |
| "loss": 0.9526954293251038, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.0343642611683848, |
| "grad_norm": 0.8884670734405518, |
| "learning_rate": 3.3225545944536132e-06, |
| "loss": 0.8466014862060547, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.0366552119129437, |
| "grad_norm": 0.7669530510902405, |
| "learning_rate": 3.3126162543748906e-06, |
| "loss": 1.048229694366455, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.0389461626575027, |
| "grad_norm": 0.45862677693367004, |
| "learning_rate": 3.302691864602183e-06, |
| "loss": 1.0864906311035156, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.0412371134020617, |
| "grad_norm": 0.6318526268005371, |
| "learning_rate": 3.292781488429494e-06, |
| "loss": 1.1123660802841187, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.0435280641466207, |
| "grad_norm": 0.6299246549606323, |
| "learning_rate": 3.2828851890614534e-06, |
| "loss": 1.052357792854309, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.0458190148911797, |
| "grad_norm": 0.23600554466247559, |
| "learning_rate": 3.2730030296129157e-06, |
| "loss": 1.099745750427246, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.0481099656357387, |
| "grad_norm": 0.29890838265419006, |
| "learning_rate": 3.2631350731085526e-06, |
| "loss": 1.165632963180542, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.0504009163802976, |
| "grad_norm": 0.38656070828437805, |
| "learning_rate": 3.25328138248246e-06, |
| "loss": 1.0975539684295654, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.0526918671248566, |
| "grad_norm": 0.5550158023834229, |
| "learning_rate": 3.2434420205777473e-06, |
| "loss": 0.6457653045654297, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.0549828178694156, |
| "grad_norm": 1.2036755084991455, |
| "learning_rate": 3.2336170501461433e-06, |
| "loss": 0.9297122955322266, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.0572737686139746, |
| "grad_norm": 0.7063114047050476, |
| "learning_rate": 3.2238065338475944e-06, |
| "loss": 0.9581747055053711, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.0595647193585336, |
| "grad_norm": 0.3019329309463501, |
| "learning_rate": 3.2140105342498577e-06, |
| "loss": 1.0715091228485107, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.0618556701030926, |
| "grad_norm": 0.43459922075271606, |
| "learning_rate": 3.2042291138281155e-06, |
| "loss": 1.1576061248779297, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.0641466208476515, |
| "grad_norm": 1.0746312141418457, |
| "learning_rate": 3.194462334964566e-06, |
| "loss": 0.6688909530639648, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.066437571592211, |
| "grad_norm": 1.6781188249588013, |
| "learning_rate": 3.184710259948032e-06, |
| "loss": 1.0624680519104004, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.06872852233677, |
| "grad_norm": 0.8279687762260437, |
| "learning_rate": 3.1749729509735555e-06, |
| "loss": 0.5724896192550659, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.071019473081329, |
| "grad_norm": 0.8611645698547363, |
| "learning_rate": 3.165250470142009e-06, |
| "loss": 1.0552845001220703, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.073310423825888, |
| "grad_norm": 0.731144905090332, |
| "learning_rate": 3.155542879459702e-06, |
| "loss": 1.301546573638916, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.075601374570447, |
| "grad_norm": 0.564445436000824, |
| "learning_rate": 3.145850240837971e-06, |
| "loss": 1.087944746017456, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.077892325315006, |
| "grad_norm": 0.6972827911376953, |
| "learning_rate": 3.136172616092801e-06, |
| "loss": 1.1200871467590332, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.080183276059565, |
| "grad_norm": 0.8714810013771057, |
| "learning_rate": 3.1265100669444184e-06, |
| "loss": 0.9296277761459351, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.082474226804124, |
| "grad_norm": 0.9884515404701233, |
| "learning_rate": 3.116862655016907e-06, |
| "loss": 1.1618545055389404, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.084765177548683, |
| "grad_norm": 0.506373405456543, |
| "learning_rate": 3.107230441837812e-06, |
| "loss": 1.1905784606933594, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.087056128293242, |
| "grad_norm": 0.30389317870140076, |
| "learning_rate": 3.097613488837744e-06, |
| "loss": 1.0908293724060059, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.089347079037801, |
| "grad_norm": 1.8103982210159302, |
| "learning_rate": 3.0880118573499928e-06, |
| "loss": 1.1718205213546753, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.09163802978236, |
| "grad_norm": 0.436989963054657, |
| "learning_rate": 3.0784256086101265e-06, |
| "loss": 1.092139482498169, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.093928980526919, |
| "grad_norm": 0.34637048840522766, |
| "learning_rate": 3.0688548037556203e-06, |
| "loss": 1.0642328262329102, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.0962199312714778, |
| "grad_norm": 0.8757824897766113, |
| "learning_rate": 3.0592995038254413e-06, |
| "loss": 1.127882480621338, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.0985108820160367, |
| "grad_norm": 0.30737531185150146, |
| "learning_rate": 3.04975976975968e-06, |
| "loss": 1.0287266969680786, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.1008018327605957, |
| "grad_norm": 0.5797215700149536, |
| "learning_rate": 3.0402356623991487e-06, |
| "loss": 1.2051303386688232, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.1030927835051547, |
| "grad_norm": 0.6220235228538513, |
| "learning_rate": 3.0307272424850043e-06, |
| "loss": 1.1138908863067627, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.1053837342497137, |
| "grad_norm": 0.24019214510917664, |
| "learning_rate": 3.021234570658351e-06, |
| "loss": 1.0616822242736816, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.1076746849942727, |
| "grad_norm": 0.47777384519577026, |
| "learning_rate": 3.011757707459857e-06, |
| "loss": 1.16475248336792, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.1099656357388317, |
| "grad_norm": 0.12783193588256836, |
| "learning_rate": 3.0022967133293716e-06, |
| "loss": 0.9693043231964111, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.1122565864833907, |
| "grad_norm": 0.743993878364563, |
| "learning_rate": 2.992851648605536e-06, |
| "loss": 1.1348570585250854, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.1145475372279496, |
| "grad_norm": 0.3964126706123352, |
| "learning_rate": 2.9834225735254e-06, |
| "loss": 0.6107242107391357, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.1168384879725086, |
| "grad_norm": 0.7012664079666138, |
| "learning_rate": 2.9740095482240382e-06, |
| "loss": 1.1791443824768066, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.1191294387170676, |
| "grad_norm": 0.18129964172840118, |
| "learning_rate": 2.964612632734163e-06, |
| "loss": 1.1110697984695435, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.1214203894616266, |
| "grad_norm": 0.2507210075855255, |
| "learning_rate": 2.9552318869857464e-06, |
| "loss": 0.8801964521408081, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.1237113402061856, |
| "grad_norm": 0.30734899640083313, |
| "learning_rate": 2.9458673708056385e-06, |
| "loss": 1.13587486743927, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.1260022909507446, |
| "grad_norm": 0.4164876341819763, |
| "learning_rate": 2.936519143917179e-06, |
| "loss": 1.0800116062164307, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.1282932416953035, |
| "grad_norm": 0.5680100321769714, |
| "learning_rate": 2.9271872659398224e-06, |
| "loss": 1.193451166152954, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.1305841924398625, |
| "grad_norm": 0.5644039511680603, |
| "learning_rate": 2.9178717963887557e-06, |
| "loss": 1.0699570178985596, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.1328751431844215, |
| "grad_norm": 0.7427708506584167, |
| "learning_rate": 2.90857279467452e-06, |
| "loss": 1.1289746761322021, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.1351660939289805, |
| "grad_norm": 0.47718605399131775, |
| "learning_rate": 2.899290320102631e-06, |
| "loss": 0.9117916226387024, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.1374570446735395, |
| "grad_norm": 1.0634793043136597, |
| "learning_rate": 2.890024431873201e-06, |
| "loss": 0.8808594942092896, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.1397479954180985, |
| "grad_norm": 0.2982898950576782, |
| "learning_rate": 2.880775189080555e-06, |
| "loss": 1.001103401184082, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.1420389461626574, |
| "grad_norm": 0.19586509466171265, |
| "learning_rate": 2.8715426507128687e-06, |
| "loss": 1.054373025894165, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.1443298969072164, |
| "grad_norm": 0.4863986074924469, |
| "learning_rate": 2.862326875651777e-06, |
| "loss": 1.1137691736221313, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.1466208476517754, |
| "grad_norm": 0.41139012575149536, |
| "learning_rate": 2.853127922672008e-06, |
| "loss": 1.0064350366592407, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.1489117983963344, |
| "grad_norm": 1.2407772541046143, |
| "learning_rate": 2.8439458504410044e-06, |
| "loss": 1.0638196468353271, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.1512027491408934, |
| "grad_norm": 0.6728169322013855, |
| "learning_rate": 2.834780717518547e-06, |
| "loss": 1.0377411842346191, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.1534936998854524, |
| "grad_norm": 0.2670747935771942, |
| "learning_rate": 2.825632582356387e-06, |
| "loss": 0.9914096593856812, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.1557846506300113, |
| "grad_norm": 0.24730202555656433, |
| "learning_rate": 2.8165015032978703e-06, |
| "loss": 1.0537339448928833, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.1580756013745703, |
| "grad_norm": 0.33252057433128357, |
| "learning_rate": 2.8073875385775663e-06, |
| "loss": 1.1693732738494873, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.1603665521191293, |
| "grad_norm": 0.23521366715431213, |
| "learning_rate": 2.7982907463208896e-06, |
| "loss": 0.8833960890769958, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.1626575028636883, |
| "grad_norm": 0.5896117091178894, |
| "learning_rate": 2.7892111845437416e-06, |
| "loss": 1.0457485914230347, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.1649484536082473, |
| "grad_norm": 0.4665268659591675, |
| "learning_rate": 2.780148911152135e-06, |
| "loss": 1.0155045986175537, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.1672394043528063, |
| "grad_norm": 0.43222132325172424, |
| "learning_rate": 2.7711039839418187e-06, |
| "loss": 0.9927984476089478, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.1695303550973652, |
| "grad_norm": 0.2742535471916199, |
| "learning_rate": 2.762076460597919e-06, |
| "loss": 1.0519921779632568, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.1718213058419242, |
| "grad_norm": 0.6122022867202759, |
| "learning_rate": 2.7530663986945605e-06, |
| "loss": 0.804986834526062, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.174112256586483, |
| "grad_norm": 0.3205280601978302, |
| "learning_rate": 2.7440738556945122e-06, |
| "loss": 0.9710903763771057, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.176403207331042, |
| "grad_norm": 0.5308868288993835, |
| "learning_rate": 2.735098888948811e-06, |
| "loss": 0.823322594165802, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.178694158075601, |
| "grad_norm": 1.2458512783050537, |
| "learning_rate": 2.726141555696399e-06, |
| "loss": 1.3204128742218018, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.18098510882016, |
| "grad_norm": 1.1205463409423828, |
| "learning_rate": 2.7172019130637605e-06, |
| "loss": 1.1062953472137451, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.183276059564719, |
| "grad_norm": 0.49585333466529846, |
| "learning_rate": 2.708280018064551e-06, |
| "loss": 0.5895834565162659, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.1855670103092786, |
| "grad_norm": 0.5067374110221863, |
| "learning_rate": 2.699375927599245e-06, |
| "loss": 1.037330985069275, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.1878579610538376, |
| "grad_norm": 0.9823523759841919, |
| "learning_rate": 2.6904896984547614e-06, |
| "loss": 0.5440244078636169, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.1901489117983965, |
| "grad_norm": 0.9039705991744995, |
| "learning_rate": 2.6816213873041086e-06, |
| "loss": 1.1244115829467773, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.1924398625429555, |
| "grad_norm": 1.6192059516906738, |
| "learning_rate": 2.6727710507060166e-06, |
| "loss": 0.4239127039909363, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.1947308132875145, |
| "grad_norm": 0.4529842436313629, |
| "learning_rate": 2.6639387451045894e-06, |
| "loss": 1.093721628189087, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.1970217640320735, |
| "grad_norm": 0.6420417428016663, |
| "learning_rate": 2.6551245268289293e-06, |
| "loss": 1.1479039192199707, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.1993127147766325, |
| "grad_norm": 0.8427514433860779, |
| "learning_rate": 2.6463284520927834e-06, |
| "loss": 1.0050065517425537, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.2016036655211915, |
| "grad_norm": 0.6339349150657654, |
| "learning_rate": 2.63755057699419e-06, |
| "loss": 0.9615905284881592, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.2038946162657505, |
| "grad_norm": 0.8383830189704895, |
| "learning_rate": 2.6287909575151166e-06, |
| "loss": 1.1090657711029053, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.2061855670103094, |
| "grad_norm": 1.1984935998916626, |
| "learning_rate": 2.620049649521103e-06, |
| "loss": 0.953253984451294, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.2084765177548684, |
| "grad_norm": 0.34425899386405945, |
| "learning_rate": 2.6113267087609018e-06, |
| "loss": 1.0870815515518188, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.2107674684994274, |
| "grad_norm": 1.677963376045227, |
| "learning_rate": 2.6026221908661307e-06, |
| "loss": 1.0642032623291016, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.2130584192439864, |
| "grad_norm": 0.594247043132782, |
| "learning_rate": 2.5939361513509124e-06, |
| "loss": 0.6523274183273315, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.2153493699885454, |
| "grad_norm": 0.458957314491272, |
| "learning_rate": 2.5852686456115194e-06, |
| "loss": 1.0752052068710327, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.2176403207331044, |
| "grad_norm": 0.31083378195762634, |
| "learning_rate": 2.5766197289260277e-06, |
| "loss": 1.0612359046936035, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.2199312714776633, |
| "grad_norm": 0.27045944333076477, |
| "learning_rate": 2.5679894564539513e-06, |
| "loss": 1.068912386894226, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.44911208748817444, |
| "learning_rate": 2.5593778832359062e-06, |
| "loss": 0.990754246711731, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.2245131729667813, |
| "grad_norm": 0.21117167174816132, |
| "learning_rate": 2.550785064193246e-06, |
| "loss": 0.8659780621528625, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.2268041237113403, |
| "grad_norm": 0.46905815601348877, |
| "learning_rate": 2.5422110541277225e-06, |
| "loss": 1.1481306552886963, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.2290950744558993, |
| "grad_norm": 2.0418505668640137, |
| "learning_rate": 2.5336559077211285e-06, |
| "loss": 0.8427814841270447, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.2313860252004583, |
| "grad_norm": 0.7220437526702881, |
| "learning_rate": 2.5251196795349476e-06, |
| "loss": 0.7763566374778748, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.2336769759450172, |
| "grad_norm": 0.3684818148612976, |
| "learning_rate": 2.5166024240100175e-06, |
| "loss": 1.0701335668563843, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.2359679266895762, |
| "grad_norm": 0.17287565767765045, |
| "learning_rate": 2.508104195466169e-06, |
| "loss": 1.0804567337036133, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.238258877434135, |
| "grad_norm": 0.6882048845291138, |
| "learning_rate": 2.4996250481018917e-06, |
| "loss": 0.618576169013977, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.240549828178694, |
| "grad_norm": 0.9598178863525391, |
| "learning_rate": 2.491165035993977e-06, |
| "loss": 1.0526471138000488, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.242840778923253, |
| "grad_norm": 0.22992214560508728, |
| "learning_rate": 2.4827242130971822e-06, |
| "loss": 0.9994850158691406, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.245131729667812, |
| "grad_norm": 0.21707294881343842, |
| "learning_rate": 2.4743026332438835e-06, |
| "loss": 1.0506229400634766, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.247422680412371, |
| "grad_norm": 0.894084095954895, |
| "learning_rate": 2.4659003501437302e-06, |
| "loss": 1.073114037513733, |
| "step": 1962 |
| }, |
| { |
| "epoch": 2.24971363115693, |
| "grad_norm": 0.47775256633758545, |
| "learning_rate": 2.4575174173833076e-06, |
| "loss": 1.0495290756225586, |
| "step": 1964 |
| }, |
| { |
| "epoch": 2.252004581901489, |
| "grad_norm": 0.6680848598480225, |
| "learning_rate": 2.449153888425786e-06, |
| "loss": 1.1938798427581787, |
| "step": 1966 |
| }, |
| { |
| "epoch": 2.254295532646048, |
| "grad_norm": 0.8610414862632751, |
| "learning_rate": 2.4408098166105905e-06, |
| "loss": 1.1015326976776123, |
| "step": 1968 |
| }, |
| { |
| "epoch": 2.256586483390607, |
| "grad_norm": 0.2719790041446686, |
| "learning_rate": 2.4324852551530546e-06, |
| "loss": 1.1828646659851074, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.258877434135166, |
| "grad_norm": 0.3849657475948334, |
| "learning_rate": 2.424180257144082e-06, |
| "loss": 0.9668605327606201, |
| "step": 1972 |
| }, |
| { |
| "epoch": 2.261168384879725, |
| "grad_norm": 0.24315226078033447, |
| "learning_rate": 2.4158948755498097e-06, |
| "loss": 1.0409603118896484, |
| "step": 1974 |
| }, |
| { |
| "epoch": 2.263459335624284, |
| "grad_norm": 0.3323868215084076, |
| "learning_rate": 2.407629163211264e-06, |
| "loss": 1.0804579257965088, |
| "step": 1976 |
| }, |
| { |
| "epoch": 2.265750286368843, |
| "grad_norm": 0.7172708511352539, |
| "learning_rate": 2.399383172844033e-06, |
| "loss": 1.0808374881744385, |
| "step": 1978 |
| }, |
| { |
| "epoch": 2.268041237113402, |
| "grad_norm": 0.48791539669036865, |
| "learning_rate": 2.3911569570379226e-06, |
| "loss": 1.0492467880249023, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.270332187857961, |
| "grad_norm": 0.6731001138687134, |
| "learning_rate": 2.3829505682566274e-06, |
| "loss": 0.8396996259689331, |
| "step": 1982 |
| }, |
| { |
| "epoch": 2.27262313860252, |
| "grad_norm": 0.2888670563697815, |
| "learning_rate": 2.374764058837388e-06, |
| "loss": 1.0629734992980957, |
| "step": 1984 |
| }, |
| { |
| "epoch": 2.274914089347079, |
| "grad_norm": 0.5932695269584656, |
| "learning_rate": 2.366597480990663e-06, |
| "loss": 1.1070618629455566, |
| "step": 1986 |
| }, |
| { |
| "epoch": 2.277205040091638, |
| "grad_norm": 0.4856787621974945, |
| "learning_rate": 2.3584508867998023e-06, |
| "loss": 1.13844633102417, |
| "step": 1988 |
| }, |
| { |
| "epoch": 2.279495990836197, |
| "grad_norm": 0.487051784992218, |
| "learning_rate": 2.350324328220697e-06, |
| "loss": 1.1154626607894897, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.281786941580756, |
| "grad_norm": 0.29695621132850647, |
| "learning_rate": 2.3422178570814656e-06, |
| "loss": 1.1702851057052612, |
| "step": 1992 |
| }, |
| { |
| "epoch": 2.284077892325315, |
| "grad_norm": 0.23192717134952545, |
| "learning_rate": 2.3341315250821102e-06, |
| "loss": 0.9374110698699951, |
| "step": 1994 |
| }, |
| { |
| "epoch": 2.286368843069874, |
| "grad_norm": 1.2225816249847412, |
| "learning_rate": 2.326065383794202e-06, |
| "loss": 0.7910254001617432, |
| "step": 1996 |
| }, |
| { |
| "epoch": 2.288659793814433, |
| "grad_norm": 0.7794654369354248, |
| "learning_rate": 2.3180194846605367e-06, |
| "loss": 1.1501424312591553, |
| "step": 1998 |
| }, |
| { |
| "epoch": 2.290950744558992, |
| "grad_norm": 0.6967248916625977, |
| "learning_rate": 2.3099938789948147e-06, |
| "loss": 0.9952400326728821, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.293241695303551, |
| "grad_norm": 1.1127731800079346, |
| "learning_rate": 2.301988617981314e-06, |
| "loss": 0.6200200915336609, |
| "step": 2002 |
| }, |
| { |
| "epoch": 2.29553264604811, |
| "grad_norm": 1.9862406253814697, |
| "learning_rate": 2.294003752674557e-06, |
| "loss": 0.8297408223152161, |
| "step": 2004 |
| }, |
| { |
| "epoch": 2.297823596792669, |
| "grad_norm": 1.128572702407837, |
| "learning_rate": 2.2860393339990005e-06, |
| "loss": 0.48388516902923584, |
| "step": 2006 |
| }, |
| { |
| "epoch": 2.3001145475372278, |
| "grad_norm": 0.25328806042671204, |
| "learning_rate": 2.2780954127486883e-06, |
| "loss": 1.1547516584396362, |
| "step": 2008 |
| }, |
| { |
| "epoch": 2.3024054982817868, |
| "grad_norm": 14.397195816040039, |
| "learning_rate": 2.270172039586948e-06, |
| "loss": 0.8220672607421875, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.3046964490263457, |
| "grad_norm": 0.5289891958236694, |
| "learning_rate": 2.2622692650460516e-06, |
| "loss": 1.0384976863861084, |
| "step": 2012 |
| }, |
| { |
| "epoch": 2.3069873997709047, |
| "grad_norm": 0.2456163614988327, |
| "learning_rate": 2.25438713952691e-06, |
| "loss": 0.9674023389816284, |
| "step": 2014 |
| }, |
| { |
| "epoch": 2.3092783505154637, |
| "grad_norm": 0.6010926961898804, |
| "learning_rate": 2.2465257132987388e-06, |
| "loss": 0.9032200574874878, |
| "step": 2016 |
| }, |
| { |
| "epoch": 2.3115693012600227, |
| "grad_norm": 0.42077237367630005, |
| "learning_rate": 2.2386850364987357e-06, |
| "loss": 1.0571644306182861, |
| "step": 2018 |
| }, |
| { |
| "epoch": 2.3138602520045817, |
| "grad_norm": 0.4678718149662018, |
| "learning_rate": 2.230865159131774e-06, |
| "loss": 0.12346747517585754, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.3161512027491407, |
| "grad_norm": 0.7129454016685486, |
| "learning_rate": 2.2230661310700717e-06, |
| "loss": 1.0253472328186035, |
| "step": 2022 |
| }, |
| { |
| "epoch": 2.3184421534936996, |
| "grad_norm": 1.1142655611038208, |
| "learning_rate": 2.215288002052882e-06, |
| "loss": 0.5160566568374634, |
| "step": 2024 |
| }, |
| { |
| "epoch": 2.320733104238259, |
| "grad_norm": 0.5436863899230957, |
| "learning_rate": 2.207530821686166e-06, |
| "loss": 1.054995059967041, |
| "step": 2026 |
| }, |
| { |
| "epoch": 2.323024054982818, |
| "grad_norm": 9.801424980163574, |
| "learning_rate": 2.199794639442287e-06, |
| "loss": 0.83745938539505, |
| "step": 2028 |
| }, |
| { |
| "epoch": 2.325315005727377, |
| "grad_norm": 0.7549030184745789, |
| "learning_rate": 2.1920795046596887e-06, |
| "loss": 1.2911202907562256, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.327605956471936, |
| "grad_norm": 0.5055285096168518, |
| "learning_rate": 2.1843854665425816e-06, |
| "loss": 0.5712074041366577, |
| "step": 2032 |
| }, |
| { |
| "epoch": 2.329896907216495, |
| "grad_norm": 0.44524145126342773, |
| "learning_rate": 2.176712574160632e-06, |
| "loss": 0.9774595499038696, |
| "step": 2034 |
| }, |
| { |
| "epoch": 2.332187857961054, |
| "grad_norm": 0.8350621461868286, |
| "learning_rate": 2.1690608764486417e-06, |
| "loss": 1.0842552185058594, |
| "step": 2036 |
| }, |
| { |
| "epoch": 2.334478808705613, |
| "grad_norm": 0.4929974377155304, |
| "learning_rate": 2.1614304222062447e-06, |
| "loss": 0.7975589036941528, |
| "step": 2038 |
| }, |
| { |
| "epoch": 2.336769759450172, |
| "grad_norm": 0.21416163444519043, |
| "learning_rate": 2.1538212600975927e-06, |
| "loss": 1.016804575920105, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.339060710194731, |
| "grad_norm": 2.911141872406006, |
| "learning_rate": 2.146233438651042e-06, |
| "loss": 0.9362708330154419, |
| "step": 2042 |
| }, |
| { |
| "epoch": 2.34135166093929, |
| "grad_norm": 0.2715796232223511, |
| "learning_rate": 2.138667006258849e-06, |
| "loss": 0.7574849128723145, |
| "step": 2044 |
| }, |
| { |
| "epoch": 2.343642611683849, |
| "grad_norm": 1.3292779922485352, |
| "learning_rate": 2.131122011176854e-06, |
| "loss": 1.0263173580169678, |
| "step": 2046 |
| }, |
| { |
| "epoch": 2.345933562428408, |
| "grad_norm": 2.113558530807495, |
| "learning_rate": 2.1235985015241832e-06, |
| "loss": 0.8715229034423828, |
| "step": 2048 |
| }, |
| { |
| "epoch": 2.348224513172967, |
| "grad_norm": 0.36145395040512085, |
| "learning_rate": 2.1160965252829344e-06, |
| "loss": 1.086829423904419, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.350515463917526, |
| "grad_norm": 2.3738577365875244, |
| "learning_rate": 2.108616130297876e-06, |
| "loss": 0.6945520639419556, |
| "step": 2052 |
| }, |
| { |
| "epoch": 2.352806414662085, |
| "grad_norm": 0.572725772857666, |
| "learning_rate": 2.101157364276134e-06, |
| "loss": 1.1936531066894531, |
| "step": 2054 |
| }, |
| { |
| "epoch": 2.355097365406644, |
| "grad_norm": 0.31756481528282166, |
| "learning_rate": 2.0937202747868974e-06, |
| "loss": 1.0508008003234863, |
| "step": 2056 |
| }, |
| { |
| "epoch": 2.357388316151203, |
| "grad_norm": 0.651594340801239, |
| "learning_rate": 2.08630490926111e-06, |
| "loss": 0.9217154383659363, |
| "step": 2058 |
| }, |
| { |
| "epoch": 2.359679266895762, |
| "grad_norm": 0.4735032618045807, |
| "learning_rate": 2.0789113149911678e-06, |
| "loss": 1.1117918491363525, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.361970217640321, |
| "grad_norm": 0.9481447339057922, |
| "learning_rate": 2.071539539130619e-06, |
| "loss": 0.8254319429397583, |
| "step": 2062 |
| }, |
| { |
| "epoch": 2.3642611683848798, |
| "grad_norm": 0.1962810903787613, |
| "learning_rate": 2.0641896286938586e-06, |
| "loss": 1.1184422969818115, |
| "step": 2064 |
| }, |
| { |
| "epoch": 2.3665521191294387, |
| "grad_norm": 1.010116696357727, |
| "learning_rate": 2.0568616305558362e-06, |
| "loss": 0.9059109687805176, |
| "step": 2066 |
| }, |
| { |
| "epoch": 2.3688430698739977, |
| "grad_norm": 0.5738312602043152, |
| "learning_rate": 2.0495555914517517e-06, |
| "loss": 1.0886991024017334, |
| "step": 2068 |
| }, |
| { |
| "epoch": 2.3711340206185567, |
| "grad_norm": 0.34965646266937256, |
| "learning_rate": 2.042271557976758e-06, |
| "loss": 1.0940256118774414, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.3734249713631157, |
| "grad_norm": 6.681120872497559, |
| "learning_rate": 2.0350095765856674e-06, |
| "loss": 0.7195948362350464, |
| "step": 2072 |
| }, |
| { |
| "epoch": 2.3757159221076747, |
| "grad_norm": 0.38432735204696655, |
| "learning_rate": 2.0277696935926465e-06, |
| "loss": 1.0875439643859863, |
| "step": 2074 |
| }, |
| { |
| "epoch": 2.3780068728522337, |
| "grad_norm": 0.3983290493488312, |
| "learning_rate": 2.020551955170932e-06, |
| "loss": 0.9240410327911377, |
| "step": 2076 |
| }, |
| { |
| "epoch": 2.3802978235967927, |
| "grad_norm": 0.3969801664352417, |
| "learning_rate": 2.0133564073525283e-06, |
| "loss": 0.7710465788841248, |
| "step": 2078 |
| }, |
| { |
| "epoch": 2.3825887743413516, |
| "grad_norm": 0.5255761742591858, |
| "learning_rate": 2.0061830960279182e-06, |
| "loss": 1.0027873516082764, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.3848797250859106, |
| "grad_norm": 0.7259711623191833, |
| "learning_rate": 1.9990320669457664e-06, |
| "loss": 1.1011751890182495, |
| "step": 2082 |
| }, |
| { |
| "epoch": 2.3871706758304696, |
| "grad_norm": 0.20675049722194672, |
| "learning_rate": 1.9919033657126297e-06, |
| "loss": 1.081270694732666, |
| "step": 2084 |
| }, |
| { |
| "epoch": 2.3894616265750286, |
| "grad_norm": 0.7646600008010864, |
| "learning_rate": 1.9847970377926723e-06, |
| "loss": 1.105683445930481, |
| "step": 2086 |
| }, |
| { |
| "epoch": 2.3917525773195876, |
| "grad_norm": 0.39679569005966187, |
| "learning_rate": 1.9777131285073608e-06, |
| "loss": 1.0713748931884766, |
| "step": 2088 |
| }, |
| { |
| "epoch": 2.3940435280641466, |
| "grad_norm": 0.24506868422031403, |
| "learning_rate": 1.9706516830351915e-06, |
| "loss": 1.2177720069885254, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.3963344788087055, |
| "grad_norm": 0.7486739158630371, |
| "learning_rate": 1.963612746411389e-06, |
| "loss": 1.154736042022705, |
| "step": 2092 |
| }, |
| { |
| "epoch": 2.3986254295532645, |
| "grad_norm": 0.31168675422668457, |
| "learning_rate": 1.9565963635276326e-06, |
| "loss": 1.0588105916976929, |
| "step": 2094 |
| }, |
| { |
| "epoch": 2.4009163802978235, |
| "grad_norm": 0.920814573764801, |
| "learning_rate": 1.949602579131754e-06, |
| "loss": 1.024669885635376, |
| "step": 2096 |
| }, |
| { |
| "epoch": 2.4032073310423825, |
| "grad_norm": 0.468282014131546, |
| "learning_rate": 1.9426314378274663e-06, |
| "loss": 1.0923664569854736, |
| "step": 2098 |
| }, |
| { |
| "epoch": 2.4054982817869415, |
| "grad_norm": 0.7058407068252563, |
| "learning_rate": 1.9356829840740705e-06, |
| "loss": 0.5762814283370972, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.4077892325315005, |
| "grad_norm": 0.5558688044548035, |
| "learning_rate": 1.928757262186177e-06, |
| "loss": 0.5579245090484619, |
| "step": 2102 |
| }, |
| { |
| "epoch": 2.4100801832760594, |
| "grad_norm": 0.5086036920547485, |
| "learning_rate": 1.9218543163334198e-06, |
| "loss": 1.1277086734771729, |
| "step": 2104 |
| }, |
| { |
| "epoch": 2.4123711340206184, |
| "grad_norm": 0.41747531294822693, |
| "learning_rate": 1.914974190540174e-06, |
| "loss": 1.0628433227539062, |
| "step": 2106 |
| }, |
| { |
| "epoch": 2.4146620847651774, |
| "grad_norm": 0.41488274931907654, |
| "learning_rate": 1.9081169286852827e-06, |
| "loss": 0.9923676252365112, |
| "step": 2108 |
| }, |
| { |
| "epoch": 2.4169530355097364, |
| "grad_norm": 0.709517776966095, |
| "learning_rate": 1.9012825745017633e-06, |
| "loss": 0.5995164513587952, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.4192439862542954, |
| "grad_norm": 2.2270102500915527, |
| "learning_rate": 1.8944711715765446e-06, |
| "loss": 1.2495015859603882, |
| "step": 2112 |
| }, |
| { |
| "epoch": 2.4215349369988544, |
| "grad_norm": 0.5543949604034424, |
| "learning_rate": 1.8876827633501784e-06, |
| "loss": 1.1288328170776367, |
| "step": 2114 |
| }, |
| { |
| "epoch": 2.4238258877434133, |
| "grad_norm": 0.46857401728630066, |
| "learning_rate": 1.8809173931165614e-06, |
| "loss": 0.9634852409362793, |
| "step": 2116 |
| }, |
| { |
| "epoch": 2.4261168384879723, |
| "grad_norm": 0.5771682262420654, |
| "learning_rate": 1.8741751040226663e-06, |
| "loss": 1.1221890449523926, |
| "step": 2118 |
| }, |
| { |
| "epoch": 2.4284077892325313, |
| "grad_norm": 1.220275640487671, |
| "learning_rate": 1.8674559390682629e-06, |
| "loss": 1.10588538646698, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.4306987399770907, |
| "grad_norm": 0.17475640773773193, |
| "learning_rate": 1.8607599411056444e-06, |
| "loss": 0.8858648538589478, |
| "step": 2122 |
| }, |
| { |
| "epoch": 2.4329896907216497, |
| "grad_norm": 0.6616489887237549, |
| "learning_rate": 1.8540871528393497e-06, |
| "loss": 0.996634304523468, |
| "step": 2124 |
| }, |
| { |
| "epoch": 2.4352806414662087, |
| "grad_norm": 1.1846482753753662, |
| "learning_rate": 1.8474376168259003e-06, |
| "loss": 1.125913143157959, |
| "step": 2126 |
| }, |
| { |
| "epoch": 2.4375715922107677, |
| "grad_norm": 0.5403264760971069, |
| "learning_rate": 1.84081137547352e-06, |
| "loss": 1.128383755683899, |
| "step": 2128 |
| }, |
| { |
| "epoch": 2.4398625429553267, |
| "grad_norm": 0.36700180172920227, |
| "learning_rate": 1.83420847104187e-06, |
| "loss": 0.5875706672668457, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.4421534936998857, |
| "grad_norm": 0.22898639738559723, |
| "learning_rate": 1.8276289456417784e-06, |
| "loss": 1.0603092908859253, |
| "step": 2132 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 0.23132291436195374, |
| "learning_rate": 1.821072841234966e-06, |
| "loss": 1.020564079284668, |
| "step": 2134 |
| }, |
| { |
| "epoch": 2.4467353951890036, |
| "grad_norm": 0.31519776582717896, |
| "learning_rate": 1.8145401996337877e-06, |
| "loss": 1.0668442249298096, |
| "step": 2136 |
| }, |
| { |
| "epoch": 2.4490263459335626, |
| "grad_norm": 0.2912147641181946, |
| "learning_rate": 1.80803106250096e-06, |
| "loss": 0.9594171047210693, |
| "step": 2138 |
| }, |
| { |
| "epoch": 2.4513172966781216, |
| "grad_norm": 1.1906369924545288, |
| "learning_rate": 1.8015454713492985e-06, |
| "loss": 1.1961896419525146, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.4536082474226806, |
| "grad_norm": 0.44794419407844543, |
| "learning_rate": 1.7950834675414498e-06, |
| "loss": 1.0605556964874268, |
| "step": 2142 |
| }, |
| { |
| "epoch": 2.4558991981672396, |
| "grad_norm": 0.4688771665096283, |
| "learning_rate": 1.788645092289627e-06, |
| "loss": 1.1090596914291382, |
| "step": 2144 |
| }, |
| { |
| "epoch": 2.4581901489117985, |
| "grad_norm": 0.5896684527397156, |
| "learning_rate": 1.7822303866553536e-06, |
| "loss": 1.0037330389022827, |
| "step": 2146 |
| }, |
| { |
| "epoch": 2.4604810996563575, |
| "grad_norm": 1.4058597087860107, |
| "learning_rate": 1.7758393915491957e-06, |
| "loss": 0.5776699185371399, |
| "step": 2148 |
| }, |
| { |
| "epoch": 2.4627720504009165, |
| "grad_norm": 0.20505838096141815, |
| "learning_rate": 1.7694721477305026e-06, |
| "loss": 1.0694668292999268, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.4650630011454755, |
| "grad_norm": 0.49451765418052673, |
| "learning_rate": 1.7631286958071444e-06, |
| "loss": 1.1405375003814697, |
| "step": 2152 |
| }, |
| { |
| "epoch": 2.4673539518900345, |
| "grad_norm": 0.407114177942276, |
| "learning_rate": 1.7568090762352591e-06, |
| "loss": 0.5926585793495178, |
| "step": 2154 |
| }, |
| { |
| "epoch": 2.4696449026345935, |
| "grad_norm": 0.3963899612426758, |
| "learning_rate": 1.7505133293189898e-06, |
| "loss": 1.0959696769714355, |
| "step": 2156 |
| }, |
| { |
| "epoch": 2.4719358533791524, |
| "grad_norm": 1.4024752378463745, |
| "learning_rate": 1.7442414952102279e-06, |
| "loss": 0.6533874273300171, |
| "step": 2158 |
| }, |
| { |
| "epoch": 2.4742268041237114, |
| "grad_norm": 0.31477341055870056, |
| "learning_rate": 1.7379936139083604e-06, |
| "loss": 1.1235566139221191, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.4765177548682704, |
| "grad_norm": 0.45664510130882263, |
| "learning_rate": 1.731769725260008e-06, |
| "loss": 0.9252137541770935, |
| "step": 2162 |
| }, |
| { |
| "epoch": 2.4788087056128294, |
| "grad_norm": 0.1995607316493988, |
| "learning_rate": 1.7255698689587774e-06, |
| "loss": 0.999367356300354, |
| "step": 2164 |
| }, |
| { |
| "epoch": 2.4810996563573884, |
| "grad_norm": 0.6067396402359009, |
| "learning_rate": 1.719394084545011e-06, |
| "loss": 1.191161870956421, |
| "step": 2166 |
| }, |
| { |
| "epoch": 2.4833906071019474, |
| "grad_norm": 0.2916881740093231, |
| "learning_rate": 1.7132424114055212e-06, |
| "loss": 1.0785753726959229, |
| "step": 2168 |
| }, |
| { |
| "epoch": 2.4856815578465064, |
| "grad_norm": 0.27279433608055115, |
| "learning_rate": 1.7071148887733552e-06, |
| "loss": 1.031059980392456, |
| "step": 2170 |
| }, |
| { |
| "epoch": 2.4879725085910653, |
| "grad_norm": 0.592989444732666, |
| "learning_rate": 1.7010115557275297e-06, |
| "loss": 0.9627765417098999, |
| "step": 2172 |
| }, |
| { |
| "epoch": 2.4902634593356243, |
| "grad_norm": 8.736328125, |
| "learning_rate": 1.6949324511927993e-06, |
| "loss": 0.6805863380432129, |
| "step": 2174 |
| }, |
| { |
| "epoch": 2.4925544100801833, |
| "grad_norm": 0.3850440979003906, |
| "learning_rate": 1.6888776139393892e-06, |
| "loss": 1.0184849500656128, |
| "step": 2176 |
| }, |
| { |
| "epoch": 2.4948453608247423, |
| "grad_norm": 0.26290857791900635, |
| "learning_rate": 1.6828470825827626e-06, |
| "loss": 1.1698343753814697, |
| "step": 2178 |
| }, |
| { |
| "epoch": 2.4971363115693013, |
| "grad_norm": 1.0720175504684448, |
| "learning_rate": 1.6768408955833653e-06, |
| "loss": 1.0607209205627441, |
| "step": 2180 |
| }, |
| { |
| "epoch": 2.4994272623138603, |
| "grad_norm": 0.7246781587600708, |
| "learning_rate": 1.6708590912463878e-06, |
| "loss": 0.7118844389915466, |
| "step": 2182 |
| }, |
| { |
| "epoch": 2.5017182130584192, |
| "grad_norm": 0.519919216632843, |
| "learning_rate": 1.6649017077215146e-06, |
| "loss": 1.1157710552215576, |
| "step": 2184 |
| }, |
| { |
| "epoch": 2.504009163802978, |
| "grad_norm": 0.5193498134613037, |
| "learning_rate": 1.6589687830026835e-06, |
| "loss": 1.1989827156066895, |
| "step": 2186 |
| }, |
| { |
| "epoch": 2.506300114547537, |
| "grad_norm": 0.5242924690246582, |
| "learning_rate": 1.653060354927844e-06, |
| "loss": 1.0901808738708496, |
| "step": 2188 |
| }, |
| { |
| "epoch": 2.508591065292096, |
| "grad_norm": 1.705298900604248, |
| "learning_rate": 1.647176461178714e-06, |
| "loss": 0.5451116561889648, |
| "step": 2190 |
| }, |
| { |
| "epoch": 2.510882016036655, |
| "grad_norm": 0.7543349862098694, |
| "learning_rate": 1.6413171392805444e-06, |
| "loss": 0.8635871410369873, |
| "step": 2192 |
| }, |
| { |
| "epoch": 2.513172966781214, |
| "grad_norm": 0.4033338725566864, |
| "learning_rate": 1.6354824266018726e-06, |
| "loss": 1.0020837783813477, |
| "step": 2194 |
| }, |
| { |
| "epoch": 2.515463917525773, |
| "grad_norm": 0.5562148094177246, |
| "learning_rate": 1.6296723603542874e-06, |
| "loss": 0.9753223657608032, |
| "step": 2196 |
| }, |
| { |
| "epoch": 2.517754868270332, |
| "grad_norm": 0.8762247562408447, |
| "learning_rate": 1.6238869775921947e-06, |
| "loss": 0.9873074293136597, |
| "step": 2198 |
| }, |
| { |
| "epoch": 2.520045819014891, |
| "grad_norm": 0.38216540217399597, |
| "learning_rate": 1.6181263152125761e-06, |
| "loss": 1.1414031982421875, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.52233676975945, |
| "grad_norm": 0.3067186176776886, |
| "learning_rate": 1.6123904099547593e-06, |
| "loss": 0.742071270942688, |
| "step": 2202 |
| }, |
| { |
| "epoch": 2.524627720504009, |
| "grad_norm": 0.38689979910850525, |
| "learning_rate": 1.606679298400175e-06, |
| "loss": 1.0735660791397095, |
| "step": 2204 |
| }, |
| { |
| "epoch": 2.526918671248568, |
| "grad_norm": 0.8441147804260254, |
| "learning_rate": 1.6009930169721332e-06, |
| "loss": 0.389965295791626, |
| "step": 2206 |
| }, |
| { |
| "epoch": 2.529209621993127, |
| "grad_norm": 0.39243248105049133, |
| "learning_rate": 1.595331601935586e-06, |
| "loss": 0.9524307250976562, |
| "step": 2208 |
| }, |
| { |
| "epoch": 2.531500572737686, |
| "grad_norm": 0.27771660685539246, |
| "learning_rate": 1.5896950893968968e-06, |
| "loss": 1.1004953384399414, |
| "step": 2210 |
| }, |
| { |
| "epoch": 2.533791523482245, |
| "grad_norm": 0.509863555431366, |
| "learning_rate": 1.584083515303611e-06, |
| "loss": 0.6490231156349182, |
| "step": 2212 |
| }, |
| { |
| "epoch": 2.536082474226804, |
| "grad_norm": 0.1981062889099121, |
| "learning_rate": 1.5784969154442228e-06, |
| "loss": 1.1196446418762207, |
| "step": 2214 |
| }, |
| { |
| "epoch": 2.538373424971363, |
| "grad_norm": 0.3844130337238312, |
| "learning_rate": 1.5729353254479544e-06, |
| "loss": 0.9391459226608276, |
| "step": 2216 |
| }, |
| { |
| "epoch": 2.540664375715922, |
| "grad_norm": 1.6460623741149902, |
| "learning_rate": 1.5673987807845224e-06, |
| "loss": 0.4223039746284485, |
| "step": 2218 |
| }, |
| { |
| "epoch": 2.542955326460481, |
| "grad_norm": 0.9630303382873535, |
| "learning_rate": 1.561887316763914e-06, |
| "loss": 0.7680240273475647, |
| "step": 2220 |
| }, |
| { |
| "epoch": 2.54524627720504, |
| "grad_norm": 0.8750276565551758, |
| "learning_rate": 1.5564009685361631e-06, |
| "loss": 1.1402933597564697, |
| "step": 2222 |
| }, |
| { |
| "epoch": 2.547537227949599, |
| "grad_norm": 0.29777172207832336, |
| "learning_rate": 1.5509397710911206e-06, |
| "loss": 1.0347285270690918, |
| "step": 2224 |
| }, |
| { |
| "epoch": 2.549828178694158, |
| "grad_norm": 0.5505490899085999, |
| "learning_rate": 1.545503759258239e-06, |
| "loss": 0.9146395921707153, |
| "step": 2226 |
| }, |
| { |
| "epoch": 2.552119129438717, |
| "grad_norm": 0.372688889503479, |
| "learning_rate": 1.5400929677063453e-06, |
| "loss": 0.9713397026062012, |
| "step": 2228 |
| }, |
| { |
| "epoch": 2.554410080183276, |
| "grad_norm": 0.9383296966552734, |
| "learning_rate": 1.534707430943422e-06, |
| "loss": 1.0299038887023926, |
| "step": 2230 |
| }, |
| { |
| "epoch": 2.556701030927835, |
| "grad_norm": 0.24810487031936646, |
| "learning_rate": 1.5293471833163836e-06, |
| "loss": 0.8337885141372681, |
| "step": 2232 |
| }, |
| { |
| "epoch": 2.558991981672394, |
| "grad_norm": 0.6177897453308105, |
| "learning_rate": 1.5240122590108615e-06, |
| "loss": 1.1169462203979492, |
| "step": 2234 |
| }, |
| { |
| "epoch": 2.561282932416953, |
| "grad_norm": 0.3793282210826874, |
| "learning_rate": 1.5187026920509857e-06, |
| "loss": 0.8278145790100098, |
| "step": 2236 |
| }, |
| { |
| "epoch": 2.563573883161512, |
| "grad_norm": 0.764354944229126, |
| "learning_rate": 1.5134185162991653e-06, |
| "loss": 1.032823920249939, |
| "step": 2238 |
| }, |
| { |
| "epoch": 2.565864833906071, |
| "grad_norm": 0.2363336831331253, |
| "learning_rate": 1.5081597654558753e-06, |
| "loss": 1.0410435199737549, |
| "step": 2240 |
| }, |
| { |
| "epoch": 2.5681557846506298, |
| "grad_norm": 0.6717326045036316, |
| "learning_rate": 1.5029264730594366e-06, |
| "loss": 1.0079201459884644, |
| "step": 2242 |
| }, |
| { |
| "epoch": 2.5704467353951888, |
| "grad_norm": 0.8275331258773804, |
| "learning_rate": 1.49771867248581e-06, |
| "loss": 1.1241486072540283, |
| "step": 2244 |
| }, |
| { |
| "epoch": 2.5727376861397477, |
| "grad_norm": 0.5479764938354492, |
| "learning_rate": 1.4925363969483764e-06, |
| "loss": 1.2064895629882812, |
| "step": 2246 |
| }, |
| { |
| "epoch": 2.5750286368843067, |
| "grad_norm": 0.29610878229141235, |
| "learning_rate": 1.48737967949773e-06, |
| "loss": 1.080405831336975, |
| "step": 2248 |
| }, |
| { |
| "epoch": 2.5773195876288657, |
| "grad_norm": 0.35893121361732483, |
| "learning_rate": 1.4822485530214636e-06, |
| "loss": 0.9980157017707825, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.579610538373425, |
| "grad_norm": 0.545345664024353, |
| "learning_rate": 1.4771430502439585e-06, |
| "loss": 1.0427041053771973, |
| "step": 2252 |
| }, |
| { |
| "epoch": 2.581901489117984, |
| "grad_norm": 0.6085090637207031, |
| "learning_rate": 1.4720632037261837e-06, |
| "loss": 1.0622684955596924, |
| "step": 2254 |
| }, |
| { |
| "epoch": 2.584192439862543, |
| "grad_norm": 0.4852236211299896, |
| "learning_rate": 1.4670090458654752e-06, |
| "loss": 1.002990484237671, |
| "step": 2256 |
| }, |
| { |
| "epoch": 2.586483390607102, |
| "grad_norm": 0.37894105911254883, |
| "learning_rate": 1.4619806088953423e-06, |
| "loss": 1.0157802104949951, |
| "step": 2258 |
| }, |
| { |
| "epoch": 2.588774341351661, |
| "grad_norm": 0.3049786686897278, |
| "learning_rate": 1.4569779248852514e-06, |
| "loss": 1.032530426979065, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.59106529209622, |
| "grad_norm": 0.3798280954360962, |
| "learning_rate": 1.4520010257404282e-06, |
| "loss": 1.0160043239593506, |
| "step": 2262 |
| }, |
| { |
| "epoch": 2.593356242840779, |
| "grad_norm": 0.41504234075546265, |
| "learning_rate": 1.4470499432016564e-06, |
| "loss": 1.0070414543151855, |
| "step": 2264 |
| }, |
| { |
| "epoch": 2.595647193585338, |
| "grad_norm": 0.5342398285865784, |
| "learning_rate": 1.4421247088450638e-06, |
| "loss": 0.3559042811393738, |
| "step": 2266 |
| }, |
| { |
| "epoch": 2.597938144329897, |
| "grad_norm": 1.0337114334106445, |
| "learning_rate": 1.4372253540819342e-06, |
| "loss": 1.1760070323944092, |
| "step": 2268 |
| }, |
| { |
| "epoch": 2.600229095074456, |
| "grad_norm": 0.6372706890106201, |
| "learning_rate": 1.4323519101584956e-06, |
| "loss": 1.1206852197647095, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.602520045819015, |
| "grad_norm": 0.23677659034729004, |
| "learning_rate": 1.4275044081557336e-06, |
| "loss": 0.9349355697631836, |
| "step": 2272 |
| }, |
| { |
| "epoch": 2.604810996563574, |
| "grad_norm": 0.23186595737934113, |
| "learning_rate": 1.422682878989179e-06, |
| "loss": 1.0508509874343872, |
| "step": 2274 |
| }, |
| { |
| "epoch": 2.607101947308133, |
| "grad_norm": 0.5015450119972229, |
| "learning_rate": 1.4178873534087224e-06, |
| "loss": 1.1740843057632446, |
| "step": 2276 |
| }, |
| { |
| "epoch": 2.609392898052692, |
| "grad_norm": 0.5724326372146606, |
| "learning_rate": 1.413117861998411e-06, |
| "loss": 0.5848703384399414, |
| "step": 2278 |
| }, |
| { |
| "epoch": 2.611683848797251, |
| "grad_norm": 0.26921916007995605, |
| "learning_rate": 1.4083744351762568e-06, |
| "loss": 1.0210750102996826, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.61397479954181, |
| "grad_norm": 0.24203850328922272, |
| "learning_rate": 1.4036571031940433e-06, |
| "loss": 0.932150661945343, |
| "step": 2282 |
| }, |
| { |
| "epoch": 2.616265750286369, |
| "grad_norm": 1.6473098993301392, |
| "learning_rate": 1.398965896137126e-06, |
| "loss": 0.7068231105804443, |
| "step": 2284 |
| }, |
| { |
| "epoch": 2.618556701030928, |
| "grad_norm": 0.41283610463142395, |
| "learning_rate": 1.394300843924251e-06, |
| "loss": 1.108357310295105, |
| "step": 2286 |
| }, |
| { |
| "epoch": 2.620847651775487, |
| "grad_norm": 0.3008173108100891, |
| "learning_rate": 1.3896619763073558e-06, |
| "loss": 0.8680709600448608, |
| "step": 2288 |
| }, |
| { |
| "epoch": 2.623138602520046, |
| "grad_norm": 0.3753491938114166, |
| "learning_rate": 1.3850493228713829e-06, |
| "loss": 1.1014978885650635, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.625429553264605, |
| "grad_norm": 0.4968052804470062, |
| "learning_rate": 1.3804629130340922e-06, |
| "loss": 0.7989722490310669, |
| "step": 2292 |
| }, |
| { |
| "epoch": 2.627720504009164, |
| "grad_norm": 1.7983485460281372, |
| "learning_rate": 1.375902776045869e-06, |
| "loss": 1.049964427947998, |
| "step": 2294 |
| }, |
| { |
| "epoch": 2.6300114547537228, |
| "grad_norm": 1.96951425075531, |
| "learning_rate": 1.3713689409895423e-06, |
| "loss": 0.9360839128494263, |
| "step": 2296 |
| }, |
| { |
| "epoch": 2.6323024054982818, |
| "grad_norm": 0.43087831139564514, |
| "learning_rate": 1.3668614367801968e-06, |
| "loss": 1.0364996194839478, |
| "step": 2298 |
| }, |
| { |
| "epoch": 2.6345933562428407, |
| "grad_norm": 1.26382577419281, |
| "learning_rate": 1.3623802921649903e-06, |
| "loss": 1.0188556909561157, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.6368843069873997, |
| "grad_norm": 0.20739609003067017, |
| "learning_rate": 1.3579255357229665e-06, |
| "loss": 1.0033255815505981, |
| "step": 2302 |
| }, |
| { |
| "epoch": 2.6391752577319587, |
| "grad_norm": 0.3471895754337311, |
| "learning_rate": 1.3534971958648763e-06, |
| "loss": 1.0234272480010986, |
| "step": 2304 |
| }, |
| { |
| "epoch": 2.6414662084765177, |
| "grad_norm": 0.7787106037139893, |
| "learning_rate": 1.3490953008329976e-06, |
| "loss": 1.0777220726013184, |
| "step": 2306 |
| }, |
| { |
| "epoch": 2.6437571592210767, |
| "grad_norm": 1.1166505813598633, |
| "learning_rate": 1.34471987870095e-06, |
| "loss": 1.075850009918213, |
| "step": 2308 |
| }, |
| { |
| "epoch": 2.6460481099656357, |
| "grad_norm": 0.6703611016273499, |
| "learning_rate": 1.3403709573735226e-06, |
| "loss": 1.053313136100769, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.6483390607101946, |
| "grad_norm": 0.7650402188301086, |
| "learning_rate": 1.336048564586489e-06, |
| "loss": 1.1778388023376465, |
| "step": 2312 |
| }, |
| { |
| "epoch": 2.6506300114547536, |
| "grad_norm": 1.5347474813461304, |
| "learning_rate": 1.3317527279064346e-06, |
| "loss": 0.9594372510910034, |
| "step": 2314 |
| }, |
| { |
| "epoch": 2.6529209621993126, |
| "grad_norm": 0.3883764147758484, |
| "learning_rate": 1.3274834747305816e-06, |
| "loss": 1.1035804748535156, |
| "step": 2316 |
| }, |
| { |
| "epoch": 2.6552119129438716, |
| "grad_norm": 0.30928364396095276, |
| "learning_rate": 1.3232408322866112e-06, |
| "loss": 1.2312179803848267, |
| "step": 2318 |
| }, |
| { |
| "epoch": 2.6575028636884306, |
| "grad_norm": 0.4253048598766327, |
| "learning_rate": 1.319024827632493e-06, |
| "loss": 1.101532220840454, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.6597938144329896, |
| "grad_norm": 0.26640430092811584, |
| "learning_rate": 1.3148354876563078e-06, |
| "loss": 0.8757455945014954, |
| "step": 2322 |
| }, |
| { |
| "epoch": 2.6620847651775486, |
| "grad_norm": 0.42339736223220825, |
| "learning_rate": 1.3106728390760822e-06, |
| "loss": 0.9791202545166016, |
| "step": 2324 |
| }, |
| { |
| "epoch": 2.6643757159221075, |
| "grad_norm": 0.7778316736221313, |
| "learning_rate": 1.3065369084396143e-06, |
| "loss": 1.1166675090789795, |
| "step": 2326 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.41248399019241333, |
| "learning_rate": 1.302427722124305e-06, |
| "loss": 1.0665652751922607, |
| "step": 2328 |
| }, |
| { |
| "epoch": 2.6689576174112255, |
| "grad_norm": 2.0227549076080322, |
| "learning_rate": 1.2983453063369886e-06, |
| "loss": 1.1030595302581787, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.6712485681557845, |
| "grad_norm": 0.12428312003612518, |
| "learning_rate": 1.2942896871137703e-06, |
| "loss": 0.1532021164894104, |
| "step": 2332 |
| }, |
| { |
| "epoch": 2.673539518900344, |
| "grad_norm": 1.4765028953552246, |
| "learning_rate": 1.290260890319854e-06, |
| "loss": 1.0765687227249146, |
| "step": 2334 |
| }, |
| { |
| "epoch": 2.675830469644903, |
| "grad_norm": 1.4312635660171509, |
| "learning_rate": 1.2862589416493828e-06, |
| "loss": 1.2551358938217163, |
| "step": 2336 |
| }, |
| { |
| "epoch": 2.678121420389462, |
| "grad_norm": 0.49947378039360046, |
| "learning_rate": 1.282283866625271e-06, |
| "loss": 1.1093734502792358, |
| "step": 2338 |
| }, |
| { |
| "epoch": 2.680412371134021, |
| "grad_norm": 0.8598469495773315, |
| "learning_rate": 1.278335690599042e-06, |
| "loss": 1.1796274185180664, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.68270332187858, |
| "grad_norm": 3.3485891819000244, |
| "learning_rate": 1.27441443875067e-06, |
| "loss": 1.1467323303222656, |
| "step": 2342 |
| }, |
| { |
| "epoch": 2.684994272623139, |
| "grad_norm": 1.1836572885513306, |
| "learning_rate": 1.2705201360884154e-06, |
| "loss": 0.5701313018798828, |
| "step": 2344 |
| }, |
| { |
| "epoch": 2.687285223367698, |
| "grad_norm": 0.9335172176361084, |
| "learning_rate": 1.2666528074486675e-06, |
| "loss": 1.0899162292480469, |
| "step": 2346 |
| }, |
| { |
| "epoch": 2.689576174112257, |
| "grad_norm": 0.257024884223938, |
| "learning_rate": 1.2628124774957862e-06, |
| "loss": 1.023752212524414, |
| "step": 2348 |
| }, |
| { |
| "epoch": 2.691867124856816, |
| "grad_norm": 0.5289345979690552, |
| "learning_rate": 1.2589991707219407e-06, |
| "loss": 0.8916153907775879, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.6941580756013748, |
| "grad_norm": 0.32408076524734497, |
| "learning_rate": 1.255212911446962e-06, |
| "loss": 1.040621042251587, |
| "step": 2352 |
| }, |
| { |
| "epoch": 2.6964490263459338, |
| "grad_norm": 0.39971452951431274, |
| "learning_rate": 1.2514537238181764e-06, |
| "loss": 0.8947229385375977, |
| "step": 2354 |
| }, |
| { |
| "epoch": 2.6987399770904927, |
| "grad_norm": 0.23209726810455322, |
| "learning_rate": 1.2477216318102638e-06, |
| "loss": 1.1119811534881592, |
| "step": 2356 |
| }, |
| { |
| "epoch": 2.7010309278350517, |
| "grad_norm": 1.079315185546875, |
| "learning_rate": 1.2440166592250908e-06, |
| "loss": 1.105717420578003, |
| "step": 2358 |
| }, |
| { |
| "epoch": 2.7033218785796107, |
| "grad_norm": 0.41968509554862976, |
| "learning_rate": 1.2403388296915748e-06, |
| "loss": 0.9688929915428162, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.7056128293241697, |
| "grad_norm": 0.34683892130851746, |
| "learning_rate": 1.2366881666655197e-06, |
| "loss": 1.0027223825454712, |
| "step": 2362 |
| }, |
| { |
| "epoch": 2.7079037800687287, |
| "grad_norm": 1.22856867313385, |
| "learning_rate": 1.2330646934294733e-06, |
| "loss": 0.8159598112106323, |
| "step": 2364 |
| }, |
| { |
| "epoch": 2.7101947308132877, |
| "grad_norm": 0.4460030198097229, |
| "learning_rate": 1.2294684330925773e-06, |
| "loss": 1.0731245279312134, |
| "step": 2366 |
| }, |
| { |
| "epoch": 2.7124856815578466, |
| "grad_norm": 0.23822948336601257, |
| "learning_rate": 1.225899408590418e-06, |
| "loss": 1.0538825988769531, |
| "step": 2368 |
| }, |
| { |
| "epoch": 2.7147766323024056, |
| "grad_norm": 0.5003798007965088, |
| "learning_rate": 1.222357642684886e-06, |
| "loss": 0.9127865433692932, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.7170675830469646, |
| "grad_norm": 0.18163853883743286, |
| "learning_rate": 1.2188431579640229e-06, |
| "loss": 0.5616147518157959, |
| "step": 2372 |
| }, |
| { |
| "epoch": 2.7193585337915236, |
| "grad_norm": 1.1338001489639282, |
| "learning_rate": 1.2153559768418826e-06, |
| "loss": 1.188947081565857, |
| "step": 2374 |
| }, |
| { |
| "epoch": 2.7216494845360826, |
| "grad_norm": 0.4888297915458679, |
| "learning_rate": 1.2118961215583876e-06, |
| "loss": 0.907243013381958, |
| "step": 2376 |
| }, |
| { |
| "epoch": 2.7239404352806416, |
| "grad_norm": 0.5065628886222839, |
| "learning_rate": 1.2084636141791853e-06, |
| "loss": 1.178124189376831, |
| "step": 2378 |
| }, |
| { |
| "epoch": 2.7262313860252005, |
| "grad_norm": 0.6853643655776978, |
| "learning_rate": 1.2050584765955106e-06, |
| "loss": 0.5477138757705688, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.7285223367697595, |
| "grad_norm": 1.9078017473220825, |
| "learning_rate": 1.2016807305240418e-06, |
| "loss": 1.0968685150146484, |
| "step": 2382 |
| }, |
| { |
| "epoch": 2.7308132875143185, |
| "grad_norm": 0.4655286967754364, |
| "learning_rate": 1.198330397506765e-06, |
| "loss": 1.0445671081542969, |
| "step": 2384 |
| }, |
| { |
| "epoch": 2.7331042382588775, |
| "grad_norm": 0.6571381688117981, |
| "learning_rate": 1.1950074989108369e-06, |
| "loss": 1.2365548610687256, |
| "step": 2386 |
| }, |
| { |
| "epoch": 2.7353951890034365, |
| "grad_norm": 0.1842108517885208, |
| "learning_rate": 1.1917120559284484e-06, |
| "loss": 1.0173747539520264, |
| "step": 2388 |
| }, |
| { |
| "epoch": 2.7376861397479955, |
| "grad_norm": 0.7447629570960999, |
| "learning_rate": 1.188444089576689e-06, |
| "loss": 1.1226527690887451, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.7399770904925544, |
| "grad_norm": 1.3908346891403198, |
| "learning_rate": 1.1852036206974093e-06, |
| "loss": 1.0542950630187988, |
| "step": 2392 |
| }, |
| { |
| "epoch": 2.7422680412371134, |
| "grad_norm": 0.5131960511207581, |
| "learning_rate": 1.1819906699570951e-06, |
| "loss": 0.9882605075836182, |
| "step": 2394 |
| }, |
| { |
| "epoch": 2.7445589919816724, |
| "grad_norm": 1.6375041007995605, |
| "learning_rate": 1.178805257846731e-06, |
| "loss": 0.8442957401275635, |
| "step": 2396 |
| }, |
| { |
| "epoch": 2.7468499427262314, |
| "grad_norm": 0.6786810159683228, |
| "learning_rate": 1.1756474046816705e-06, |
| "loss": 1.0617409944534302, |
| "step": 2398 |
| }, |
| { |
| "epoch": 2.7491408934707904, |
| "grad_norm": 0.3984721899032593, |
| "learning_rate": 1.172517130601506e-06, |
| "loss": 0.9120774269104004, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.7514318442153494, |
| "grad_norm": 0.8128576874732971, |
| "learning_rate": 1.169414455569941e-06, |
| "loss": 1.0682992935180664, |
| "step": 2402 |
| }, |
| { |
| "epoch": 2.7537227949599083, |
| "grad_norm": 0.6351785063743591, |
| "learning_rate": 1.1663393993746639e-06, |
| "loss": 0.6914603114128113, |
| "step": 2404 |
| }, |
| { |
| "epoch": 2.7560137457044673, |
| "grad_norm": 0.9976605176925659, |
| "learning_rate": 1.1632919816272185e-06, |
| "loss": 1.171576976776123, |
| "step": 2406 |
| }, |
| { |
| "epoch": 2.7583046964490263, |
| "grad_norm": 0.303443044424057, |
| "learning_rate": 1.1602722217628847e-06, |
| "loss": 0.5294689536094666, |
| "step": 2408 |
| }, |
| { |
| "epoch": 2.7605956471935853, |
| "grad_norm": 0.26282933354377747, |
| "learning_rate": 1.157280139040546e-06, |
| "loss": 1.1015565395355225, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.7628865979381443, |
| "grad_norm": 0.6320168375968933, |
| "learning_rate": 1.1543157525425753e-06, |
| "loss": 1.0779060125350952, |
| "step": 2412 |
| }, |
| { |
| "epoch": 2.7651775486827033, |
| "grad_norm": 0.8445232510566711, |
| "learning_rate": 1.1513790811747077e-06, |
| "loss": 1.0145206451416016, |
| "step": 2414 |
| }, |
| { |
| "epoch": 2.7674684994272623, |
| "grad_norm": 1.2781503200531006, |
| "learning_rate": 1.1484701436659228e-06, |
| "loss": 0.9530794620513916, |
| "step": 2416 |
| }, |
| { |
| "epoch": 2.7697594501718212, |
| "grad_norm": 0.4669745862483978, |
| "learning_rate": 1.1455889585683234e-06, |
| "loss": 1.1770665645599365, |
| "step": 2418 |
| }, |
| { |
| "epoch": 2.77205040091638, |
| "grad_norm": 0.1694551408290863, |
| "learning_rate": 1.1427355442570174e-06, |
| "loss": 0.8900678157806396, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.774341351660939, |
| "grad_norm": 0.6487224698066711, |
| "learning_rate": 1.1399099189300022e-06, |
| "loss": 1.1467430591583252, |
| "step": 2422 |
| }, |
| { |
| "epoch": 2.776632302405498, |
| "grad_norm": 0.4884292483329773, |
| "learning_rate": 1.1371121006080476e-06, |
| "loss": 0.6432940363883972, |
| "step": 2424 |
| }, |
| { |
| "epoch": 2.778923253150057, |
| "grad_norm": 0.5796436071395874, |
| "learning_rate": 1.1343421071345807e-06, |
| "loss": 0.8554515838623047, |
| "step": 2426 |
| }, |
| { |
| "epoch": 2.781214203894616, |
| "grad_norm": 0.6898307800292969, |
| "learning_rate": 1.1315999561755704e-06, |
| "loss": 1.0714622735977173, |
| "step": 2428 |
| }, |
| { |
| "epoch": 2.783505154639175, |
| "grad_norm": 0.8374386429786682, |
| "learning_rate": 1.1288856652194187e-06, |
| "loss": 1.2249865531921387, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.785796105383734, |
| "grad_norm": 0.5728365778923035, |
| "learning_rate": 1.1261992515768475e-06, |
| "loss": 1.0746301412582397, |
| "step": 2432 |
| }, |
| { |
| "epoch": 2.788087056128293, |
| "grad_norm": 0.5857115387916565, |
| "learning_rate": 1.1235407323807863e-06, |
| "loss": 0.9600643515586853, |
| "step": 2434 |
| }, |
| { |
| "epoch": 2.790378006872852, |
| "grad_norm": 0.43586304783821106, |
| "learning_rate": 1.1209101245862641e-06, |
| "loss": 1.0450245141983032, |
| "step": 2436 |
| }, |
| { |
| "epoch": 2.792668957617411, |
| "grad_norm": 0.9094945192337036, |
| "learning_rate": 1.1183074449703026e-06, |
| "loss": 1.141032099723816, |
| "step": 2438 |
| }, |
| { |
| "epoch": 2.79495990836197, |
| "grad_norm": 0.5066821575164795, |
| "learning_rate": 1.1157327101318079e-06, |
| "loss": 1.046962022781372, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.797250859106529, |
| "grad_norm": 0.302134245634079, |
| "learning_rate": 1.113185936491464e-06, |
| "loss": 0.7253270149230957, |
| "step": 2442 |
| }, |
| { |
| "epoch": 2.799541809851088, |
| "grad_norm": 0.46006661653518677, |
| "learning_rate": 1.1106671402916311e-06, |
| "loss": 1.059772253036499, |
| "step": 2444 |
| }, |
| { |
| "epoch": 2.801832760595647, |
| "grad_norm": 0.4403242766857147, |
| "learning_rate": 1.1081763375962376e-06, |
| "loss": 1.0585718154907227, |
| "step": 2446 |
| }, |
| { |
| "epoch": 2.804123711340206, |
| "grad_norm": 0.6102672219276428, |
| "learning_rate": 1.1057135442906802e-06, |
| "loss": 1.0364320278167725, |
| "step": 2448 |
| }, |
| { |
| "epoch": 2.806414662084765, |
| "grad_norm": 0.2114000916481018, |
| "learning_rate": 1.1032787760817245e-06, |
| "loss": 1.0323476791381836, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.808705612829324, |
| "grad_norm": 0.44233590364456177, |
| "learning_rate": 1.1008720484973997e-06, |
| "loss": 1.0494945049285889, |
| "step": 2452 |
| }, |
| { |
| "epoch": 2.810996563573883, |
| "grad_norm": 0.5234547853469849, |
| "learning_rate": 1.0984933768869045e-06, |
| "loss": 0.8871585130691528, |
| "step": 2454 |
| }, |
| { |
| "epoch": 2.813287514318442, |
| "grad_norm": 2.157639741897583, |
| "learning_rate": 1.0961427764205054e-06, |
| "loss": 0.9773741960525513, |
| "step": 2456 |
| }, |
| { |
| "epoch": 2.815578465063001, |
| "grad_norm": 0.7899772524833679, |
| "learning_rate": 1.0938202620894444e-06, |
| "loss": 1.0361276865005493, |
| "step": 2458 |
| }, |
| { |
| "epoch": 2.81786941580756, |
| "grad_norm": 0.3699988126754761, |
| "learning_rate": 1.0915258487058393e-06, |
| "loss": 0.9854030609130859, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.820160366552119, |
| "grad_norm": 0.5396907329559326, |
| "learning_rate": 1.0892595509025897e-06, |
| "loss": 1.0175352096557617, |
| "step": 2462 |
| }, |
| { |
| "epoch": 2.822451317296678, |
| "grad_norm": 0.5068468451499939, |
| "learning_rate": 1.0870213831332856e-06, |
| "loss": 1.0485467910766602, |
| "step": 2464 |
| }, |
| { |
| "epoch": 2.824742268041237, |
| "grad_norm": 0.43589094281196594, |
| "learning_rate": 1.0848113596721147e-06, |
| "loss": 0.9287646412849426, |
| "step": 2466 |
| }, |
| { |
| "epoch": 2.827033218785796, |
| "grad_norm": 0.4347958266735077, |
| "learning_rate": 1.0826294946137705e-06, |
| "loss": 1.1217800378799438, |
| "step": 2468 |
| }, |
| { |
| "epoch": 2.8293241695303553, |
| "grad_norm": 0.5516931414604187, |
| "learning_rate": 1.0804758018733635e-06, |
| "loss": 1.1510246992111206, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.8316151202749142, |
| "grad_norm": 1.0004255771636963, |
| "learning_rate": 1.0783502951863302e-06, |
| "loss": 0.7378605604171753, |
| "step": 2472 |
| }, |
| { |
| "epoch": 2.8339060710194732, |
| "grad_norm": 0.43937617540359497, |
| "learning_rate": 1.076252988108349e-06, |
| "loss": 1.0908252000808716, |
| "step": 2474 |
| }, |
| { |
| "epoch": 2.836197021764032, |
| "grad_norm": 0.5650505423545837, |
| "learning_rate": 1.0741838940152498e-06, |
| "loss": 1.0471761226654053, |
| "step": 2476 |
| }, |
| { |
| "epoch": 2.838487972508591, |
| "grad_norm": 0.5975066423416138, |
| "learning_rate": 1.0721430261029334e-06, |
| "loss": 1.0682255029678345, |
| "step": 2478 |
| }, |
| { |
| "epoch": 2.84077892325315, |
| "grad_norm": 0.3658738136291504, |
| "learning_rate": 1.0701303973872825e-06, |
| "loss": 1.1062527894973755, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.843069873997709, |
| "grad_norm": 0.29034459590911865, |
| "learning_rate": 1.0681460207040816e-06, |
| "loss": 0.9529769420623779, |
| "step": 2482 |
| }, |
| { |
| "epoch": 2.845360824742268, |
| "grad_norm": 1.0845448970794678, |
| "learning_rate": 1.066189908708935e-06, |
| "loss": 1.1356077194213867, |
| "step": 2484 |
| }, |
| { |
| "epoch": 2.847651775486827, |
| "grad_norm": 0.6470354199409485, |
| "learning_rate": 1.0642620738771844e-06, |
| "loss": 1.1769475936889648, |
| "step": 2486 |
| }, |
| { |
| "epoch": 2.849942726231386, |
| "grad_norm": 0.30026912689208984, |
| "learning_rate": 1.0623625285038317e-06, |
| "loss": 1.2107105255126953, |
| "step": 2488 |
| }, |
| { |
| "epoch": 2.852233676975945, |
| "grad_norm": 0.4359968900680542, |
| "learning_rate": 1.0604912847034579e-06, |
| "loss": 1.1430937051773071, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.854524627720504, |
| "grad_norm": 0.9392463564872742, |
| "learning_rate": 1.0586483544101484e-06, |
| "loss": 0.9589693546295166, |
| "step": 2492 |
| }, |
| { |
| "epoch": 2.856815578465063, |
| "grad_norm": 0.5891343951225281, |
| "learning_rate": 1.0568337493774153e-06, |
| "loss": 1.0557307004928589, |
| "step": 2494 |
| }, |
| { |
| "epoch": 2.859106529209622, |
| "grad_norm": 0.40429064631462097, |
| "learning_rate": 1.055047481178123e-06, |
| "loss": 1.2271441221237183, |
| "step": 2496 |
| }, |
| { |
| "epoch": 2.861397479954181, |
| "grad_norm": 0.5770910382270813, |
| "learning_rate": 1.0532895612044148e-06, |
| "loss": 1.2372446060180664, |
| "step": 2498 |
| }, |
| { |
| "epoch": 2.86368843069874, |
| "grad_norm": 0.7154263854026794, |
| "learning_rate": 1.0515600006676388e-06, |
| "loss": 1.0392417907714844, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.865979381443299, |
| "grad_norm": 0.37973713874816895, |
| "learning_rate": 1.0498588105982773e-06, |
| "loss": 1.0606379508972168, |
| "step": 2502 |
| }, |
| { |
| "epoch": 2.868270332187858, |
| "grad_norm": 0.41982316970825195, |
| "learning_rate": 1.0481860018458777e-06, |
| "loss": 0.9914207458496094, |
| "step": 2504 |
| }, |
| { |
| "epoch": 2.870561282932417, |
| "grad_norm": 0.6768814325332642, |
| "learning_rate": 1.0465415850789818e-06, |
| "loss": 1.011329174041748, |
| "step": 2506 |
| }, |
| { |
| "epoch": 2.872852233676976, |
| "grad_norm": 0.5299584269523621, |
| "learning_rate": 1.044925570785057e-06, |
| "loss": 1.0899381637573242, |
| "step": 2508 |
| }, |
| { |
| "epoch": 2.875143184421535, |
| "grad_norm": 0.6537067294120789, |
| "learning_rate": 1.0433379692704303e-06, |
| "loss": 0.8742640018463135, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.877434135166094, |
| "grad_norm": 0.7677899599075317, |
| "learning_rate": 1.0417787906602254e-06, |
| "loss": 1.1713974475860596, |
| "step": 2512 |
| }, |
| { |
| "epoch": 2.879725085910653, |
| "grad_norm": 1.137424349784851, |
| "learning_rate": 1.0402480448982916e-06, |
| "loss": 1.0399460792541504, |
| "step": 2514 |
| }, |
| { |
| "epoch": 2.882016036655212, |
| "grad_norm": 5.416769027709961, |
| "learning_rate": 1.0387457417471484e-06, |
| "loss": 0.7440576553344727, |
| "step": 2516 |
| }, |
| { |
| "epoch": 2.884306987399771, |
| "grad_norm": 0.3989621698856354, |
| "learning_rate": 1.0372718907879148e-06, |
| "loss": 0.6085208058357239, |
| "step": 2518 |
| }, |
| { |
| "epoch": 2.88659793814433, |
| "grad_norm": 0.39145538210868835, |
| "learning_rate": 1.0358265014202562e-06, |
| "loss": 1.0786077976226807, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.888888888888889, |
| "grad_norm": 0.6507622003555298, |
| "learning_rate": 1.0344095828623187e-06, |
| "loss": 1.1602535247802734, |
| "step": 2522 |
| }, |
| { |
| "epoch": 2.891179839633448, |
| "grad_norm": 0.5639922022819519, |
| "learning_rate": 1.0330211441506725e-06, |
| "loss": 1.2351245880126953, |
| "step": 2524 |
| }, |
| { |
| "epoch": 2.893470790378007, |
| "grad_norm": 2.3547637462615967, |
| "learning_rate": 1.0316611941402554e-06, |
| "loss": 1.2815968990325928, |
| "step": 2526 |
| }, |
| { |
| "epoch": 2.895761741122566, |
| "grad_norm": 0.5312323570251465, |
| "learning_rate": 1.0303297415043125e-06, |
| "loss": 1.054031491279602, |
| "step": 2528 |
| }, |
| { |
| "epoch": 2.8980526918671248, |
| "grad_norm": 0.10760916769504547, |
| "learning_rate": 1.0290267947343467e-06, |
| "loss": 0.5422964692115784, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.9003436426116838, |
| "grad_norm": 0.5096286535263062, |
| "learning_rate": 1.027752362140058e-06, |
| "loss": 0.909081220626831, |
| "step": 2532 |
| }, |
| { |
| "epoch": 2.9026345933562427, |
| "grad_norm": 0.9880173802375793, |
| "learning_rate": 1.0265064518492971e-06, |
| "loss": 0.6273051500320435, |
| "step": 2534 |
| }, |
| { |
| "epoch": 2.9049255441008017, |
| "grad_norm": 0.2785288393497467, |
| "learning_rate": 1.0252890718080065e-06, |
| "loss": 0.9630579948425293, |
| "step": 2536 |
| }, |
| { |
| "epoch": 2.9072164948453607, |
| "grad_norm": 0.9245594143867493, |
| "learning_rate": 1.024100229780177e-06, |
| "loss": 0.8256567716598511, |
| "step": 2538 |
| }, |
| { |
| "epoch": 2.9095074455899197, |
| "grad_norm": 0.6933486461639404, |
| "learning_rate": 1.0229399333477933e-06, |
| "loss": 1.098947286605835, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.9117983963344787, |
| "grad_norm": 0.5825735330581665, |
| "learning_rate": 1.0218081899107866e-06, |
| "loss": 1.055496335029602, |
| "step": 2542 |
| }, |
| { |
| "epoch": 2.9140893470790377, |
| "grad_norm": 0.6609728336334229, |
| "learning_rate": 1.0207050066869884e-06, |
| "loss": 1.1041927337646484, |
| "step": 2544 |
| }, |
| { |
| "epoch": 2.9163802978235966, |
| "grad_norm": 0.6976462602615356, |
| "learning_rate": 1.0196303907120835e-06, |
| "loss": 1.2125544548034668, |
| "step": 2546 |
| }, |
| { |
| "epoch": 2.9186712485681556, |
| "grad_norm": 0.5264245867729187, |
| "learning_rate": 1.0185843488395675e-06, |
| "loss": 1.159142017364502, |
| "step": 2548 |
| }, |
| { |
| "epoch": 2.9209621993127146, |
| "grad_norm": 0.5094210505485535, |
| "learning_rate": 1.017566887740698e-06, |
| "loss": 1.1290173530578613, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.923253150057274, |
| "grad_norm": 1.0814720392227173, |
| "learning_rate": 1.0165780139044579e-06, |
| "loss": 1.0745420455932617, |
| "step": 2552 |
| }, |
| { |
| "epoch": 2.925544100801833, |
| "grad_norm": 0.922420084476471, |
| "learning_rate": 1.0156177336375104e-06, |
| "loss": 1.032870888710022, |
| "step": 2554 |
| }, |
| { |
| "epoch": 2.927835051546392, |
| "grad_norm": 2.324207067489624, |
| "learning_rate": 1.0146860530641597e-06, |
| "loss": 1.0902037620544434, |
| "step": 2556 |
| }, |
| { |
| "epoch": 2.930126002290951, |
| "grad_norm": 0.6379292607307434, |
| "learning_rate": 1.013782978126312e-06, |
| "loss": 1.1656874418258667, |
| "step": 2558 |
| }, |
| { |
| "epoch": 2.93241695303551, |
| "grad_norm": 0.3427787721157074, |
| "learning_rate": 1.0129085145834374e-06, |
| "loss": 0.8560712337493896, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.934707903780069, |
| "grad_norm": 1.8992148637771606, |
| "learning_rate": 1.012062668012534e-06, |
| "loss": 0.7727330327033997, |
| "step": 2562 |
| }, |
| { |
| "epoch": 2.936998854524628, |
| "grad_norm": 0.9722429513931274, |
| "learning_rate": 1.0112454438080912e-06, |
| "loss": 0.9743561744689941, |
| "step": 2564 |
| }, |
| { |
| "epoch": 2.939289805269187, |
| "grad_norm": 0.20309951901435852, |
| "learning_rate": 1.010456847182055e-06, |
| "loss": 1.0358002185821533, |
| "step": 2566 |
| }, |
| { |
| "epoch": 2.941580756013746, |
| "grad_norm": 0.3222387731075287, |
| "learning_rate": 1.0096968831637976e-06, |
| "loss": 1.0403361320495605, |
| "step": 2568 |
| }, |
| { |
| "epoch": 2.943871706758305, |
| "grad_norm": 0.5108765363693237, |
| "learning_rate": 1.008965556600081e-06, |
| "loss": 1.0720305442810059, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.946162657502864, |
| "grad_norm": 0.6678656935691833, |
| "learning_rate": 1.0082628721550296e-06, |
| "loss": 1.0465478897094727, |
| "step": 2572 |
| }, |
| { |
| "epoch": 2.948453608247423, |
| "grad_norm": 0.264837384223938, |
| "learning_rate": 1.0075888343100988e-06, |
| "loss": 1.0957000255584717, |
| "step": 2574 |
| }, |
| { |
| "epoch": 2.950744558991982, |
| "grad_norm": 0.42729780077934265, |
| "learning_rate": 1.0069434473640478e-06, |
| "loss": 0.6375606060028076, |
| "step": 2576 |
| }, |
| { |
| "epoch": 2.953035509736541, |
| "grad_norm": 0.5195022225379944, |
| "learning_rate": 1.0063267154329093e-06, |
| "loss": 1.1245808601379395, |
| "step": 2578 |
| }, |
| { |
| "epoch": 2.9553264604811, |
| "grad_norm": 0.3333146572113037, |
| "learning_rate": 1.0057386424499674e-06, |
| "loss": 1.0144271850585938, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.957617411225659, |
| "grad_norm": 0.1994045525789261, |
| "learning_rate": 1.005179232165728e-06, |
| "loss": 1.0108163356781006, |
| "step": 2582 |
| }, |
| { |
| "epoch": 2.959908361970218, |
| "grad_norm": 0.7289289832115173, |
| "learning_rate": 1.0046484881478987e-06, |
| "loss": 1.0791220664978027, |
| "step": 2584 |
| }, |
| { |
| "epoch": 2.9621993127147768, |
| "grad_norm": 0.6395173072814941, |
| "learning_rate": 1.0041464137813643e-06, |
| "loss": 1.0681736469268799, |
| "step": 2586 |
| }, |
| { |
| "epoch": 2.9644902634593358, |
| "grad_norm": 0.46986323595046997, |
| "learning_rate": 1.0036730122681644e-06, |
| "loss": 1.0714763402938843, |
| "step": 2588 |
| }, |
| { |
| "epoch": 2.9667812142038947, |
| "grad_norm": 0.4828946590423584, |
| "learning_rate": 1.003228286627476e-06, |
| "loss": 1.036878228187561, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.9690721649484537, |
| "grad_norm": 0.661065399646759, |
| "learning_rate": 1.00281223969559e-06, |
| "loss": 1.086197853088379, |
| "step": 2592 |
| }, |
| { |
| "epoch": 2.9713631156930127, |
| "grad_norm": 1.195217490196228, |
| "learning_rate": 1.0024248741258968e-06, |
| "loss": 0.8736682534217834, |
| "step": 2594 |
| }, |
| { |
| "epoch": 2.9736540664375717, |
| "grad_norm": 0.887296199798584, |
| "learning_rate": 1.0020661923888674e-06, |
| "loss": 1.1362636089324951, |
| "step": 2596 |
| }, |
| { |
| "epoch": 2.9759450171821307, |
| "grad_norm": 0.5010428428649902, |
| "learning_rate": 1.001736196772038e-06, |
| "loss": 1.0959832668304443, |
| "step": 2598 |
| }, |
| { |
| "epoch": 2.9782359679266897, |
| "grad_norm": 0.4214611351490021, |
| "learning_rate": 1.0014348893799954e-06, |
| "loss": 1.2732155323028564, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.9805269186712486, |
| "grad_norm": 0.6986483335494995, |
| "learning_rate": 1.0011622721343655e-06, |
| "loss": 1.0966172218322754, |
| "step": 2602 |
| }, |
| { |
| "epoch": 2.9828178694158076, |
| "grad_norm": 0.5451075434684753, |
| "learning_rate": 1.0009183467737969e-06, |
| "loss": 0.8794732093811035, |
| "step": 2604 |
| }, |
| { |
| "epoch": 2.9851088201603666, |
| "grad_norm": 0.4121303856372833, |
| "learning_rate": 1.0007031148539529e-06, |
| "loss": 1.0295562744140625, |
| "step": 2606 |
| }, |
| { |
| "epoch": 2.9873997709049256, |
| "grad_norm": 0.9556783437728882, |
| "learning_rate": 1.0005165777475022e-06, |
| "loss": 0.5639452338218689, |
| "step": 2608 |
| }, |
| { |
| "epoch": 2.9896907216494846, |
| "grad_norm": 1.1290655136108398, |
| "learning_rate": 1.000358736644108e-06, |
| "loss": 0.4423694610595703, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.9919816723940436, |
| "grad_norm": 0.21035031974315643, |
| "learning_rate": 1.00022959255042e-06, |
| "loss": 1.0184959173202515, |
| "step": 2612 |
| }, |
| { |
| "epoch": 2.9942726231386025, |
| "grad_norm": 1.0605756044387817, |
| "learning_rate": 1.0001291462900717e-06, |
| "loss": 1.0759494304656982, |
| "step": 2614 |
| }, |
| { |
| "epoch": 2.9965635738831615, |
| "grad_norm": 0.7549898624420166, |
| "learning_rate": 1.0000573985036708e-06, |
| "loss": 0.9966956973075867, |
| "step": 2616 |
| }, |
| { |
| "epoch": 2.9988545246277205, |
| "grad_norm": 0.7441288232803345, |
| "learning_rate": 1.0000143496487968e-06, |
| "loss": 1.153484582901001, |
| "step": 2618 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 2619, |
| "total_flos": 4.688730399671583e+18, |
| "train_loss": 1.122755285885872, |
| "train_runtime": 13889.3173, |
| "train_samples_per_second": 3.017, |
| "train_steps_per_second": 0.189 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 2619, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.688730399671583e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|