{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.40009755077866427, "eval_steps": 500, "global_step": 5742, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.967912761732224e-05, "grad_norm": 2.733745568803013, "learning_rate": 6.999999924522766e-07, "loss": 1.7192, "step": 1 }, { "epoch": 0.00013935825523464447, "grad_norm": 2.709369185413448, "learning_rate": 6.99999969809107e-07, "loss": 1.6019, "step": 2 }, { "epoch": 0.0002090373828519667, "grad_norm": 2.9135830924208594, "learning_rate": 6.999999320704921e-07, "loss": 1.8484, "step": 3 }, { "epoch": 0.00027871651046928894, "grad_norm": 2.777419413695461, "learning_rate": 6.999998792364337e-07, "loss": 1.756, "step": 4 }, { "epoch": 0.00034839563808661115, "grad_norm": 2.8075005508831694, "learning_rate": 6.999998113069345e-07, "loss": 1.7913, "step": 5 }, { "epoch": 0.0004180747657039334, "grad_norm": 2.668293350418484, "learning_rate": 6.999997282819976e-07, "loss": 1.6321, "step": 6 }, { "epoch": 0.0004877538933212556, "grad_norm": 2.4745384488107187, "learning_rate": 6.999996301616271e-07, "loss": 1.5594, "step": 7 }, { "epoch": 0.0005574330209385779, "grad_norm": 2.3430588976825826, "learning_rate": 6.999995169458277e-07, "loss": 1.6613, "step": 8 }, { "epoch": 0.0006271121485559001, "grad_norm": 2.2357126724663092, "learning_rate": 6.999993886346046e-07, "loss": 1.676, "step": 9 }, { "epoch": 0.0006967912761732223, "grad_norm": 2.342047279317775, "learning_rate": 6.999992452279643e-07, "loss": 1.6192, "step": 10 }, { "epoch": 0.0007664704037905445, "grad_norm": 2.2540685801049944, "learning_rate": 6.999990867259133e-07, "loss": 1.559, "step": 11 }, { "epoch": 0.0008361495314078668, "grad_norm": 1.971114382158942, "learning_rate": 6.999989131284595e-07, "loss": 1.6581, "step": 12 }, { "epoch": 0.000905828659025189, "grad_norm": 1.5959396288528505, "learning_rate": 6.999987244356111e-07, "loss": 1.721, "step": 13 }, { "epoch": 0.0009755077866425112, "grad_norm": 1.4800088775047004, "learning_rate": 6.999985206473771e-07, "loss": 1.7666, "step": 14 }, { "epoch": 0.0010451869142598335, "grad_norm": 1.3687402013136123, "learning_rate": 6.999983017637673e-07, "loss": 1.726, "step": 15 }, { "epoch": 0.0011148660418771558, "grad_norm": 1.391725060939141, "learning_rate": 6.999980677847923e-07, "loss": 1.7181, "step": 16 }, { "epoch": 0.0011845451694944779, "grad_norm": 1.1938617951720005, "learning_rate": 6.999978187104631e-07, "loss": 1.5963, "step": 17 }, { "epoch": 0.0012542242971118002, "grad_norm": 1.2751779681441469, "learning_rate": 6.999975545407918e-07, "loss": 1.6692, "step": 18 }, { "epoch": 0.0013239034247291223, "grad_norm": 1.2381900634264318, "learning_rate": 6.99997275275791e-07, "loss": 1.7631, "step": 19 }, { "epoch": 0.0013935825523464446, "grad_norm": 1.153546411353916, "learning_rate": 6.999969809154741e-07, "loss": 1.7649, "step": 20 }, { "epoch": 0.001463261679963767, "grad_norm": 1.1646630042222352, "learning_rate": 6.999966714598552e-07, "loss": 1.7358, "step": 21 }, { "epoch": 0.001532940807581089, "grad_norm": 1.1023651577627083, "learning_rate": 6.99996346908949e-07, "loss": 1.67, "step": 22 }, { "epoch": 0.0016026199351984113, "grad_norm": 1.1875711007742904, "learning_rate": 6.999960072627713e-07, "loss": 1.622, "step": 23 }, { "epoch": 0.0016722990628157337, "grad_norm": 1.0365697887615943, "learning_rate": 6.999956525213383e-07, "loss": 1.7234, "step": 24 }, { "epoch": 0.0017419781904330558, "grad_norm": 0.9973919994515652, "learning_rate": 6.99995282684667e-07, "loss": 1.7004, "step": 25 }, { "epoch": 0.001811657318050378, "grad_norm": 1.3266756191624058, "learning_rate": 6.999948977527749e-07, "loss": 1.7692, "step": 26 }, { "epoch": 0.0018813364456677002, "grad_norm": 1.4466381604576397, "learning_rate": 6.999944977256808e-07, "loss": 1.4938, "step": 27 }, { "epoch": 0.0019510155732850225, "grad_norm": 1.5268786523739337, "learning_rate": 6.999940826034036e-07, "loss": 1.7042, "step": 28 }, { "epoch": 0.002020694700902345, "grad_norm": 1.4725142311197557, "learning_rate": 6.999936523859633e-07, "loss": 1.4254, "step": 29 }, { "epoch": 0.002090373828519667, "grad_norm": 1.4663543510668822, "learning_rate": 6.999932070733806e-07, "loss": 1.5621, "step": 30 }, { "epoch": 0.002160052956136989, "grad_norm": 1.5525614196605468, "learning_rate": 6.999927466656765e-07, "loss": 1.7332, "step": 31 }, { "epoch": 0.0022297320837543115, "grad_norm": 1.3577154717206668, "learning_rate": 6.999922711628735e-07, "loss": 1.5684, "step": 32 }, { "epoch": 0.0022994112113716336, "grad_norm": 1.2464378205015698, "learning_rate": 6.999917805649943e-07, "loss": 1.7046, "step": 33 }, { "epoch": 0.0023690903389889557, "grad_norm": 1.2571057892933462, "learning_rate": 6.999912748720621e-07, "loss": 1.6985, "step": 34 }, { "epoch": 0.0024387694666062783, "grad_norm": 1.046761844185471, "learning_rate": 6.999907540841015e-07, "loss": 1.5903, "step": 35 }, { "epoch": 0.0025084485942236004, "grad_norm": 1.0190382659116664, "learning_rate": 6.999902182011371e-07, "loss": 1.6446, "step": 36 }, { "epoch": 0.0025781277218409225, "grad_norm": 0.9420347898121777, "learning_rate": 6.999896672231951e-07, "loss": 1.6862, "step": 37 }, { "epoch": 0.0026478068494582446, "grad_norm": 0.9594587432273478, "learning_rate": 6.999891011503013e-07, "loss": 1.6502, "step": 38 }, { "epoch": 0.002717485977075567, "grad_norm": 0.8779244551322024, "learning_rate": 6.999885199824832e-07, "loss": 1.5531, "step": 39 }, { "epoch": 0.002787165104692889, "grad_norm": 0.8480799062449409, "learning_rate": 6.999879237197686e-07, "loss": 1.4879, "step": 40 }, { "epoch": 0.0028568442323102113, "grad_norm": 0.9497551105971518, "learning_rate": 6.99987312362186e-07, "loss": 1.6479, "step": 41 }, { "epoch": 0.002926523359927534, "grad_norm": 0.8618279518712127, "learning_rate": 6.999866859097648e-07, "loss": 1.662, "step": 42 }, { "epoch": 0.002996202487544856, "grad_norm": 0.9018906795561105, "learning_rate": 6.999860443625348e-07, "loss": 1.5774, "step": 43 }, { "epoch": 0.003065881615162178, "grad_norm": 0.8612830023344303, "learning_rate": 6.999853877205271e-07, "loss": 1.8141, "step": 44 }, { "epoch": 0.0031355607427795006, "grad_norm": 0.9007192891107818, "learning_rate": 6.999847159837729e-07, "loss": 1.6209, "step": 45 }, { "epoch": 0.0032052398703968227, "grad_norm": 0.8585620295584652, "learning_rate": 6.999840291523043e-07, "loss": 1.6896, "step": 46 }, { "epoch": 0.0032749189980141448, "grad_norm": 0.8582642386199358, "learning_rate": 6.999833272261546e-07, "loss": 1.5751, "step": 47 }, { "epoch": 0.0033445981256314673, "grad_norm": 0.9273580515998116, "learning_rate": 6.999826102053571e-07, "loss": 1.5397, "step": 48 }, { "epoch": 0.0034142772532487894, "grad_norm": 0.8614901819193356, "learning_rate": 6.999818780899462e-07, "loss": 1.5651, "step": 49 }, { "epoch": 0.0034839563808661115, "grad_norm": 0.8055350569759604, "learning_rate": 6.999811308799572e-07, "loss": 1.5396, "step": 50 }, { "epoch": 0.0035536355084834336, "grad_norm": 0.7667301976321873, "learning_rate": 6.999803685754257e-07, "loss": 1.5635, "step": 51 }, { "epoch": 0.003623314636100756, "grad_norm": 0.7558223853302638, "learning_rate": 6.999795911763883e-07, "loss": 1.4957, "step": 52 }, { "epoch": 0.0036929937637180782, "grad_norm": 0.8569578181282904, "learning_rate": 6.999787986828822e-07, "loss": 1.5791, "step": 53 }, { "epoch": 0.0037626728913354003, "grad_norm": 0.8084428013047424, "learning_rate": 6.999779910949456e-07, "loss": 1.6145, "step": 54 }, { "epoch": 0.003832352018952723, "grad_norm": 0.7559081128636418, "learning_rate": 6.999771684126168e-07, "loss": 1.5544, "step": 55 }, { "epoch": 0.003902031146570045, "grad_norm": 0.7689467380996695, "learning_rate": 6.999763306359357e-07, "loss": 1.5876, "step": 56 }, { "epoch": 0.003971710274187367, "grad_norm": 0.7544375614009099, "learning_rate": 6.99975477764942e-07, "loss": 1.5481, "step": 57 }, { "epoch": 0.00404138940180469, "grad_norm": 0.7831679946606233, "learning_rate": 6.999746097996769e-07, "loss": 1.5945, "step": 58 }, { "epoch": 0.004111068529422011, "grad_norm": 0.750603996125084, "learning_rate": 6.999737267401817e-07, "loss": 1.5258, "step": 59 }, { "epoch": 0.004180747657039334, "grad_norm": 0.7953623833277211, "learning_rate": 6.999728285864989e-07, "loss": 1.5848, "step": 60 }, { "epoch": 0.004250426784656656, "grad_norm": 0.7574855755742249, "learning_rate": 6.999719153386717e-07, "loss": 1.621, "step": 61 }, { "epoch": 0.004320105912273978, "grad_norm": 0.776253899297405, "learning_rate": 6.999709869967437e-07, "loss": 1.6088, "step": 62 }, { "epoch": 0.0043897850398913005, "grad_norm": 0.7610571494782742, "learning_rate": 6.999700435607591e-07, "loss": 1.5922, "step": 63 }, { "epoch": 0.004459464167508623, "grad_norm": 0.8152423242847946, "learning_rate": 6.999690850307637e-07, "loss": 1.6002, "step": 64 }, { "epoch": 0.004529143295125945, "grad_norm": 0.7968474238058425, "learning_rate": 6.99968111406803e-07, "loss": 1.5156, "step": 65 }, { "epoch": 0.004598822422743267, "grad_norm": 0.7974893416155076, "learning_rate": 6.999671226889238e-07, "loss": 1.655, "step": 66 }, { "epoch": 0.00466850155036059, "grad_norm": 0.7912729979364881, "learning_rate": 6.999661188771734e-07, "loss": 1.628, "step": 67 }, { "epoch": 0.0047381806779779115, "grad_norm": 0.7550934267500832, "learning_rate": 6.999650999716002e-07, "loss": 1.7023, "step": 68 }, { "epoch": 0.004807859805595234, "grad_norm": 0.7295311010941107, "learning_rate": 6.999640659722525e-07, "loss": 1.589, "step": 69 }, { "epoch": 0.0048775389332125565, "grad_norm": 0.7758262875672367, "learning_rate": 6.999630168791802e-07, "loss": 1.6251, "step": 70 }, { "epoch": 0.004947218060829878, "grad_norm": 0.7449174445401683, "learning_rate": 6.999619526924338e-07, "loss": 1.5023, "step": 71 }, { "epoch": 0.005016897188447201, "grad_norm": 0.7692068872313949, "learning_rate": 6.999608734120638e-07, "loss": 1.6567, "step": 72 }, { "epoch": 0.005086576316064523, "grad_norm": 0.8275918260872214, "learning_rate": 6.999597790381221e-07, "loss": 1.708, "step": 73 }, { "epoch": 0.005156255443681845, "grad_norm": 0.7872033802821833, "learning_rate": 6.999586695706612e-07, "loss": 1.7163, "step": 74 }, { "epoch": 0.0052259345712991675, "grad_norm": 0.8159200519159217, "learning_rate": 6.999575450097343e-07, "loss": 1.5663, "step": 75 }, { "epoch": 0.005295613698916489, "grad_norm": 0.7956432710560783, "learning_rate": 6.999564053553953e-07, "loss": 1.697, "step": 76 }, { "epoch": 0.005365292826533812, "grad_norm": 0.7130908379784697, "learning_rate": 6.999552506076987e-07, "loss": 1.6055, "step": 77 }, { "epoch": 0.005434971954151134, "grad_norm": 0.7274551108988991, "learning_rate": 6.999540807667e-07, "loss": 1.5876, "step": 78 }, { "epoch": 0.005504651081768456, "grad_norm": 0.7646927508527166, "learning_rate": 6.99952895832455e-07, "loss": 1.6219, "step": 79 }, { "epoch": 0.005574330209385778, "grad_norm": 0.8844817783804978, "learning_rate": 6.999516958050207e-07, "loss": 1.5852, "step": 80 }, { "epoch": 0.005644009337003101, "grad_norm": 0.8046092626958843, "learning_rate": 6.999504806844547e-07, "loss": 1.6941, "step": 81 }, { "epoch": 0.005713688464620423, "grad_norm": 0.7565003415350436, "learning_rate": 6.999492504708149e-07, "loss": 1.5299, "step": 82 }, { "epoch": 0.005783367592237745, "grad_norm": 0.7491176510626603, "learning_rate": 6.999480051641605e-07, "loss": 1.4925, "step": 83 }, { "epoch": 0.005853046719855068, "grad_norm": 0.7518065408047131, "learning_rate": 6.999467447645511e-07, "loss": 1.6606, "step": 84 }, { "epoch": 0.005922725847472389, "grad_norm": 0.7208224360353647, "learning_rate": 6.999454692720472e-07, "loss": 1.5146, "step": 85 }, { "epoch": 0.005992404975089712, "grad_norm": 0.7845222532902206, "learning_rate": 6.999441786867097e-07, "loss": 1.6499, "step": 86 }, { "epoch": 0.006062084102707034, "grad_norm": 0.7179978138866828, "learning_rate": 6.999428730086007e-07, "loss": 1.6328, "step": 87 }, { "epoch": 0.006131763230324356, "grad_norm": 0.7211806613439429, "learning_rate": 6.999415522377827e-07, "loss": 1.5726, "step": 88 }, { "epoch": 0.006201442357941679, "grad_norm": 0.7546953013822226, "learning_rate": 6.999402163743188e-07, "loss": 1.5601, "step": 89 }, { "epoch": 0.006271121485559001, "grad_norm": 0.7566779977768506, "learning_rate": 6.999388654182733e-07, "loss": 1.684, "step": 90 }, { "epoch": 0.006340800613176323, "grad_norm": 0.6969488028265275, "learning_rate": 6.999374993697108e-07, "loss": 1.5598, "step": 91 }, { "epoch": 0.006410479740793645, "grad_norm": 0.705981536567612, "learning_rate": 6.999361182286967e-07, "loss": 1.5545, "step": 92 }, { "epoch": 0.006480158868410968, "grad_norm": 0.8466488562144117, "learning_rate": 6.999347219952973e-07, "loss": 1.6598, "step": 93 }, { "epoch": 0.0065498379960282896, "grad_norm": 0.7800529271957665, "learning_rate": 6.999333106695795e-07, "loss": 1.6096, "step": 94 }, { "epoch": 0.006619517123645612, "grad_norm": 0.7752456360469325, "learning_rate": 6.999318842516109e-07, "loss": 1.6597, "step": 95 }, { "epoch": 0.006689196251262935, "grad_norm": 0.6961730716580421, "learning_rate": 6.999304427414599e-07, "loss": 1.4991, "step": 96 }, { "epoch": 0.006758875378880256, "grad_norm": 0.7111901307005418, "learning_rate": 6.999289861391954e-07, "loss": 1.5348, "step": 97 }, { "epoch": 0.006828554506497579, "grad_norm": 0.7729231425029259, "learning_rate": 6.999275144448875e-07, "loss": 1.7086, "step": 98 }, { "epoch": 0.0068982336341149005, "grad_norm": 0.7207807898538051, "learning_rate": 6.999260276586065e-07, "loss": 1.4942, "step": 99 }, { "epoch": 0.006967912761732223, "grad_norm": 0.8209832140800927, "learning_rate": 6.999245257804236e-07, "loss": 1.6462, "step": 100 }, { "epoch": 0.0070375918893495456, "grad_norm": 0.7372508955817842, "learning_rate": 6.999230088104111e-07, "loss": 1.6425, "step": 101 }, { "epoch": 0.007107271016966867, "grad_norm": 0.7103080276487075, "learning_rate": 6.999214767486415e-07, "loss": 1.5064, "step": 102 }, { "epoch": 0.00717695014458419, "grad_norm": 0.7453563805733722, "learning_rate": 6.999199295951881e-07, "loss": 1.5735, "step": 103 }, { "epoch": 0.007246629272201512, "grad_norm": 0.7316234271209868, "learning_rate": 6.999183673501252e-07, "loss": 1.487, "step": 104 }, { "epoch": 0.007316308399818834, "grad_norm": 0.8306495639187648, "learning_rate": 6.999167900135276e-07, "loss": 1.6901, "step": 105 }, { "epoch": 0.0073859875274361565, "grad_norm": 0.6877525548972586, "learning_rate": 6.99915197585471e-07, "loss": 1.5244, "step": 106 }, { "epoch": 0.007455666655053479, "grad_norm": 0.7545823598451985, "learning_rate": 6.999135900660315e-07, "loss": 1.6525, "step": 107 }, { "epoch": 0.007525345782670801, "grad_norm": 0.7557325144444581, "learning_rate": 6.999119674552864e-07, "loss": 1.5528, "step": 108 }, { "epoch": 0.007595024910288123, "grad_norm": 0.7363162596156012, "learning_rate": 6.999103297533131e-07, "loss": 1.6752, "step": 109 }, { "epoch": 0.007664704037905446, "grad_norm": 0.7779785261187538, "learning_rate": 6.999086769601905e-07, "loss": 1.6054, "step": 110 }, { "epoch": 0.007734383165522767, "grad_norm": 0.7375695814517242, "learning_rate": 6.999070090759976e-07, "loss": 1.6362, "step": 111 }, { "epoch": 0.00780406229314009, "grad_norm": 0.7295801652020935, "learning_rate": 6.999053261008142e-07, "loss": 1.6278, "step": 112 }, { "epoch": 0.007873741420757412, "grad_norm": 0.7564600054233208, "learning_rate": 6.999036280347211e-07, "loss": 1.5602, "step": 113 }, { "epoch": 0.007943420548374734, "grad_norm": 0.7325116000746935, "learning_rate": 6.999019148777999e-07, "loss": 1.5854, "step": 114 }, { "epoch": 0.008013099675992057, "grad_norm": 0.750333027438248, "learning_rate": 6.999001866301322e-07, "loss": 1.6378, "step": 115 }, { "epoch": 0.00808277880360938, "grad_norm": 0.7077391988654039, "learning_rate": 6.998984432918011e-07, "loss": 1.4496, "step": 116 }, { "epoch": 0.008152457931226702, "grad_norm": 0.7061194584771281, "learning_rate": 6.998966848628901e-07, "loss": 1.7211, "step": 117 }, { "epoch": 0.008222137058844023, "grad_norm": 0.753741884650152, "learning_rate": 6.998949113434836e-07, "loss": 1.5725, "step": 118 }, { "epoch": 0.008291816186461345, "grad_norm": 0.8776126975121025, "learning_rate": 6.998931227336665e-07, "loss": 1.4603, "step": 119 }, { "epoch": 0.008361495314078668, "grad_norm": 0.7457924178802692, "learning_rate": 6.998913190335243e-07, "loss": 1.5817, "step": 120 }, { "epoch": 0.00843117444169599, "grad_norm": 0.6662869164399065, "learning_rate": 6.998895002431439e-07, "loss": 1.4134, "step": 121 }, { "epoch": 0.008500853569313313, "grad_norm": 0.730929392364289, "learning_rate": 6.998876663626121e-07, "loss": 1.6551, "step": 122 }, { "epoch": 0.008570532696930635, "grad_norm": 0.7485428242937736, "learning_rate": 6.998858173920168e-07, "loss": 1.5917, "step": 123 }, { "epoch": 0.008640211824547956, "grad_norm": 0.7041960816293819, "learning_rate": 6.998839533314466e-07, "loss": 1.5663, "step": 124 }, { "epoch": 0.008709890952165279, "grad_norm": 0.7065757407197867, "learning_rate": 6.998820741809911e-07, "loss": 1.5738, "step": 125 }, { "epoch": 0.008779570079782601, "grad_norm": 0.7167544228047786, "learning_rate": 6.998801799407402e-07, "loss": 1.4507, "step": 126 }, { "epoch": 0.008849249207399924, "grad_norm": 0.7373858233625152, "learning_rate": 6.998782706107846e-07, "loss": 1.5638, "step": 127 }, { "epoch": 0.008918928335017246, "grad_norm": 0.762790306505906, "learning_rate": 6.998763461912157e-07, "loss": 1.5871, "step": 128 }, { "epoch": 0.008988607462634567, "grad_norm": 0.7997672459056562, "learning_rate": 6.998744066821259e-07, "loss": 1.7292, "step": 129 }, { "epoch": 0.00905828659025189, "grad_norm": 0.6949799061752019, "learning_rate": 6.998724520836082e-07, "loss": 1.557, "step": 130 }, { "epoch": 0.009127965717869212, "grad_norm": 0.7429111002352855, "learning_rate": 6.998704823957561e-07, "loss": 1.6075, "step": 131 }, { "epoch": 0.009197644845486535, "grad_norm": 0.7247660368087409, "learning_rate": 6.99868497618664e-07, "loss": 1.4907, "step": 132 }, { "epoch": 0.009267323973103857, "grad_norm": 0.753497515434336, "learning_rate": 6.998664977524273e-07, "loss": 1.5622, "step": 133 }, { "epoch": 0.00933700310072118, "grad_norm": 0.7525401766307482, "learning_rate": 6.998644827971414e-07, "loss": 1.6525, "step": 134 }, { "epoch": 0.0094066822283385, "grad_norm": 0.7719829875157536, "learning_rate": 6.998624527529032e-07, "loss": 1.7565, "step": 135 }, { "epoch": 0.009476361355955823, "grad_norm": 0.7780799099616043, "learning_rate": 6.998604076198099e-07, "loss": 1.5999, "step": 136 }, { "epoch": 0.009546040483573145, "grad_norm": 0.6788851566259799, "learning_rate": 6.998583473979593e-07, "loss": 1.5832, "step": 137 }, { "epoch": 0.009615719611190468, "grad_norm": 0.6759649759261548, "learning_rate": 6.998562720874505e-07, "loss": 1.5318, "step": 138 }, { "epoch": 0.00968539873880779, "grad_norm": 0.722411480856436, "learning_rate": 6.998541816883826e-07, "loss": 1.4956, "step": 139 }, { "epoch": 0.009755077866425113, "grad_norm": 0.7963536281427879, "learning_rate": 6.99852076200856e-07, "loss": 1.6532, "step": 140 }, { "epoch": 0.009824756994042434, "grad_norm": 0.711429048225027, "learning_rate": 6.998499556249715e-07, "loss": 1.5737, "step": 141 }, { "epoch": 0.009894436121659756, "grad_norm": 0.6897849482508333, "learning_rate": 6.998478199608309e-07, "loss": 1.5071, "step": 142 }, { "epoch": 0.009964115249277079, "grad_norm": 0.699945633650181, "learning_rate": 6.998456692085361e-07, "loss": 1.5943, "step": 143 }, { "epoch": 0.010033794376894401, "grad_norm": 0.6689453841253, "learning_rate": 6.998435033681907e-07, "loss": 1.4587, "step": 144 }, { "epoch": 0.010103473504511724, "grad_norm": 0.7133832784569326, "learning_rate": 6.998413224398982e-07, "loss": 1.5861, "step": 145 }, { "epoch": 0.010173152632129047, "grad_norm": 0.7036410895242622, "learning_rate": 6.998391264237631e-07, "loss": 1.6346, "step": 146 }, { "epoch": 0.010242831759746367, "grad_norm": 0.6886627438173859, "learning_rate": 6.998369153198908e-07, "loss": 1.5589, "step": 147 }, { "epoch": 0.01031251088736369, "grad_norm": 0.7367558477965669, "learning_rate": 6.998346891283872e-07, "loss": 1.6064, "step": 148 }, { "epoch": 0.010382190014981012, "grad_norm": 0.8105693813585202, "learning_rate": 6.998324478493588e-07, "loss": 1.6741, "step": 149 }, { "epoch": 0.010451869142598335, "grad_norm": 0.7110523737644423, "learning_rate": 6.998301914829133e-07, "loss": 1.5858, "step": 150 }, { "epoch": 0.010521548270215658, "grad_norm": 0.7292303967140454, "learning_rate": 6.998279200291587e-07, "loss": 1.644, "step": 151 }, { "epoch": 0.010591227397832978, "grad_norm": 0.7220512349531498, "learning_rate": 6.998256334882037e-07, "loss": 1.5486, "step": 152 }, { "epoch": 0.0106609065254503, "grad_norm": 0.7698867140395331, "learning_rate": 6.998233318601581e-07, "loss": 1.5194, "step": 153 }, { "epoch": 0.010730585653067623, "grad_norm": 0.9732965207520899, "learning_rate": 6.998210151451322e-07, "loss": 1.5542, "step": 154 }, { "epoch": 0.010800264780684946, "grad_norm": 0.7118245740667292, "learning_rate": 6.998186833432369e-07, "loss": 1.5825, "step": 155 }, { "epoch": 0.010869943908302268, "grad_norm": 0.6767247887601149, "learning_rate": 6.998163364545839e-07, "loss": 1.5421, "step": 156 }, { "epoch": 0.010939623035919591, "grad_norm": 0.7463837156055579, "learning_rate": 6.998139744792858e-07, "loss": 1.3482, "step": 157 }, { "epoch": 0.011009302163536912, "grad_norm": 0.7023208534797809, "learning_rate": 6.998115974174558e-07, "loss": 1.5365, "step": 158 }, { "epoch": 0.011078981291154234, "grad_norm": 0.7779897717499841, "learning_rate": 6.998092052692076e-07, "loss": 1.5526, "step": 159 }, { "epoch": 0.011148660418771557, "grad_norm": 0.7022624765789008, "learning_rate": 6.998067980346561e-07, "loss": 1.5957, "step": 160 }, { "epoch": 0.01121833954638888, "grad_norm": 0.7690238675537974, "learning_rate": 6.998043757139166e-07, "loss": 1.6412, "step": 161 }, { "epoch": 0.011288018674006202, "grad_norm": 0.6977898185845065, "learning_rate": 6.99801938307105e-07, "loss": 1.5129, "step": 162 }, { "epoch": 0.011357697801623524, "grad_norm": 0.7360024484783224, "learning_rate": 6.997994858143384e-07, "loss": 1.6246, "step": 163 }, { "epoch": 0.011427376929240845, "grad_norm": 0.743352598561228, "learning_rate": 6.997970182357341e-07, "loss": 1.6589, "step": 164 }, { "epoch": 0.011497056056858168, "grad_norm": 0.7438090842992685, "learning_rate": 6.997945355714104e-07, "loss": 1.7993, "step": 165 }, { "epoch": 0.01156673518447549, "grad_norm": 0.7806976756154644, "learning_rate": 6.997920378214862e-07, "loss": 1.6904, "step": 166 }, { "epoch": 0.011636414312092813, "grad_norm": 0.7617691034349805, "learning_rate": 6.997895249860815e-07, "loss": 1.5514, "step": 167 }, { "epoch": 0.011706093439710135, "grad_norm": 0.6897868904950596, "learning_rate": 6.997869970653164e-07, "loss": 1.7041, "step": 168 }, { "epoch": 0.011775772567327458, "grad_norm": 0.7171263670275106, "learning_rate": 6.997844540593121e-07, "loss": 1.5569, "step": 169 }, { "epoch": 0.011845451694944779, "grad_norm": 0.7349273462264111, "learning_rate": 6.997818959681906e-07, "loss": 1.6214, "step": 170 }, { "epoch": 0.011915130822562101, "grad_norm": 0.7595114670832553, "learning_rate": 6.997793227920744e-07, "loss": 1.7286, "step": 171 }, { "epoch": 0.011984809950179424, "grad_norm": 0.6743455616725484, "learning_rate": 6.997767345310868e-07, "loss": 1.5967, "step": 172 }, { "epoch": 0.012054489077796746, "grad_norm": 0.8442688339279371, "learning_rate": 6.997741311853519e-07, "loss": 1.5291, "step": 173 }, { "epoch": 0.012124168205414069, "grad_norm": 0.7135683838950063, "learning_rate": 6.997715127549944e-07, "loss": 1.547, "step": 174 }, { "epoch": 0.01219384733303139, "grad_norm": 0.7680553601030344, "learning_rate": 6.997688792401398e-07, "loss": 1.5167, "step": 175 }, { "epoch": 0.012263526460648712, "grad_norm": 0.7351716147785469, "learning_rate": 6.997662306409142e-07, "loss": 1.5149, "step": 176 }, { "epoch": 0.012333205588266035, "grad_norm": 0.708247558633638, "learning_rate": 6.997635669574448e-07, "loss": 1.5914, "step": 177 }, { "epoch": 0.012402884715883357, "grad_norm": 0.7447006944945493, "learning_rate": 6.997608881898589e-07, "loss": 1.6293, "step": 178 }, { "epoch": 0.01247256384350068, "grad_norm": 0.7344799477527223, "learning_rate": 6.997581943382852e-07, "loss": 1.6402, "step": 179 }, { "epoch": 0.012542242971118002, "grad_norm": 0.7278240397379574, "learning_rate": 6.997554854028525e-07, "loss": 1.5459, "step": 180 }, { "epoch": 0.012611922098735323, "grad_norm": 0.7020532255212869, "learning_rate": 6.997527613836908e-07, "loss": 1.5233, "step": 181 }, { "epoch": 0.012681601226352646, "grad_norm": 0.7588235184758088, "learning_rate": 6.997500222809307e-07, "loss": 1.5568, "step": 182 }, { "epoch": 0.012751280353969968, "grad_norm": 0.7530180397421503, "learning_rate": 6.997472680947033e-07, "loss": 1.5685, "step": 183 }, { "epoch": 0.01282095948158729, "grad_norm": 0.7180796924001129, "learning_rate": 6.997444988251405e-07, "loss": 1.6382, "step": 184 }, { "epoch": 0.012890638609204613, "grad_norm": 0.7664151954290458, "learning_rate": 6.997417144723754e-07, "loss": 1.5858, "step": 185 }, { "epoch": 0.012960317736821936, "grad_norm": 0.7437188630824249, "learning_rate": 6.99738915036541e-07, "loss": 1.6035, "step": 186 }, { "epoch": 0.013029996864439257, "grad_norm": 0.7201391130892155, "learning_rate": 6.997361005177718e-07, "loss": 1.5728, "step": 187 }, { "epoch": 0.013099675992056579, "grad_norm": 0.7259717563800355, "learning_rate": 6.997332709162024e-07, "loss": 1.6235, "step": 188 }, { "epoch": 0.013169355119673902, "grad_norm": 0.8047726194305217, "learning_rate": 6.997304262319686e-07, "loss": 1.7083, "step": 189 }, { "epoch": 0.013239034247291224, "grad_norm": 0.7286276274616162, "learning_rate": 6.997275664652066e-07, "loss": 1.65, "step": 190 }, { "epoch": 0.013308713374908547, "grad_norm": 0.7179888240618036, "learning_rate": 6.997246916160535e-07, "loss": 1.4977, "step": 191 }, { "epoch": 0.01337839250252587, "grad_norm": 0.6454186584546411, "learning_rate": 6.99721801684647e-07, "loss": 1.4368, "step": 192 }, { "epoch": 0.01344807163014319, "grad_norm": 0.8428174000154314, "learning_rate": 6.997188966711259e-07, "loss": 1.6111, "step": 193 }, { "epoch": 0.013517750757760513, "grad_norm": 0.7142931494445387, "learning_rate": 6.997159765756289e-07, "loss": 1.4782, "step": 194 }, { "epoch": 0.013587429885377835, "grad_norm": 0.7013011170224142, "learning_rate": 6.997130413982963e-07, "loss": 1.6277, "step": 195 }, { "epoch": 0.013657109012995158, "grad_norm": 0.7323477085758628, "learning_rate": 6.997100911392687e-07, "loss": 1.6999, "step": 196 }, { "epoch": 0.01372678814061248, "grad_norm": 0.6782232596228719, "learning_rate": 6.997071257986873e-07, "loss": 1.5167, "step": 197 }, { "epoch": 0.013796467268229801, "grad_norm": 0.7661525942281635, "learning_rate": 6.997041453766945e-07, "loss": 1.5548, "step": 198 }, { "epoch": 0.013866146395847124, "grad_norm": 0.7078911668543628, "learning_rate": 6.997011498734329e-07, "loss": 1.6429, "step": 199 }, { "epoch": 0.013935825523464446, "grad_norm": 0.7303745750427693, "learning_rate": 6.996981392890463e-07, "loss": 1.5804, "step": 200 }, { "epoch": 0.014005504651081769, "grad_norm": 0.6831734889505485, "learning_rate": 6.996951136236786e-07, "loss": 1.4779, "step": 201 }, { "epoch": 0.014075183778699091, "grad_norm": 0.7347205886402175, "learning_rate": 6.99692072877475e-07, "loss": 1.5836, "step": 202 }, { "epoch": 0.014144862906316414, "grad_norm": 0.7096195389620719, "learning_rate": 6.996890170505813e-07, "loss": 1.6197, "step": 203 }, { "epoch": 0.014214542033933734, "grad_norm": 0.7562441052582111, "learning_rate": 6.996859461431439e-07, "loss": 1.7908, "step": 204 }, { "epoch": 0.014284221161551057, "grad_norm": 0.7334066931518938, "learning_rate": 6.996828601553098e-07, "loss": 1.4389, "step": 205 }, { "epoch": 0.01435390028916838, "grad_norm": 0.6840997752191323, "learning_rate": 6.99679759087227e-07, "loss": 1.5409, "step": 206 }, { "epoch": 0.014423579416785702, "grad_norm": 0.7665481621203577, "learning_rate": 6.996766429390443e-07, "loss": 1.5991, "step": 207 }, { "epoch": 0.014493258544403025, "grad_norm": 0.7817725118434202, "learning_rate": 6.996735117109106e-07, "loss": 1.5223, "step": 208 }, { "epoch": 0.014562937672020347, "grad_norm": 0.7283839189183391, "learning_rate": 6.996703654029764e-07, "loss": 1.6684, "step": 209 }, { "epoch": 0.014632616799637668, "grad_norm": 0.7032582333066247, "learning_rate": 6.996672040153923e-07, "loss": 1.5299, "step": 210 }, { "epoch": 0.01470229592725499, "grad_norm": 0.8077693130167044, "learning_rate": 6.996640275483096e-07, "loss": 1.5585, "step": 211 }, { "epoch": 0.014771975054872313, "grad_norm": 0.7057816760896902, "learning_rate": 6.996608360018808e-07, "loss": 1.6727, "step": 212 }, { "epoch": 0.014841654182489636, "grad_norm": 0.7312558829705393, "learning_rate": 6.996576293762589e-07, "loss": 1.5828, "step": 213 }, { "epoch": 0.014911333310106958, "grad_norm": 0.6892094634096153, "learning_rate": 6.996544076715972e-07, "loss": 1.5521, "step": 214 }, { "epoch": 0.014981012437724279, "grad_norm": 0.7622336534357208, "learning_rate": 6.996511708880504e-07, "loss": 1.5927, "step": 215 }, { "epoch": 0.015050691565341601, "grad_norm": 0.8615964785895364, "learning_rate": 6.996479190257735e-07, "loss": 1.495, "step": 216 }, { "epoch": 0.015120370692958924, "grad_norm": 0.7461775817348781, "learning_rate": 6.996446520849225e-07, "loss": 1.6432, "step": 217 }, { "epoch": 0.015190049820576246, "grad_norm": 0.702229537181909, "learning_rate": 6.996413700656536e-07, "loss": 1.5812, "step": 218 }, { "epoch": 0.015259728948193569, "grad_norm": 0.7563790834731114, "learning_rate": 6.996380729681244e-07, "loss": 1.4811, "step": 219 }, { "epoch": 0.015329408075810892, "grad_norm": 0.7242920709990179, "learning_rate": 6.99634760792493e-07, "loss": 1.4832, "step": 220 }, { "epoch": 0.015399087203428212, "grad_norm": 0.7170579173498212, "learning_rate": 6.996314335389177e-07, "loss": 1.5585, "step": 221 }, { "epoch": 0.015468766331045535, "grad_norm": 0.7477520743417225, "learning_rate": 6.996280912075582e-07, "loss": 1.6152, "step": 222 }, { "epoch": 0.015538445458662857, "grad_norm": 0.7365668010013201, "learning_rate": 6.996247337985746e-07, "loss": 1.6056, "step": 223 }, { "epoch": 0.01560812458628018, "grad_norm": 0.7407770775087316, "learning_rate": 6.99621361312128e-07, "loss": 1.6089, "step": 224 }, { "epoch": 0.0156778037138975, "grad_norm": 0.7501162465150802, "learning_rate": 6.996179737483797e-07, "loss": 1.5875, "step": 225 }, { "epoch": 0.015747482841514823, "grad_norm": 0.703304864155775, "learning_rate": 6.996145711074923e-07, "loss": 1.4987, "step": 226 }, { "epoch": 0.015817161969132146, "grad_norm": 0.7613996009018047, "learning_rate": 6.996111533896286e-07, "loss": 1.517, "step": 227 }, { "epoch": 0.01588684109674947, "grad_norm": 0.7295854207507647, "learning_rate": 6.996077205949528e-07, "loss": 1.5285, "step": 228 }, { "epoch": 0.01595652022436679, "grad_norm": 0.7074237961088794, "learning_rate": 6.99604272723629e-07, "loss": 1.46, "step": 229 }, { "epoch": 0.016026199351984113, "grad_norm": 0.7560396455970501, "learning_rate": 6.996008097758227e-07, "loss": 1.505, "step": 230 }, { "epoch": 0.016095878479601436, "grad_norm": 0.7333373513299797, "learning_rate": 6.995973317516995e-07, "loss": 1.5871, "step": 231 }, { "epoch": 0.01616555760721876, "grad_norm": 0.640181179700889, "learning_rate": 6.995938386514265e-07, "loss": 1.5224, "step": 232 }, { "epoch": 0.01623523673483608, "grad_norm": 0.7101909631345903, "learning_rate": 6.995903304751709e-07, "loss": 1.6038, "step": 233 }, { "epoch": 0.016304915862453404, "grad_norm": 0.7454843789810528, "learning_rate": 6.995868072231007e-07, "loss": 1.627, "step": 234 }, { "epoch": 0.016374594990070723, "grad_norm": 0.7024591060628032, "learning_rate": 6.99583268895385e-07, "loss": 1.5807, "step": 235 }, { "epoch": 0.016444274117688045, "grad_norm": 0.8070089400837613, "learning_rate": 6.995797154921931e-07, "loss": 1.5066, "step": 236 }, { "epoch": 0.016513953245305368, "grad_norm": 0.7292465008262952, "learning_rate": 6.995761470136955e-07, "loss": 1.5627, "step": 237 }, { "epoch": 0.01658363237292269, "grad_norm": 0.7678168219350536, "learning_rate": 6.995725634600631e-07, "loss": 1.6508, "step": 238 }, { "epoch": 0.016653311500540013, "grad_norm": 0.7171811611510969, "learning_rate": 6.995689648314677e-07, "loss": 1.6866, "step": 239 }, { "epoch": 0.016722990628157335, "grad_norm": 0.7466351681320509, "learning_rate": 6.995653511280816e-07, "loss": 1.6049, "step": 240 }, { "epoch": 0.016792669755774658, "grad_norm": 0.7266320402780408, "learning_rate": 6.995617223500782e-07, "loss": 1.5321, "step": 241 }, { "epoch": 0.01686234888339198, "grad_norm": 0.7063647038715658, "learning_rate": 6.995580784976312e-07, "loss": 1.5047, "step": 242 }, { "epoch": 0.016932028011009303, "grad_norm": 0.7085394609516711, "learning_rate": 6.995544195709153e-07, "loss": 1.7102, "step": 243 }, { "epoch": 0.017001707138626625, "grad_norm": 0.7046716168804864, "learning_rate": 6.995507455701059e-07, "loss": 1.4328, "step": 244 }, { "epoch": 0.017071386266243948, "grad_norm": 0.72750562924709, "learning_rate": 6.995470564953791e-07, "loss": 1.5067, "step": 245 }, { "epoch": 0.01714106539386127, "grad_norm": 0.6583564224352184, "learning_rate": 6.995433523469114e-07, "loss": 1.5941, "step": 246 }, { "epoch": 0.01721074452147859, "grad_norm": 0.6829271380484534, "learning_rate": 6.995396331248806e-07, "loss": 1.5829, "step": 247 }, { "epoch": 0.017280423649095912, "grad_norm": 0.7501970674237022, "learning_rate": 6.99535898829465e-07, "loss": 1.5741, "step": 248 }, { "epoch": 0.017350102776713235, "grad_norm": 0.718589643719869, "learning_rate": 6.995321494608432e-07, "loss": 1.6546, "step": 249 }, { "epoch": 0.017419781904330557, "grad_norm": 0.7018977730704544, "learning_rate": 6.995283850191951e-07, "loss": 1.6441, "step": 250 }, { "epoch": 0.01748946103194788, "grad_norm": 0.7116814108818308, "learning_rate": 6.99524605504701e-07, "loss": 1.6247, "step": 251 }, { "epoch": 0.017559140159565202, "grad_norm": 0.7417559961150321, "learning_rate": 6.995208109175422e-07, "loss": 1.6553, "step": 252 }, { "epoch": 0.017628819287182525, "grad_norm": 0.7195473670622379, "learning_rate": 6.995170012579004e-07, "loss": 1.4587, "step": 253 }, { "epoch": 0.017698498414799847, "grad_norm": 0.6889531952135118, "learning_rate": 6.995131765259583e-07, "loss": 1.6015, "step": 254 }, { "epoch": 0.01776817754241717, "grad_norm": 0.6800146240575404, "learning_rate": 6.995093367218991e-07, "loss": 1.542, "step": 255 }, { "epoch": 0.017837856670034492, "grad_norm": 0.706603053149328, "learning_rate": 6.995054818459067e-07, "loss": 1.516, "step": 256 }, { "epoch": 0.017907535797651815, "grad_norm": 0.7258769121019596, "learning_rate": 6.99501611898166e-07, "loss": 1.5032, "step": 257 }, { "epoch": 0.017977214925269134, "grad_norm": 0.7475511019702241, "learning_rate": 6.994977268788624e-07, "loss": 1.5867, "step": 258 }, { "epoch": 0.018046894052886456, "grad_norm": 0.74299733714245, "learning_rate": 6.99493826788182e-07, "loss": 1.6865, "step": 259 }, { "epoch": 0.01811657318050378, "grad_norm": 0.7066253885889029, "learning_rate": 6.99489911626312e-07, "loss": 1.6019, "step": 260 }, { "epoch": 0.0181862523081211, "grad_norm": 0.8182502015789678, "learning_rate": 6.994859813934395e-07, "loss": 1.4875, "step": 261 }, { "epoch": 0.018255931435738424, "grad_norm": 0.7488901193748735, "learning_rate": 6.994820360897534e-07, "loss": 1.5313, "step": 262 }, { "epoch": 0.018325610563355747, "grad_norm": 0.7298797429645452, "learning_rate": 6.994780757154423e-07, "loss": 1.5985, "step": 263 }, { "epoch": 0.01839528969097307, "grad_norm": 0.7494430286408182, "learning_rate": 6.994741002706963e-07, "loss": 1.6039, "step": 264 }, { "epoch": 0.01846496881859039, "grad_norm": 0.749305407018932, "learning_rate": 6.994701097557058e-07, "loss": 1.6647, "step": 265 }, { "epoch": 0.018534647946207714, "grad_norm": 0.7393237097632813, "learning_rate": 6.99466104170662e-07, "loss": 1.6921, "step": 266 }, { "epoch": 0.018604327073825037, "grad_norm": 0.7052812834008583, "learning_rate": 6.99462083515757e-07, "loss": 1.4998, "step": 267 }, { "epoch": 0.01867400620144236, "grad_norm": 0.7656850091837507, "learning_rate": 6.994580477911834e-07, "loss": 1.551, "step": 268 }, { "epoch": 0.018743685329059682, "grad_norm": 0.7262479768296017, "learning_rate": 6.994539969971345e-07, "loss": 1.6432, "step": 269 }, { "epoch": 0.018813364456677, "grad_norm": 0.6957600485001357, "learning_rate": 6.994499311338046e-07, "loss": 1.6025, "step": 270 }, { "epoch": 0.018883043584294323, "grad_norm": 0.6905054997272053, "learning_rate": 6.994458502013883e-07, "loss": 1.4301, "step": 271 }, { "epoch": 0.018952722711911646, "grad_norm": 0.7179445614004244, "learning_rate": 6.994417542000813e-07, "loss": 1.5864, "step": 272 }, { "epoch": 0.01902240183952897, "grad_norm": 0.7072984855963963, "learning_rate": 6.994376431300801e-07, "loss": 1.6053, "step": 273 }, { "epoch": 0.01909208096714629, "grad_norm": 0.7570107373874823, "learning_rate": 6.994335169915813e-07, "loss": 1.6383, "step": 274 }, { "epoch": 0.019161760094763614, "grad_norm": 0.7274334966890454, "learning_rate": 6.99429375784783e-07, "loss": 1.625, "step": 275 }, { "epoch": 0.019231439222380936, "grad_norm": 0.7563658771609096, "learning_rate": 6.994252195098834e-07, "loss": 1.4685, "step": 276 }, { "epoch": 0.01930111834999826, "grad_norm": 0.6811578449534527, "learning_rate": 6.994210481670817e-07, "loss": 1.5123, "step": 277 }, { "epoch": 0.01937079747761558, "grad_norm": 0.6786727534901147, "learning_rate": 6.994168617565782e-07, "loss": 1.4686, "step": 278 }, { "epoch": 0.019440476605232904, "grad_norm": 0.7655364222476935, "learning_rate": 6.994126602785729e-07, "loss": 1.4576, "step": 279 }, { "epoch": 0.019510155732850226, "grad_norm": 0.6821324813628526, "learning_rate": 6.994084437332676e-07, "loss": 1.5197, "step": 280 }, { "epoch": 0.019579834860467545, "grad_norm": 0.7262970704517422, "learning_rate": 6.99404212120864e-07, "loss": 1.4957, "step": 281 }, { "epoch": 0.019649513988084868, "grad_norm": 0.743676021736133, "learning_rate": 6.993999654415654e-07, "loss": 1.6085, "step": 282 }, { "epoch": 0.01971919311570219, "grad_norm": 0.6378550410731294, "learning_rate": 6.993957036955749e-07, "loss": 1.602, "step": 283 }, { "epoch": 0.019788872243319513, "grad_norm": 0.7246033604464897, "learning_rate": 6.993914268830967e-07, "loss": 1.5291, "step": 284 }, { "epoch": 0.019858551370936835, "grad_norm": 0.6880276199996247, "learning_rate": 6.99387135004336e-07, "loss": 1.4173, "step": 285 }, { "epoch": 0.019928230498554158, "grad_norm": 0.6947157355311014, "learning_rate": 6.993828280594983e-07, "loss": 1.6025, "step": 286 }, { "epoch": 0.01999790962617148, "grad_norm": 0.7435927158927187, "learning_rate": 6.993785060487901e-07, "loss": 1.4261, "step": 287 }, { "epoch": 0.020067588753788803, "grad_norm": 0.7142473424703729, "learning_rate": 6.993741689724185e-07, "loss": 1.5124, "step": 288 }, { "epoch": 0.020137267881406126, "grad_norm": 0.7008072048840218, "learning_rate": 6.993698168305914e-07, "loss": 1.5411, "step": 289 }, { "epoch": 0.020206947009023448, "grad_norm": 0.7435854653268873, "learning_rate": 6.993654496235172e-07, "loss": 1.6077, "step": 290 }, { "epoch": 0.02027662613664077, "grad_norm": 0.725379113270835, "learning_rate": 6.993610673514052e-07, "loss": 1.5128, "step": 291 }, { "epoch": 0.020346305264258093, "grad_norm": 0.7117406405072563, "learning_rate": 6.993566700144656e-07, "loss": 1.6271, "step": 292 }, { "epoch": 0.020415984391875412, "grad_norm": 0.702802060197601, "learning_rate": 6.99352257612909e-07, "loss": 1.6355, "step": 293 }, { "epoch": 0.020485663519492735, "grad_norm": 0.7416404216373786, "learning_rate": 6.993478301469469e-07, "loss": 1.5558, "step": 294 }, { "epoch": 0.020555342647110057, "grad_norm": 6.474567925910475, "learning_rate": 6.993433876167913e-07, "loss": 1.6586, "step": 295 }, { "epoch": 0.02062502177472738, "grad_norm": 0.7080837056054183, "learning_rate": 6.993389300226553e-07, "loss": 1.6812, "step": 296 }, { "epoch": 0.020694700902344702, "grad_norm": 0.6653198180715081, "learning_rate": 6.993344573647524e-07, "loss": 1.4974, "step": 297 }, { "epoch": 0.020764380029962025, "grad_norm": 0.7204476403446304, "learning_rate": 6.99329969643297e-07, "loss": 1.5675, "step": 298 }, { "epoch": 0.020834059157579347, "grad_norm": 0.7445246032941663, "learning_rate": 6.993254668585042e-07, "loss": 1.5476, "step": 299 }, { "epoch": 0.02090373828519667, "grad_norm": 0.7460425723778988, "learning_rate": 6.993209490105897e-07, "loss": 1.4898, "step": 300 }, { "epoch": 0.020973417412813992, "grad_norm": 0.7157641517249568, "learning_rate": 6.9931641609977e-07, "loss": 1.5786, "step": 301 }, { "epoch": 0.021043096540431315, "grad_norm": 0.7016023826923905, "learning_rate": 6.993118681262625e-07, "loss": 1.5981, "step": 302 }, { "epoch": 0.021112775668048638, "grad_norm": 0.7382367733171915, "learning_rate": 6.993073050902849e-07, "loss": 1.5072, "step": 303 }, { "epoch": 0.021182454795665957, "grad_norm": 0.7606065005113284, "learning_rate": 6.99302726992056e-07, "loss": 1.4621, "step": 304 }, { "epoch": 0.02125213392328328, "grad_norm": 0.6813739822883382, "learning_rate": 6.992981338317952e-07, "loss": 1.4303, "step": 305 }, { "epoch": 0.0213218130509006, "grad_norm": 9.560327004767403, "learning_rate": 6.992935256097225e-07, "loss": 1.6614, "step": 306 }, { "epoch": 0.021391492178517924, "grad_norm": 0.7301189610598565, "learning_rate": 6.992889023260589e-07, "loss": 1.7052, "step": 307 }, { "epoch": 0.021461171306135247, "grad_norm": 0.7407277030456509, "learning_rate": 6.992842639810258e-07, "loss": 1.6177, "step": 308 }, { "epoch": 0.02153085043375257, "grad_norm": 0.6916162651462537, "learning_rate": 6.992796105748455e-07, "loss": 1.3961, "step": 309 }, { "epoch": 0.021600529561369892, "grad_norm": 0.7318959820786545, "learning_rate": 6.992749421077412e-07, "loss": 1.6585, "step": 310 }, { "epoch": 0.021670208688987214, "grad_norm": 0.7366437888923923, "learning_rate": 6.992702585799365e-07, "loss": 1.6567, "step": 311 }, { "epoch": 0.021739887816604537, "grad_norm": 0.7091001881005623, "learning_rate": 6.992655599916557e-07, "loss": 1.4418, "step": 312 }, { "epoch": 0.02180956694422186, "grad_norm": 0.7275485003563671, "learning_rate": 6.992608463431242e-07, "loss": 1.5372, "step": 313 }, { "epoch": 0.021879246071839182, "grad_norm": 0.6704285285994563, "learning_rate": 6.992561176345678e-07, "loss": 1.4605, "step": 314 }, { "epoch": 0.021948925199456504, "grad_norm": 0.7125544818193529, "learning_rate": 6.99251373866213e-07, "loss": 1.5262, "step": 315 }, { "epoch": 0.022018604327073824, "grad_norm": 0.7509609338377493, "learning_rate": 6.992466150382873e-07, "loss": 1.5833, "step": 316 }, { "epoch": 0.022088283454691146, "grad_norm": 0.747340093745921, "learning_rate": 6.992418411510185e-07, "loss": 1.6504, "step": 317 }, { "epoch": 0.02215796258230847, "grad_norm": 0.7046299436255133, "learning_rate": 6.992370522046357e-07, "loss": 1.4034, "step": 318 }, { "epoch": 0.02222764170992579, "grad_norm": 0.7257162097711396, "learning_rate": 6.992322481993683e-07, "loss": 1.5848, "step": 319 }, { "epoch": 0.022297320837543114, "grad_norm": 0.6691152644528866, "learning_rate": 6.992274291354463e-07, "loss": 1.4598, "step": 320 }, { "epoch": 0.022366999965160436, "grad_norm": 0.7457614871946824, "learning_rate": 6.992225950131009e-07, "loss": 1.5387, "step": 321 }, { "epoch": 0.02243667909277776, "grad_norm": 0.689307701631239, "learning_rate": 6.992177458325635e-07, "loss": 1.5808, "step": 322 }, { "epoch": 0.02250635822039508, "grad_norm": 0.7335991464373063, "learning_rate": 6.992128815940668e-07, "loss": 1.6553, "step": 323 }, { "epoch": 0.022576037348012404, "grad_norm": 0.6899224219200941, "learning_rate": 6.992080022978437e-07, "loss": 1.5265, "step": 324 }, { "epoch": 0.022645716475629726, "grad_norm": 0.6806245477363745, "learning_rate": 6.992031079441282e-07, "loss": 1.4794, "step": 325 }, { "epoch": 0.02271539560324705, "grad_norm": 0.8141536466407525, "learning_rate": 6.991981985331546e-07, "loss": 1.5921, "step": 326 }, { "epoch": 0.022785074730864368, "grad_norm": 0.6788755156136146, "learning_rate": 6.991932740651582e-07, "loss": 1.5735, "step": 327 }, { "epoch": 0.02285475385848169, "grad_norm": 0.7272500675836373, "learning_rate": 6.991883345403751e-07, "loss": 1.55, "step": 328 }, { "epoch": 0.022924432986099013, "grad_norm": 0.7009298523819532, "learning_rate": 6.991833799590422e-07, "loss": 1.4261, "step": 329 }, { "epoch": 0.022994112113716336, "grad_norm": 0.6948863685694087, "learning_rate": 6.991784103213965e-07, "loss": 1.5659, "step": 330 }, { "epoch": 0.023063791241333658, "grad_norm": 0.7063327387262183, "learning_rate": 6.991734256276766e-07, "loss": 1.5343, "step": 331 }, { "epoch": 0.02313347036895098, "grad_norm": 0.7099461836572424, "learning_rate": 6.99168425878121e-07, "loss": 1.5125, "step": 332 }, { "epoch": 0.023203149496568303, "grad_norm": 0.7796603493076472, "learning_rate": 6.991634110729694e-07, "loss": 1.6092, "step": 333 }, { "epoch": 0.023272828624185626, "grad_norm": 0.7296219083016224, "learning_rate": 6.991583812124623e-07, "loss": 1.6215, "step": 334 }, { "epoch": 0.023342507751802948, "grad_norm": 0.72574929831423, "learning_rate": 6.991533362968406e-07, "loss": 1.6325, "step": 335 }, { "epoch": 0.02341218687942027, "grad_norm": 0.7019432899693439, "learning_rate": 6.99148276326346e-07, "loss": 1.5221, "step": 336 }, { "epoch": 0.023481866007037593, "grad_norm": 0.7492264986012627, "learning_rate": 6.991432013012211e-07, "loss": 1.6158, "step": 337 }, { "epoch": 0.023551545134654916, "grad_norm": 0.692588523766954, "learning_rate": 6.991381112217092e-07, "loss": 1.5078, "step": 338 }, { "epoch": 0.023621224262272235, "grad_norm": 0.6981979538678789, "learning_rate": 6.991330060880539e-07, "loss": 1.6101, "step": 339 }, { "epoch": 0.023690903389889557, "grad_norm": 0.7194235046945815, "learning_rate": 6.991278859005003e-07, "loss": 1.5601, "step": 340 }, { "epoch": 0.02376058251750688, "grad_norm": 0.7420959635718415, "learning_rate": 6.991227506592932e-07, "loss": 1.6685, "step": 341 }, { "epoch": 0.023830261645124202, "grad_norm": 0.7089125015154109, "learning_rate": 6.991176003646792e-07, "loss": 1.6257, "step": 342 }, { "epoch": 0.023899940772741525, "grad_norm": 0.7210440382167659, "learning_rate": 6.991124350169049e-07, "loss": 1.5857, "step": 343 }, { "epoch": 0.023969619900358848, "grad_norm": 0.7827481182453416, "learning_rate": 6.991072546162178e-07, "loss": 1.6987, "step": 344 }, { "epoch": 0.02403929902797617, "grad_norm": 0.7395681382788681, "learning_rate": 6.991020591628663e-07, "loss": 1.6111, "step": 345 }, { "epoch": 0.024108978155593493, "grad_norm": 0.800887301847492, "learning_rate": 6.990968486570993e-07, "loss": 1.5495, "step": 346 }, { "epoch": 0.024178657283210815, "grad_norm": 0.7185838722342582, "learning_rate": 6.990916230991663e-07, "loss": 1.6471, "step": 347 }, { "epoch": 0.024248336410828138, "grad_norm": 0.6988928940290781, "learning_rate": 6.990863824893181e-07, "loss": 1.5798, "step": 348 }, { "epoch": 0.02431801553844546, "grad_norm": 0.7670273052980163, "learning_rate": 6.990811268278056e-07, "loss": 1.6012, "step": 349 }, { "epoch": 0.02438769466606278, "grad_norm": 0.6956525844641243, "learning_rate": 6.990758561148806e-07, "loss": 1.6228, "step": 350 }, { "epoch": 0.024457373793680102, "grad_norm": 0.7380720675968462, "learning_rate": 6.990705703507959e-07, "loss": 1.6031, "step": 351 }, { "epoch": 0.024527052921297424, "grad_norm": 0.7351909737787244, "learning_rate": 6.990652695358046e-07, "loss": 1.5208, "step": 352 }, { "epoch": 0.024596732048914747, "grad_norm": 0.740389363327584, "learning_rate": 6.990599536701608e-07, "loss": 1.5537, "step": 353 }, { "epoch": 0.02466641117653207, "grad_norm": 0.8039919377703592, "learning_rate": 6.990546227541194e-07, "loss": 1.7098, "step": 354 }, { "epoch": 0.024736090304149392, "grad_norm": 0.7304226595911018, "learning_rate": 6.990492767879357e-07, "loss": 1.605, "step": 355 }, { "epoch": 0.024805769431766714, "grad_norm": 0.741024497237045, "learning_rate": 6.990439157718658e-07, "loss": 1.5043, "step": 356 }, { "epoch": 0.024875448559384037, "grad_norm": 0.7024368053127528, "learning_rate": 6.990385397061669e-07, "loss": 1.5534, "step": 357 }, { "epoch": 0.02494512768700136, "grad_norm": 0.7323425435804326, "learning_rate": 6.990331485910965e-07, "loss": 1.6298, "step": 358 }, { "epoch": 0.025014806814618682, "grad_norm": 0.7290774888294044, "learning_rate": 6.990277424269127e-07, "loss": 1.4802, "step": 359 }, { "epoch": 0.025084485942236005, "grad_norm": 0.6658582153329362, "learning_rate": 6.990223212138749e-07, "loss": 1.5377, "step": 360 }, { "epoch": 0.025154165069853327, "grad_norm": 0.739800561775487, "learning_rate": 6.990168849522429e-07, "loss": 1.5655, "step": 361 }, { "epoch": 0.025223844197470646, "grad_norm": 0.6911945679125883, "learning_rate": 6.99011433642277e-07, "loss": 1.6444, "step": 362 }, { "epoch": 0.02529352332508797, "grad_norm": 0.7460442162123894, "learning_rate": 6.990059672842386e-07, "loss": 1.4954, "step": 363 }, { "epoch": 0.02536320245270529, "grad_norm": 0.8406791490688356, "learning_rate": 6.990004858783895e-07, "loss": 1.683, "step": 364 }, { "epoch": 0.025432881580322614, "grad_norm": 0.6825887722241889, "learning_rate": 6.989949894249926e-07, "loss": 1.4904, "step": 365 }, { "epoch": 0.025502560707939936, "grad_norm": 0.734984485601963, "learning_rate": 6.989894779243111e-07, "loss": 1.57, "step": 366 }, { "epoch": 0.02557223983555726, "grad_norm": 0.802220464323417, "learning_rate": 6.989839513766093e-07, "loss": 1.5195, "step": 367 }, { "epoch": 0.02564191896317458, "grad_norm": 0.7037489151381637, "learning_rate": 6.989784097821519e-07, "loss": 1.4717, "step": 368 }, { "epoch": 0.025711598090791904, "grad_norm": 0.7343259525355395, "learning_rate": 6.989728531412046e-07, "loss": 1.5571, "step": 369 }, { "epoch": 0.025781277218409226, "grad_norm": 0.6747397697253835, "learning_rate": 6.989672814540335e-07, "loss": 1.5264, "step": 370 }, { "epoch": 0.02585095634602655, "grad_norm": 0.7568976619670307, "learning_rate": 6.989616947209057e-07, "loss": 1.6249, "step": 371 }, { "epoch": 0.02592063547364387, "grad_norm": 0.6759327486428545, "learning_rate": 6.98956092942089e-07, "loss": 1.5019, "step": 372 }, { "epoch": 0.02599031460126119, "grad_norm": 0.709291267177882, "learning_rate": 6.989504761178519e-07, "loss": 1.5009, "step": 373 }, { "epoch": 0.026059993728878513, "grad_norm": 0.7099006099654483, "learning_rate": 6.989448442484632e-07, "loss": 1.5475, "step": 374 }, { "epoch": 0.026129672856495836, "grad_norm": 0.690112841906305, "learning_rate": 6.989391973341932e-07, "loss": 1.4402, "step": 375 }, { "epoch": 0.026199351984113158, "grad_norm": 0.7427387751223772, "learning_rate": 6.989335353753122e-07, "loss": 1.5491, "step": 376 }, { "epoch": 0.02626903111173048, "grad_norm": 0.6770409349734714, "learning_rate": 6.98927858372092e-07, "loss": 1.4618, "step": 377 }, { "epoch": 0.026338710239347803, "grad_norm": 0.7358590438958797, "learning_rate": 6.989221663248041e-07, "loss": 1.5153, "step": 378 }, { "epoch": 0.026408389366965126, "grad_norm": 0.6659329097898178, "learning_rate": 6.989164592337216e-07, "loss": 1.5076, "step": 379 }, { "epoch": 0.02647806849458245, "grad_norm": 0.6891744971335575, "learning_rate": 6.989107370991179e-07, "loss": 1.6146, "step": 380 }, { "epoch": 0.02654774762219977, "grad_norm": 0.7550006904638933, "learning_rate": 6.989049999212671e-07, "loss": 1.5802, "step": 381 }, { "epoch": 0.026617426749817093, "grad_norm": 0.7002665501804611, "learning_rate": 6.988992477004446e-07, "loss": 1.5355, "step": 382 }, { "epoch": 0.026687105877434416, "grad_norm": 0.7009921240154263, "learning_rate": 6.988934804369254e-07, "loss": 1.5517, "step": 383 }, { "epoch": 0.02675678500505174, "grad_norm": 0.7407286371939378, "learning_rate": 6.988876981309864e-07, "loss": 1.616, "step": 384 }, { "epoch": 0.026826464132669058, "grad_norm": 0.7137375904069622, "learning_rate": 6.988819007829045e-07, "loss": 1.569, "step": 385 }, { "epoch": 0.02689614326028638, "grad_norm": 0.6968322471953381, "learning_rate": 6.988760883929575e-07, "loss": 1.4872, "step": 386 }, { "epoch": 0.026965822387903703, "grad_norm": 0.7335852919010041, "learning_rate": 6.988702609614239e-07, "loss": 1.5747, "step": 387 }, { "epoch": 0.027035501515521025, "grad_norm": 0.7563187176836982, "learning_rate": 6.98864418488583e-07, "loss": 1.6437, "step": 388 }, { "epoch": 0.027105180643138348, "grad_norm": 0.7615857240277533, "learning_rate": 6.988585609747149e-07, "loss": 1.6221, "step": 389 }, { "epoch": 0.02717485977075567, "grad_norm": 0.7603071022901849, "learning_rate": 6.988526884201002e-07, "loss": 1.6949, "step": 390 }, { "epoch": 0.027244538898372993, "grad_norm": 0.6968767287992647, "learning_rate": 6.988468008250203e-07, "loss": 1.6981, "step": 391 }, { "epoch": 0.027314218025990315, "grad_norm": 0.7112654333278738, "learning_rate": 6.988408981897575e-07, "loss": 1.5906, "step": 392 }, { "epoch": 0.027383897153607638, "grad_norm": 0.7298025420013617, "learning_rate": 6.988349805145946e-07, "loss": 1.466, "step": 393 }, { "epoch": 0.02745357628122496, "grad_norm": 0.710177304695451, "learning_rate": 6.98829047799815e-07, "loss": 1.6849, "step": 394 }, { "epoch": 0.027523255408842283, "grad_norm": 0.757594638471493, "learning_rate": 6.988231000457031e-07, "loss": 1.5379, "step": 395 }, { "epoch": 0.027592934536459602, "grad_norm": 0.666881395058542, "learning_rate": 6.98817137252544e-07, "loss": 1.3365, "step": 396 }, { "epoch": 0.027662613664076924, "grad_norm": 0.7591578570938375, "learning_rate": 6.988111594206236e-07, "loss": 1.621, "step": 397 }, { "epoch": 0.027732292791694247, "grad_norm": 0.7175961360945237, "learning_rate": 6.988051665502281e-07, "loss": 1.6332, "step": 398 }, { "epoch": 0.02780197191931157, "grad_norm": 0.6804206351265298, "learning_rate": 6.987991586416447e-07, "loss": 1.5439, "step": 399 }, { "epoch": 0.027871651046928892, "grad_norm": 0.7399865885593209, "learning_rate": 6.987931356951616e-07, "loss": 1.6571, "step": 400 }, { "epoch": 0.027941330174546215, "grad_norm": 0.6659744696681877, "learning_rate": 6.987870977110671e-07, "loss": 1.4773, "step": 401 }, { "epoch": 0.028011009302163537, "grad_norm": 0.7512620715969653, "learning_rate": 6.987810446896507e-07, "loss": 1.473, "step": 402 }, { "epoch": 0.02808068842978086, "grad_norm": 0.6700099927086145, "learning_rate": 6.987749766312023e-07, "loss": 1.4796, "step": 403 }, { "epoch": 0.028150367557398182, "grad_norm": 0.7135736477251078, "learning_rate": 6.98768893536013e-07, "loss": 1.5236, "step": 404 }, { "epoch": 0.028220046685015505, "grad_norm": 0.6892839109346525, "learning_rate": 6.987627954043743e-07, "loss": 1.6111, "step": 405 }, { "epoch": 0.028289725812632827, "grad_norm": 0.6767335993021136, "learning_rate": 6.987566822365781e-07, "loss": 1.5699, "step": 406 }, { "epoch": 0.028359404940250146, "grad_norm": 0.7433392074067198, "learning_rate": 6.987505540329176e-07, "loss": 1.4527, "step": 407 }, { "epoch": 0.02842908406786747, "grad_norm": 0.6556767958241513, "learning_rate": 6.987444107936865e-07, "loss": 1.6098, "step": 408 }, { "epoch": 0.02849876319548479, "grad_norm": 0.7215818443504534, "learning_rate": 6.987382525191791e-07, "loss": 1.4621, "step": 409 }, { "epoch": 0.028568442323102114, "grad_norm": 0.7477993104746903, "learning_rate": 6.987320792096907e-07, "loss": 1.6124, "step": 410 }, { "epoch": 0.028638121450719436, "grad_norm": 0.7201733172736332, "learning_rate": 6.987258908655168e-07, "loss": 1.4661, "step": 411 }, { "epoch": 0.02870780057833676, "grad_norm": 0.72337434724977, "learning_rate": 6.987196874869541e-07, "loss": 1.8324, "step": 412 }, { "epoch": 0.02877747970595408, "grad_norm": 0.7097137381375397, "learning_rate": 6.987134690743e-07, "loss": 1.5459, "step": 413 }, { "epoch": 0.028847158833571404, "grad_norm": 0.7173005147308623, "learning_rate": 6.987072356278523e-07, "loss": 1.4824, "step": 414 }, { "epoch": 0.028916837961188727, "grad_norm": 0.7879220907659518, "learning_rate": 6.987009871479101e-07, "loss": 1.5851, "step": 415 }, { "epoch": 0.02898651708880605, "grad_norm": 0.6911412058391415, "learning_rate": 6.986947236347724e-07, "loss": 1.4977, "step": 416 }, { "epoch": 0.02905619621642337, "grad_norm": 0.7012733331107831, "learning_rate": 6.986884450887396e-07, "loss": 1.565, "step": 417 }, { "epoch": 0.029125875344040694, "grad_norm": 0.7840924933610383, "learning_rate": 6.986821515101124e-07, "loss": 1.55, "step": 418 }, { "epoch": 0.029195554471658013, "grad_norm": 0.6964388522029603, "learning_rate": 6.986758428991927e-07, "loss": 1.6359, "step": 419 }, { "epoch": 0.029265233599275336, "grad_norm": 0.7336109790356905, "learning_rate": 6.986695192562826e-07, "loss": 1.5707, "step": 420 }, { "epoch": 0.02933491272689266, "grad_norm": 0.7719512610826261, "learning_rate": 6.986631805816851e-07, "loss": 1.6261, "step": 421 }, { "epoch": 0.02940459185450998, "grad_norm": 0.7301564052254476, "learning_rate": 6.986568268757041e-07, "loss": 1.5733, "step": 422 }, { "epoch": 0.029474270982127303, "grad_norm": 0.7533907159721789, "learning_rate": 6.98650458138644e-07, "loss": 1.4902, "step": 423 }, { "epoch": 0.029543950109744626, "grad_norm": 0.7073959569566014, "learning_rate": 6.986440743708101e-07, "loss": 1.5164, "step": 424 }, { "epoch": 0.02961362923736195, "grad_norm": 0.7145941743912411, "learning_rate": 6.986376755725082e-07, "loss": 1.6115, "step": 425 }, { "epoch": 0.02968330836497927, "grad_norm": 0.6923721591358505, "learning_rate": 6.98631261744045e-07, "loss": 1.5826, "step": 426 }, { "epoch": 0.029752987492596594, "grad_norm": 0.6839320727296647, "learning_rate": 6.986248328857279e-07, "loss": 1.4239, "step": 427 }, { "epoch": 0.029822666620213916, "grad_norm": 0.7210066398143022, "learning_rate": 6.986183889978649e-07, "loss": 1.6437, "step": 428 }, { "epoch": 0.02989234574783124, "grad_norm": 0.7008115162017041, "learning_rate": 6.98611930080765e-07, "loss": 1.5053, "step": 429 }, { "epoch": 0.029962024875448558, "grad_norm": 0.6942891542494432, "learning_rate": 6.986054561347374e-07, "loss": 1.5146, "step": 430 }, { "epoch": 0.03003170400306588, "grad_norm": 0.6635542554591299, "learning_rate": 6.985989671600925e-07, "loss": 1.4483, "step": 431 }, { "epoch": 0.030101383130683203, "grad_norm": 0.7115892340457141, "learning_rate": 6.985924631571414e-07, "loss": 1.5982, "step": 432 }, { "epoch": 0.030171062258300525, "grad_norm": 0.6854278325150915, "learning_rate": 6.985859441261956e-07, "loss": 1.437, "step": 433 }, { "epoch": 0.030240741385917848, "grad_norm": 0.6759620583516114, "learning_rate": 6.985794100675676e-07, "loss": 1.5435, "step": 434 }, { "epoch": 0.03031042051353517, "grad_norm": 0.6933685069524756, "learning_rate": 6.985728609815706e-07, "loss": 1.6306, "step": 435 }, { "epoch": 0.030380099641152493, "grad_norm": 0.7146245192057525, "learning_rate": 6.985662968685184e-07, "loss": 1.6303, "step": 436 }, { "epoch": 0.030449778768769815, "grad_norm": 0.7232446855847888, "learning_rate": 6.985597177287253e-07, "loss": 1.5931, "step": 437 }, { "epoch": 0.030519457896387138, "grad_norm": 0.7184645069885551, "learning_rate": 6.985531235625069e-07, "loss": 1.5508, "step": 438 }, { "epoch": 0.03058913702400446, "grad_norm": 0.7214958357639737, "learning_rate": 6.98546514370179e-07, "loss": 1.4922, "step": 439 }, { "epoch": 0.030658816151621783, "grad_norm": 0.6782468340100151, "learning_rate": 6.985398901520586e-07, "loss": 1.5012, "step": 440 }, { "epoch": 0.030728495279239106, "grad_norm": 0.7311905689952574, "learning_rate": 6.985332509084629e-07, "loss": 1.5831, "step": 441 }, { "epoch": 0.030798174406856425, "grad_norm": 0.698328027036214, "learning_rate": 6.985265966397102e-07, "loss": 1.4995, "step": 442 }, { "epoch": 0.030867853534473747, "grad_norm": 0.7368612541969222, "learning_rate": 6.985199273461193e-07, "loss": 1.4867, "step": 443 }, { "epoch": 0.03093753266209107, "grad_norm": 0.7327398721151145, "learning_rate": 6.985132430280098e-07, "loss": 1.5259, "step": 444 }, { "epoch": 0.031007211789708392, "grad_norm": 0.7331724114655763, "learning_rate": 6.985065436857021e-07, "loss": 1.5916, "step": 445 }, { "epoch": 0.031076890917325715, "grad_norm": 0.7182699009047467, "learning_rate": 6.984998293195171e-07, "loss": 1.654, "step": 446 }, { "epoch": 0.031146570044943037, "grad_norm": 0.6931250303313554, "learning_rate": 6.984930999297769e-07, "loss": 1.4452, "step": 447 }, { "epoch": 0.03121624917256036, "grad_norm": 0.7721643708509517, "learning_rate": 6.984863555168036e-07, "loss": 1.5864, "step": 448 }, { "epoch": 0.03128592830017768, "grad_norm": 0.7545867751828237, "learning_rate": 6.984795960809205e-07, "loss": 1.5629, "step": 449 }, { "epoch": 0.031355607427795, "grad_norm": 0.7269873867725736, "learning_rate": 6.984728216224517e-07, "loss": 1.6225, "step": 450 }, { "epoch": 0.031425286555412324, "grad_norm": 0.722602283050896, "learning_rate": 6.984660321417218e-07, "loss": 1.4438, "step": 451 }, { "epoch": 0.031494965683029646, "grad_norm": 0.7384727981711023, "learning_rate": 6.98459227639056e-07, "loss": 1.5485, "step": 452 }, { "epoch": 0.03156464481064697, "grad_norm": 0.7018161474292629, "learning_rate": 6.984524081147805e-07, "loss": 1.5858, "step": 453 }, { "epoch": 0.03163432393826429, "grad_norm": 0.7169485562565262, "learning_rate": 6.984455735692222e-07, "loss": 1.6247, "step": 454 }, { "epoch": 0.031704003065881614, "grad_norm": 0.7308662225701769, "learning_rate": 6.984387240027084e-07, "loss": 1.5516, "step": 455 }, { "epoch": 0.03177368219349894, "grad_norm": 0.7209362918239302, "learning_rate": 6.984318594155676e-07, "loss": 1.4468, "step": 456 }, { "epoch": 0.03184336132111626, "grad_norm": 0.7890065268272298, "learning_rate": 6.984249798081286e-07, "loss": 1.5742, "step": 457 }, { "epoch": 0.03191304044873358, "grad_norm": 0.7443926307266672, "learning_rate": 6.98418085180721e-07, "loss": 1.5981, "step": 458 }, { "epoch": 0.031982719576350904, "grad_norm": 0.7686644197473939, "learning_rate": 6.984111755336755e-07, "loss": 1.5131, "step": 459 }, { "epoch": 0.03205239870396823, "grad_norm": 0.7515630377431824, "learning_rate": 6.984042508673228e-07, "loss": 1.5627, "step": 460 }, { "epoch": 0.03212207783158555, "grad_norm": 0.8071194750966978, "learning_rate": 6.983973111819951e-07, "loss": 1.5605, "step": 461 }, { "epoch": 0.03219175695920287, "grad_norm": 0.7068935640984225, "learning_rate": 6.98390356478025e-07, "loss": 1.5796, "step": 462 }, { "epoch": 0.032261436086820194, "grad_norm": 0.7079434375614534, "learning_rate": 6.983833867557455e-07, "loss": 1.544, "step": 463 }, { "epoch": 0.03233111521443752, "grad_norm": 0.6875118632785433, "learning_rate": 6.983764020154909e-07, "loss": 1.5442, "step": 464 }, { "epoch": 0.03240079434205484, "grad_norm": 0.7374325063772078, "learning_rate": 6.983694022575955e-07, "loss": 1.4734, "step": 465 }, { "epoch": 0.03247047346967216, "grad_norm": 0.68601080519657, "learning_rate": 6.983623874823952e-07, "loss": 1.4625, "step": 466 }, { "epoch": 0.032540152597289485, "grad_norm": 0.7015382118886788, "learning_rate": 6.983553576902259e-07, "loss": 1.4303, "step": 467 }, { "epoch": 0.03260983172490681, "grad_norm": 0.7230971366494634, "learning_rate": 6.983483128814246e-07, "loss": 1.5003, "step": 468 }, { "epoch": 0.03267951085252413, "grad_norm": 0.7052403587790288, "learning_rate": 6.983412530563287e-07, "loss": 1.5526, "step": 469 }, { "epoch": 0.032749189980141445, "grad_norm": 0.7742076645158189, "learning_rate": 6.98334178215277e-07, "loss": 1.516, "step": 470 }, { "epoch": 0.03281886910775877, "grad_norm": 0.7818787556440743, "learning_rate": 6.983270883586081e-07, "loss": 1.6288, "step": 471 }, { "epoch": 0.03288854823537609, "grad_norm": 0.686648879518541, "learning_rate": 6.983199834866616e-07, "loss": 1.5191, "step": 472 }, { "epoch": 0.03295822736299341, "grad_norm": 0.75638490928715, "learning_rate": 6.983128635997785e-07, "loss": 1.6619, "step": 473 }, { "epoch": 0.033027906490610735, "grad_norm": 0.6944982743137997, "learning_rate": 6.983057286982998e-07, "loss": 1.504, "step": 474 }, { "epoch": 0.03309758561822806, "grad_norm": 0.7297694980053806, "learning_rate": 6.982985787825673e-07, "loss": 1.7033, "step": 475 }, { "epoch": 0.03316726474584538, "grad_norm": 0.7814280477752864, "learning_rate": 6.982914138529237e-07, "loss": 1.6998, "step": 476 }, { "epoch": 0.0332369438734627, "grad_norm": 0.7592691279743546, "learning_rate": 6.982842339097124e-07, "loss": 1.6168, "step": 477 }, { "epoch": 0.033306623001080025, "grad_norm": 0.6950822644790459, "learning_rate": 6.982770389532773e-07, "loss": 1.6014, "step": 478 }, { "epoch": 0.03337630212869735, "grad_norm": 0.8258391482075685, "learning_rate": 6.982698289839635e-07, "loss": 1.6586, "step": 479 }, { "epoch": 0.03344598125631467, "grad_norm": 0.6971077588869111, "learning_rate": 6.982626040021164e-07, "loss": 1.5408, "step": 480 }, { "epoch": 0.03351566038393199, "grad_norm": 0.7228631898409523, "learning_rate": 6.982553640080821e-07, "loss": 1.5811, "step": 481 }, { "epoch": 0.033585339511549316, "grad_norm": 0.7282650250649347, "learning_rate": 6.982481090022077e-07, "loss": 1.6308, "step": 482 }, { "epoch": 0.03365501863916664, "grad_norm": 0.7363507876647463, "learning_rate": 6.982408389848408e-07, "loss": 1.6058, "step": 483 }, { "epoch": 0.03372469776678396, "grad_norm": 0.7304548735709389, "learning_rate": 6.982335539563299e-07, "loss": 1.5704, "step": 484 }, { "epoch": 0.03379437689440128, "grad_norm": 0.7182208438930545, "learning_rate": 6.98226253917024e-07, "loss": 1.559, "step": 485 }, { "epoch": 0.033864056022018606, "grad_norm": 0.7095183394575739, "learning_rate": 6.982189388672729e-07, "loss": 1.5234, "step": 486 }, { "epoch": 0.03393373514963593, "grad_norm": 0.7100337849855409, "learning_rate": 6.982116088074274e-07, "loss": 1.4691, "step": 487 }, { "epoch": 0.03400341427725325, "grad_norm": 0.7187486302555179, "learning_rate": 6.982042637378384e-07, "loss": 1.4962, "step": 488 }, { "epoch": 0.03407309340487057, "grad_norm": 0.738433028371621, "learning_rate": 6.981969036588582e-07, "loss": 1.5137, "step": 489 }, { "epoch": 0.034142772532487896, "grad_norm": 0.7494164590053654, "learning_rate": 6.981895285708394e-07, "loss": 1.6279, "step": 490 }, { "epoch": 0.03421245166010522, "grad_norm": 0.6714113417696089, "learning_rate": 6.981821384741353e-07, "loss": 1.5603, "step": 491 }, { "epoch": 0.03428213078772254, "grad_norm": 0.7347104034043285, "learning_rate": 6.981747333691003e-07, "loss": 1.6593, "step": 492 }, { "epoch": 0.034351809915339857, "grad_norm": 0.7315383543287005, "learning_rate": 6.981673132560891e-07, "loss": 1.6402, "step": 493 }, { "epoch": 0.03442148904295718, "grad_norm": 0.7971154217795332, "learning_rate": 6.981598781354574e-07, "loss": 1.8423, "step": 494 }, { "epoch": 0.0344911681705745, "grad_norm": 0.7651575324005175, "learning_rate": 6.981524280075613e-07, "loss": 1.4978, "step": 495 }, { "epoch": 0.034560847298191824, "grad_norm": 0.7325811848983502, "learning_rate": 6.981449628727581e-07, "loss": 1.532, "step": 496 }, { "epoch": 0.03463052642580915, "grad_norm": 0.7606852959640895, "learning_rate": 6.981374827314053e-07, "loss": 1.6605, "step": 497 }, { "epoch": 0.03470020555342647, "grad_norm": 0.7965507881830842, "learning_rate": 6.981299875838615e-07, "loss": 1.7101, "step": 498 }, { "epoch": 0.03476988468104379, "grad_norm": 0.7685993735694805, "learning_rate": 6.981224774304859e-07, "loss": 1.5262, "step": 499 }, { "epoch": 0.034839563808661114, "grad_norm": 5.064186032538183, "learning_rate": 6.981149522716382e-07, "loss": 1.4328, "step": 500 }, { "epoch": 0.03490924293627844, "grad_norm": 0.6818074661763434, "learning_rate": 6.981074121076793e-07, "loss": 1.5321, "step": 501 }, { "epoch": 0.03497892206389576, "grad_norm": 0.7140914098818415, "learning_rate": 6.980998569389705e-07, "loss": 1.6084, "step": 502 }, { "epoch": 0.03504860119151308, "grad_norm": 0.7070692585114468, "learning_rate": 6.980922867658736e-07, "loss": 1.5168, "step": 503 }, { "epoch": 0.035118280319130404, "grad_norm": 0.7710204892635041, "learning_rate": 6.980847015887516e-07, "loss": 1.5205, "step": 504 }, { "epoch": 0.03518795944674773, "grad_norm": 0.6981371493463051, "learning_rate": 6.98077101407968e-07, "loss": 1.525, "step": 505 }, { "epoch": 0.03525763857436505, "grad_norm": 0.7402087236647792, "learning_rate": 6.980694862238869e-07, "loss": 1.5848, "step": 506 }, { "epoch": 0.03532731770198237, "grad_norm": 0.7287932998192505, "learning_rate": 6.980618560368733e-07, "loss": 1.6185, "step": 507 }, { "epoch": 0.035396996829599695, "grad_norm": 0.7641151729360003, "learning_rate": 6.980542108472929e-07, "loss": 1.5882, "step": 508 }, { "epoch": 0.03546667595721702, "grad_norm": 0.7181253669244724, "learning_rate": 6.98046550655512e-07, "loss": 1.6705, "step": 509 }, { "epoch": 0.03553635508483434, "grad_norm": 0.710146864055551, "learning_rate": 6.980388754618978e-07, "loss": 1.6129, "step": 510 }, { "epoch": 0.03560603421245166, "grad_norm": 0.7452987549392524, "learning_rate": 6.98031185266818e-07, "loss": 1.5337, "step": 511 }, { "epoch": 0.035675713340068985, "grad_norm": 0.7765641316168032, "learning_rate": 6.980234800706411e-07, "loss": 1.6418, "step": 512 }, { "epoch": 0.03574539246768631, "grad_norm": 0.7175298447436895, "learning_rate": 6.980157598737365e-07, "loss": 1.5969, "step": 513 }, { "epoch": 0.03581507159530363, "grad_norm": 0.6839168177575335, "learning_rate": 6.98008024676474e-07, "loss": 1.5608, "step": 514 }, { "epoch": 0.03588475072292095, "grad_norm": 0.7179064031162559, "learning_rate": 6.980002744792244e-07, "loss": 1.5678, "step": 515 }, { "epoch": 0.03595442985053827, "grad_norm": 0.7319035209158925, "learning_rate": 6.97992509282359e-07, "loss": 1.5515, "step": 516 }, { "epoch": 0.03602410897815559, "grad_norm": 0.7417659390519656, "learning_rate": 6.9798472908625e-07, "loss": 1.5878, "step": 517 }, { "epoch": 0.03609378810577291, "grad_norm": 0.7355893605239711, "learning_rate": 6.979769338912703e-07, "loss": 1.5392, "step": 518 }, { "epoch": 0.036163467233390235, "grad_norm": 0.7046601739665954, "learning_rate": 6.979691236977935e-07, "loss": 1.559, "step": 519 }, { "epoch": 0.03623314636100756, "grad_norm": 0.7097878162497628, "learning_rate": 6.979612985061936e-07, "loss": 1.6167, "step": 520 }, { "epoch": 0.03630282548862488, "grad_norm": 0.7173533144523362, "learning_rate": 6.979534583168458e-07, "loss": 1.5876, "step": 521 }, { "epoch": 0.0363725046162422, "grad_norm": 0.6869507707629723, "learning_rate": 6.979456031301258e-07, "loss": 1.5362, "step": 522 }, { "epoch": 0.036442183743859526, "grad_norm": 0.7110753403068261, "learning_rate": 6.9793773294641e-07, "loss": 1.524, "step": 523 }, { "epoch": 0.03651186287147685, "grad_norm": 0.6991914768564813, "learning_rate": 6.979298477660757e-07, "loss": 1.5218, "step": 524 }, { "epoch": 0.03658154199909417, "grad_norm": 0.7184863855815552, "learning_rate": 6.979219475895006e-07, "loss": 1.4594, "step": 525 }, { "epoch": 0.03665122112671149, "grad_norm": 0.6675660110790665, "learning_rate": 6.979140324170635e-07, "loss": 1.5326, "step": 526 }, { "epoch": 0.036720900254328816, "grad_norm": 0.7247217590142752, "learning_rate": 6.979061022491434e-07, "loss": 1.5758, "step": 527 }, { "epoch": 0.03679057938194614, "grad_norm": 0.7340681651786749, "learning_rate": 6.978981570861205e-07, "loss": 1.58, "step": 528 }, { "epoch": 0.03686025850956346, "grad_norm": 0.6962545953214849, "learning_rate": 6.978901969283756e-07, "loss": 1.6094, "step": 529 }, { "epoch": 0.03692993763718078, "grad_norm": 0.7616295983929137, "learning_rate": 6.978822217762901e-07, "loss": 1.5415, "step": 530 }, { "epoch": 0.036999616764798106, "grad_norm": 0.6858684867940431, "learning_rate": 6.978742316302462e-07, "loss": 1.6065, "step": 531 }, { "epoch": 0.03706929589241543, "grad_norm": 0.7566143279490591, "learning_rate": 6.978662264906268e-07, "loss": 1.6859, "step": 532 }, { "epoch": 0.03713897502003275, "grad_norm": 0.7450351080732804, "learning_rate": 6.978582063578154e-07, "loss": 1.6706, "step": 533 }, { "epoch": 0.03720865414765007, "grad_norm": 0.8200694743403997, "learning_rate": 6.978501712321967e-07, "loss": 1.5625, "step": 534 }, { "epoch": 0.037278333275267396, "grad_norm": 0.7468795076646643, "learning_rate": 6.978421211141554e-07, "loss": 1.6703, "step": 535 }, { "epoch": 0.03734801240288472, "grad_norm": 0.7180155939758754, "learning_rate": 6.978340560040774e-07, "loss": 1.5986, "step": 536 }, { "epoch": 0.03741769153050204, "grad_norm": 0.7147435818894757, "learning_rate": 6.978259759023493e-07, "loss": 1.5683, "step": 537 }, { "epoch": 0.037487370658119364, "grad_norm": 0.779891568987929, "learning_rate": 6.978178808093581e-07, "loss": 1.6255, "step": 538 }, { "epoch": 0.03755704978573668, "grad_norm": 0.7618768803406499, "learning_rate": 6.978097707254919e-07, "loss": 1.7284, "step": 539 }, { "epoch": 0.037626728913354, "grad_norm": 0.7298137167232285, "learning_rate": 6.978016456511393e-07, "loss": 1.6203, "step": 540 }, { "epoch": 0.037696408040971324, "grad_norm": 0.7562638291509813, "learning_rate": 6.977935055866896e-07, "loss": 1.6245, "step": 541 }, { "epoch": 0.03776608716858865, "grad_norm": 0.6874011048759046, "learning_rate": 6.977853505325329e-07, "loss": 1.4707, "step": 542 }, { "epoch": 0.03783576629620597, "grad_norm": 0.7091168800804247, "learning_rate": 6.977771804890601e-07, "loss": 1.535, "step": 543 }, { "epoch": 0.03790544542382329, "grad_norm": 0.7817069685386216, "learning_rate": 6.97768995456663e-07, "loss": 1.5988, "step": 544 }, { "epoch": 0.037975124551440614, "grad_norm": 0.7041314844983713, "learning_rate": 6.977607954357331e-07, "loss": 1.6012, "step": 545 }, { "epoch": 0.03804480367905794, "grad_norm": 0.7442509072892823, "learning_rate": 6.977525804266641e-07, "loss": 1.5457, "step": 546 }, { "epoch": 0.03811448280667526, "grad_norm": 0.6605248898044932, "learning_rate": 6.977443504298493e-07, "loss": 1.5191, "step": 547 }, { "epoch": 0.03818416193429258, "grad_norm": 0.7001973481764062, "learning_rate": 6.977361054456831e-07, "loss": 1.609, "step": 548 }, { "epoch": 0.038253841061909905, "grad_norm": 0.7060005032563952, "learning_rate": 6.977278454745608e-07, "loss": 1.5307, "step": 549 }, { "epoch": 0.03832352018952723, "grad_norm": 0.6902502138912355, "learning_rate": 6.97719570516878e-07, "loss": 1.5973, "step": 550 }, { "epoch": 0.03839319931714455, "grad_norm": 0.7691013493985639, "learning_rate": 6.977112805730315e-07, "loss": 1.6391, "step": 551 }, { "epoch": 0.03846287844476187, "grad_norm": 0.7352744642083536, "learning_rate": 6.977029756434184e-07, "loss": 1.6102, "step": 552 }, { "epoch": 0.038532557572379195, "grad_norm": 0.7297849741047285, "learning_rate": 6.976946557284367e-07, "loss": 1.5189, "step": 553 }, { "epoch": 0.03860223669999652, "grad_norm": 0.8050387561532055, "learning_rate": 6.976863208284852e-07, "loss": 1.6532, "step": 554 }, { "epoch": 0.03867191582761384, "grad_norm": 0.7513047702526604, "learning_rate": 6.976779709439633e-07, "loss": 1.655, "step": 555 }, { "epoch": 0.03874159495523116, "grad_norm": 0.7109447925987789, "learning_rate": 6.97669606075271e-07, "loss": 1.6917, "step": 556 }, { "epoch": 0.038811274082848485, "grad_norm": 0.7066517914984015, "learning_rate": 6.976612262228094e-07, "loss": 1.548, "step": 557 }, { "epoch": 0.03888095321046581, "grad_norm": 0.7926936156146592, "learning_rate": 6.976528313869799e-07, "loss": 1.6356, "step": 558 }, { "epoch": 0.03895063233808313, "grad_norm": 0.7617651263509975, "learning_rate": 6.976444215681848e-07, "loss": 1.4892, "step": 559 }, { "epoch": 0.03902031146570045, "grad_norm": 0.7338500441938097, "learning_rate": 6.976359967668273e-07, "loss": 1.5784, "step": 560 }, { "epoch": 0.039089990593317775, "grad_norm": 0.7305585031017265, "learning_rate": 6.97627556983311e-07, "loss": 1.745, "step": 561 }, { "epoch": 0.03915966972093509, "grad_norm": 0.712314154316217, "learning_rate": 6.976191022180402e-07, "loss": 1.5946, "step": 562 }, { "epoch": 0.03922934884855241, "grad_norm": 0.6785678714420585, "learning_rate": 6.976106324714204e-07, "loss": 1.5513, "step": 563 }, { "epoch": 0.039299027976169736, "grad_norm": 0.7179837146316714, "learning_rate": 6.976021477438572e-07, "loss": 1.5586, "step": 564 }, { "epoch": 0.03936870710378706, "grad_norm": 0.6880995082747411, "learning_rate": 6.975936480357574e-07, "loss": 1.4726, "step": 565 }, { "epoch": 0.03943838623140438, "grad_norm": 0.7490245409219959, "learning_rate": 6.975851333475283e-07, "loss": 1.5735, "step": 566 }, { "epoch": 0.0395080653590217, "grad_norm": 0.7718344714659485, "learning_rate": 6.975766036795778e-07, "loss": 1.6744, "step": 567 }, { "epoch": 0.039577744486639026, "grad_norm": 0.6999493242955485, "learning_rate": 6.975680590323147e-07, "loss": 1.5618, "step": 568 }, { "epoch": 0.03964742361425635, "grad_norm": 0.7277339304673994, "learning_rate": 6.975594994061485e-07, "loss": 1.6268, "step": 569 }, { "epoch": 0.03971710274187367, "grad_norm": 0.7224063955146929, "learning_rate": 6.975509248014895e-07, "loss": 1.6151, "step": 570 }, { "epoch": 0.03978678186949099, "grad_norm": 0.7754705593906143, "learning_rate": 6.975423352187485e-07, "loss": 1.5736, "step": 571 }, { "epoch": 0.039856460997108316, "grad_norm": 0.658621938146351, "learning_rate": 6.975337306583371e-07, "loss": 1.4863, "step": 572 }, { "epoch": 0.03992614012472564, "grad_norm": 0.730264824789615, "learning_rate": 6.975251111206678e-07, "loss": 1.5592, "step": 573 }, { "epoch": 0.03999581925234296, "grad_norm": 0.6657629477477796, "learning_rate": 6.975164766061535e-07, "loss": 1.3176, "step": 574 }, { "epoch": 0.04006549837996028, "grad_norm": 0.7795968362382858, "learning_rate": 6.975078271152082e-07, "loss": 1.654, "step": 575 }, { "epoch": 0.040135177507577606, "grad_norm": 0.7411465230498967, "learning_rate": 6.974991626482462e-07, "loss": 1.6269, "step": 576 }, { "epoch": 0.04020485663519493, "grad_norm": 0.7291408994652216, "learning_rate": 6.974904832056828e-07, "loss": 1.4967, "step": 577 }, { "epoch": 0.04027453576281225, "grad_norm": 0.6991182395286692, "learning_rate": 6.974817887879338e-07, "loss": 1.4528, "step": 578 }, { "epoch": 0.040344214890429574, "grad_norm": 0.7457004375077715, "learning_rate": 6.974730793954159e-07, "loss": 1.586, "step": 579 }, { "epoch": 0.040413894018046896, "grad_norm": 0.7067310028445386, "learning_rate": 6.974643550285467e-07, "loss": 1.638, "step": 580 }, { "epoch": 0.04048357314566422, "grad_norm": 0.6841122043735114, "learning_rate": 6.974556156877441e-07, "loss": 1.5638, "step": 581 }, { "epoch": 0.04055325227328154, "grad_norm": 0.7077425525133038, "learning_rate": 6.974468613734269e-07, "loss": 1.4724, "step": 582 }, { "epoch": 0.040622931400898864, "grad_norm": 0.7009427894499272, "learning_rate": 6.974380920860147e-07, "loss": 1.6103, "step": 583 }, { "epoch": 0.040692610528516186, "grad_norm": 0.7608036022452415, "learning_rate": 6.974293078259277e-07, "loss": 1.6457, "step": 584 }, { "epoch": 0.0407622896561335, "grad_norm": 0.704166945656348, "learning_rate": 6.974205085935869e-07, "loss": 1.5455, "step": 585 }, { "epoch": 0.040831968783750824, "grad_norm": 0.6946949545881428, "learning_rate": 6.974116943894139e-07, "loss": 1.666, "step": 586 }, { "epoch": 0.04090164791136815, "grad_norm": 0.716100071019257, "learning_rate": 6.974028652138311e-07, "loss": 1.5197, "step": 587 }, { "epoch": 0.04097132703898547, "grad_norm": 0.6838107620585757, "learning_rate": 6.973940210672617e-07, "loss": 1.4886, "step": 588 }, { "epoch": 0.04104100616660279, "grad_norm": 0.6697472310234589, "learning_rate": 6.973851619501295e-07, "loss": 1.5473, "step": 589 }, { "epoch": 0.041110685294220115, "grad_norm": 0.774401037898998, "learning_rate": 6.973762878628589e-07, "loss": 1.5531, "step": 590 }, { "epoch": 0.04118036442183744, "grad_norm": 0.6962775521829606, "learning_rate": 6.973673988058754e-07, "loss": 1.621, "step": 591 }, { "epoch": 0.04125004354945476, "grad_norm": 0.7465537441237275, "learning_rate": 6.973584947796049e-07, "loss": 1.3792, "step": 592 }, { "epoch": 0.04131972267707208, "grad_norm": 0.7728818872843954, "learning_rate": 6.973495757844739e-07, "loss": 1.4883, "step": 593 }, { "epoch": 0.041389401804689405, "grad_norm": 0.733610207501176, "learning_rate": 6.973406418209102e-07, "loss": 1.647, "step": 594 }, { "epoch": 0.04145908093230673, "grad_norm": 0.8035948982021359, "learning_rate": 6.973316928893416e-07, "loss": 1.5647, "step": 595 }, { "epoch": 0.04152876005992405, "grad_norm": 0.6768018321794376, "learning_rate": 6.97322728990197e-07, "loss": 1.4769, "step": 596 }, { "epoch": 0.04159843918754137, "grad_norm": 0.7482483315404442, "learning_rate": 6.973137501239061e-07, "loss": 1.6266, "step": 597 }, { "epoch": 0.041668118315158695, "grad_norm": 0.711191857418, "learning_rate": 6.973047562908992e-07, "loss": 1.4338, "step": 598 }, { "epoch": 0.04173779744277602, "grad_norm": 0.7248513304507356, "learning_rate": 6.972957474916072e-07, "loss": 1.5761, "step": 599 }, { "epoch": 0.04180747657039334, "grad_norm": 0.7389307964740751, "learning_rate": 6.972867237264619e-07, "loss": 1.5608, "step": 600 }, { "epoch": 0.04187715569801066, "grad_norm": 0.7421731119566499, "learning_rate": 6.972776849958957e-07, "loss": 1.6273, "step": 601 }, { "epoch": 0.041946834825627985, "grad_norm": 0.7836075175702004, "learning_rate": 6.972686313003416e-07, "loss": 1.594, "step": 602 }, { "epoch": 0.04201651395324531, "grad_norm": 0.7090879903195844, "learning_rate": 6.972595626402337e-07, "loss": 1.4985, "step": 603 }, { "epoch": 0.04208619308086263, "grad_norm": 0.6658000549796161, "learning_rate": 6.972504790160064e-07, "loss": 1.518, "step": 604 }, { "epoch": 0.04215587220847995, "grad_norm": 0.7349695726566142, "learning_rate": 6.972413804280953e-07, "loss": 1.5596, "step": 605 }, { "epoch": 0.042225551336097275, "grad_norm": 0.7483074717752025, "learning_rate": 6.972322668769361e-07, "loss": 1.5794, "step": 606 }, { "epoch": 0.0422952304637146, "grad_norm": 0.7038158186966939, "learning_rate": 6.972231383629657e-07, "loss": 1.5117, "step": 607 }, { "epoch": 0.04236490959133191, "grad_norm": 0.6751541816081398, "learning_rate": 6.972139948866215e-07, "loss": 1.5732, "step": 608 }, { "epoch": 0.042434588718949236, "grad_norm": 0.747301063418828, "learning_rate": 6.972048364483418e-07, "loss": 1.5431, "step": 609 }, { "epoch": 0.04250426784656656, "grad_norm": 0.7420324379102797, "learning_rate": 6.971956630485652e-07, "loss": 1.6458, "step": 610 }, { "epoch": 0.04257394697418388, "grad_norm": 0.6698738775177508, "learning_rate": 6.971864746877316e-07, "loss": 1.5838, "step": 611 }, { "epoch": 0.0426436261018012, "grad_norm": 0.7080826823481463, "learning_rate": 6.971772713662812e-07, "loss": 1.5728, "step": 612 }, { "epoch": 0.042713305229418526, "grad_norm": 0.7130688284022854, "learning_rate": 6.971680530846551e-07, "loss": 1.6254, "step": 613 }, { "epoch": 0.04278298435703585, "grad_norm": 0.6664129156822504, "learning_rate": 6.971588198432952e-07, "loss": 1.4956, "step": 614 }, { "epoch": 0.04285266348465317, "grad_norm": 0.7132385041089505, "learning_rate": 6.971495716426435e-07, "loss": 1.5515, "step": 615 }, { "epoch": 0.04292234261227049, "grad_norm": 0.73710904278707, "learning_rate": 6.971403084831436e-07, "loss": 1.6063, "step": 616 }, { "epoch": 0.042992021739887816, "grad_norm": 0.8132005846012044, "learning_rate": 6.971310303652395e-07, "loss": 1.6231, "step": 617 }, { "epoch": 0.04306170086750514, "grad_norm": 0.6973241807952938, "learning_rate": 6.971217372893753e-07, "loss": 1.6154, "step": 618 }, { "epoch": 0.04313137999512246, "grad_norm": 2.8190614475050513, "learning_rate": 6.971124292559969e-07, "loss": 1.6138, "step": 619 }, { "epoch": 0.043201059122739784, "grad_norm": 0.7236203087033882, "learning_rate": 6.971031062655502e-07, "loss": 1.46, "step": 620 }, { "epoch": 0.043270738250357106, "grad_norm": 0.7721528246703844, "learning_rate": 6.970937683184816e-07, "loss": 1.651, "step": 621 }, { "epoch": 0.04334041737797443, "grad_norm": 0.7207721016116038, "learning_rate": 6.970844154152392e-07, "loss": 1.6102, "step": 622 }, { "epoch": 0.04341009650559175, "grad_norm": 0.7470132174851035, "learning_rate": 6.970750475562709e-07, "loss": 1.5735, "step": 623 }, { "epoch": 0.043479775633209074, "grad_norm": 0.682052163775226, "learning_rate": 6.970656647420255e-07, "loss": 1.5302, "step": 624 }, { "epoch": 0.043549454760826396, "grad_norm": 0.7618764660275008, "learning_rate": 6.970562669729528e-07, "loss": 1.6138, "step": 625 }, { "epoch": 0.04361913388844372, "grad_norm": 0.6772064059244419, "learning_rate": 6.970468542495033e-07, "loss": 1.4087, "step": 626 }, { "epoch": 0.04368881301606104, "grad_norm": 0.7376677084023966, "learning_rate": 6.970374265721277e-07, "loss": 1.5959, "step": 627 }, { "epoch": 0.043758492143678364, "grad_norm": 0.8303204432724274, "learning_rate": 6.970279839412782e-07, "loss": 1.6502, "step": 628 }, { "epoch": 0.043828171271295686, "grad_norm": 0.7786738881941985, "learning_rate": 6.970185263574071e-07, "loss": 1.6448, "step": 629 }, { "epoch": 0.04389785039891301, "grad_norm": 0.7128585750577182, "learning_rate": 6.970090538209676e-07, "loss": 1.44, "step": 630 }, { "epoch": 0.043967529526530325, "grad_norm": 0.6627090163476095, "learning_rate": 6.969995663324138e-07, "loss": 1.3244, "step": 631 }, { "epoch": 0.04403720865414765, "grad_norm": 0.7745287325391855, "learning_rate": 6.969900638922e-07, "loss": 1.7033, "step": 632 }, { "epoch": 0.04410688778176497, "grad_norm": 0.733220507077979, "learning_rate": 6.969805465007822e-07, "loss": 1.6772, "step": 633 }, { "epoch": 0.04417656690938229, "grad_norm": 0.6960784033598787, "learning_rate": 6.969710141586159e-07, "loss": 1.5492, "step": 634 }, { "epoch": 0.044246246036999615, "grad_norm": 0.6881665503401039, "learning_rate": 6.96961466866158e-07, "loss": 1.5384, "step": 635 }, { "epoch": 0.04431592516461694, "grad_norm": 0.7093423611533153, "learning_rate": 6.969519046238665e-07, "loss": 1.5778, "step": 636 }, { "epoch": 0.04438560429223426, "grad_norm": 0.6950788938005011, "learning_rate": 6.969423274321992e-07, "loss": 1.4672, "step": 637 }, { "epoch": 0.04445528341985158, "grad_norm": 0.6971933321048744, "learning_rate": 6.969327352916151e-07, "loss": 1.5282, "step": 638 }, { "epoch": 0.044524962547468905, "grad_norm": 0.6700704096045089, "learning_rate": 6.96923128202574e-07, "loss": 1.4895, "step": 639 }, { "epoch": 0.04459464167508623, "grad_norm": 0.6942860808996856, "learning_rate": 6.969135061655361e-07, "loss": 1.4508, "step": 640 }, { "epoch": 0.04466432080270355, "grad_norm": 0.7804476942861223, "learning_rate": 6.969038691809628e-07, "loss": 1.509, "step": 641 }, { "epoch": 0.04473399993032087, "grad_norm": 0.6853905689405291, "learning_rate": 6.968942172493156e-07, "loss": 1.4684, "step": 642 }, { "epoch": 0.044803679057938195, "grad_norm": 0.6764348433542908, "learning_rate": 6.968845503710572e-07, "loss": 1.5356, "step": 643 }, { "epoch": 0.04487335818555552, "grad_norm": 0.6773570660436679, "learning_rate": 6.96874868546651e-07, "loss": 1.5618, "step": 644 }, { "epoch": 0.04494303731317284, "grad_norm": 0.7622466312475294, "learning_rate": 6.968651717765608e-07, "loss": 1.5596, "step": 645 }, { "epoch": 0.04501271644079016, "grad_norm": 0.7254213570087428, "learning_rate": 6.968554600612512e-07, "loss": 1.5213, "step": 646 }, { "epoch": 0.045082395568407485, "grad_norm": 0.6885774067075373, "learning_rate": 6.968457334011879e-07, "loss": 1.5261, "step": 647 }, { "epoch": 0.04515207469602481, "grad_norm": 0.7101552502470694, "learning_rate": 6.968359917968368e-07, "loss": 1.6694, "step": 648 }, { "epoch": 0.04522175382364213, "grad_norm": 0.7369871559845563, "learning_rate": 6.968262352486649e-07, "loss": 1.5304, "step": 649 }, { "epoch": 0.04529143295125945, "grad_norm": 0.7875133371881571, "learning_rate": 6.968164637571393e-07, "loss": 1.4934, "step": 650 }, { "epoch": 0.045361112078876775, "grad_norm": 0.7019279683983147, "learning_rate": 6.968066773227289e-07, "loss": 1.5468, "step": 651 }, { "epoch": 0.0454307912064941, "grad_norm": 0.7341904116170085, "learning_rate": 6.967968759459023e-07, "loss": 1.5338, "step": 652 }, { "epoch": 0.04550047033411142, "grad_norm": 0.6810202405701805, "learning_rate": 6.967870596271292e-07, "loss": 1.6357, "step": 653 }, { "epoch": 0.045570149461728736, "grad_norm": 0.699448200025997, "learning_rate": 6.967772283668803e-07, "loss": 1.5453, "step": 654 }, { "epoch": 0.04563982858934606, "grad_norm": 0.7158382501901476, "learning_rate": 6.967673821656265e-07, "loss": 1.5505, "step": 655 }, { "epoch": 0.04570950771696338, "grad_norm": 0.6535608334622804, "learning_rate": 6.967575210238395e-07, "loss": 1.5453, "step": 656 }, { "epoch": 0.045779186844580703, "grad_norm": 0.7080634763231599, "learning_rate": 6.967476449419924e-07, "loss": 1.5479, "step": 657 }, { "epoch": 0.045848865972198026, "grad_norm": 0.7659320955527936, "learning_rate": 6.967377539205579e-07, "loss": 1.6749, "step": 658 }, { "epoch": 0.04591854509981535, "grad_norm": 0.6686160543798587, "learning_rate": 6.967278479600104e-07, "loss": 1.4683, "step": 659 }, { "epoch": 0.04598822422743267, "grad_norm": 0.7537119319439501, "learning_rate": 6.967179270608243e-07, "loss": 1.45, "step": 660 }, { "epoch": 0.046057903355049994, "grad_norm": 0.7349118447650984, "learning_rate": 6.967079912234754e-07, "loss": 1.595, "step": 661 }, { "epoch": 0.046127582482667316, "grad_norm": 0.7293110394952392, "learning_rate": 6.966980404484395e-07, "loss": 1.4752, "step": 662 }, { "epoch": 0.04619726161028464, "grad_norm": 0.7387902011995563, "learning_rate": 6.966880747361936e-07, "loss": 1.4697, "step": 663 }, { "epoch": 0.04626694073790196, "grad_norm": 0.6765249539707564, "learning_rate": 6.966780940872153e-07, "loss": 1.4381, "step": 664 }, { "epoch": 0.046336619865519284, "grad_norm": 0.689217791498506, "learning_rate": 6.966680985019828e-07, "loss": 1.4685, "step": 665 }, { "epoch": 0.046406298993136606, "grad_norm": 0.6841331183811533, "learning_rate": 6.966580879809752e-07, "loss": 1.4852, "step": 666 }, { "epoch": 0.04647597812075393, "grad_norm": 0.743925290927153, "learning_rate": 6.966480625246722e-07, "loss": 1.5746, "step": 667 }, { "epoch": 0.04654565724837125, "grad_norm": 0.6890299457883376, "learning_rate": 6.966380221335544e-07, "loss": 1.6132, "step": 668 }, { "epoch": 0.046615336375988574, "grad_norm": 0.7300586622886946, "learning_rate": 6.966279668081026e-07, "loss": 1.5986, "step": 669 }, { "epoch": 0.046685015503605896, "grad_norm": 0.7367747555470463, "learning_rate": 6.966178965487989e-07, "loss": 1.6231, "step": 670 }, { "epoch": 0.04675469463122322, "grad_norm": 0.6836238330355955, "learning_rate": 6.96607811356126e-07, "loss": 1.576, "step": 671 }, { "epoch": 0.04682437375884054, "grad_norm": 0.7033258443880477, "learning_rate": 6.96597711230567e-07, "loss": 1.6208, "step": 672 }, { "epoch": 0.046894052886457864, "grad_norm": 0.8339698614381776, "learning_rate": 6.96587596172606e-07, "loss": 1.5455, "step": 673 }, { "epoch": 0.04696373201407519, "grad_norm": 0.7085764029952359, "learning_rate": 6.965774661827277e-07, "loss": 1.4264, "step": 674 }, { "epoch": 0.04703341114169251, "grad_norm": 0.7646037417894928, "learning_rate": 6.965673212614174e-07, "loss": 1.4358, "step": 675 }, { "epoch": 0.04710309026930983, "grad_norm": 0.7532185100268795, "learning_rate": 6.965571614091616e-07, "loss": 1.6341, "step": 676 }, { "epoch": 0.04717276939692715, "grad_norm": 0.6685706703379944, "learning_rate": 6.96546986626447e-07, "loss": 1.546, "step": 677 }, { "epoch": 0.04724244852454447, "grad_norm": 0.680236955536486, "learning_rate": 6.965367969137611e-07, "loss": 1.4986, "step": 678 }, { "epoch": 0.04731212765216179, "grad_norm": 0.6986156128855948, "learning_rate": 6.965265922715925e-07, "loss": 1.5194, "step": 679 }, { "epoch": 0.047381806779779115, "grad_norm": 0.7329077948281687, "learning_rate": 6.9651637270043e-07, "loss": 1.5835, "step": 680 }, { "epoch": 0.04745148590739644, "grad_norm": 0.67737004024086, "learning_rate": 6.965061382007632e-07, "loss": 1.5043, "step": 681 }, { "epoch": 0.04752116503501376, "grad_norm": 0.7124756190212052, "learning_rate": 6.964958887730829e-07, "loss": 1.5287, "step": 682 }, { "epoch": 0.04759084416263108, "grad_norm": 0.7089711249978385, "learning_rate": 6.964856244178801e-07, "loss": 1.5851, "step": 683 }, { "epoch": 0.047660523290248405, "grad_norm": 0.6927438151246575, "learning_rate": 6.964753451356467e-07, "loss": 1.5052, "step": 684 }, { "epoch": 0.04773020241786573, "grad_norm": 0.7749472284337433, "learning_rate": 6.964650509268753e-07, "loss": 1.6473, "step": 685 }, { "epoch": 0.04779988154548305, "grad_norm": 0.6963609256556942, "learning_rate": 6.964547417920593e-07, "loss": 1.6013, "step": 686 }, { "epoch": 0.04786956067310037, "grad_norm": 0.687383111340697, "learning_rate": 6.964444177316926e-07, "loss": 1.5295, "step": 687 }, { "epoch": 0.047939239800717695, "grad_norm": 0.7180160100897451, "learning_rate": 6.964340787462701e-07, "loss": 1.4711, "step": 688 }, { "epoch": 0.04800891892833502, "grad_norm": 0.7590607641255273, "learning_rate": 6.964237248362871e-07, "loss": 1.5769, "step": 689 }, { "epoch": 0.04807859805595234, "grad_norm": 0.8004088368747236, "learning_rate": 6.9641335600224e-07, "loss": 1.5545, "step": 690 }, { "epoch": 0.04814827718356966, "grad_norm": 0.7273413263118548, "learning_rate": 6.964029722446253e-07, "loss": 1.6128, "step": 691 }, { "epoch": 0.048217956311186985, "grad_norm": 0.7066776812890259, "learning_rate": 6.963925735639411e-07, "loss": 1.5317, "step": 692 }, { "epoch": 0.04828763543880431, "grad_norm": 0.7015857397206559, "learning_rate": 6.963821599606854e-07, "loss": 1.5972, "step": 693 }, { "epoch": 0.04835731456642163, "grad_norm": 0.7232812425018523, "learning_rate": 6.963717314353574e-07, "loss": 1.5294, "step": 694 }, { "epoch": 0.04842699369403895, "grad_norm": 0.7243849048748213, "learning_rate": 6.963612879884567e-07, "loss": 1.556, "step": 695 }, { "epoch": 0.048496672821656275, "grad_norm": 0.7359746952905802, "learning_rate": 6.96350829620484e-07, "loss": 1.546, "step": 696 }, { "epoch": 0.0485663519492736, "grad_norm": 0.7122154316120477, "learning_rate": 6.963403563319402e-07, "loss": 1.733, "step": 697 }, { "epoch": 0.04863603107689092, "grad_norm": 0.7055779006893984, "learning_rate": 6.963298681233274e-07, "loss": 1.6171, "step": 698 }, { "epoch": 0.04870571020450824, "grad_norm": 0.716296197133196, "learning_rate": 6.963193649951483e-07, "loss": 1.752, "step": 699 }, { "epoch": 0.04877538933212556, "grad_norm": 0.6713758234296247, "learning_rate": 6.96308846947906e-07, "loss": 1.4199, "step": 700 }, { "epoch": 0.04884506845974288, "grad_norm": 0.7743784620060852, "learning_rate": 6.962983139821047e-07, "loss": 1.4771, "step": 701 }, { "epoch": 0.048914747587360204, "grad_norm": 0.6641119216158364, "learning_rate": 6.96287766098249e-07, "loss": 1.5956, "step": 702 }, { "epoch": 0.048984426714977526, "grad_norm": 0.6558222609838982, "learning_rate": 6.962772032968446e-07, "loss": 1.4149, "step": 703 }, { "epoch": 0.04905410584259485, "grad_norm": 0.7450866187835455, "learning_rate": 6.962666255783975e-07, "loss": 1.5214, "step": 704 }, { "epoch": 0.04912378497021217, "grad_norm": 0.7446602054101235, "learning_rate": 6.962560329434148e-07, "loss": 1.5909, "step": 705 }, { "epoch": 0.049193464097829494, "grad_norm": 0.6912223067403324, "learning_rate": 6.962454253924038e-07, "loss": 1.5803, "step": 706 }, { "epoch": 0.049263143225446816, "grad_norm": 0.7497854047755567, "learning_rate": 6.962348029258732e-07, "loss": 1.6816, "step": 707 }, { "epoch": 0.04933282235306414, "grad_norm": 0.6944629046945672, "learning_rate": 6.96224165544332e-07, "loss": 1.4664, "step": 708 }, { "epoch": 0.04940250148068146, "grad_norm": 0.6968358758121764, "learning_rate": 6.962135132482896e-07, "loss": 1.5491, "step": 709 }, { "epoch": 0.049472180608298784, "grad_norm": 0.6771949433189489, "learning_rate": 6.962028460382568e-07, "loss": 1.4434, "step": 710 }, { "epoch": 0.049541859735916106, "grad_norm": 0.7158393842253569, "learning_rate": 6.961921639147448e-07, "loss": 1.6615, "step": 711 }, { "epoch": 0.04961153886353343, "grad_norm": 0.6926218608736993, "learning_rate": 6.961814668782655e-07, "loss": 1.551, "step": 712 }, { "epoch": 0.04968121799115075, "grad_norm": 0.7354027473157468, "learning_rate": 6.961707549293313e-07, "loss": 1.5184, "step": 713 }, { "epoch": 0.049750897118768074, "grad_norm": 0.6540498769668062, "learning_rate": 6.961600280684558e-07, "loss": 1.4642, "step": 714 }, { "epoch": 0.0498205762463854, "grad_norm": 0.7328051651446971, "learning_rate": 6.961492862961528e-07, "loss": 1.5514, "step": 715 }, { "epoch": 0.04989025537400272, "grad_norm": 0.7123550388724823, "learning_rate": 6.961385296129375e-07, "loss": 1.6047, "step": 716 }, { "epoch": 0.04995993450162004, "grad_norm": 0.7147651353416852, "learning_rate": 6.961277580193249e-07, "loss": 1.6656, "step": 717 }, { "epoch": 0.050029613629237364, "grad_norm": 0.6906496045277001, "learning_rate": 6.961169715158317e-07, "loss": 1.5934, "step": 718 }, { "epoch": 0.05009929275685469, "grad_norm": 0.7541044654754127, "learning_rate": 6.961061701029741e-07, "loss": 1.4913, "step": 719 }, { "epoch": 0.05016897188447201, "grad_norm": 0.7540942860771993, "learning_rate": 6.960953537812703e-07, "loss": 1.5373, "step": 720 }, { "epoch": 0.05023865101208933, "grad_norm": 0.6781331883086441, "learning_rate": 6.960845225512386e-07, "loss": 1.5436, "step": 721 }, { "epoch": 0.050308330139706654, "grad_norm": 0.8470452786448093, "learning_rate": 6.960736764133978e-07, "loss": 1.6208, "step": 722 }, { "epoch": 0.05037800926732397, "grad_norm": 0.6760410770524181, "learning_rate": 6.960628153682679e-07, "loss": 1.4621, "step": 723 }, { "epoch": 0.05044768839494129, "grad_norm": 0.7490727821128771, "learning_rate": 6.960519394163692e-07, "loss": 1.4257, "step": 724 }, { "epoch": 0.050517367522558615, "grad_norm": 0.6857827107854004, "learning_rate": 6.96041048558223e-07, "loss": 1.6527, "step": 725 }, { "epoch": 0.05058704665017594, "grad_norm": 0.7032073154704966, "learning_rate": 6.960301427943513e-07, "loss": 1.6024, "step": 726 }, { "epoch": 0.05065672577779326, "grad_norm": 0.6849058234749557, "learning_rate": 6.960192221252765e-07, "loss": 1.4722, "step": 727 }, { "epoch": 0.05072640490541058, "grad_norm": 0.7322091820332296, "learning_rate": 6.960082865515221e-07, "loss": 1.6492, "step": 728 }, { "epoch": 0.050796084033027905, "grad_norm": 0.7224516190286429, "learning_rate": 6.959973360736122e-07, "loss": 1.6839, "step": 729 }, { "epoch": 0.05086576316064523, "grad_norm": 0.686954801078055, "learning_rate": 6.959863706920713e-07, "loss": 1.5376, "step": 730 }, { "epoch": 0.05093544228826255, "grad_norm": 0.6862780679407999, "learning_rate": 6.959753904074253e-07, "loss": 1.5023, "step": 731 }, { "epoch": 0.05100512141587987, "grad_norm": 0.721010317984146, "learning_rate": 6.959643952202001e-07, "loss": 1.5141, "step": 732 }, { "epoch": 0.051074800543497195, "grad_norm": 0.6967847500688505, "learning_rate": 6.959533851309226e-07, "loss": 1.4556, "step": 733 }, { "epoch": 0.05114447967111452, "grad_norm": 0.7489626901360203, "learning_rate": 6.959423601401205e-07, "loss": 1.533, "step": 734 }, { "epoch": 0.05121415879873184, "grad_norm": 0.7850799405099177, "learning_rate": 6.959313202483222e-07, "loss": 1.5717, "step": 735 }, { "epoch": 0.05128383792634916, "grad_norm": 0.6962064304868032, "learning_rate": 6.959202654560567e-07, "loss": 1.4927, "step": 736 }, { "epoch": 0.051353517053966485, "grad_norm": 0.6814157744096807, "learning_rate": 6.959091957638539e-07, "loss": 1.5426, "step": 737 }, { "epoch": 0.05142319618158381, "grad_norm": 0.7046197816079532, "learning_rate": 6.958981111722439e-07, "loss": 1.5478, "step": 738 }, { "epoch": 0.05149287530920113, "grad_norm": 0.730215604464463, "learning_rate": 6.958870116817583e-07, "loss": 1.5133, "step": 739 }, { "epoch": 0.05156255443681845, "grad_norm": 0.7009966531083134, "learning_rate": 6.958758972929289e-07, "loss": 1.5814, "step": 740 }, { "epoch": 0.051632233564435775, "grad_norm": 0.7164763075684121, "learning_rate": 6.958647680062882e-07, "loss": 1.54, "step": 741 }, { "epoch": 0.0517019126920531, "grad_norm": 0.6961036208661276, "learning_rate": 6.958536238223697e-07, "loss": 1.5408, "step": 742 }, { "epoch": 0.05177159181967042, "grad_norm": 0.7736476120671665, "learning_rate": 6.958424647417074e-07, "loss": 1.633, "step": 743 }, { "epoch": 0.05184127094728774, "grad_norm": 0.7260767598944713, "learning_rate": 6.958312907648358e-07, "loss": 1.4184, "step": 744 }, { "epoch": 0.051910950074905066, "grad_norm": 0.7292566831371331, "learning_rate": 6.958201018922908e-07, "loss": 1.4852, "step": 745 }, { "epoch": 0.05198062920252238, "grad_norm": 0.6826555733917109, "learning_rate": 6.958088981246085e-07, "loss": 1.4976, "step": 746 }, { "epoch": 0.052050308330139704, "grad_norm": 0.8057138550101121, "learning_rate": 6.957976794623257e-07, "loss": 1.6662, "step": 747 }, { "epoch": 0.052119987457757026, "grad_norm": 0.7383595107784787, "learning_rate": 6.9578644590598e-07, "loss": 1.6895, "step": 748 }, { "epoch": 0.05218966658537435, "grad_norm": 0.7623169165234596, "learning_rate": 6.957751974561098e-07, "loss": 1.5831, "step": 749 }, { "epoch": 0.05225934571299167, "grad_norm": 0.7010872976902116, "learning_rate": 6.95763934113254e-07, "loss": 1.6176, "step": 750 }, { "epoch": 0.052329024840608994, "grad_norm": 0.7652887108312698, "learning_rate": 6.957526558779526e-07, "loss": 1.5928, "step": 751 }, { "epoch": 0.052398703968226316, "grad_norm": 0.8296288937742872, "learning_rate": 6.95741362750746e-07, "loss": 1.5899, "step": 752 }, { "epoch": 0.05246838309584364, "grad_norm": 0.7128647360486586, "learning_rate": 6.957300547321753e-07, "loss": 1.55, "step": 753 }, { "epoch": 0.05253806222346096, "grad_norm": 0.7111814164131637, "learning_rate": 6.957187318227823e-07, "loss": 1.7715, "step": 754 }, { "epoch": 0.052607741351078284, "grad_norm": 0.7581371563133963, "learning_rate": 6.9570739402311e-07, "loss": 1.621, "step": 755 }, { "epoch": 0.05267742047869561, "grad_norm": 0.7320911081083655, "learning_rate": 6.956960413337015e-07, "loss": 1.6028, "step": 756 }, { "epoch": 0.05274709960631293, "grad_norm": 0.6811468195966736, "learning_rate": 6.956846737551008e-07, "loss": 1.4258, "step": 757 }, { "epoch": 0.05281677873393025, "grad_norm": 0.7558283632455667, "learning_rate": 6.956732912878528e-07, "loss": 1.6908, "step": 758 }, { "epoch": 0.052886457861547574, "grad_norm": 0.7401585909526207, "learning_rate": 6.956618939325027e-07, "loss": 1.5859, "step": 759 }, { "epoch": 0.0529561369891649, "grad_norm": 0.6869688342909643, "learning_rate": 6.95650481689597e-07, "loss": 1.5995, "step": 760 }, { "epoch": 0.05302581611678222, "grad_norm": 0.732390242062926, "learning_rate": 6.956390545596824e-07, "loss": 1.5717, "step": 761 }, { "epoch": 0.05309549524439954, "grad_norm": 0.7217401642738522, "learning_rate": 6.956276125433066e-07, "loss": 1.6592, "step": 762 }, { "epoch": 0.053165174372016864, "grad_norm": 0.7026097108747728, "learning_rate": 6.956161556410179e-07, "loss": 1.4727, "step": 763 }, { "epoch": 0.05323485349963419, "grad_norm": 0.7104570430746965, "learning_rate": 6.956046838533654e-07, "loss": 1.548, "step": 764 }, { "epoch": 0.05330453262725151, "grad_norm": 0.6794024019476335, "learning_rate": 6.955931971808987e-07, "loss": 1.3615, "step": 765 }, { "epoch": 0.05337421175486883, "grad_norm": 0.7494295020279464, "learning_rate": 6.955816956241684e-07, "loss": 1.5749, "step": 766 }, { "epoch": 0.053443890882486154, "grad_norm": 0.6688709082207913, "learning_rate": 6.955701791837256e-07, "loss": 1.5078, "step": 767 }, { "epoch": 0.05351357001010348, "grad_norm": 0.710494094903225, "learning_rate": 6.955586478601222e-07, "loss": 1.4936, "step": 768 }, { "epoch": 0.05358324913772079, "grad_norm": 0.711894193384377, "learning_rate": 6.955471016539109e-07, "loss": 1.563, "step": 769 }, { "epoch": 0.053652928265338115, "grad_norm": 0.7323768349826295, "learning_rate": 6.95535540565645e-07, "loss": 1.5585, "step": 770 }, { "epoch": 0.05372260739295544, "grad_norm": 0.7329434747907027, "learning_rate": 6.955239645958784e-07, "loss": 1.617, "step": 771 }, { "epoch": 0.05379228652057276, "grad_norm": 0.748227642697152, "learning_rate": 6.95512373745166e-07, "loss": 1.5454, "step": 772 }, { "epoch": 0.05386196564819008, "grad_norm": 0.7328425554339898, "learning_rate": 6.95500768014063e-07, "loss": 1.5224, "step": 773 }, { "epoch": 0.053931644775807405, "grad_norm": 0.7466917176783684, "learning_rate": 6.954891474031259e-07, "loss": 1.5347, "step": 774 }, { "epoch": 0.05400132390342473, "grad_norm": 0.7585007520137389, "learning_rate": 6.954775119129114e-07, "loss": 1.7438, "step": 775 }, { "epoch": 0.05407100303104205, "grad_norm": 0.7439885599579423, "learning_rate": 6.95465861543977e-07, "loss": 1.6233, "step": 776 }, { "epoch": 0.05414068215865937, "grad_norm": 0.6937689954958782, "learning_rate": 6.954541962968813e-07, "loss": 1.5569, "step": 777 }, { "epoch": 0.054210361286276695, "grad_norm": 0.6808608820879647, "learning_rate": 6.954425161721832e-07, "loss": 1.5482, "step": 778 }, { "epoch": 0.05428004041389402, "grad_norm": 0.7549169966464018, "learning_rate": 6.954308211704422e-07, "loss": 1.5517, "step": 779 }, { "epoch": 0.05434971954151134, "grad_norm": 0.6734510329316952, "learning_rate": 6.954191112922192e-07, "loss": 1.5492, "step": 780 }, { "epoch": 0.05441939866912866, "grad_norm": 0.6877325247570651, "learning_rate": 6.954073865380749e-07, "loss": 1.4862, "step": 781 }, { "epoch": 0.054489077796745986, "grad_norm": 0.6788147898970631, "learning_rate": 6.953956469085715e-07, "loss": 1.5726, "step": 782 }, { "epoch": 0.05455875692436331, "grad_norm": 0.7305927316424901, "learning_rate": 6.953838924042714e-07, "loss": 1.4975, "step": 783 }, { "epoch": 0.05462843605198063, "grad_norm": 0.7132269277569465, "learning_rate": 6.95372123025738e-07, "loss": 1.4406, "step": 784 }, { "epoch": 0.05469811517959795, "grad_norm": 0.7020412658784635, "learning_rate": 6.953603387735353e-07, "loss": 1.587, "step": 785 }, { "epoch": 0.054767794307215276, "grad_norm": 0.7528096373864551, "learning_rate": 6.953485396482281e-07, "loss": 1.601, "step": 786 }, { "epoch": 0.0548374734348326, "grad_norm": 0.6512215101888548, "learning_rate": 6.953367256503816e-07, "loss": 1.5033, "step": 787 }, { "epoch": 0.05490715256244992, "grad_norm": 0.6788130061609902, "learning_rate": 6.953248967805621e-07, "loss": 1.4305, "step": 788 }, { "epoch": 0.05497683169006724, "grad_norm": 0.7446892987199926, "learning_rate": 6.953130530393365e-07, "loss": 1.7707, "step": 789 }, { "epoch": 0.055046510817684566, "grad_norm": 0.8206689971928751, "learning_rate": 6.953011944272724e-07, "loss": 1.697, "step": 790 }, { "epoch": 0.05511618994530188, "grad_norm": 0.7121618215215683, "learning_rate": 6.952893209449378e-07, "loss": 1.5926, "step": 791 }, { "epoch": 0.055185869072919204, "grad_norm": 0.6684685055821509, "learning_rate": 6.952774325929022e-07, "loss": 1.5346, "step": 792 }, { "epoch": 0.055255548200536526, "grad_norm": 0.7876441568581036, "learning_rate": 6.95265529371735e-07, "loss": 1.6429, "step": 793 }, { "epoch": 0.05532522732815385, "grad_norm": 0.6966735167203888, "learning_rate": 6.952536112820066e-07, "loss": 1.5847, "step": 794 }, { "epoch": 0.05539490645577117, "grad_norm": 0.6584961268883045, "learning_rate": 6.952416783242882e-07, "loss": 1.4795, "step": 795 }, { "epoch": 0.055464585583388494, "grad_norm": 0.8031283060641858, "learning_rate": 6.952297304991516e-07, "loss": 1.7066, "step": 796 }, { "epoch": 0.05553426471100582, "grad_norm": 0.7839285381689913, "learning_rate": 6.952177678071696e-07, "loss": 1.7255, "step": 797 }, { "epoch": 0.05560394383862314, "grad_norm": 0.7693006460738586, "learning_rate": 6.952057902489152e-07, "loss": 1.6596, "step": 798 }, { "epoch": 0.05567362296624046, "grad_norm": 0.722826042731386, "learning_rate": 6.951937978249624e-07, "loss": 1.5677, "step": 799 }, { "epoch": 0.055743302093857784, "grad_norm": 0.7117854955696307, "learning_rate": 6.951817905358861e-07, "loss": 1.5712, "step": 800 }, { "epoch": 0.05581298122147511, "grad_norm": 0.7873681567228429, "learning_rate": 6.951697683822617e-07, "loss": 1.5217, "step": 801 }, { "epoch": 0.05588266034909243, "grad_norm": 0.6990444943511518, "learning_rate": 6.951577313646651e-07, "loss": 1.4714, "step": 802 }, { "epoch": 0.05595233947670975, "grad_norm": 0.7087438008306041, "learning_rate": 6.951456794836733e-07, "loss": 1.5843, "step": 803 }, { "epoch": 0.056022018604327074, "grad_norm": 0.7466712931823629, "learning_rate": 6.951336127398638e-07, "loss": 1.5316, "step": 804 }, { "epoch": 0.0560916977319444, "grad_norm": 0.7653458202685157, "learning_rate": 6.951215311338148e-07, "loss": 1.6453, "step": 805 }, { "epoch": 0.05616137685956172, "grad_norm": 0.7236559613144168, "learning_rate": 6.951094346661055e-07, "loss": 1.6199, "step": 806 }, { "epoch": 0.05623105598717904, "grad_norm": 0.6944459703744715, "learning_rate": 6.950973233373155e-07, "loss": 1.5263, "step": 807 }, { "epoch": 0.056300735114796364, "grad_norm": 0.7816944407503731, "learning_rate": 6.95085197148025e-07, "loss": 1.6126, "step": 808 }, { "epoch": 0.05637041424241369, "grad_norm": 0.7124399093435471, "learning_rate": 6.950730560988153e-07, "loss": 1.5162, "step": 809 }, { "epoch": 0.05644009337003101, "grad_norm": 0.6988292198617666, "learning_rate": 6.950609001902682e-07, "loss": 1.4888, "step": 810 }, { "epoch": 0.05650977249764833, "grad_norm": 0.7329883842362056, "learning_rate": 6.950487294229662e-07, "loss": 1.6594, "step": 811 }, { "epoch": 0.056579451625265655, "grad_norm": 0.6939338590714141, "learning_rate": 6.950365437974927e-07, "loss": 1.3898, "step": 812 }, { "epoch": 0.05664913075288298, "grad_norm": 0.7188599302209264, "learning_rate": 6.950243433144314e-07, "loss": 1.4821, "step": 813 }, { "epoch": 0.05671880988050029, "grad_norm": 0.6823465353383991, "learning_rate": 6.950121279743672e-07, "loss": 1.5173, "step": 814 }, { "epoch": 0.056788489008117615, "grad_norm": 0.8750761366460855, "learning_rate": 6.949998977778852e-07, "loss": 1.7818, "step": 815 }, { "epoch": 0.05685816813573494, "grad_norm": 0.7137120708518332, "learning_rate": 6.949876527255718e-07, "loss": 1.6433, "step": 816 }, { "epoch": 0.05692784726335226, "grad_norm": 0.6619871776167325, "learning_rate": 6.949753928180137e-07, "loss": 1.5437, "step": 817 }, { "epoch": 0.05699752639096958, "grad_norm": 0.698883982747292, "learning_rate": 6.949631180557984e-07, "loss": 1.5743, "step": 818 }, { "epoch": 0.057067205518586905, "grad_norm": 0.7606502997566646, "learning_rate": 6.949508284395141e-07, "loss": 1.6545, "step": 819 }, { "epoch": 0.05713688464620423, "grad_norm": 0.714118076308743, "learning_rate": 6.949385239697498e-07, "loss": 1.4312, "step": 820 }, { "epoch": 0.05720656377382155, "grad_norm": 0.7478782433732383, "learning_rate": 6.949262046470951e-07, "loss": 1.5869, "step": 821 }, { "epoch": 0.05727624290143887, "grad_norm": 0.7041434815718658, "learning_rate": 6.949138704721405e-07, "loss": 1.5026, "step": 822 }, { "epoch": 0.057345922029056196, "grad_norm": 0.7290786553345412, "learning_rate": 6.94901521445477e-07, "loss": 1.8279, "step": 823 }, { "epoch": 0.05741560115667352, "grad_norm": 0.6990004793808002, "learning_rate": 6.948891575676963e-07, "loss": 1.4113, "step": 824 }, { "epoch": 0.05748528028429084, "grad_norm": 0.7811171957869557, "learning_rate": 6.94876778839391e-07, "loss": 1.558, "step": 825 }, { "epoch": 0.05755495941190816, "grad_norm": 0.739030141597572, "learning_rate": 6.948643852611543e-07, "loss": 1.5298, "step": 826 }, { "epoch": 0.057624638539525486, "grad_norm": 0.7219462250643431, "learning_rate": 6.948519768335801e-07, "loss": 1.5291, "step": 827 }, { "epoch": 0.05769431766714281, "grad_norm": 0.7120923451377806, "learning_rate": 6.948395535572631e-07, "loss": 1.4506, "step": 828 }, { "epoch": 0.05776399679476013, "grad_norm": 0.6975168518442334, "learning_rate": 6.948271154327985e-07, "loss": 1.4987, "step": 829 }, { "epoch": 0.05783367592237745, "grad_norm": 0.7138373985437595, "learning_rate": 6.948146624607826e-07, "loss": 1.6882, "step": 830 }, { "epoch": 0.057903355049994776, "grad_norm": 0.6708575706578195, "learning_rate": 6.948021946418118e-07, "loss": 1.4863, "step": 831 }, { "epoch": 0.0579730341776121, "grad_norm": 0.6808372768351861, "learning_rate": 6.947897119764841e-07, "loss": 1.5298, "step": 832 }, { "epoch": 0.05804271330522942, "grad_norm": 0.6852255133243262, "learning_rate": 6.947772144653973e-07, "loss": 1.5228, "step": 833 }, { "epoch": 0.05811239243284674, "grad_norm": 0.7146018666757779, "learning_rate": 6.947647021091504e-07, "loss": 1.5371, "step": 834 }, { "epoch": 0.058182071560464066, "grad_norm": 0.7270596691308416, "learning_rate": 6.947521749083431e-07, "loss": 1.6466, "step": 835 }, { "epoch": 0.05825175068808139, "grad_norm": 0.7603407007056545, "learning_rate": 6.947396328635757e-07, "loss": 1.6199, "step": 836 }, { "epoch": 0.058321429815698704, "grad_norm": 0.6738987574655745, "learning_rate": 6.947270759754491e-07, "loss": 1.5234, "step": 837 }, { "epoch": 0.05839110894331603, "grad_norm": 0.6897399123926644, "learning_rate": 6.947145042445652e-07, "loss": 1.4501, "step": 838 }, { "epoch": 0.05846078807093335, "grad_norm": 0.7264844681997801, "learning_rate": 6.947019176715265e-07, "loss": 1.5785, "step": 839 }, { "epoch": 0.05853046719855067, "grad_norm": 0.71216329594823, "learning_rate": 6.94689316256936e-07, "loss": 1.5699, "step": 840 }, { "epoch": 0.058600146326167994, "grad_norm": 0.7269442447044326, "learning_rate": 6.946767000013978e-07, "loss": 1.5217, "step": 841 }, { "epoch": 0.05866982545378532, "grad_norm": 0.7213513641430525, "learning_rate": 6.946640689055163e-07, "loss": 1.4736, "step": 842 }, { "epoch": 0.05873950458140264, "grad_norm": 0.6911765142958883, "learning_rate": 6.946514229698968e-07, "loss": 1.5468, "step": 843 }, { "epoch": 0.05880918370901996, "grad_norm": 0.7312402973375173, "learning_rate": 6.946387621951456e-07, "loss": 1.5845, "step": 844 }, { "epoch": 0.058878862836637284, "grad_norm": 0.8766300017445524, "learning_rate": 6.946260865818691e-07, "loss": 1.6933, "step": 845 }, { "epoch": 0.05894854196425461, "grad_norm": 0.7437506934142784, "learning_rate": 6.946133961306748e-07, "loss": 1.5705, "step": 846 }, { "epoch": 0.05901822109187193, "grad_norm": 0.7250232099606774, "learning_rate": 6.946006908421711e-07, "loss": 1.6298, "step": 847 }, { "epoch": 0.05908790021948925, "grad_norm": 0.7468248406429453, "learning_rate": 6.945879707169668e-07, "loss": 1.5957, "step": 848 }, { "epoch": 0.059157579347106574, "grad_norm": 0.7307135773660111, "learning_rate": 6.945752357556712e-07, "loss": 1.6783, "step": 849 }, { "epoch": 0.0592272584747239, "grad_norm": 0.7462954093323562, "learning_rate": 6.945624859588947e-07, "loss": 1.6313, "step": 850 }, { "epoch": 0.05929693760234122, "grad_norm": 0.6836820982793157, "learning_rate": 6.945497213272485e-07, "loss": 1.5552, "step": 851 }, { "epoch": 0.05936661672995854, "grad_norm": 0.7349495347711605, "learning_rate": 6.94536941861344e-07, "loss": 1.5534, "step": 852 }, { "epoch": 0.059436295857575865, "grad_norm": 0.7393307515575156, "learning_rate": 6.945241475617939e-07, "loss": 1.5919, "step": 853 }, { "epoch": 0.05950597498519319, "grad_norm": 0.7581161323825397, "learning_rate": 6.945113384292112e-07, "loss": 1.5157, "step": 854 }, { "epoch": 0.05957565411281051, "grad_norm": 0.7811029541452695, "learning_rate": 6.944985144642097e-07, "loss": 1.5304, "step": 855 }, { "epoch": 0.05964533324042783, "grad_norm": 0.6479169819808708, "learning_rate": 6.94485675667404e-07, "loss": 1.5384, "step": 856 }, { "epoch": 0.059715012368045155, "grad_norm": 0.6754137449735433, "learning_rate": 6.944728220394094e-07, "loss": 1.5514, "step": 857 }, { "epoch": 0.05978469149566248, "grad_norm": 0.7010515414317577, "learning_rate": 6.944599535808418e-07, "loss": 1.6216, "step": 858 }, { "epoch": 0.0598543706232798, "grad_norm": 0.7420195687781036, "learning_rate": 6.944470702923181e-07, "loss": 1.4931, "step": 859 }, { "epoch": 0.059924049750897115, "grad_norm": 0.6985723055107013, "learning_rate": 6.944341721744553e-07, "loss": 1.5086, "step": 860 }, { "epoch": 0.05999372887851444, "grad_norm": 0.7496262938898509, "learning_rate": 6.944212592278718e-07, "loss": 1.5999, "step": 861 }, { "epoch": 0.06006340800613176, "grad_norm": 0.6981277967509739, "learning_rate": 6.944083314531863e-07, "loss": 1.6053, "step": 862 }, { "epoch": 0.06013308713374908, "grad_norm": 0.6804099181252625, "learning_rate": 6.943953888510182e-07, "loss": 1.5734, "step": 863 }, { "epoch": 0.060202766261366406, "grad_norm": 0.6731032371257547, "learning_rate": 6.943824314219881e-07, "loss": 1.5698, "step": 864 }, { "epoch": 0.06027244538898373, "grad_norm": 0.7425209366343755, "learning_rate": 6.943694591667166e-07, "loss": 1.5785, "step": 865 }, { "epoch": 0.06034212451660105, "grad_norm": 0.7528653762418056, "learning_rate": 6.943564720858257e-07, "loss": 1.6388, "step": 866 }, { "epoch": 0.06041180364421837, "grad_norm": 0.8122728565845222, "learning_rate": 6.943434701799373e-07, "loss": 1.625, "step": 867 }, { "epoch": 0.060481482771835696, "grad_norm": 0.7019858720037662, "learning_rate": 6.943304534496749e-07, "loss": 1.564, "step": 868 }, { "epoch": 0.06055116189945302, "grad_norm": 0.7177275391866323, "learning_rate": 6.943174218956621e-07, "loss": 1.6123, "step": 869 }, { "epoch": 0.06062084102707034, "grad_norm": 0.7030350752558858, "learning_rate": 6.943043755185235e-07, "loss": 1.5799, "step": 870 }, { "epoch": 0.06069052015468766, "grad_norm": 0.7320624667675142, "learning_rate": 6.942913143188841e-07, "loss": 1.626, "step": 871 }, { "epoch": 0.060760199282304986, "grad_norm": 0.7741178657105833, "learning_rate": 6.9427823829737e-07, "loss": 1.6211, "step": 872 }, { "epoch": 0.06082987840992231, "grad_norm": 0.7034164705948763, "learning_rate": 6.942651474546077e-07, "loss": 1.5162, "step": 873 }, { "epoch": 0.06089955753753963, "grad_norm": 0.6909381638542067, "learning_rate": 6.942520417912248e-07, "loss": 1.4292, "step": 874 }, { "epoch": 0.06096923666515695, "grad_norm": 0.7115351412753147, "learning_rate": 6.94238921307849e-07, "loss": 1.6299, "step": 875 }, { "epoch": 0.061038915792774276, "grad_norm": 0.6704973088007256, "learning_rate": 6.942257860051093e-07, "loss": 1.6049, "step": 876 }, { "epoch": 0.0611085949203916, "grad_norm": 0.7460113523362771, "learning_rate": 6.942126358836352e-07, "loss": 1.6023, "step": 877 }, { "epoch": 0.06117827404800892, "grad_norm": 0.740997449770298, "learning_rate": 6.941994709440567e-07, "loss": 1.461, "step": 878 }, { "epoch": 0.061247953175626244, "grad_norm": 0.7344001816527201, "learning_rate": 6.941862911870047e-07, "loss": 1.6357, "step": 879 }, { "epoch": 0.061317632303243566, "grad_norm": 0.842515805713784, "learning_rate": 6.941730966131111e-07, "loss": 1.6079, "step": 880 }, { "epoch": 0.06138731143086089, "grad_norm": 0.7541684280459339, "learning_rate": 6.941598872230078e-07, "loss": 1.4737, "step": 881 }, { "epoch": 0.06145699055847821, "grad_norm": 0.7780595951607582, "learning_rate": 6.941466630173281e-07, "loss": 1.5857, "step": 882 }, { "epoch": 0.06152666968609553, "grad_norm": 0.7922696893069073, "learning_rate": 6.941334239967056e-07, "loss": 1.6614, "step": 883 }, { "epoch": 0.06159634881371285, "grad_norm": 0.7049532795712117, "learning_rate": 6.941201701617749e-07, "loss": 1.5411, "step": 884 }, { "epoch": 0.06166602794133017, "grad_norm": 0.7030659598960232, "learning_rate": 6.941069015131709e-07, "loss": 1.6289, "step": 885 }, { "epoch": 0.061735707068947494, "grad_norm": 0.7093629644917849, "learning_rate": 6.940936180515296e-07, "loss": 1.4986, "step": 886 }, { "epoch": 0.06180538619656482, "grad_norm": 0.7310013628107739, "learning_rate": 6.940803197774875e-07, "loss": 1.7331, "step": 887 }, { "epoch": 0.06187506532418214, "grad_norm": 0.7085823814611059, "learning_rate": 6.94067006691682e-07, "loss": 1.4859, "step": 888 }, { "epoch": 0.06194474445179946, "grad_norm": 0.6963722191996505, "learning_rate": 6.940536787947512e-07, "loss": 1.6608, "step": 889 }, { "epoch": 0.062014423579416784, "grad_norm": 0.7409922933496068, "learning_rate": 6.940403360873335e-07, "loss": 1.5919, "step": 890 }, { "epoch": 0.06208410270703411, "grad_norm": 0.6655268228790038, "learning_rate": 6.940269785700685e-07, "loss": 1.507, "step": 891 }, { "epoch": 0.06215378183465143, "grad_norm": 0.6821234570236667, "learning_rate": 6.940136062435963e-07, "loss": 1.6311, "step": 892 }, { "epoch": 0.06222346096226875, "grad_norm": 0.6902129271155977, "learning_rate": 6.940002191085575e-07, "loss": 1.533, "step": 893 }, { "epoch": 0.062293140089886075, "grad_norm": 0.7606556884733682, "learning_rate": 6.93986817165594e-07, "loss": 1.6512, "step": 894 }, { "epoch": 0.0623628192175034, "grad_norm": 0.7289684165848465, "learning_rate": 6.939734004153479e-07, "loss": 1.5418, "step": 895 }, { "epoch": 0.06243249834512072, "grad_norm": 0.6811870960828804, "learning_rate": 6.939599688584621e-07, "loss": 1.5511, "step": 896 }, { "epoch": 0.06250217747273804, "grad_norm": 0.7242835339866917, "learning_rate": 6.939465224955802e-07, "loss": 1.4901, "step": 897 }, { "epoch": 0.06257185660035536, "grad_norm": 0.7231645951362123, "learning_rate": 6.939330613273468e-07, "loss": 1.5142, "step": 898 }, { "epoch": 0.06264153572797268, "grad_norm": 0.7434077826308495, "learning_rate": 6.939195853544069e-07, "loss": 1.5778, "step": 899 }, { "epoch": 0.06271121485559, "grad_norm": 0.7528424466845036, "learning_rate": 6.939060945774062e-07, "loss": 1.5573, "step": 900 }, { "epoch": 0.06278089398320733, "grad_norm": 0.6960916907060996, "learning_rate": 6.938925889969913e-07, "loss": 1.5107, "step": 901 }, { "epoch": 0.06285057311082465, "grad_norm": 0.741434891143874, "learning_rate": 6.938790686138093e-07, "loss": 1.4976, "step": 902 }, { "epoch": 0.06292025223844197, "grad_norm": 0.7758276506408398, "learning_rate": 6.938655334285084e-07, "loss": 1.7011, "step": 903 }, { "epoch": 0.06298993136605929, "grad_norm": 0.7923740403380025, "learning_rate": 6.938519834417369e-07, "loss": 1.6621, "step": 904 }, { "epoch": 0.06305961049367662, "grad_norm": 0.6856901148311233, "learning_rate": 6.938384186541444e-07, "loss": 1.6103, "step": 905 }, { "epoch": 0.06312928962129394, "grad_norm": 0.702310601086839, "learning_rate": 6.938248390663807e-07, "loss": 1.5056, "step": 906 }, { "epoch": 0.06319896874891126, "grad_norm": 0.7810829024837994, "learning_rate": 6.938112446790969e-07, "loss": 1.5446, "step": 907 }, { "epoch": 0.06326864787652858, "grad_norm": 0.7307945024026805, "learning_rate": 6.937976354929442e-07, "loss": 1.6072, "step": 908 }, { "epoch": 0.0633383270041459, "grad_norm": 0.7100956099681816, "learning_rate": 6.937840115085747e-07, "loss": 1.5953, "step": 909 }, { "epoch": 0.06340800613176323, "grad_norm": 0.7112718468289873, "learning_rate": 6.937703727266416e-07, "loss": 1.5293, "step": 910 }, { "epoch": 0.06347768525938055, "grad_norm": 0.6940826349677348, "learning_rate": 6.937567191477984e-07, "loss": 1.52, "step": 911 }, { "epoch": 0.06354736438699787, "grad_norm": 0.7615667933385247, "learning_rate": 6.937430507726993e-07, "loss": 1.5318, "step": 912 }, { "epoch": 0.0636170435146152, "grad_norm": 0.7576711542574699, "learning_rate": 6.937293676019993e-07, "loss": 1.6737, "step": 913 }, { "epoch": 0.06368672264223252, "grad_norm": 0.7429248277684565, "learning_rate": 6.937156696363543e-07, "loss": 1.6404, "step": 914 }, { "epoch": 0.06375640176984984, "grad_norm": 0.696155637296429, "learning_rate": 6.937019568764206e-07, "loss": 1.5436, "step": 915 }, { "epoch": 0.06382608089746716, "grad_norm": 0.6842590034765481, "learning_rate": 6.936882293228554e-07, "loss": 1.5392, "step": 916 }, { "epoch": 0.06389576002508449, "grad_norm": 0.696787602226188, "learning_rate": 6.936744869763163e-07, "loss": 1.5912, "step": 917 }, { "epoch": 0.06396543915270181, "grad_norm": 0.7528902720982941, "learning_rate": 6.936607298374624e-07, "loss": 1.6004, "step": 918 }, { "epoch": 0.06403511828031913, "grad_norm": 0.6598951020939933, "learning_rate": 6.936469579069525e-07, "loss": 1.5006, "step": 919 }, { "epoch": 0.06410479740793645, "grad_norm": 0.6731494738244983, "learning_rate": 6.936331711854467e-07, "loss": 1.4561, "step": 920 }, { "epoch": 0.06417447653555378, "grad_norm": 0.6743817096436955, "learning_rate": 6.936193696736058e-07, "loss": 1.5704, "step": 921 }, { "epoch": 0.0642441556631711, "grad_norm": 0.7051720087157666, "learning_rate": 6.936055533720911e-07, "loss": 1.4952, "step": 922 }, { "epoch": 0.06431383479078842, "grad_norm": 0.7103238647333436, "learning_rate": 6.935917222815648e-07, "loss": 1.5239, "step": 923 }, { "epoch": 0.06438351391840574, "grad_norm": 0.7235088782382487, "learning_rate": 6.935778764026895e-07, "loss": 1.6262, "step": 924 }, { "epoch": 0.06445319304602307, "grad_norm": 0.6819119127698352, "learning_rate": 6.935640157361289e-07, "loss": 1.5787, "step": 925 }, { "epoch": 0.06452287217364039, "grad_norm": 0.7374359593151948, "learning_rate": 6.935501402825473e-07, "loss": 1.4686, "step": 926 }, { "epoch": 0.06459255130125771, "grad_norm": 0.7116900330828307, "learning_rate": 6.935362500426095e-07, "loss": 1.5711, "step": 927 }, { "epoch": 0.06466223042887503, "grad_norm": 0.6973009589376816, "learning_rate": 6.935223450169812e-07, "loss": 1.6242, "step": 928 }, { "epoch": 0.06473190955649236, "grad_norm": 0.721322375011435, "learning_rate": 6.935084252063286e-07, "loss": 1.5486, "step": 929 }, { "epoch": 0.06480158868410968, "grad_norm": 0.6756510474295269, "learning_rate": 6.934944906113191e-07, "loss": 1.4674, "step": 930 }, { "epoch": 0.064871267811727, "grad_norm": 0.7197319416399377, "learning_rate": 6.934805412326201e-07, "loss": 1.5464, "step": 931 }, { "epoch": 0.06494094693934432, "grad_norm": 0.7561165970179851, "learning_rate": 6.934665770709004e-07, "loss": 1.4934, "step": 932 }, { "epoch": 0.06501062606696165, "grad_norm": 0.7184946003658444, "learning_rate": 6.93452598126829e-07, "loss": 1.39, "step": 933 }, { "epoch": 0.06508030519457897, "grad_norm": 0.7180786135022353, "learning_rate": 6.934386044010759e-07, "loss": 1.6113, "step": 934 }, { "epoch": 0.06514998432219629, "grad_norm": 0.7796809708168528, "learning_rate": 6.934245958943115e-07, "loss": 1.6264, "step": 935 }, { "epoch": 0.06521966344981361, "grad_norm": 0.725538535253128, "learning_rate": 6.934105726072076e-07, "loss": 1.5312, "step": 936 }, { "epoch": 0.06528934257743094, "grad_norm": 0.7304382528288118, "learning_rate": 6.933965345404356e-07, "loss": 1.5362, "step": 937 }, { "epoch": 0.06535902170504826, "grad_norm": 0.700294818967333, "learning_rate": 6.933824816946687e-07, "loss": 1.4986, "step": 938 }, { "epoch": 0.06542870083266558, "grad_norm": 0.6997315562329343, "learning_rate": 6.933684140705801e-07, "loss": 1.4747, "step": 939 }, { "epoch": 0.06549837996028289, "grad_norm": 0.7567625481144566, "learning_rate": 6.933543316688441e-07, "loss": 1.5696, "step": 940 }, { "epoch": 0.06556805908790021, "grad_norm": 41.44909914705615, "learning_rate": 6.933402344901354e-07, "loss": 1.6127, "step": 941 }, { "epoch": 0.06563773821551754, "grad_norm": 0.6679798181768342, "learning_rate": 6.933261225351298e-07, "loss": 1.585, "step": 942 }, { "epoch": 0.06570741734313486, "grad_norm": 0.7000648730835969, "learning_rate": 6.933119958045033e-07, "loss": 1.6216, "step": 943 }, { "epoch": 0.06577709647075218, "grad_norm": 0.7596264387177684, "learning_rate": 6.93297854298933e-07, "loss": 1.6523, "step": 944 }, { "epoch": 0.0658467755983695, "grad_norm": 0.7193619041313796, "learning_rate": 6.932836980190967e-07, "loss": 1.5485, "step": 945 }, { "epoch": 0.06591645472598683, "grad_norm": 0.6980772094913987, "learning_rate": 6.932695269656726e-07, "loss": 1.5319, "step": 946 }, { "epoch": 0.06598613385360415, "grad_norm": 0.7442746561075959, "learning_rate": 6.9325534113934e-07, "loss": 1.5161, "step": 947 }, { "epoch": 0.06605581298122147, "grad_norm": 0.6777835337823753, "learning_rate": 6.932411405407785e-07, "loss": 1.4636, "step": 948 }, { "epoch": 0.0661254921088388, "grad_norm": 0.6727156937868579, "learning_rate": 6.932269251706688e-07, "loss": 1.5201, "step": 949 }, { "epoch": 0.06619517123645612, "grad_norm": 0.7245330036989707, "learning_rate": 6.932126950296921e-07, "loss": 1.5595, "step": 950 }, { "epoch": 0.06626485036407344, "grad_norm": 0.7125701635864038, "learning_rate": 6.931984501185303e-07, "loss": 1.6323, "step": 951 }, { "epoch": 0.06633452949169076, "grad_norm": 0.6963101718771975, "learning_rate": 6.93184190437866e-07, "loss": 1.513, "step": 952 }, { "epoch": 0.06640420861930808, "grad_norm": 0.7046379018841911, "learning_rate": 6.931699159883825e-07, "loss": 1.5284, "step": 953 }, { "epoch": 0.0664738877469254, "grad_norm": 0.678439137321281, "learning_rate": 6.931556267707642e-07, "loss": 1.553, "step": 954 }, { "epoch": 0.06654356687454273, "grad_norm": 0.7410253132305197, "learning_rate": 6.931413227856954e-07, "loss": 1.5533, "step": 955 }, { "epoch": 0.06661324600216005, "grad_norm": 0.6908842566143348, "learning_rate": 6.93127004033862e-07, "loss": 1.6341, "step": 956 }, { "epoch": 0.06668292512977737, "grad_norm": 0.6651535227538429, "learning_rate": 6.931126705159499e-07, "loss": 1.5806, "step": 957 }, { "epoch": 0.0667526042573947, "grad_norm": 0.6920497807137774, "learning_rate": 6.930983222326462e-07, "loss": 1.567, "step": 958 }, { "epoch": 0.06682228338501202, "grad_norm": 0.7078371963892504, "learning_rate": 6.930839591846383e-07, "loss": 1.6668, "step": 959 }, { "epoch": 0.06689196251262934, "grad_norm": 0.7606381115394008, "learning_rate": 6.930695813726146e-07, "loss": 1.4752, "step": 960 }, { "epoch": 0.06696164164024666, "grad_norm": 0.8536717759212689, "learning_rate": 6.93055188797264e-07, "loss": 1.5652, "step": 961 }, { "epoch": 0.06703132076786399, "grad_norm": 0.6653573860861989, "learning_rate": 6.930407814592765e-07, "loss": 1.5914, "step": 962 }, { "epoch": 0.06710099989548131, "grad_norm": 0.784194439397854, "learning_rate": 6.930263593593424e-07, "loss": 1.5406, "step": 963 }, { "epoch": 0.06717067902309863, "grad_norm": 0.7610703370050599, "learning_rate": 6.930119224981526e-07, "loss": 1.6075, "step": 964 }, { "epoch": 0.06724035815071595, "grad_norm": 0.6764485214707169, "learning_rate": 6.929974708763992e-07, "loss": 1.504, "step": 965 }, { "epoch": 0.06731003727833328, "grad_norm": 0.707512885228743, "learning_rate": 6.929830044947746e-07, "loss": 1.4592, "step": 966 }, { "epoch": 0.0673797164059506, "grad_norm": 0.7081374537556824, "learning_rate": 6.929685233539723e-07, "loss": 1.6566, "step": 967 }, { "epoch": 0.06744939553356792, "grad_norm": 0.7122482694793018, "learning_rate": 6.929540274546861e-07, "loss": 1.6348, "step": 968 }, { "epoch": 0.06751907466118524, "grad_norm": 0.6901522900821034, "learning_rate": 6.929395167976105e-07, "loss": 1.5057, "step": 969 }, { "epoch": 0.06758875378880257, "grad_norm": 0.7160908180373641, "learning_rate": 6.929249913834413e-07, "loss": 1.5376, "step": 970 }, { "epoch": 0.06765843291641989, "grad_norm": 0.7388489184952017, "learning_rate": 6.929104512128743e-07, "loss": 1.4687, "step": 971 }, { "epoch": 0.06772811204403721, "grad_norm": 0.7003909631678097, "learning_rate": 6.928958962866063e-07, "loss": 1.6329, "step": 972 }, { "epoch": 0.06779779117165453, "grad_norm": 0.6912891435110838, "learning_rate": 6.928813266053349e-07, "loss": 1.4846, "step": 973 }, { "epoch": 0.06786747029927186, "grad_norm": 0.6603982078975053, "learning_rate": 6.928667421697582e-07, "loss": 1.3881, "step": 974 }, { "epoch": 0.06793714942688918, "grad_norm": 0.7235369345078301, "learning_rate": 6.928521429805752e-07, "loss": 1.6745, "step": 975 }, { "epoch": 0.0680068285545065, "grad_norm": 0.7482907027017797, "learning_rate": 6.928375290384856e-07, "loss": 1.5114, "step": 976 }, { "epoch": 0.06807650768212382, "grad_norm": 0.786232207346211, "learning_rate": 6.928229003441894e-07, "loss": 1.5673, "step": 977 }, { "epoch": 0.06814618680974115, "grad_norm": 0.6986408216965335, "learning_rate": 6.928082568983882e-07, "loss": 1.6475, "step": 978 }, { "epoch": 0.06821586593735847, "grad_norm": 0.7219767354753639, "learning_rate": 6.927935987017831e-07, "loss": 1.4191, "step": 979 }, { "epoch": 0.06828554506497579, "grad_norm": 0.7733751013809093, "learning_rate": 6.927789257550769e-07, "loss": 1.4901, "step": 980 }, { "epoch": 0.06835522419259311, "grad_norm": 0.7088273005920183, "learning_rate": 6.927642380589728e-07, "loss": 1.5104, "step": 981 }, { "epoch": 0.06842490332021044, "grad_norm": 0.7231089020272345, "learning_rate": 6.927495356141747e-07, "loss": 1.565, "step": 982 }, { "epoch": 0.06849458244782776, "grad_norm": 0.7465251630697766, "learning_rate": 6.927348184213869e-07, "loss": 1.6297, "step": 983 }, { "epoch": 0.06856426157544508, "grad_norm": 0.7028021616887362, "learning_rate": 6.927200864813149e-07, "loss": 1.5676, "step": 984 }, { "epoch": 0.0686339407030624, "grad_norm": 0.7241553816634947, "learning_rate": 6.927053397946644e-07, "loss": 1.5426, "step": 985 }, { "epoch": 0.06870361983067971, "grad_norm": 0.6631488358648268, "learning_rate": 6.926905783621427e-07, "loss": 1.426, "step": 986 }, { "epoch": 0.06877329895829704, "grad_norm": 0.7238485756150155, "learning_rate": 6.926758021844565e-07, "loss": 1.6008, "step": 987 }, { "epoch": 0.06884297808591436, "grad_norm": 0.6720576579169943, "learning_rate": 6.926610112623144e-07, "loss": 1.4347, "step": 988 }, { "epoch": 0.06891265721353168, "grad_norm": 0.73334548757339, "learning_rate": 6.926462055964249e-07, "loss": 1.5721, "step": 989 }, { "epoch": 0.068982336341149, "grad_norm": 0.7137428718304452, "learning_rate": 6.926313851874977e-07, "loss": 1.6358, "step": 990 }, { "epoch": 0.06905201546876633, "grad_norm": 0.7729856075932263, "learning_rate": 6.92616550036243e-07, "loss": 1.5841, "step": 991 }, { "epoch": 0.06912169459638365, "grad_norm": 0.720906050858157, "learning_rate": 6.926017001433716e-07, "loss": 1.5314, "step": 992 }, { "epoch": 0.06919137372400097, "grad_norm": 0.7376452297137691, "learning_rate": 6.925868355095953e-07, "loss": 1.5631, "step": 993 }, { "epoch": 0.0692610528516183, "grad_norm": 0.7351626865087098, "learning_rate": 6.925719561356263e-07, "loss": 1.4988, "step": 994 }, { "epoch": 0.06933073197923562, "grad_norm": 0.6997542125412789, "learning_rate": 6.925570620221779e-07, "loss": 1.5988, "step": 995 }, { "epoch": 0.06940041110685294, "grad_norm": 0.706269039215262, "learning_rate": 6.925421531699636e-07, "loss": 1.5851, "step": 996 }, { "epoch": 0.06947009023447026, "grad_norm": 0.6832486756386302, "learning_rate": 6.925272295796979e-07, "loss": 1.4754, "step": 997 }, { "epoch": 0.06953976936208758, "grad_norm": 0.7294064047869246, "learning_rate": 6.92512291252096e-07, "loss": 1.5658, "step": 998 }, { "epoch": 0.0696094484897049, "grad_norm": 0.7209483681033394, "learning_rate": 6.924973381878738e-07, "loss": 1.4977, "step": 999 }, { "epoch": 0.06967912761732223, "grad_norm": 0.6482722642545093, "learning_rate": 6.92482370387748e-07, "loss": 1.5456, "step": 1000 }, { "epoch": 0.06974880674493955, "grad_norm": 0.6911923296637908, "learning_rate": 6.924673878524356e-07, "loss": 1.5774, "step": 1001 }, { "epoch": 0.06981848587255687, "grad_norm": 0.6806494876420273, "learning_rate": 6.924523905826549e-07, "loss": 1.6226, "step": 1002 }, { "epoch": 0.0698881650001742, "grad_norm": 0.7206441347495618, "learning_rate": 6.924373785791244e-07, "loss": 1.5996, "step": 1003 }, { "epoch": 0.06995784412779152, "grad_norm": 0.6568982429262141, "learning_rate": 6.924223518425635e-07, "loss": 1.5396, "step": 1004 }, { "epoch": 0.07002752325540884, "grad_norm": 0.718836906784285, "learning_rate": 6.924073103736925e-07, "loss": 1.4372, "step": 1005 }, { "epoch": 0.07009720238302616, "grad_norm": 0.7071783051387326, "learning_rate": 6.92392254173232e-07, "loss": 1.5001, "step": 1006 }, { "epoch": 0.07016688151064349, "grad_norm": 0.722945628224867, "learning_rate": 6.923771832419036e-07, "loss": 1.691, "step": 1007 }, { "epoch": 0.07023656063826081, "grad_norm": 0.783086373789463, "learning_rate": 6.923620975804296e-07, "loss": 1.568, "step": 1008 }, { "epoch": 0.07030623976587813, "grad_norm": 0.6976889026374747, "learning_rate": 6.923469971895328e-07, "loss": 1.6341, "step": 1009 }, { "epoch": 0.07037591889349545, "grad_norm": 0.7523424135440696, "learning_rate": 6.923318820699369e-07, "loss": 1.5886, "step": 1010 }, { "epoch": 0.07044559802111278, "grad_norm": 0.7173214160819936, "learning_rate": 6.923167522223664e-07, "loss": 1.6562, "step": 1011 }, { "epoch": 0.0705152771487301, "grad_norm": 0.7123339152044518, "learning_rate": 6.923016076475462e-07, "loss": 1.5676, "step": 1012 }, { "epoch": 0.07058495627634742, "grad_norm": 0.7333638244874635, "learning_rate": 6.92286448346202e-07, "loss": 1.6151, "step": 1013 }, { "epoch": 0.07065463540396474, "grad_norm": 0.7004839329730347, "learning_rate": 6.922712743190605e-07, "loss": 1.5125, "step": 1014 }, { "epoch": 0.07072431453158207, "grad_norm": 0.744033569156811, "learning_rate": 6.922560855668486e-07, "loss": 1.579, "step": 1015 }, { "epoch": 0.07079399365919939, "grad_norm": 0.6842826563152172, "learning_rate": 6.922408820902942e-07, "loss": 1.4839, "step": 1016 }, { "epoch": 0.07086367278681671, "grad_norm": 0.6780606922456736, "learning_rate": 6.922256638901262e-07, "loss": 1.478, "step": 1017 }, { "epoch": 0.07093335191443403, "grad_norm": 0.704981494096453, "learning_rate": 6.922104309670736e-07, "loss": 1.6119, "step": 1018 }, { "epoch": 0.07100303104205136, "grad_norm": 0.7637589997063822, "learning_rate": 6.921951833218664e-07, "loss": 1.6386, "step": 1019 }, { "epoch": 0.07107271016966868, "grad_norm": 0.6944249613797556, "learning_rate": 6.921799209552354e-07, "loss": 1.428, "step": 1020 }, { "epoch": 0.071142389297286, "grad_norm": 0.6580765696200459, "learning_rate": 6.921646438679119e-07, "loss": 1.6064, "step": 1021 }, { "epoch": 0.07121206842490332, "grad_norm": 0.6919362421338939, "learning_rate": 6.921493520606281e-07, "loss": 1.5615, "step": 1022 }, { "epoch": 0.07128174755252065, "grad_norm": 0.6524789983946453, "learning_rate": 6.921340455341168e-07, "loss": 1.4955, "step": 1023 }, { "epoch": 0.07135142668013797, "grad_norm": 0.7358174972537268, "learning_rate": 6.921187242891115e-07, "loss": 1.5882, "step": 1024 }, { "epoch": 0.07142110580775529, "grad_norm": 0.7351799396329001, "learning_rate": 6.921033883263464e-07, "loss": 1.5962, "step": 1025 }, { "epoch": 0.07149078493537261, "grad_norm": 0.7773506925419971, "learning_rate": 6.920880376465565e-07, "loss": 1.5903, "step": 1026 }, { "epoch": 0.07156046406298994, "grad_norm": 0.720052376496024, "learning_rate": 6.920726722504773e-07, "loss": 1.5835, "step": 1027 }, { "epoch": 0.07163014319060726, "grad_norm": 0.7122596855425408, "learning_rate": 6.920572921388453e-07, "loss": 1.4811, "step": 1028 }, { "epoch": 0.07169982231822458, "grad_norm": 0.7530822747200974, "learning_rate": 6.920418973123976e-07, "loss": 1.6935, "step": 1029 }, { "epoch": 0.0717695014458419, "grad_norm": 0.6738093485439884, "learning_rate": 6.920264877718716e-07, "loss": 1.5866, "step": 1030 }, { "epoch": 0.07183918057345923, "grad_norm": 0.7484468258192855, "learning_rate": 6.920110635180063e-07, "loss": 1.6671, "step": 1031 }, { "epoch": 0.07190885970107654, "grad_norm": 0.7419311209570713, "learning_rate": 6.919956245515402e-07, "loss": 1.6695, "step": 1032 }, { "epoch": 0.07197853882869386, "grad_norm": 0.6826743849739504, "learning_rate": 6.919801708732137e-07, "loss": 1.5021, "step": 1033 }, { "epoch": 0.07204821795631118, "grad_norm": 0.7000484130543231, "learning_rate": 6.919647024837673e-07, "loss": 1.5443, "step": 1034 }, { "epoch": 0.0721178970839285, "grad_norm": 0.6954818658655855, "learning_rate": 6.91949219383942e-07, "loss": 1.4374, "step": 1035 }, { "epoch": 0.07218757621154583, "grad_norm": 0.715842381283248, "learning_rate": 6.9193372157448e-07, "loss": 1.5712, "step": 1036 }, { "epoch": 0.07225725533916315, "grad_norm": 0.6767161402869131, "learning_rate": 6.919182090561241e-07, "loss": 1.3916, "step": 1037 }, { "epoch": 0.07232693446678047, "grad_norm": 0.6910566641237087, "learning_rate": 6.919026818296173e-07, "loss": 1.5389, "step": 1038 }, { "epoch": 0.0723966135943978, "grad_norm": 0.7341954371107908, "learning_rate": 6.91887139895704e-07, "loss": 1.5189, "step": 1039 }, { "epoch": 0.07246629272201512, "grad_norm": 0.7214373950468113, "learning_rate": 6.91871583255129e-07, "loss": 1.5847, "step": 1040 }, { "epoch": 0.07253597184963244, "grad_norm": 0.7014361915993079, "learning_rate": 6.918560119086376e-07, "loss": 1.4826, "step": 1041 }, { "epoch": 0.07260565097724976, "grad_norm": 0.7704601014028651, "learning_rate": 6.918404258569763e-07, "loss": 1.468, "step": 1042 }, { "epoch": 0.07267533010486708, "grad_norm": 0.808693366876018, "learning_rate": 6.918248251008917e-07, "loss": 1.6654, "step": 1043 }, { "epoch": 0.0727450092324844, "grad_norm": 0.7110048932554214, "learning_rate": 6.918092096411318e-07, "loss": 1.5662, "step": 1044 }, { "epoch": 0.07281468836010173, "grad_norm": 0.6982849324228914, "learning_rate": 6.917935794784445e-07, "loss": 1.5331, "step": 1045 }, { "epoch": 0.07288436748771905, "grad_norm": 0.7381550381399294, "learning_rate": 6.917779346135791e-07, "loss": 1.5555, "step": 1046 }, { "epoch": 0.07295404661533637, "grad_norm": 0.704548114293804, "learning_rate": 6.917622750472852e-07, "loss": 1.6146, "step": 1047 }, { "epoch": 0.0730237257429537, "grad_norm": 0.7042543974542429, "learning_rate": 6.917466007803135e-07, "loss": 1.3595, "step": 1048 }, { "epoch": 0.07309340487057102, "grad_norm": 0.7267693860346068, "learning_rate": 6.917309118134148e-07, "loss": 1.6352, "step": 1049 }, { "epoch": 0.07316308399818834, "grad_norm": 0.7426775788118576, "learning_rate": 6.917152081473412e-07, "loss": 1.6405, "step": 1050 }, { "epoch": 0.07323276312580566, "grad_norm": 0.7070458152085065, "learning_rate": 6.91699489782845e-07, "loss": 1.4703, "step": 1051 }, { "epoch": 0.07330244225342299, "grad_norm": 0.7417121840086736, "learning_rate": 6.916837567206797e-07, "loss": 1.5257, "step": 1052 }, { "epoch": 0.07337212138104031, "grad_norm": 0.7486626599689458, "learning_rate": 6.916680089615992e-07, "loss": 1.469, "step": 1053 }, { "epoch": 0.07344180050865763, "grad_norm": 0.6854908185511666, "learning_rate": 6.916522465063581e-07, "loss": 1.487, "step": 1054 }, { "epoch": 0.07351147963627495, "grad_norm": 0.7121319583710837, "learning_rate": 6.916364693557117e-07, "loss": 1.5364, "step": 1055 }, { "epoch": 0.07358115876389228, "grad_norm": 0.7287291289514267, "learning_rate": 6.916206775104164e-07, "loss": 1.5476, "step": 1056 }, { "epoch": 0.0736508378915096, "grad_norm": 0.744317342873817, "learning_rate": 6.916048709712286e-07, "loss": 1.5123, "step": 1057 }, { "epoch": 0.07372051701912692, "grad_norm": 0.7352301107365474, "learning_rate": 6.915890497389059e-07, "loss": 1.5221, "step": 1058 }, { "epoch": 0.07379019614674424, "grad_norm": 0.667449801712433, "learning_rate": 6.915732138142066e-07, "loss": 1.5428, "step": 1059 }, { "epoch": 0.07385987527436157, "grad_norm": 0.8240861662656697, "learning_rate": 6.915573631978896e-07, "loss": 1.5278, "step": 1060 }, { "epoch": 0.07392955440197889, "grad_norm": 0.8115926103137241, "learning_rate": 6.915414978907143e-07, "loss": 1.5511, "step": 1061 }, { "epoch": 0.07399923352959621, "grad_norm": 0.7115918276627143, "learning_rate": 6.915256178934411e-07, "loss": 1.5539, "step": 1062 }, { "epoch": 0.07406891265721353, "grad_norm": 0.7231534378390903, "learning_rate": 6.915097232068309e-07, "loss": 1.4308, "step": 1063 }, { "epoch": 0.07413859178483086, "grad_norm": 0.6849196644707507, "learning_rate": 6.914938138316456e-07, "loss": 1.5336, "step": 1064 }, { "epoch": 0.07420827091244818, "grad_norm": 0.712647033269892, "learning_rate": 6.914778897686477e-07, "loss": 1.4848, "step": 1065 }, { "epoch": 0.0742779500400655, "grad_norm": 0.6841995015789953, "learning_rate": 6.914619510185999e-07, "loss": 1.5199, "step": 1066 }, { "epoch": 0.07434762916768282, "grad_norm": 0.7032621096597862, "learning_rate": 6.914459975822664e-07, "loss": 1.5288, "step": 1067 }, { "epoch": 0.07441730829530015, "grad_norm": 0.8064800483185524, "learning_rate": 6.914300294604115e-07, "loss": 1.5849, "step": 1068 }, { "epoch": 0.07448698742291747, "grad_norm": 0.6616858768991769, "learning_rate": 6.914140466538005e-07, "loss": 1.5662, "step": 1069 }, { "epoch": 0.07455666655053479, "grad_norm": 0.6981029713873405, "learning_rate": 6.913980491631993e-07, "loss": 1.5105, "step": 1070 }, { "epoch": 0.07462634567815211, "grad_norm": 0.7960162371795243, "learning_rate": 6.913820369893746e-07, "loss": 1.511, "step": 1071 }, { "epoch": 0.07469602480576944, "grad_norm": 0.696388376902543, "learning_rate": 6.913660101330937e-07, "loss": 1.4846, "step": 1072 }, { "epoch": 0.07476570393338676, "grad_norm": 0.7920768720605448, "learning_rate": 6.913499685951247e-07, "loss": 1.5168, "step": 1073 }, { "epoch": 0.07483538306100408, "grad_norm": 0.694588502839473, "learning_rate": 6.913339123762361e-07, "loss": 1.5247, "step": 1074 }, { "epoch": 0.0749050621886214, "grad_norm": 0.7466383220743256, "learning_rate": 6.913178414771977e-07, "loss": 1.6722, "step": 1075 }, { "epoch": 0.07497474131623873, "grad_norm": 0.7403064737880765, "learning_rate": 6.913017558987794e-07, "loss": 1.5448, "step": 1076 }, { "epoch": 0.07504442044385605, "grad_norm": 0.7088682271914373, "learning_rate": 6.912856556417521e-07, "loss": 1.518, "step": 1077 }, { "epoch": 0.07511409957147336, "grad_norm": 0.7251852788383186, "learning_rate": 6.912695407068875e-07, "loss": 1.4666, "step": 1078 }, { "epoch": 0.07518377869909068, "grad_norm": 0.7118843231495031, "learning_rate": 6.912534110949577e-07, "loss": 1.5608, "step": 1079 }, { "epoch": 0.075253457826708, "grad_norm": 0.7373827534866939, "learning_rate": 6.912372668067356e-07, "loss": 1.56, "step": 1080 }, { "epoch": 0.07532313695432533, "grad_norm": 0.6652634128463273, "learning_rate": 6.912211078429952e-07, "loss": 1.4923, "step": 1081 }, { "epoch": 0.07539281608194265, "grad_norm": 0.7210995227862197, "learning_rate": 6.912049342045104e-07, "loss": 1.6125, "step": 1082 }, { "epoch": 0.07546249520955997, "grad_norm": 0.7873573632536712, "learning_rate": 6.911887458920568e-07, "loss": 1.6104, "step": 1083 }, { "epoch": 0.0755321743371773, "grad_norm": 0.670639410874111, "learning_rate": 6.911725429064096e-07, "loss": 1.5491, "step": 1084 }, { "epoch": 0.07560185346479462, "grad_norm": 0.7412530911154177, "learning_rate": 6.911563252483458e-07, "loss": 1.5383, "step": 1085 }, { "epoch": 0.07567153259241194, "grad_norm": 0.8068313122574107, "learning_rate": 6.911400929186425e-07, "loss": 1.5513, "step": 1086 }, { "epoch": 0.07574121172002926, "grad_norm": 0.721225693696014, "learning_rate": 6.911238459180772e-07, "loss": 1.5331, "step": 1087 }, { "epoch": 0.07581089084764658, "grad_norm": 0.7167014213265765, "learning_rate": 6.911075842474287e-07, "loss": 1.6393, "step": 1088 }, { "epoch": 0.0758805699752639, "grad_norm": 0.7171069278949371, "learning_rate": 6.910913079074765e-07, "loss": 1.549, "step": 1089 }, { "epoch": 0.07595024910288123, "grad_norm": 0.6934862794176397, "learning_rate": 6.910750168990005e-07, "loss": 1.4912, "step": 1090 }, { "epoch": 0.07601992823049855, "grad_norm": 0.7197521911287685, "learning_rate": 6.910587112227811e-07, "loss": 1.5344, "step": 1091 }, { "epoch": 0.07608960735811587, "grad_norm": 0.7464831626664131, "learning_rate": 6.910423908796001e-07, "loss": 1.4993, "step": 1092 }, { "epoch": 0.0761592864857332, "grad_norm": 0.7137983125771801, "learning_rate": 6.910260558702393e-07, "loss": 1.6077, "step": 1093 }, { "epoch": 0.07622896561335052, "grad_norm": 0.7084434020338641, "learning_rate": 6.910097061954817e-07, "loss": 1.5826, "step": 1094 }, { "epoch": 0.07629864474096784, "grad_norm": 0.6707576740728572, "learning_rate": 6.909933418561109e-07, "loss": 1.5207, "step": 1095 }, { "epoch": 0.07636832386858516, "grad_norm": 0.701643205980676, "learning_rate": 6.909769628529107e-07, "loss": 1.5677, "step": 1096 }, { "epoch": 0.07643800299620249, "grad_norm": 0.6960908138813413, "learning_rate": 6.909605691866665e-07, "loss": 1.5405, "step": 1097 }, { "epoch": 0.07650768212381981, "grad_norm": 0.7409390432569176, "learning_rate": 6.909441608581636e-07, "loss": 1.6348, "step": 1098 }, { "epoch": 0.07657736125143713, "grad_norm": 0.7330616230821628, "learning_rate": 6.909277378681885e-07, "loss": 1.6215, "step": 1099 }, { "epoch": 0.07664704037905445, "grad_norm": 0.706598439534836, "learning_rate": 6.909113002175281e-07, "loss": 1.5915, "step": 1100 }, { "epoch": 0.07671671950667178, "grad_norm": 0.7230263429802867, "learning_rate": 6.908948479069701e-07, "loss": 1.5836, "step": 1101 }, { "epoch": 0.0767863986342891, "grad_norm": 0.6871898211556535, "learning_rate": 6.908783809373031e-07, "loss": 1.5958, "step": 1102 }, { "epoch": 0.07685607776190642, "grad_norm": 0.7139068952007671, "learning_rate": 6.908618993093161e-07, "loss": 1.6262, "step": 1103 }, { "epoch": 0.07692575688952374, "grad_norm": 0.6569664827836058, "learning_rate": 6.90845403023799e-07, "loss": 1.5115, "step": 1104 }, { "epoch": 0.07699543601714107, "grad_norm": 0.7253554862037589, "learning_rate": 6.908288920815422e-07, "loss": 1.529, "step": 1105 }, { "epoch": 0.07706511514475839, "grad_norm": 0.6970591877647012, "learning_rate": 6.90812366483337e-07, "loss": 1.6525, "step": 1106 }, { "epoch": 0.07713479427237571, "grad_norm": 0.6659396879644911, "learning_rate": 6.907958262299755e-07, "loss": 1.5162, "step": 1107 }, { "epoch": 0.07720447339999303, "grad_norm": 0.6871713539255694, "learning_rate": 6.907792713222501e-07, "loss": 1.6047, "step": 1108 }, { "epoch": 0.07727415252761036, "grad_norm": 0.7116167931927397, "learning_rate": 6.907627017609543e-07, "loss": 1.485, "step": 1109 }, { "epoch": 0.07734383165522768, "grad_norm": 0.7053043605198956, "learning_rate": 6.907461175468822e-07, "loss": 1.4887, "step": 1110 }, { "epoch": 0.077413510782845, "grad_norm": 0.6875986075874049, "learning_rate": 6.907295186808285e-07, "loss": 1.5854, "step": 1111 }, { "epoch": 0.07748318991046232, "grad_norm": 0.7028170272289873, "learning_rate": 6.907129051635885e-07, "loss": 1.6719, "step": 1112 }, { "epoch": 0.07755286903807965, "grad_norm": 0.7086162175094449, "learning_rate": 6.906962769959585e-07, "loss": 1.5725, "step": 1113 }, { "epoch": 0.07762254816569697, "grad_norm": 0.7438672634911797, "learning_rate": 6.906796341787353e-07, "loss": 1.5896, "step": 1114 }, { "epoch": 0.07769222729331429, "grad_norm": 0.6975872227510016, "learning_rate": 6.906629767127165e-07, "loss": 1.5404, "step": 1115 }, { "epoch": 0.07776190642093161, "grad_norm": 0.7077339896057622, "learning_rate": 6.906463045987004e-07, "loss": 1.6565, "step": 1116 }, { "epoch": 0.07783158554854894, "grad_norm": 0.7147410083240348, "learning_rate": 6.906296178374858e-07, "loss": 1.4631, "step": 1117 }, { "epoch": 0.07790126467616626, "grad_norm": 0.7178738712503007, "learning_rate": 6.906129164298726e-07, "loss": 1.6519, "step": 1118 }, { "epoch": 0.07797094380378358, "grad_norm": 0.7567807790555247, "learning_rate": 6.905962003766609e-07, "loss": 1.606, "step": 1119 }, { "epoch": 0.0780406229314009, "grad_norm": 0.7677258840305423, "learning_rate": 6.90579469678652e-07, "loss": 1.514, "step": 1120 }, { "epoch": 0.07811030205901823, "grad_norm": 0.7107671421573318, "learning_rate": 6.905627243366474e-07, "loss": 1.7219, "step": 1121 }, { "epoch": 0.07817998118663555, "grad_norm": 0.7435836249390709, "learning_rate": 6.905459643514499e-07, "loss": 1.5213, "step": 1122 }, { "epoch": 0.07824966031425287, "grad_norm": 0.7182999889998696, "learning_rate": 6.905291897238625e-07, "loss": 1.5407, "step": 1123 }, { "epoch": 0.07831933944187018, "grad_norm": 0.7416359447410312, "learning_rate": 6.905124004546891e-07, "loss": 1.5425, "step": 1124 }, { "epoch": 0.0783890185694875, "grad_norm": 0.757556656682122, "learning_rate": 6.904955965447342e-07, "loss": 1.5452, "step": 1125 }, { "epoch": 0.07845869769710483, "grad_norm": 0.6606122586088298, "learning_rate": 6.904787779948031e-07, "loss": 1.4316, "step": 1126 }, { "epoch": 0.07852837682472215, "grad_norm": 0.7167556172667429, "learning_rate": 6.90461944805702e-07, "loss": 1.5075, "step": 1127 }, { "epoch": 0.07859805595233947, "grad_norm": 0.641614834382918, "learning_rate": 6.904450969782374e-07, "loss": 1.4476, "step": 1128 }, { "epoch": 0.0786677350799568, "grad_norm": 0.7465073966135141, "learning_rate": 6.904282345132164e-07, "loss": 1.6159, "step": 1129 }, { "epoch": 0.07873741420757412, "grad_norm": 0.7399215393549937, "learning_rate": 6.904113574114476e-07, "loss": 1.64, "step": 1130 }, { "epoch": 0.07880709333519144, "grad_norm": 0.6855077939067492, "learning_rate": 6.903944656737396e-07, "loss": 1.4624, "step": 1131 }, { "epoch": 0.07887677246280876, "grad_norm": 0.7251989381893763, "learning_rate": 6.903775593009017e-07, "loss": 1.5619, "step": 1132 }, { "epoch": 0.07894645159042608, "grad_norm": 0.6915517066303697, "learning_rate": 6.903606382937443e-07, "loss": 1.4697, "step": 1133 }, { "epoch": 0.0790161307180434, "grad_norm": 0.7016250746878615, "learning_rate": 6.903437026530782e-07, "loss": 1.4792, "step": 1134 }, { "epoch": 0.07908580984566073, "grad_norm": 0.7219157858272937, "learning_rate": 6.90326752379715e-07, "loss": 1.6381, "step": 1135 }, { "epoch": 0.07915548897327805, "grad_norm": 0.7397081395480988, "learning_rate": 6.90309787474467e-07, "loss": 1.5577, "step": 1136 }, { "epoch": 0.07922516810089537, "grad_norm": 0.7266051003891588, "learning_rate": 6.902928079381473e-07, "loss": 1.6519, "step": 1137 }, { "epoch": 0.0792948472285127, "grad_norm": 0.7290609479762402, "learning_rate": 6.902758137715693e-07, "loss": 1.5796, "step": 1138 }, { "epoch": 0.07936452635613002, "grad_norm": 0.6567695028044657, "learning_rate": 6.902588049755478e-07, "loss": 1.501, "step": 1139 }, { "epoch": 0.07943420548374734, "grad_norm": 0.7077836191905298, "learning_rate": 6.902417815508975e-07, "loss": 1.5554, "step": 1140 }, { "epoch": 0.07950388461136466, "grad_norm": 0.640089811517191, "learning_rate": 6.902247434984345e-07, "loss": 1.4511, "step": 1141 }, { "epoch": 0.07957356373898199, "grad_norm": 0.7397155755165485, "learning_rate": 6.902076908189751e-07, "loss": 1.6186, "step": 1142 }, { "epoch": 0.07964324286659931, "grad_norm": 0.7370105780872295, "learning_rate": 6.901906235133365e-07, "loss": 1.5568, "step": 1143 }, { "epoch": 0.07971292199421663, "grad_norm": 0.678857242792769, "learning_rate": 6.90173541582337e-07, "loss": 1.5696, "step": 1144 }, { "epoch": 0.07978260112183395, "grad_norm": 0.7033520068281577, "learning_rate": 6.901564450267946e-07, "loss": 1.6165, "step": 1145 }, { "epoch": 0.07985228024945128, "grad_norm": 0.7137263676325256, "learning_rate": 6.901393338475288e-07, "loss": 1.6099, "step": 1146 }, { "epoch": 0.0799219593770686, "grad_norm": 0.7144404342321975, "learning_rate": 6.901222080453598e-07, "loss": 1.5078, "step": 1147 }, { "epoch": 0.07999163850468592, "grad_norm": 0.718330695095129, "learning_rate": 6.901050676211082e-07, "loss": 1.5648, "step": 1148 }, { "epoch": 0.08006131763230324, "grad_norm": 0.7194005716639188, "learning_rate": 6.900879125755955e-07, "loss": 1.511, "step": 1149 }, { "epoch": 0.08013099675992057, "grad_norm": 0.7678458971817134, "learning_rate": 6.900707429096435e-07, "loss": 1.5421, "step": 1150 }, { "epoch": 0.08020067588753789, "grad_norm": 0.6985402111375227, "learning_rate": 6.900535586240753e-07, "loss": 1.539, "step": 1151 }, { "epoch": 0.08027035501515521, "grad_norm": 0.6740543021552393, "learning_rate": 6.900363597197144e-07, "loss": 1.544, "step": 1152 }, { "epoch": 0.08034003414277253, "grad_norm": 0.7044355580971693, "learning_rate": 6.900191461973848e-07, "loss": 1.5279, "step": 1153 }, { "epoch": 0.08040971327038986, "grad_norm": 0.7132437628563113, "learning_rate": 6.900019180579115e-07, "loss": 1.5389, "step": 1154 }, { "epoch": 0.08047939239800718, "grad_norm": 0.6845257281834255, "learning_rate": 6.899846753021202e-07, "loss": 1.576, "step": 1155 }, { "epoch": 0.0805490715256245, "grad_norm": 0.7459578464387648, "learning_rate": 6.899674179308371e-07, "loss": 1.4598, "step": 1156 }, { "epoch": 0.08061875065324182, "grad_norm": 0.7157920946244044, "learning_rate": 6.899501459448892e-07, "loss": 1.534, "step": 1157 }, { "epoch": 0.08068842978085915, "grad_norm": 0.8475637704029476, "learning_rate": 6.899328593451042e-07, "loss": 1.5881, "step": 1158 }, { "epoch": 0.08075810890847647, "grad_norm": 0.7428948647025614, "learning_rate": 6.899155581323108e-07, "loss": 1.5358, "step": 1159 }, { "epoch": 0.08082778803609379, "grad_norm": 0.6865769571057071, "learning_rate": 6.898982423073378e-07, "loss": 1.5509, "step": 1160 }, { "epoch": 0.08089746716371111, "grad_norm": 0.7284383625362557, "learning_rate": 6.898809118710149e-07, "loss": 1.6219, "step": 1161 }, { "epoch": 0.08096714629132844, "grad_norm": 0.7222780249612296, "learning_rate": 6.89863566824173e-07, "loss": 1.6027, "step": 1162 }, { "epoch": 0.08103682541894576, "grad_norm": 0.7075963278835353, "learning_rate": 6.89846207167643e-07, "loss": 1.5669, "step": 1163 }, { "epoch": 0.08110650454656308, "grad_norm": 0.7514979660782378, "learning_rate": 6.89828832902257e-07, "loss": 1.6114, "step": 1164 }, { "epoch": 0.0811761836741804, "grad_norm": 0.6928840372383243, "learning_rate": 6.898114440288475e-07, "loss": 1.5779, "step": 1165 }, { "epoch": 0.08124586280179773, "grad_norm": 0.7625255343114417, "learning_rate": 6.897940405482478e-07, "loss": 1.5837, "step": 1166 }, { "epoch": 0.08131554192941505, "grad_norm": 0.6853653653010325, "learning_rate": 6.897766224612919e-07, "loss": 1.5777, "step": 1167 }, { "epoch": 0.08138522105703237, "grad_norm": 0.7381018328852454, "learning_rate": 6.897591897688147e-07, "loss": 1.5864, "step": 1168 }, { "epoch": 0.0814549001846497, "grad_norm": 0.6959768347113379, "learning_rate": 6.897417424716513e-07, "loss": 1.6115, "step": 1169 }, { "epoch": 0.081524579312267, "grad_norm": 0.6895660442037262, "learning_rate": 6.897242805706381e-07, "loss": 1.5186, "step": 1170 }, { "epoch": 0.08159425843988433, "grad_norm": 0.7519857537232603, "learning_rate": 6.897068040666117e-07, "loss": 1.5396, "step": 1171 }, { "epoch": 0.08166393756750165, "grad_norm": 0.718532609238401, "learning_rate": 6.896893129604098e-07, "loss": 1.5395, "step": 1172 }, { "epoch": 0.08173361669511897, "grad_norm": 0.7399091934568263, "learning_rate": 6.896718072528704e-07, "loss": 1.621, "step": 1173 }, { "epoch": 0.0818032958227363, "grad_norm": 0.6925568720224577, "learning_rate": 6.896542869448327e-07, "loss": 1.5752, "step": 1174 }, { "epoch": 0.08187297495035362, "grad_norm": 0.7067599488817029, "learning_rate": 6.896367520371359e-07, "loss": 1.5278, "step": 1175 }, { "epoch": 0.08194265407797094, "grad_norm": 0.7103145143633663, "learning_rate": 6.896192025306206e-07, "loss": 1.6391, "step": 1176 }, { "epoch": 0.08201233320558826, "grad_norm": 0.7734153160510779, "learning_rate": 6.896016384261277e-07, "loss": 1.5156, "step": 1177 }, { "epoch": 0.08208201233320558, "grad_norm": 0.6859669641724797, "learning_rate": 6.895840597244991e-07, "loss": 1.5193, "step": 1178 }, { "epoch": 0.0821516914608229, "grad_norm": 0.673186710234392, "learning_rate": 6.89566466426577e-07, "loss": 1.3677, "step": 1179 }, { "epoch": 0.08222137058844023, "grad_norm": 0.7640001453836751, "learning_rate": 6.895488585332045e-07, "loss": 1.6201, "step": 1180 }, { "epoch": 0.08229104971605755, "grad_norm": 0.7061881267193482, "learning_rate": 6.895312360452255e-07, "loss": 1.5405, "step": 1181 }, { "epoch": 0.08236072884367487, "grad_norm": 0.7576495015869381, "learning_rate": 6.895135989634845e-07, "loss": 1.6068, "step": 1182 }, { "epoch": 0.0824304079712922, "grad_norm": 0.8049680580056225, "learning_rate": 6.894959472888267e-07, "loss": 1.6249, "step": 1183 }, { "epoch": 0.08250008709890952, "grad_norm": 0.7441843872012226, "learning_rate": 6.894782810220979e-07, "loss": 1.6032, "step": 1184 }, { "epoch": 0.08256976622652684, "grad_norm": 0.6633384020829896, "learning_rate": 6.89460600164145e-07, "loss": 1.5106, "step": 1185 }, { "epoch": 0.08263944535414416, "grad_norm": 0.8483096471103987, "learning_rate": 6.89442904715815e-07, "loss": 1.5806, "step": 1186 }, { "epoch": 0.08270912448176149, "grad_norm": 0.6666047476495981, "learning_rate": 6.894251946779559e-07, "loss": 1.442, "step": 1187 }, { "epoch": 0.08277880360937881, "grad_norm": 0.7289293352898126, "learning_rate": 6.894074700514165e-07, "loss": 1.5653, "step": 1188 }, { "epoch": 0.08284848273699613, "grad_norm": 0.7406788086107634, "learning_rate": 6.893897308370463e-07, "loss": 1.3979, "step": 1189 }, { "epoch": 0.08291816186461345, "grad_norm": 0.6537157668650204, "learning_rate": 6.893719770356951e-07, "loss": 1.431, "step": 1190 }, { "epoch": 0.08298784099223078, "grad_norm": 0.7073784965936744, "learning_rate": 6.893542086482141e-07, "loss": 1.3773, "step": 1191 }, { "epoch": 0.0830575201198481, "grad_norm": 26.82777241523961, "learning_rate": 6.893364256754546e-07, "loss": 1.5639, "step": 1192 }, { "epoch": 0.08312719924746542, "grad_norm": 0.6623471303915018, "learning_rate": 6.893186281182687e-07, "loss": 1.5554, "step": 1193 }, { "epoch": 0.08319687837508274, "grad_norm": 0.6809260214860811, "learning_rate": 6.893008159775095e-07, "loss": 1.4711, "step": 1194 }, { "epoch": 0.08326655750270007, "grad_norm": 0.7157109529885805, "learning_rate": 6.892829892540304e-07, "loss": 1.4567, "step": 1195 }, { "epoch": 0.08333623663031739, "grad_norm": 0.7103122232716318, "learning_rate": 6.892651479486856e-07, "loss": 1.5585, "step": 1196 }, { "epoch": 0.08340591575793471, "grad_norm": 0.7422514010124394, "learning_rate": 6.892472920623305e-07, "loss": 1.5075, "step": 1197 }, { "epoch": 0.08347559488555203, "grad_norm": 0.6853896896273873, "learning_rate": 6.892294215958206e-07, "loss": 1.4262, "step": 1198 }, { "epoch": 0.08354527401316936, "grad_norm": 0.706936413282032, "learning_rate": 6.892115365500121e-07, "loss": 1.5037, "step": 1199 }, { "epoch": 0.08361495314078668, "grad_norm": 0.6749722628125229, "learning_rate": 6.891936369257622e-07, "loss": 1.668, "step": 1200 }, { "epoch": 0.083684632268404, "grad_norm": 0.7119207196263206, "learning_rate": 6.891757227239289e-07, "loss": 1.6217, "step": 1201 }, { "epoch": 0.08375431139602132, "grad_norm": 0.7160388119086022, "learning_rate": 6.891577939453703e-07, "loss": 1.4864, "step": 1202 }, { "epoch": 0.08382399052363865, "grad_norm": 0.719587381416626, "learning_rate": 6.891398505909459e-07, "loss": 1.6268, "step": 1203 }, { "epoch": 0.08389366965125597, "grad_norm": 0.7042976963968398, "learning_rate": 6.891218926615155e-07, "loss": 1.5495, "step": 1204 }, { "epoch": 0.08396334877887329, "grad_norm": 0.704887862807588, "learning_rate": 6.891039201579396e-07, "loss": 1.5302, "step": 1205 }, { "epoch": 0.08403302790649061, "grad_norm": 0.6661954067466997, "learning_rate": 6.890859330810796e-07, "loss": 1.6247, "step": 1206 }, { "epoch": 0.08410270703410794, "grad_norm": 0.6652903746555803, "learning_rate": 6.890679314317973e-07, "loss": 1.3775, "step": 1207 }, { "epoch": 0.08417238616172526, "grad_norm": 0.6829149653109714, "learning_rate": 6.890499152109555e-07, "loss": 1.4859, "step": 1208 }, { "epoch": 0.08424206528934258, "grad_norm": 0.7343751200022568, "learning_rate": 6.890318844194177e-07, "loss": 1.482, "step": 1209 }, { "epoch": 0.0843117444169599, "grad_norm": 0.68418704479864, "learning_rate": 6.890138390580477e-07, "loss": 1.4719, "step": 1210 }, { "epoch": 0.08438142354457723, "grad_norm": 0.7197352748202582, "learning_rate": 6.889957791277104e-07, "loss": 1.6337, "step": 1211 }, { "epoch": 0.08445110267219455, "grad_norm": 0.6649129082292412, "learning_rate": 6.889777046292714e-07, "loss": 1.6359, "step": 1212 }, { "epoch": 0.08452078179981187, "grad_norm": 0.7587559411692023, "learning_rate": 6.889596155635966e-07, "loss": 1.5549, "step": 1213 }, { "epoch": 0.0845904609274292, "grad_norm": 0.6806377337801913, "learning_rate": 6.889415119315531e-07, "loss": 1.496, "step": 1214 }, { "epoch": 0.08466014005504652, "grad_norm": 0.776941258193978, "learning_rate": 6.889233937340084e-07, "loss": 1.5431, "step": 1215 }, { "epoch": 0.08472981918266383, "grad_norm": 0.6941217236555354, "learning_rate": 6.889052609718306e-07, "loss": 1.5759, "step": 1216 }, { "epoch": 0.08479949831028115, "grad_norm": 0.7495506413349972, "learning_rate": 6.888871136458888e-07, "loss": 1.476, "step": 1217 }, { "epoch": 0.08486917743789847, "grad_norm": 0.7426227429066157, "learning_rate": 6.888689517570526e-07, "loss": 1.4853, "step": 1218 }, { "epoch": 0.0849388565655158, "grad_norm": 0.7340607548502726, "learning_rate": 6.888507753061925e-07, "loss": 1.5197, "step": 1219 }, { "epoch": 0.08500853569313312, "grad_norm": 0.7516432731396169, "learning_rate": 6.888325842941795e-07, "loss": 1.5661, "step": 1220 }, { "epoch": 0.08507821482075044, "grad_norm": 0.690211007495237, "learning_rate": 6.888143787218852e-07, "loss": 1.4689, "step": 1221 }, { "epoch": 0.08514789394836776, "grad_norm": 0.7315103283612352, "learning_rate": 6.887961585901822e-07, "loss": 1.5662, "step": 1222 }, { "epoch": 0.08521757307598508, "grad_norm": 0.7556938120563503, "learning_rate": 6.887779238999436e-07, "loss": 1.4909, "step": 1223 }, { "epoch": 0.0852872522036024, "grad_norm": 0.7979406139485317, "learning_rate": 6.887596746520433e-07, "loss": 1.6, "step": 1224 }, { "epoch": 0.08535693133121973, "grad_norm": 0.7772957970517779, "learning_rate": 6.887414108473558e-07, "loss": 1.6837, "step": 1225 }, { "epoch": 0.08542661045883705, "grad_norm": 0.7963373152044353, "learning_rate": 6.887231324867562e-07, "loss": 1.6797, "step": 1226 }, { "epoch": 0.08549628958645437, "grad_norm": 0.6920973186236006, "learning_rate": 6.887048395711207e-07, "loss": 1.4087, "step": 1227 }, { "epoch": 0.0855659687140717, "grad_norm": 0.6683888822025812, "learning_rate": 6.886865321013255e-07, "loss": 1.4964, "step": 1228 }, { "epoch": 0.08563564784168902, "grad_norm": 0.7450329584753508, "learning_rate": 6.886682100782485e-07, "loss": 1.5268, "step": 1229 }, { "epoch": 0.08570532696930634, "grad_norm": 0.6858244481120948, "learning_rate": 6.886498735027673e-07, "loss": 1.5805, "step": 1230 }, { "epoch": 0.08577500609692366, "grad_norm": 0.7184719502519555, "learning_rate": 6.88631522375761e-07, "loss": 1.528, "step": 1231 }, { "epoch": 0.08584468522454099, "grad_norm": 0.7760134696891086, "learning_rate": 6.886131566981086e-07, "loss": 1.5172, "step": 1232 }, { "epoch": 0.08591436435215831, "grad_norm": 0.7105109119867613, "learning_rate": 6.885947764706906e-07, "loss": 1.532, "step": 1233 }, { "epoch": 0.08598404347977563, "grad_norm": 0.7015865098791763, "learning_rate": 6.885763816943875e-07, "loss": 1.4887, "step": 1234 }, { "epoch": 0.08605372260739295, "grad_norm": 0.6983077687068207, "learning_rate": 6.885579723700809e-07, "loss": 1.5157, "step": 1235 }, { "epoch": 0.08612340173501028, "grad_norm": 0.7334632983600148, "learning_rate": 6.885395484986532e-07, "loss": 1.5426, "step": 1236 }, { "epoch": 0.0861930808626276, "grad_norm": 0.693820895798714, "learning_rate": 6.885211100809872e-07, "loss": 1.4726, "step": 1237 }, { "epoch": 0.08626275999024492, "grad_norm": 0.6706017501517098, "learning_rate": 6.885026571179664e-07, "loss": 1.553, "step": 1238 }, { "epoch": 0.08633243911786224, "grad_norm": 0.6712469046406953, "learning_rate": 6.884841896104753e-07, "loss": 1.5283, "step": 1239 }, { "epoch": 0.08640211824547957, "grad_norm": 0.7396947117151503, "learning_rate": 6.884657075593987e-07, "loss": 1.4582, "step": 1240 }, { "epoch": 0.08647179737309689, "grad_norm": 0.7470920745785362, "learning_rate": 6.884472109656224e-07, "loss": 1.5739, "step": 1241 }, { "epoch": 0.08654147650071421, "grad_norm": 5.5737115931864585, "learning_rate": 6.884286998300328e-07, "loss": 1.5824, "step": 1242 }, { "epoch": 0.08661115562833153, "grad_norm": 0.6947509528605078, "learning_rate": 6.88410174153517e-07, "loss": 1.5973, "step": 1243 }, { "epoch": 0.08668083475594886, "grad_norm": 0.7216219247735955, "learning_rate": 6.883916339369627e-07, "loss": 1.5319, "step": 1244 }, { "epoch": 0.08675051388356618, "grad_norm": 0.7350172934004601, "learning_rate": 6.883730791812585e-07, "loss": 1.5269, "step": 1245 }, { "epoch": 0.0868201930111835, "grad_norm": 0.6955094801968497, "learning_rate": 6.883545098872935e-07, "loss": 1.6165, "step": 1246 }, { "epoch": 0.08688987213880082, "grad_norm": 0.7538227602808574, "learning_rate": 6.883359260559576e-07, "loss": 1.4654, "step": 1247 }, { "epoch": 0.08695955126641815, "grad_norm": 0.789486381100046, "learning_rate": 6.883173276881414e-07, "loss": 1.5613, "step": 1248 }, { "epoch": 0.08702923039403547, "grad_norm": 0.6975860457622158, "learning_rate": 6.882987147847363e-07, "loss": 1.5203, "step": 1249 }, { "epoch": 0.08709890952165279, "grad_norm": 0.7033421302463916, "learning_rate": 6.88280087346634e-07, "loss": 1.5222, "step": 1250 }, { "epoch": 0.08716858864927012, "grad_norm": 0.7153412252377012, "learning_rate": 6.882614453747272e-07, "loss": 1.4739, "step": 1251 }, { "epoch": 0.08723826777688744, "grad_norm": 0.694186348996605, "learning_rate": 6.882427888699094e-07, "loss": 1.5958, "step": 1252 }, { "epoch": 0.08730794690450476, "grad_norm": 0.6852337145570081, "learning_rate": 6.882241178330748e-07, "loss": 1.545, "step": 1253 }, { "epoch": 0.08737762603212208, "grad_norm": 0.7840072464679235, "learning_rate": 6.882054322651178e-07, "loss": 1.5576, "step": 1254 }, { "epoch": 0.0874473051597394, "grad_norm": 0.7009252831810354, "learning_rate": 6.88186732166934e-07, "loss": 1.5604, "step": 1255 }, { "epoch": 0.08751698428735673, "grad_norm": 0.7162091483897209, "learning_rate": 6.881680175394195e-07, "loss": 1.4449, "step": 1256 }, { "epoch": 0.08758666341497405, "grad_norm": 0.6789452720548425, "learning_rate": 6.881492883834714e-07, "loss": 1.5185, "step": 1257 }, { "epoch": 0.08765634254259137, "grad_norm": 0.75702264090507, "learning_rate": 6.881305446999869e-07, "loss": 1.5161, "step": 1258 }, { "epoch": 0.0877260216702087, "grad_norm": 0.6837204332937512, "learning_rate": 6.881117864898646e-07, "loss": 1.5034, "step": 1259 }, { "epoch": 0.08779570079782602, "grad_norm": 0.70633577212723, "learning_rate": 6.88093013754003e-07, "loss": 1.4734, "step": 1260 }, { "epoch": 0.08786537992544333, "grad_norm": 0.7236570341818808, "learning_rate": 6.880742264933021e-07, "loss": 1.5184, "step": 1261 }, { "epoch": 0.08793505905306065, "grad_norm": 0.7415268607505546, "learning_rate": 6.880554247086619e-07, "loss": 1.6711, "step": 1262 }, { "epoch": 0.08800473818067797, "grad_norm": 0.6770620482470922, "learning_rate": 6.880366084009836e-07, "loss": 1.5219, "step": 1263 }, { "epoch": 0.0880744173082953, "grad_norm": 0.7006824285806458, "learning_rate": 6.880177775711691e-07, "loss": 1.4605, "step": 1264 }, { "epoch": 0.08814409643591262, "grad_norm": 0.6941678012435419, "learning_rate": 6.879989322201204e-07, "loss": 1.5493, "step": 1265 }, { "epoch": 0.08821377556352994, "grad_norm": 0.6932515587732107, "learning_rate": 6.879800723487409e-07, "loss": 1.596, "step": 1266 }, { "epoch": 0.08828345469114726, "grad_norm": 0.727639027077372, "learning_rate": 6.879611979579343e-07, "loss": 1.6517, "step": 1267 }, { "epoch": 0.08835313381876458, "grad_norm": 0.7187124776898827, "learning_rate": 6.879423090486051e-07, "loss": 1.4588, "step": 1268 }, { "epoch": 0.0884228129463819, "grad_norm": 0.714150308857557, "learning_rate": 6.879234056216587e-07, "loss": 1.5832, "step": 1269 }, { "epoch": 0.08849249207399923, "grad_norm": 0.7638975002556709, "learning_rate": 6.879044876780006e-07, "loss": 1.6047, "step": 1270 }, { "epoch": 0.08856217120161655, "grad_norm": 0.7434466148742814, "learning_rate": 6.878855552185377e-07, "loss": 1.6107, "step": 1271 }, { "epoch": 0.08863185032923387, "grad_norm": 0.7399244720941883, "learning_rate": 6.878666082441772e-07, "loss": 1.4261, "step": 1272 }, { "epoch": 0.0887015294568512, "grad_norm": 0.6842006315513699, "learning_rate": 6.87847646755827e-07, "loss": 1.4954, "step": 1273 }, { "epoch": 0.08877120858446852, "grad_norm": 0.7245096057643684, "learning_rate": 6.878286707543958e-07, "loss": 1.5579, "step": 1274 }, { "epoch": 0.08884088771208584, "grad_norm": 0.7879892971492194, "learning_rate": 6.878096802407931e-07, "loss": 1.5088, "step": 1275 }, { "epoch": 0.08891056683970316, "grad_norm": 0.6868473383580004, "learning_rate": 6.877906752159289e-07, "loss": 1.3988, "step": 1276 }, { "epoch": 0.08898024596732049, "grad_norm": 0.7437043498439738, "learning_rate": 6.877716556807138e-07, "loss": 1.6203, "step": 1277 }, { "epoch": 0.08904992509493781, "grad_norm": 0.7058403378765666, "learning_rate": 6.877526216360595e-07, "loss": 1.5629, "step": 1278 }, { "epoch": 0.08911960422255513, "grad_norm": 0.7342911778185349, "learning_rate": 6.87733573082878e-07, "loss": 1.617, "step": 1279 }, { "epoch": 0.08918928335017245, "grad_norm": 0.7737218614982889, "learning_rate": 6.877145100220821e-07, "loss": 1.703, "step": 1280 }, { "epoch": 0.08925896247778978, "grad_norm": 0.7115474712008767, "learning_rate": 6.876954324545854e-07, "loss": 1.5477, "step": 1281 }, { "epoch": 0.0893286416054071, "grad_norm": 0.8068670560532931, "learning_rate": 6.876763403813022e-07, "loss": 1.6857, "step": 1282 }, { "epoch": 0.08939832073302442, "grad_norm": 0.7093087007583438, "learning_rate": 6.876572338031475e-07, "loss": 1.4789, "step": 1283 }, { "epoch": 0.08946799986064174, "grad_norm": 0.7420277841762087, "learning_rate": 6.876381127210368e-07, "loss": 1.4069, "step": 1284 }, { "epoch": 0.08953767898825907, "grad_norm": 1.9990965003575498, "learning_rate": 6.876189771358862e-07, "loss": 1.4755, "step": 1285 }, { "epoch": 0.08960735811587639, "grad_norm": 0.6977842602151991, "learning_rate": 6.875998270486131e-07, "loss": 1.6219, "step": 1286 }, { "epoch": 0.08967703724349371, "grad_norm": 0.6590490730475049, "learning_rate": 6.875806624601351e-07, "loss": 1.5421, "step": 1287 }, { "epoch": 0.08974671637111103, "grad_norm": 0.6887642541353808, "learning_rate": 6.875614833713706e-07, "loss": 1.4732, "step": 1288 }, { "epoch": 0.08981639549872836, "grad_norm": 0.7376671506965188, "learning_rate": 6.875422897832385e-07, "loss": 1.3695, "step": 1289 }, { "epoch": 0.08988607462634568, "grad_norm": 0.7259016277612667, "learning_rate": 6.875230816966589e-07, "loss": 1.5346, "step": 1290 }, { "epoch": 0.089955753753963, "grad_norm": 0.724086003625605, "learning_rate": 6.875038591125522e-07, "loss": 1.6004, "step": 1291 }, { "epoch": 0.09002543288158033, "grad_norm": 0.7178196848691761, "learning_rate": 6.874846220318393e-07, "loss": 1.6231, "step": 1292 }, { "epoch": 0.09009511200919765, "grad_norm": 0.7373207554001411, "learning_rate": 6.874653704554426e-07, "loss": 1.4717, "step": 1293 }, { "epoch": 0.09016479113681497, "grad_norm": 0.7134230514137152, "learning_rate": 6.874461043842843e-07, "loss": 1.6591, "step": 1294 }, { "epoch": 0.09023447026443229, "grad_norm": 0.7376105614130277, "learning_rate": 6.874268238192877e-07, "loss": 1.6336, "step": 1295 }, { "epoch": 0.09030414939204962, "grad_norm": 0.7450088115019615, "learning_rate": 6.874075287613769e-07, "loss": 1.6974, "step": 1296 }, { "epoch": 0.09037382851966694, "grad_norm": 0.6893786584174302, "learning_rate": 6.873882192114765e-07, "loss": 1.5151, "step": 1297 }, { "epoch": 0.09044350764728426, "grad_norm": 0.6715073587311645, "learning_rate": 6.873688951705119e-07, "loss": 1.4657, "step": 1298 }, { "epoch": 0.09051318677490158, "grad_norm": 0.7566996663979243, "learning_rate": 6.873495566394089e-07, "loss": 1.5942, "step": 1299 }, { "epoch": 0.0905828659025189, "grad_norm": 0.7357953944743499, "learning_rate": 6.873302036190946e-07, "loss": 1.6023, "step": 1300 }, { "epoch": 0.09065254503013623, "grad_norm": 0.7581630163955304, "learning_rate": 6.873108361104963e-07, "loss": 1.7109, "step": 1301 }, { "epoch": 0.09072222415775355, "grad_norm": 0.7341133536278991, "learning_rate": 6.872914541145419e-07, "loss": 1.6366, "step": 1302 }, { "epoch": 0.09079190328537087, "grad_norm": 0.6626232009933386, "learning_rate": 6.872720576321606e-07, "loss": 1.5272, "step": 1303 }, { "epoch": 0.0908615824129882, "grad_norm": 0.7910328700731327, "learning_rate": 6.872526466642818e-07, "loss": 1.7656, "step": 1304 }, { "epoch": 0.09093126154060552, "grad_norm": 0.7662975077969607, "learning_rate": 6.872332212118355e-07, "loss": 1.635, "step": 1305 }, { "epoch": 0.09100094066822284, "grad_norm": 0.6887721459909045, "learning_rate": 6.872137812757528e-07, "loss": 1.523, "step": 1306 }, { "epoch": 0.09107061979584015, "grad_norm": 0.712920618786257, "learning_rate": 6.871943268569653e-07, "loss": 1.6317, "step": 1307 }, { "epoch": 0.09114029892345747, "grad_norm": 0.7577558050195798, "learning_rate": 6.871748579564053e-07, "loss": 1.5641, "step": 1308 }, { "epoch": 0.0912099780510748, "grad_norm": 0.6982117743762234, "learning_rate": 6.871553745750056e-07, "loss": 1.4785, "step": 1309 }, { "epoch": 0.09127965717869212, "grad_norm": 0.6991004630390641, "learning_rate": 6.871358767137003e-07, "loss": 1.5554, "step": 1310 }, { "epoch": 0.09134933630630944, "grad_norm": 0.7314176603239776, "learning_rate": 6.871163643734233e-07, "loss": 1.6083, "step": 1311 }, { "epoch": 0.09141901543392676, "grad_norm": 0.6877124905657467, "learning_rate": 6.870968375551098e-07, "loss": 1.5961, "step": 1312 }, { "epoch": 0.09148869456154408, "grad_norm": 0.6395093045596821, "learning_rate": 6.870772962596959e-07, "loss": 1.5792, "step": 1313 }, { "epoch": 0.09155837368916141, "grad_norm": 0.7082289797343458, "learning_rate": 6.870577404881177e-07, "loss": 1.5305, "step": 1314 }, { "epoch": 0.09162805281677873, "grad_norm": 0.7492565372235473, "learning_rate": 6.870381702413124e-07, "loss": 1.6021, "step": 1315 }, { "epoch": 0.09169773194439605, "grad_norm": 0.7328857828061993, "learning_rate": 6.87018585520218e-07, "loss": 1.5604, "step": 1316 }, { "epoch": 0.09176741107201337, "grad_norm": 0.9248051912076805, "learning_rate": 6.86998986325773e-07, "loss": 1.5056, "step": 1317 }, { "epoch": 0.0918370901996307, "grad_norm": 0.7133062641617752, "learning_rate": 6.869793726589165e-07, "loss": 1.5892, "step": 1318 }, { "epoch": 0.09190676932724802, "grad_norm": 0.7847077630418262, "learning_rate": 6.869597445205885e-07, "loss": 1.5176, "step": 1319 }, { "epoch": 0.09197644845486534, "grad_norm": 0.6839226291668064, "learning_rate": 6.869401019117297e-07, "loss": 1.4888, "step": 1320 }, { "epoch": 0.09204612758248266, "grad_norm": 0.721028875233305, "learning_rate": 6.869204448332812e-07, "loss": 1.5445, "step": 1321 }, { "epoch": 0.09211580671009999, "grad_norm": 0.7977990236065278, "learning_rate": 6.869007732861853e-07, "loss": 1.6349, "step": 1322 }, { "epoch": 0.09218548583771731, "grad_norm": 0.726214409816211, "learning_rate": 6.868810872713846e-07, "loss": 1.5163, "step": 1323 }, { "epoch": 0.09225516496533463, "grad_norm": 0.7781168458751725, "learning_rate": 6.868613867898223e-07, "loss": 1.585, "step": 1324 }, { "epoch": 0.09232484409295195, "grad_norm": 0.718082280962953, "learning_rate": 6.868416718424427e-07, "loss": 1.5275, "step": 1325 }, { "epoch": 0.09239452322056928, "grad_norm": 0.752359718631759, "learning_rate": 6.868219424301905e-07, "loss": 1.7251, "step": 1326 }, { "epoch": 0.0924642023481866, "grad_norm": 0.7757472708282024, "learning_rate": 6.868021985540112e-07, "loss": 1.5877, "step": 1327 }, { "epoch": 0.09253388147580392, "grad_norm": 0.7188829520974873, "learning_rate": 6.867824402148509e-07, "loss": 1.6991, "step": 1328 }, { "epoch": 0.09260356060342124, "grad_norm": 0.6927468915057636, "learning_rate": 6.867626674136566e-07, "loss": 1.4599, "step": 1329 }, { "epoch": 0.09267323973103857, "grad_norm": 0.7793706193864617, "learning_rate": 6.867428801513757e-07, "loss": 1.8769, "step": 1330 }, { "epoch": 0.09274291885865589, "grad_norm": 0.7050947771615956, "learning_rate": 6.867230784289566e-07, "loss": 1.5947, "step": 1331 }, { "epoch": 0.09281259798627321, "grad_norm": 0.7363007364951878, "learning_rate": 6.867032622473481e-07, "loss": 1.4746, "step": 1332 }, { "epoch": 0.09288227711389054, "grad_norm": 0.7210920566358469, "learning_rate": 6.866834316074999e-07, "loss": 1.4997, "step": 1333 }, { "epoch": 0.09295195624150786, "grad_norm": 0.694334164410711, "learning_rate": 6.866635865103623e-07, "loss": 1.5192, "step": 1334 }, { "epoch": 0.09302163536912518, "grad_norm": 0.6826381175796876, "learning_rate": 6.866437269568864e-07, "loss": 1.5416, "step": 1335 }, { "epoch": 0.0930913144967425, "grad_norm": 0.7094518106250833, "learning_rate": 6.866238529480238e-07, "loss": 1.6305, "step": 1336 }, { "epoch": 0.09316099362435983, "grad_norm": 0.7451153132227982, "learning_rate": 6.86603964484727e-07, "loss": 1.6427, "step": 1337 }, { "epoch": 0.09323067275197715, "grad_norm": 0.6944135717548009, "learning_rate": 6.865840615679489e-07, "loss": 1.5994, "step": 1338 }, { "epoch": 0.09330035187959447, "grad_norm": 0.7324834912449787, "learning_rate": 6.865641441986436e-07, "loss": 1.5387, "step": 1339 }, { "epoch": 0.09337003100721179, "grad_norm": 0.6871232434674317, "learning_rate": 6.865442123777652e-07, "loss": 1.4626, "step": 1340 }, { "epoch": 0.09343971013482912, "grad_norm": 0.75076436213832, "learning_rate": 6.865242661062692e-07, "loss": 1.4723, "step": 1341 }, { "epoch": 0.09350938926244644, "grad_norm": 0.6986478206384793, "learning_rate": 6.865043053851113e-07, "loss": 1.546, "step": 1342 }, { "epoch": 0.09357906839006376, "grad_norm": 0.6773347909600681, "learning_rate": 6.864843302152483e-07, "loss": 1.5233, "step": 1343 }, { "epoch": 0.09364874751768108, "grad_norm": 0.7256916010451714, "learning_rate": 6.864643405976371e-07, "loss": 1.5341, "step": 1344 }, { "epoch": 0.0937184266452984, "grad_norm": 0.7156842660159166, "learning_rate": 6.864443365332357e-07, "loss": 1.4524, "step": 1345 }, { "epoch": 0.09378810577291573, "grad_norm": 0.717844637914586, "learning_rate": 6.86424318023003e-07, "loss": 1.5503, "step": 1346 }, { "epoch": 0.09385778490053305, "grad_norm": 0.7134994406384746, "learning_rate": 6.864042850678981e-07, "loss": 1.5557, "step": 1347 }, { "epoch": 0.09392746402815037, "grad_norm": 0.7060719200241936, "learning_rate": 6.863842376688812e-07, "loss": 1.6098, "step": 1348 }, { "epoch": 0.0939971431557677, "grad_norm": 0.7133840642791894, "learning_rate": 6.863641758269128e-07, "loss": 1.6069, "step": 1349 }, { "epoch": 0.09406682228338502, "grad_norm": 0.7419267085106452, "learning_rate": 6.863440995429543e-07, "loss": 1.5277, "step": 1350 }, { "epoch": 0.09413650141100234, "grad_norm": 0.7320435951110261, "learning_rate": 6.863240088179681e-07, "loss": 1.5051, "step": 1351 }, { "epoch": 0.09420618053861966, "grad_norm": 0.7189557968560032, "learning_rate": 6.863039036529167e-07, "loss": 1.5563, "step": 1352 }, { "epoch": 0.09427585966623697, "grad_norm": 0.7258479005816894, "learning_rate": 6.862837840487637e-07, "loss": 1.7108, "step": 1353 }, { "epoch": 0.0943455387938543, "grad_norm": 0.7093271738297767, "learning_rate": 6.862636500064733e-07, "loss": 1.561, "step": 1354 }, { "epoch": 0.09441521792147162, "grad_norm": 0.7289165862673944, "learning_rate": 6.862435015270102e-07, "loss": 1.5443, "step": 1355 }, { "epoch": 0.09448489704908894, "grad_norm": 0.728731968183907, "learning_rate": 6.862233386113402e-07, "loss": 1.4719, "step": 1356 }, { "epoch": 0.09455457617670626, "grad_norm": 0.7598985533740218, "learning_rate": 6.862031612604292e-07, "loss": 1.5874, "step": 1357 }, { "epoch": 0.09462425530432358, "grad_norm": 0.7489312592826469, "learning_rate": 6.861829694752445e-07, "loss": 1.5196, "step": 1358 }, { "epoch": 0.09469393443194091, "grad_norm": 0.7115801847246932, "learning_rate": 6.861627632567536e-07, "loss": 1.4183, "step": 1359 }, { "epoch": 0.09476361355955823, "grad_norm": 0.7286465327425591, "learning_rate": 6.861425426059247e-07, "loss": 1.5136, "step": 1360 }, { "epoch": 0.09483329268717555, "grad_norm": 0.7282399514777784, "learning_rate": 6.861223075237272e-07, "loss": 1.4448, "step": 1361 }, { "epoch": 0.09490297181479287, "grad_norm": 0.6552566952542627, "learning_rate": 6.861020580111302e-07, "loss": 1.5461, "step": 1362 }, { "epoch": 0.0949726509424102, "grad_norm": 0.6940211142791631, "learning_rate": 6.860817940691046e-07, "loss": 1.4516, "step": 1363 }, { "epoch": 0.09504233007002752, "grad_norm": 0.7633128378880454, "learning_rate": 6.860615156986212e-07, "loss": 1.6427, "step": 1364 }, { "epoch": 0.09511200919764484, "grad_norm": 0.6982614518760314, "learning_rate": 6.86041222900652e-07, "loss": 1.4, "step": 1365 }, { "epoch": 0.09518168832526216, "grad_norm": 0.7177285924240916, "learning_rate": 6.860209156761693e-07, "loss": 1.4115, "step": 1366 }, { "epoch": 0.09525136745287949, "grad_norm": 0.6952426491388587, "learning_rate": 6.860005940261464e-07, "loss": 1.4289, "step": 1367 }, { "epoch": 0.09532104658049681, "grad_norm": 0.6700157416887091, "learning_rate": 6.859802579515571e-07, "loss": 1.4947, "step": 1368 }, { "epoch": 0.09539072570811413, "grad_norm": 0.7096025037508805, "learning_rate": 6.859599074533759e-07, "loss": 1.5841, "step": 1369 }, { "epoch": 0.09546040483573145, "grad_norm": 0.6999665684061958, "learning_rate": 6.85939542532578e-07, "loss": 1.5309, "step": 1370 }, { "epoch": 0.09553008396334878, "grad_norm": 0.7132517833298145, "learning_rate": 6.859191631901395e-07, "loss": 1.6011, "step": 1371 }, { "epoch": 0.0955997630909661, "grad_norm": 0.6828272290576067, "learning_rate": 6.858987694270371e-07, "loss": 1.5264, "step": 1372 }, { "epoch": 0.09566944221858342, "grad_norm": 0.7158171608458893, "learning_rate": 6.858783612442477e-07, "loss": 1.5748, "step": 1373 }, { "epoch": 0.09573912134620075, "grad_norm": 0.7506877164228436, "learning_rate": 6.858579386427496e-07, "loss": 1.5836, "step": 1374 }, { "epoch": 0.09580880047381807, "grad_norm": 0.7367438933731758, "learning_rate": 6.858375016235214e-07, "loss": 1.6368, "step": 1375 }, { "epoch": 0.09587847960143539, "grad_norm": 0.7285757842868972, "learning_rate": 6.858170501875426e-07, "loss": 1.5309, "step": 1376 }, { "epoch": 0.09594815872905271, "grad_norm": 0.73602276499145, "learning_rate": 6.85796584335793e-07, "loss": 1.5709, "step": 1377 }, { "epoch": 0.09601783785667004, "grad_norm": 0.7170190034347591, "learning_rate": 6.857761040692537e-07, "loss": 1.5174, "step": 1378 }, { "epoch": 0.09608751698428736, "grad_norm": 0.7080538784656087, "learning_rate": 6.857556093889062e-07, "loss": 1.5577, "step": 1379 }, { "epoch": 0.09615719611190468, "grad_norm": 0.6842467145632883, "learning_rate": 6.857351002957321e-07, "loss": 1.5241, "step": 1380 }, { "epoch": 0.096226875239522, "grad_norm": 0.7093581497996692, "learning_rate": 6.857145767907148e-07, "loss": 1.597, "step": 1381 }, { "epoch": 0.09629655436713933, "grad_norm": 0.7003330075726656, "learning_rate": 6.856940388748376e-07, "loss": 1.5867, "step": 1382 }, { "epoch": 0.09636623349475665, "grad_norm": 0.7464105256695647, "learning_rate": 6.856734865490847e-07, "loss": 1.6015, "step": 1383 }, { "epoch": 0.09643591262237397, "grad_norm": 0.7417160276838659, "learning_rate": 6.85652919814441e-07, "loss": 1.564, "step": 1384 }, { "epoch": 0.09650559174999129, "grad_norm": 0.7036341159599784, "learning_rate": 6.856323386718923e-07, "loss": 1.4837, "step": 1385 }, { "epoch": 0.09657527087760862, "grad_norm": 0.7182081102121308, "learning_rate": 6.856117431224246e-07, "loss": 1.4602, "step": 1386 }, { "epoch": 0.09664495000522594, "grad_norm": 0.7267870082254924, "learning_rate": 6.855911331670251e-07, "loss": 1.5879, "step": 1387 }, { "epoch": 0.09671462913284326, "grad_norm": 0.7263497628374512, "learning_rate": 6.855705088066814e-07, "loss": 1.5322, "step": 1388 }, { "epoch": 0.09678430826046058, "grad_norm": 0.7424830626660222, "learning_rate": 6.855498700423819e-07, "loss": 1.663, "step": 1389 }, { "epoch": 0.0968539873880779, "grad_norm": 0.6792334902361514, "learning_rate": 6.855292168751155e-07, "loss": 1.6057, "step": 1390 }, { "epoch": 0.09692366651569523, "grad_norm": 0.6902998981753529, "learning_rate": 6.855085493058721e-07, "loss": 1.5234, "step": 1391 }, { "epoch": 0.09699334564331255, "grad_norm": 0.6695799650473492, "learning_rate": 6.854878673356421e-07, "loss": 1.6354, "step": 1392 }, { "epoch": 0.09706302477092987, "grad_norm": 0.707158420145716, "learning_rate": 6.854671709654168e-07, "loss": 1.6303, "step": 1393 }, { "epoch": 0.0971327038985472, "grad_norm": 0.6919860585571154, "learning_rate": 6.854464601961875e-07, "loss": 1.5524, "step": 1394 }, { "epoch": 0.09720238302616452, "grad_norm": 0.7174186095393233, "learning_rate": 6.854257350289472e-07, "loss": 1.5092, "step": 1395 }, { "epoch": 0.09727206215378184, "grad_norm": 0.7194978937889569, "learning_rate": 6.854049954646889e-07, "loss": 1.6163, "step": 1396 }, { "epoch": 0.09734174128139916, "grad_norm": 0.7669208281476789, "learning_rate": 6.853842415044065e-07, "loss": 1.4796, "step": 1397 }, { "epoch": 0.09741142040901649, "grad_norm": 0.7872523249133949, "learning_rate": 6.853634731490944e-07, "loss": 1.5655, "step": 1398 }, { "epoch": 0.0974810995366338, "grad_norm": 0.72278764585173, "learning_rate": 6.853426903997482e-07, "loss": 1.5231, "step": 1399 }, { "epoch": 0.09755077866425112, "grad_norm": 0.6911638249630152, "learning_rate": 6.853218932573636e-07, "loss": 1.4571, "step": 1400 }, { "epoch": 0.09762045779186844, "grad_norm": 0.7409581114985209, "learning_rate": 6.853010817229374e-07, "loss": 1.5235, "step": 1401 }, { "epoch": 0.09769013691948576, "grad_norm": 0.7512553391928395, "learning_rate": 6.852802557974668e-07, "loss": 1.6149, "step": 1402 }, { "epoch": 0.09775981604710308, "grad_norm": 0.7542228208631525, "learning_rate": 6.8525941548195e-07, "loss": 1.6557, "step": 1403 }, { "epoch": 0.09782949517472041, "grad_norm": 0.730933943838304, "learning_rate": 6.852385607773855e-07, "loss": 1.5203, "step": 1404 }, { "epoch": 0.09789917430233773, "grad_norm": 0.7226965064395986, "learning_rate": 6.852176916847728e-07, "loss": 1.5156, "step": 1405 }, { "epoch": 0.09796885342995505, "grad_norm": 0.7400386566230431, "learning_rate": 6.851968082051119e-07, "loss": 1.4812, "step": 1406 }, { "epoch": 0.09803853255757237, "grad_norm": 0.7217069234943865, "learning_rate": 6.851759103394038e-07, "loss": 1.6011, "step": 1407 }, { "epoch": 0.0981082116851897, "grad_norm": 0.7208248423892274, "learning_rate": 6.851549980886498e-07, "loss": 1.5482, "step": 1408 }, { "epoch": 0.09817789081280702, "grad_norm": 0.73869785008963, "learning_rate": 6.851340714538519e-07, "loss": 1.5056, "step": 1409 }, { "epoch": 0.09824756994042434, "grad_norm": 0.7205356992642813, "learning_rate": 6.851131304360134e-07, "loss": 1.6143, "step": 1410 }, { "epoch": 0.09831724906804166, "grad_norm": 0.7504594927506455, "learning_rate": 6.850921750361374e-07, "loss": 1.5684, "step": 1411 }, { "epoch": 0.09838692819565899, "grad_norm": 0.7912872734883598, "learning_rate": 6.850712052552282e-07, "loss": 1.5114, "step": 1412 }, { "epoch": 0.09845660732327631, "grad_norm": 0.6954212545087036, "learning_rate": 6.85050221094291e-07, "loss": 1.5148, "step": 1413 }, { "epoch": 0.09852628645089363, "grad_norm": 0.7121224389351605, "learning_rate": 6.850292225543312e-07, "loss": 1.5543, "step": 1414 }, { "epoch": 0.09859596557851096, "grad_norm": 0.7308811982222069, "learning_rate": 6.850082096363551e-07, "loss": 1.5675, "step": 1415 }, { "epoch": 0.09866564470612828, "grad_norm": 0.6669232499820077, "learning_rate": 6.849871823413696e-07, "loss": 1.5222, "step": 1416 }, { "epoch": 0.0987353238337456, "grad_norm": 0.7023704892031762, "learning_rate": 6.849661406703825e-07, "loss": 1.4323, "step": 1417 }, { "epoch": 0.09880500296136292, "grad_norm": 0.7450305359211221, "learning_rate": 6.849450846244022e-07, "loss": 1.6129, "step": 1418 }, { "epoch": 0.09887468208898025, "grad_norm": 0.7559519975027, "learning_rate": 6.849240142044376e-07, "loss": 1.543, "step": 1419 }, { "epoch": 0.09894436121659757, "grad_norm": 0.7557039933040223, "learning_rate": 6.849029294114985e-07, "loss": 1.7281, "step": 1420 }, { "epoch": 0.09901404034421489, "grad_norm": 0.7535013429286772, "learning_rate": 6.848818302465954e-07, "loss": 1.5028, "step": 1421 }, { "epoch": 0.09908371947183221, "grad_norm": 0.695752285288689, "learning_rate": 6.848607167107393e-07, "loss": 1.5552, "step": 1422 }, { "epoch": 0.09915339859944954, "grad_norm": 0.7130712608592883, "learning_rate": 6.848395888049421e-07, "loss": 1.5192, "step": 1423 }, { "epoch": 0.09922307772706686, "grad_norm": 0.7462396757316636, "learning_rate": 6.84818446530216e-07, "loss": 1.6576, "step": 1424 }, { "epoch": 0.09929275685468418, "grad_norm": 0.6936681758464512, "learning_rate": 6.847972898875747e-07, "loss": 1.4941, "step": 1425 }, { "epoch": 0.0993624359823015, "grad_norm": 0.7291394697109261, "learning_rate": 6.847761188780318e-07, "loss": 1.5412, "step": 1426 }, { "epoch": 0.09943211510991883, "grad_norm": 0.8215857318532434, "learning_rate": 6.847549335026017e-07, "loss": 1.6619, "step": 1427 }, { "epoch": 0.09950179423753615, "grad_norm": 0.7589146596740709, "learning_rate": 6.847337337623e-07, "loss": 1.5622, "step": 1428 }, { "epoch": 0.09957147336515347, "grad_norm": 0.7000258820654407, "learning_rate": 6.847125196581422e-07, "loss": 1.6265, "step": 1429 }, { "epoch": 0.0996411524927708, "grad_norm": 0.6653107210684309, "learning_rate": 6.846912911911453e-07, "loss": 1.4856, "step": 1430 }, { "epoch": 0.09971083162038812, "grad_norm": 0.6702010457721812, "learning_rate": 6.846700483623265e-07, "loss": 1.4824, "step": 1431 }, { "epoch": 0.09978051074800544, "grad_norm": 0.7175711439850605, "learning_rate": 6.846487911727036e-07, "loss": 1.5737, "step": 1432 }, { "epoch": 0.09985018987562276, "grad_norm": 0.729863487630728, "learning_rate": 6.846275196232956e-07, "loss": 1.4471, "step": 1433 }, { "epoch": 0.09991986900324008, "grad_norm": 0.6618160691322408, "learning_rate": 6.846062337151217e-07, "loss": 1.4689, "step": 1434 }, { "epoch": 0.0999895481308574, "grad_norm": 0.761821100047957, "learning_rate": 6.845849334492021e-07, "loss": 1.6445, "step": 1435 }, { "epoch": 0.10005922725847473, "grad_norm": 0.7375583392439995, "learning_rate": 6.845636188265573e-07, "loss": 1.5686, "step": 1436 }, { "epoch": 0.10012890638609205, "grad_norm": 0.7082128443723841, "learning_rate": 6.845422898482089e-07, "loss": 1.4708, "step": 1437 }, { "epoch": 0.10019858551370937, "grad_norm": 0.7831935456199045, "learning_rate": 6.845209465151791e-07, "loss": 1.5965, "step": 1438 }, { "epoch": 0.1002682646413267, "grad_norm": 0.6625792127601626, "learning_rate": 6.844995888284906e-07, "loss": 1.4301, "step": 1439 }, { "epoch": 0.10033794376894402, "grad_norm": 0.7224274069117864, "learning_rate": 6.84478216789167e-07, "loss": 1.52, "step": 1440 }, { "epoch": 0.10040762289656134, "grad_norm": 0.7306401875419833, "learning_rate": 6.844568303982324e-07, "loss": 1.6456, "step": 1441 }, { "epoch": 0.10047730202417866, "grad_norm": 0.6991325586636551, "learning_rate": 6.844354296567117e-07, "loss": 1.5894, "step": 1442 }, { "epoch": 0.10054698115179599, "grad_norm": 0.8232785004103219, "learning_rate": 6.844140145656305e-07, "loss": 1.7543, "step": 1443 }, { "epoch": 0.10061666027941331, "grad_norm": 0.7215617175038418, "learning_rate": 6.84392585126015e-07, "loss": 1.5773, "step": 1444 }, { "epoch": 0.10068633940703062, "grad_norm": 0.7374985957692638, "learning_rate": 6.843711413388923e-07, "loss": 1.5536, "step": 1445 }, { "epoch": 0.10075601853464794, "grad_norm": 0.7104503086346701, "learning_rate": 6.843496832052897e-07, "loss": 1.5673, "step": 1446 }, { "epoch": 0.10082569766226526, "grad_norm": 0.7036943520124541, "learning_rate": 6.843282107262359e-07, "loss": 1.4641, "step": 1447 }, { "epoch": 0.10089537678988258, "grad_norm": 0.7682117601321635, "learning_rate": 6.843067239027598e-07, "loss": 1.4993, "step": 1448 }, { "epoch": 0.10096505591749991, "grad_norm": 0.7987108957657223, "learning_rate": 6.842852227358907e-07, "loss": 1.6263, "step": 1449 }, { "epoch": 0.10103473504511723, "grad_norm": 0.7182857850830695, "learning_rate": 6.842637072266596e-07, "loss": 1.53, "step": 1450 }, { "epoch": 0.10110441417273455, "grad_norm": 0.7246888294850192, "learning_rate": 6.842421773760972e-07, "loss": 1.7401, "step": 1451 }, { "epoch": 0.10117409330035187, "grad_norm": 0.7094639682875344, "learning_rate": 6.842206331852352e-07, "loss": 1.5597, "step": 1452 }, { "epoch": 0.1012437724279692, "grad_norm": 0.744605882539227, "learning_rate": 6.841990746551064e-07, "loss": 1.6996, "step": 1453 }, { "epoch": 0.10131345155558652, "grad_norm": 0.7732575296825245, "learning_rate": 6.841775017867435e-07, "loss": 1.6414, "step": 1454 }, { "epoch": 0.10138313068320384, "grad_norm": 0.7219152072214178, "learning_rate": 6.841559145811805e-07, "loss": 1.6682, "step": 1455 }, { "epoch": 0.10145280981082117, "grad_norm": 0.7089114420009685, "learning_rate": 6.84134313039452e-07, "loss": 1.6003, "step": 1456 }, { "epoch": 0.10152248893843849, "grad_norm": 0.6822564437101662, "learning_rate": 6.841126971625932e-07, "loss": 1.5532, "step": 1457 }, { "epoch": 0.10159216806605581, "grad_norm": 0.6908697206870166, "learning_rate": 6.840910669516399e-07, "loss": 1.551, "step": 1458 }, { "epoch": 0.10166184719367313, "grad_norm": 0.749847037637897, "learning_rate": 6.840694224076284e-07, "loss": 1.511, "step": 1459 }, { "epoch": 0.10173152632129046, "grad_norm": 0.6973808625903857, "learning_rate": 6.840477635315965e-07, "loss": 1.6447, "step": 1460 }, { "epoch": 0.10180120544890778, "grad_norm": 0.6637049891036413, "learning_rate": 6.840260903245816e-07, "loss": 1.4964, "step": 1461 }, { "epoch": 0.1018708845765251, "grad_norm": 0.7051328218519948, "learning_rate": 6.840044027876226e-07, "loss": 1.4516, "step": 1462 }, { "epoch": 0.10194056370414242, "grad_norm": 0.6998029876564847, "learning_rate": 6.839827009217589e-07, "loss": 1.7098, "step": 1463 }, { "epoch": 0.10201024283175975, "grad_norm": 0.8312269910488456, "learning_rate": 6.839609847280303e-07, "loss": 1.4785, "step": 1464 }, { "epoch": 0.10207992195937707, "grad_norm": 0.692043398562744, "learning_rate": 6.839392542074777e-07, "loss": 1.3329, "step": 1465 }, { "epoch": 0.10214960108699439, "grad_norm": 0.6905454250734092, "learning_rate": 6.83917509361142e-07, "loss": 1.6113, "step": 1466 }, { "epoch": 0.10221928021461171, "grad_norm": 0.6692689047744544, "learning_rate": 6.838957501900658e-07, "loss": 1.5585, "step": 1467 }, { "epoch": 0.10228895934222904, "grad_norm": 0.6866297965522439, "learning_rate": 6.838739766952916e-07, "loss": 1.491, "step": 1468 }, { "epoch": 0.10235863846984636, "grad_norm": 0.713111200601478, "learning_rate": 6.838521888778629e-07, "loss": 1.4458, "step": 1469 }, { "epoch": 0.10242831759746368, "grad_norm": 0.7596916282973877, "learning_rate": 6.838303867388237e-07, "loss": 1.5562, "step": 1470 }, { "epoch": 0.102497996725081, "grad_norm": 0.7729511940587167, "learning_rate": 6.83808570279219e-07, "loss": 1.4941, "step": 1471 }, { "epoch": 0.10256767585269833, "grad_norm": 0.6728813668527486, "learning_rate": 6.83786739500094e-07, "loss": 1.3944, "step": 1472 }, { "epoch": 0.10263735498031565, "grad_norm": 0.7228001449563164, "learning_rate": 6.837648944024951e-07, "loss": 1.6582, "step": 1473 }, { "epoch": 0.10270703410793297, "grad_norm": 0.7070507373346999, "learning_rate": 6.837430349874693e-07, "loss": 1.5626, "step": 1474 }, { "epoch": 0.1027767132355503, "grad_norm": 0.6647571817731183, "learning_rate": 6.837211612560636e-07, "loss": 1.4195, "step": 1475 }, { "epoch": 0.10284639236316762, "grad_norm": 0.7221508915094103, "learning_rate": 6.836992732093267e-07, "loss": 1.6211, "step": 1476 }, { "epoch": 0.10291607149078494, "grad_norm": 0.7109611055734725, "learning_rate": 6.836773708483076e-07, "loss": 1.5389, "step": 1477 }, { "epoch": 0.10298575061840226, "grad_norm": 0.6907251786674505, "learning_rate": 6.836554541740556e-07, "loss": 1.5164, "step": 1478 }, { "epoch": 0.10305542974601958, "grad_norm": 0.699620393769317, "learning_rate": 6.836335231876212e-07, "loss": 1.7227, "step": 1479 }, { "epoch": 0.1031251088736369, "grad_norm": 0.6940924493567484, "learning_rate": 6.836115778900552e-07, "loss": 1.5148, "step": 1480 }, { "epoch": 0.10319478800125423, "grad_norm": 0.7238715870249767, "learning_rate": 6.835896182824093e-07, "loss": 1.5669, "step": 1481 }, { "epoch": 0.10326446712887155, "grad_norm": 0.7088044163423685, "learning_rate": 6.83567644365736e-07, "loss": 1.4448, "step": 1482 }, { "epoch": 0.10333414625648887, "grad_norm": 0.7447083571913127, "learning_rate": 6.835456561410882e-07, "loss": 1.6735, "step": 1483 }, { "epoch": 0.1034038253841062, "grad_norm": 0.6910524206838882, "learning_rate": 6.835236536095197e-07, "loss": 1.5637, "step": 1484 }, { "epoch": 0.10347350451172352, "grad_norm": 0.6593429324828214, "learning_rate": 6.835016367720847e-07, "loss": 1.4477, "step": 1485 }, { "epoch": 0.10354318363934084, "grad_norm": 0.7235635260040205, "learning_rate": 6.834796056298386e-07, "loss": 1.5663, "step": 1486 }, { "epoch": 0.10361286276695816, "grad_norm": 0.6955384974930857, "learning_rate": 6.83457560183837e-07, "loss": 1.5034, "step": 1487 }, { "epoch": 0.10368254189457549, "grad_norm": 0.6869816822781529, "learning_rate": 6.834355004351363e-07, "loss": 1.5124, "step": 1488 }, { "epoch": 0.10375222102219281, "grad_norm": 0.7239903773603513, "learning_rate": 6.834134263847939e-07, "loss": 1.5423, "step": 1489 }, { "epoch": 0.10382190014981013, "grad_norm": 0.7255177486951737, "learning_rate": 6.833913380338675e-07, "loss": 1.468, "step": 1490 }, { "epoch": 0.10389157927742744, "grad_norm": 0.6935427630827753, "learning_rate": 6.833692353834154e-07, "loss": 1.5646, "step": 1491 }, { "epoch": 0.10396125840504476, "grad_norm": 0.78289308346883, "learning_rate": 6.833471184344971e-07, "loss": 1.61, "step": 1492 }, { "epoch": 0.10403093753266208, "grad_norm": 0.7540578067091052, "learning_rate": 6.833249871881725e-07, "loss": 1.6918, "step": 1493 }, { "epoch": 0.10410061666027941, "grad_norm": 0.6723185719673338, "learning_rate": 6.833028416455019e-07, "loss": 1.5178, "step": 1494 }, { "epoch": 0.10417029578789673, "grad_norm": 0.7225132128946964, "learning_rate": 6.832806818075467e-07, "loss": 1.5849, "step": 1495 }, { "epoch": 0.10423997491551405, "grad_norm": 0.7256132910891259, "learning_rate": 6.83258507675369e-07, "loss": 1.6121, "step": 1496 }, { "epoch": 0.10430965404313138, "grad_norm": 0.7833180897554166, "learning_rate": 6.832363192500312e-07, "loss": 1.5221, "step": 1497 }, { "epoch": 0.1043793331707487, "grad_norm": 0.7612167370203573, "learning_rate": 6.832141165325967e-07, "loss": 1.5844, "step": 1498 }, { "epoch": 0.10444901229836602, "grad_norm": 0.7410871801404689, "learning_rate": 6.831918995241296e-07, "loss": 1.6095, "step": 1499 }, { "epoch": 0.10451869142598334, "grad_norm": 0.7017298260754218, "learning_rate": 6.831696682256944e-07, "loss": 1.511, "step": 1500 }, { "epoch": 0.10458837055360067, "grad_norm": 0.7339141096335122, "learning_rate": 6.831474226383567e-07, "loss": 1.6158, "step": 1501 }, { "epoch": 0.10465804968121799, "grad_norm": 0.7315827206339433, "learning_rate": 6.831251627631824e-07, "loss": 1.5025, "step": 1502 }, { "epoch": 0.10472772880883531, "grad_norm": 0.8214608688213216, "learning_rate": 6.831028886012382e-07, "loss": 1.4651, "step": 1503 }, { "epoch": 0.10479740793645263, "grad_norm": 0.7050091177175483, "learning_rate": 6.830806001535916e-07, "loss": 1.5367, "step": 1504 }, { "epoch": 0.10486708706406996, "grad_norm": 0.7055266701449938, "learning_rate": 6.830582974213108e-07, "loss": 1.5644, "step": 1505 }, { "epoch": 0.10493676619168728, "grad_norm": 0.7094965831953183, "learning_rate": 6.830359804054643e-07, "loss": 1.6474, "step": 1506 }, { "epoch": 0.1050064453193046, "grad_norm": 0.6640006713070633, "learning_rate": 6.83013649107122e-07, "loss": 1.3401, "step": 1507 }, { "epoch": 0.10507612444692192, "grad_norm": 0.7105850301509256, "learning_rate": 6.829913035273536e-07, "loss": 1.455, "step": 1508 }, { "epoch": 0.10514580357453925, "grad_norm": 0.6867742081585059, "learning_rate": 6.829689436672302e-07, "loss": 1.2886, "step": 1509 }, { "epoch": 0.10521548270215657, "grad_norm": 0.6958585661416363, "learning_rate": 6.829465695278233e-07, "loss": 1.5809, "step": 1510 }, { "epoch": 0.10528516182977389, "grad_norm": 0.7797334011110041, "learning_rate": 6.829241811102052e-07, "loss": 1.474, "step": 1511 }, { "epoch": 0.10535484095739121, "grad_norm": 0.6753774832924908, "learning_rate": 6.829017784154487e-07, "loss": 1.5347, "step": 1512 }, { "epoch": 0.10542452008500854, "grad_norm": 0.6657324139543032, "learning_rate": 6.828793614446273e-07, "loss": 1.4748, "step": 1513 }, { "epoch": 0.10549419921262586, "grad_norm": 0.7217234617981039, "learning_rate": 6.828569301988155e-07, "loss": 1.5803, "step": 1514 }, { "epoch": 0.10556387834024318, "grad_norm": 0.7292235276823041, "learning_rate": 6.82834484679088e-07, "loss": 1.4454, "step": 1515 }, { "epoch": 0.1056335574678605, "grad_norm": 0.6526371966253907, "learning_rate": 6.828120248865206e-07, "loss": 1.415, "step": 1516 }, { "epoch": 0.10570323659547783, "grad_norm": 0.7198666759857738, "learning_rate": 6.827895508221897e-07, "loss": 1.5559, "step": 1517 }, { "epoch": 0.10577291572309515, "grad_norm": 0.7628274611446516, "learning_rate": 6.827670624871721e-07, "loss": 1.5228, "step": 1518 }, { "epoch": 0.10584259485071247, "grad_norm": 0.6985806196410942, "learning_rate": 6.827445598825453e-07, "loss": 1.5399, "step": 1519 }, { "epoch": 0.1059122739783298, "grad_norm": 0.7268289166229313, "learning_rate": 6.827220430093882e-07, "loss": 1.5711, "step": 1520 }, { "epoch": 0.10598195310594712, "grad_norm": 0.7159822726292894, "learning_rate": 6.826995118687796e-07, "loss": 1.3665, "step": 1521 }, { "epoch": 0.10605163223356444, "grad_norm": 0.6799598821150186, "learning_rate": 6.826769664617991e-07, "loss": 1.5734, "step": 1522 }, { "epoch": 0.10612131136118176, "grad_norm": 0.6759158480037062, "learning_rate": 6.826544067895273e-07, "loss": 1.5268, "step": 1523 }, { "epoch": 0.10619099048879908, "grad_norm": 0.6764740596037916, "learning_rate": 6.826318328530453e-07, "loss": 1.5798, "step": 1524 }, { "epoch": 0.1062606696164164, "grad_norm": 0.7544503094902575, "learning_rate": 6.826092446534348e-07, "loss": 1.6701, "step": 1525 }, { "epoch": 0.10633034874403373, "grad_norm": 0.7599345785560571, "learning_rate": 6.825866421917783e-07, "loss": 1.5108, "step": 1526 }, { "epoch": 0.10640002787165105, "grad_norm": 0.6887529792956971, "learning_rate": 6.82564025469159e-07, "loss": 1.4195, "step": 1527 }, { "epoch": 0.10646970699926837, "grad_norm": 0.701570365983734, "learning_rate": 6.825413944866607e-07, "loss": 1.5902, "step": 1528 }, { "epoch": 0.1065393861268857, "grad_norm": 0.6851192165134431, "learning_rate": 6.825187492453679e-07, "loss": 1.5846, "step": 1529 }, { "epoch": 0.10660906525450302, "grad_norm": 0.6547795464181767, "learning_rate": 6.82496089746366e-07, "loss": 1.3953, "step": 1530 }, { "epoch": 0.10667874438212034, "grad_norm": 0.7023921604836459, "learning_rate": 6.824734159907405e-07, "loss": 1.6031, "step": 1531 }, { "epoch": 0.10674842350973766, "grad_norm": 0.7383875135991188, "learning_rate": 6.824507279795784e-07, "loss": 1.5285, "step": 1532 }, { "epoch": 0.10681810263735499, "grad_norm": 0.7322590444258, "learning_rate": 6.824280257139667e-07, "loss": 1.4706, "step": 1533 }, { "epoch": 0.10688778176497231, "grad_norm": 0.6595049248009317, "learning_rate": 6.824053091949933e-07, "loss": 1.4327, "step": 1534 }, { "epoch": 0.10695746089258963, "grad_norm": 0.7165800718934179, "learning_rate": 6.823825784237471e-07, "loss": 1.5829, "step": 1535 }, { "epoch": 0.10702714002020695, "grad_norm": 0.704731120016156, "learning_rate": 6.82359833401317e-07, "loss": 1.5255, "step": 1536 }, { "epoch": 0.10709681914782426, "grad_norm": 0.6997247632243365, "learning_rate": 6.823370741287933e-07, "loss": 1.6407, "step": 1537 }, { "epoch": 0.10716649827544159, "grad_norm": 0.7023753654540732, "learning_rate": 6.823143006072667e-07, "loss": 1.5886, "step": 1538 }, { "epoch": 0.10723617740305891, "grad_norm": 0.6743001705870307, "learning_rate": 6.822915128378284e-07, "loss": 1.572, "step": 1539 }, { "epoch": 0.10730585653067623, "grad_norm": 0.6750188030153429, "learning_rate": 6.822687108215704e-07, "loss": 1.5842, "step": 1540 }, { "epoch": 0.10737553565829355, "grad_norm": 0.7262010807988295, "learning_rate": 6.822458945595856e-07, "loss": 1.5688, "step": 1541 }, { "epoch": 0.10744521478591088, "grad_norm": 0.7212282946377836, "learning_rate": 6.822230640529671e-07, "loss": 1.5317, "step": 1542 }, { "epoch": 0.1075148939135282, "grad_norm": 0.8885724857888603, "learning_rate": 6.822002193028095e-07, "loss": 1.6592, "step": 1543 }, { "epoch": 0.10758457304114552, "grad_norm": 0.7197806691400269, "learning_rate": 6.82177360310207e-07, "loss": 1.5434, "step": 1544 }, { "epoch": 0.10765425216876284, "grad_norm": 0.7558178313589539, "learning_rate": 6.821544870762554e-07, "loss": 1.6391, "step": 1545 }, { "epoch": 0.10772393129638017, "grad_norm": 0.7987854415241106, "learning_rate": 6.821315996020506e-07, "loss": 1.5524, "step": 1546 }, { "epoch": 0.10779361042399749, "grad_norm": 0.6686455155160611, "learning_rate": 6.821086978886897e-07, "loss": 1.4751, "step": 1547 }, { "epoch": 0.10786328955161481, "grad_norm": 0.7341664427885704, "learning_rate": 6.8208578193727e-07, "loss": 1.5269, "step": 1548 }, { "epoch": 0.10793296867923213, "grad_norm": 0.7003223328477466, "learning_rate": 6.820628517488898e-07, "loss": 1.5568, "step": 1549 }, { "epoch": 0.10800264780684946, "grad_norm": 0.6864701685410314, "learning_rate": 6.820399073246477e-07, "loss": 1.4843, "step": 1550 }, { "epoch": 0.10807232693446678, "grad_norm": 0.8687409341573422, "learning_rate": 6.820169486656435e-07, "loss": 1.5423, "step": 1551 }, { "epoch": 0.1081420060620841, "grad_norm": 0.7607151628672985, "learning_rate": 6.819939757729774e-07, "loss": 1.5437, "step": 1552 }, { "epoch": 0.10821168518970142, "grad_norm": 0.7048985366339954, "learning_rate": 6.819709886477503e-07, "loss": 1.5318, "step": 1553 }, { "epoch": 0.10828136431731875, "grad_norm": 0.7314286735099996, "learning_rate": 6.819479872910638e-07, "loss": 1.5726, "step": 1554 }, { "epoch": 0.10835104344493607, "grad_norm": 0.6962507248591363, "learning_rate": 6.8192497170402e-07, "loss": 1.4723, "step": 1555 }, { "epoch": 0.10842072257255339, "grad_norm": 0.7785074386756621, "learning_rate": 6.819019418877221e-07, "loss": 1.6421, "step": 1556 }, { "epoch": 0.10849040170017071, "grad_norm": 0.7954582117985666, "learning_rate": 6.818788978432735e-07, "loss": 1.664, "step": 1557 }, { "epoch": 0.10856008082778804, "grad_norm": 0.7367228514312243, "learning_rate": 6.818558395717786e-07, "loss": 1.5606, "step": 1558 }, { "epoch": 0.10862975995540536, "grad_norm": 0.745909368111922, "learning_rate": 6.818327670743425e-07, "loss": 1.4566, "step": 1559 }, { "epoch": 0.10869943908302268, "grad_norm": 0.7600253418884132, "learning_rate": 6.818096803520709e-07, "loss": 1.6048, "step": 1560 }, { "epoch": 0.10876911821064, "grad_norm": 0.7373741671607602, "learning_rate": 6.817865794060699e-07, "loss": 1.423, "step": 1561 }, { "epoch": 0.10883879733825733, "grad_norm": 0.6896732729151899, "learning_rate": 6.817634642374468e-07, "loss": 1.5513, "step": 1562 }, { "epoch": 0.10890847646587465, "grad_norm": 0.6983623496494458, "learning_rate": 6.817403348473094e-07, "loss": 1.5651, "step": 1563 }, { "epoch": 0.10897815559349197, "grad_norm": 0.7525447676994738, "learning_rate": 6.817171912367657e-07, "loss": 1.6091, "step": 1564 }, { "epoch": 0.1090478347211093, "grad_norm": 0.7532040188348436, "learning_rate": 6.816940334069252e-07, "loss": 1.5547, "step": 1565 }, { "epoch": 0.10911751384872662, "grad_norm": 0.7377763746387574, "learning_rate": 6.816708613588975e-07, "loss": 1.6717, "step": 1566 }, { "epoch": 0.10918719297634394, "grad_norm": 0.7404993105111815, "learning_rate": 6.816476750937931e-07, "loss": 1.5567, "step": 1567 }, { "epoch": 0.10925687210396126, "grad_norm": 0.6607513401827204, "learning_rate": 6.816244746127231e-07, "loss": 1.4863, "step": 1568 }, { "epoch": 0.10932655123157858, "grad_norm": 0.7327524646568696, "learning_rate": 6.816012599167993e-07, "loss": 1.4312, "step": 1569 }, { "epoch": 0.1093962303591959, "grad_norm": 0.7166557657811868, "learning_rate": 6.815780310071341e-07, "loss": 1.5816, "step": 1570 }, { "epoch": 0.10946590948681323, "grad_norm": 0.695293401858971, "learning_rate": 6.81554787884841e-07, "loss": 1.5191, "step": 1571 }, { "epoch": 0.10953558861443055, "grad_norm": 0.7131918332055402, "learning_rate": 6.815315305510336e-07, "loss": 1.6284, "step": 1572 }, { "epoch": 0.10960526774204787, "grad_norm": 0.7076583193113377, "learning_rate": 6.815082590068264e-07, "loss": 1.535, "step": 1573 }, { "epoch": 0.1096749468696652, "grad_norm": 0.7135512952756091, "learning_rate": 6.814849732533347e-07, "loss": 1.5333, "step": 1574 }, { "epoch": 0.10974462599728252, "grad_norm": 0.7538494298477586, "learning_rate": 6.814616732916744e-07, "loss": 1.4118, "step": 1575 }, { "epoch": 0.10981430512489984, "grad_norm": 0.7178907661850447, "learning_rate": 6.814383591229622e-07, "loss": 1.3534, "step": 1576 }, { "epoch": 0.10988398425251716, "grad_norm": 0.7308851126536335, "learning_rate": 6.814150307483151e-07, "loss": 1.5599, "step": 1577 }, { "epoch": 0.10995366338013449, "grad_norm": 0.7669543415808457, "learning_rate": 6.813916881688513e-07, "loss": 1.54, "step": 1578 }, { "epoch": 0.11002334250775181, "grad_norm": 0.8240784223589613, "learning_rate": 6.813683313856894e-07, "loss": 1.5358, "step": 1579 }, { "epoch": 0.11009302163536913, "grad_norm": 0.7221931116839967, "learning_rate": 6.813449603999485e-07, "loss": 1.6244, "step": 1580 }, { "epoch": 0.11016270076298645, "grad_norm": 0.7189519304364052, "learning_rate": 6.813215752127488e-07, "loss": 1.4958, "step": 1581 }, { "epoch": 0.11023237989060376, "grad_norm": 0.7511549610377619, "learning_rate": 6.812981758252108e-07, "loss": 1.6661, "step": 1582 }, { "epoch": 0.11030205901822109, "grad_norm": 0.7497943083182442, "learning_rate": 6.81274762238456e-07, "loss": 1.5513, "step": 1583 }, { "epoch": 0.11037173814583841, "grad_norm": 0.7112454154563912, "learning_rate": 6.812513344536063e-07, "loss": 1.5281, "step": 1584 }, { "epoch": 0.11044141727345573, "grad_norm": 0.7163312744702844, "learning_rate": 6.812278924717844e-07, "loss": 1.5135, "step": 1585 }, { "epoch": 0.11051109640107305, "grad_norm": 0.7529791816288604, "learning_rate": 6.812044362941139e-07, "loss": 1.6745, "step": 1586 }, { "epoch": 0.11058077552869038, "grad_norm": 0.6813105813519863, "learning_rate": 6.811809659217186e-07, "loss": 1.3124, "step": 1587 }, { "epoch": 0.1106504546563077, "grad_norm": 0.6772757264130072, "learning_rate": 6.811574813557234e-07, "loss": 1.5298, "step": 1588 }, { "epoch": 0.11072013378392502, "grad_norm": 0.6824509840298503, "learning_rate": 6.811339825972538e-07, "loss": 1.6369, "step": 1589 }, { "epoch": 0.11078981291154234, "grad_norm": 0.7296519362331932, "learning_rate": 6.811104696474356e-07, "loss": 1.5423, "step": 1590 }, { "epoch": 0.11085949203915967, "grad_norm": 0.718482315453647, "learning_rate": 6.810869425073959e-07, "loss": 1.6452, "step": 1591 }, { "epoch": 0.11092917116677699, "grad_norm": 0.722378080045864, "learning_rate": 6.81063401178262e-07, "loss": 1.4309, "step": 1592 }, { "epoch": 0.11099885029439431, "grad_norm": 0.6985374571080087, "learning_rate": 6.810398456611623e-07, "loss": 1.5046, "step": 1593 }, { "epoch": 0.11106852942201163, "grad_norm": 0.7735897379532881, "learning_rate": 6.810162759572252e-07, "loss": 1.4598, "step": 1594 }, { "epoch": 0.11113820854962896, "grad_norm": 0.7141646454049914, "learning_rate": 6.809926920675806e-07, "loss": 1.4788, "step": 1595 }, { "epoch": 0.11120788767724628, "grad_norm": 0.7004030395772681, "learning_rate": 6.809690939933585e-07, "loss": 1.5618, "step": 1596 }, { "epoch": 0.1112775668048636, "grad_norm": 0.7195195106477383, "learning_rate": 6.809454817356897e-07, "loss": 1.5963, "step": 1597 }, { "epoch": 0.11134724593248092, "grad_norm": 0.6591270162532643, "learning_rate": 6.80921855295706e-07, "loss": 1.4725, "step": 1598 }, { "epoch": 0.11141692506009825, "grad_norm": 0.7034075202200033, "learning_rate": 6.808982146745393e-07, "loss": 1.5809, "step": 1599 }, { "epoch": 0.11148660418771557, "grad_norm": 0.677382514036192, "learning_rate": 6.808745598733229e-07, "loss": 1.4277, "step": 1600 }, { "epoch": 0.11155628331533289, "grad_norm": 0.6988131968542942, "learning_rate": 6.8085089089319e-07, "loss": 1.4795, "step": 1601 }, { "epoch": 0.11162596244295021, "grad_norm": 0.6558486580469053, "learning_rate": 6.808272077352751e-07, "loss": 1.5151, "step": 1602 }, { "epoch": 0.11169564157056754, "grad_norm": 0.7466317982625242, "learning_rate": 6.808035104007131e-07, "loss": 1.5656, "step": 1603 }, { "epoch": 0.11176532069818486, "grad_norm": 0.7023736399262505, "learning_rate": 6.807797988906397e-07, "loss": 1.5659, "step": 1604 }, { "epoch": 0.11183499982580218, "grad_norm": 0.6949112857794784, "learning_rate": 6.807560732061909e-07, "loss": 1.6362, "step": 1605 }, { "epoch": 0.1119046789534195, "grad_norm": 0.7026342000850446, "learning_rate": 6.807323333485041e-07, "loss": 1.4273, "step": 1606 }, { "epoch": 0.11197435808103683, "grad_norm": 0.6892676901830094, "learning_rate": 6.807085793187167e-07, "loss": 1.6011, "step": 1607 }, { "epoch": 0.11204403720865415, "grad_norm": 0.7217360369844035, "learning_rate": 6.80684811117967e-07, "loss": 1.5082, "step": 1608 }, { "epoch": 0.11211371633627147, "grad_norm": 0.7305733477997847, "learning_rate": 6.806610287473942e-07, "loss": 1.4838, "step": 1609 }, { "epoch": 0.1121833954638888, "grad_norm": 0.7356577124965377, "learning_rate": 6.806372322081379e-07, "loss": 1.5469, "step": 1610 }, { "epoch": 0.11225307459150612, "grad_norm": 0.678810220355509, "learning_rate": 6.806134215013386e-07, "loss": 1.4224, "step": 1611 }, { "epoch": 0.11232275371912344, "grad_norm": 0.7015202486376189, "learning_rate": 6.80589596628137e-07, "loss": 1.5777, "step": 1612 }, { "epoch": 0.11239243284674076, "grad_norm": 0.7235445572079848, "learning_rate": 6.805657575896753e-07, "loss": 1.5127, "step": 1613 }, { "epoch": 0.11246211197435808, "grad_norm": 0.7302277793966115, "learning_rate": 6.805419043870957e-07, "loss": 1.5286, "step": 1614 }, { "epoch": 0.1125317911019754, "grad_norm": 0.7486458593950519, "learning_rate": 6.805180370215413e-07, "loss": 1.6699, "step": 1615 }, { "epoch": 0.11260147022959273, "grad_norm": 0.7586164782838338, "learning_rate": 6.804941554941558e-07, "loss": 1.5705, "step": 1616 }, { "epoch": 0.11267114935721005, "grad_norm": 0.7399086186510679, "learning_rate": 6.804702598060838e-07, "loss": 1.6787, "step": 1617 }, { "epoch": 0.11274082848482737, "grad_norm": 0.6983175095680024, "learning_rate": 6.804463499584704e-07, "loss": 1.7091, "step": 1618 }, { "epoch": 0.1128105076124447, "grad_norm": 0.6907069923114252, "learning_rate": 6.804224259524613e-07, "loss": 1.6168, "step": 1619 }, { "epoch": 0.11288018674006202, "grad_norm": 0.7384786246263818, "learning_rate": 6.803984877892031e-07, "loss": 1.516, "step": 1620 }, { "epoch": 0.11294986586767934, "grad_norm": 0.7134317876278762, "learning_rate": 6.80374535469843e-07, "loss": 1.4856, "step": 1621 }, { "epoch": 0.11301954499529666, "grad_norm": 0.7270747064059724, "learning_rate": 6.803505689955286e-07, "loss": 1.5908, "step": 1622 }, { "epoch": 0.11308922412291399, "grad_norm": 0.7517895848474664, "learning_rate": 6.803265883674087e-07, "loss": 1.5437, "step": 1623 }, { "epoch": 0.11315890325053131, "grad_norm": 0.6932504297112048, "learning_rate": 6.803025935866324e-07, "loss": 1.5541, "step": 1624 }, { "epoch": 0.11322858237814863, "grad_norm": 0.7251577977979223, "learning_rate": 6.802785846543495e-07, "loss": 1.566, "step": 1625 }, { "epoch": 0.11329826150576595, "grad_norm": 0.7127719443354646, "learning_rate": 6.802545615717106e-07, "loss": 1.6241, "step": 1626 }, { "epoch": 0.11336794063338328, "grad_norm": 0.7287839552408284, "learning_rate": 6.80230524339867e-07, "loss": 1.6895, "step": 1627 }, { "epoch": 0.11343761976100059, "grad_norm": 0.7003344394380433, "learning_rate": 6.802064729599706e-07, "loss": 1.5245, "step": 1628 }, { "epoch": 0.11350729888861791, "grad_norm": 0.7174216722950388, "learning_rate": 6.80182407433174e-07, "loss": 1.6864, "step": 1629 }, { "epoch": 0.11357697801623523, "grad_norm": 0.6850433524557609, "learning_rate": 6.801583277606304e-07, "loss": 1.5079, "step": 1630 }, { "epoch": 0.11364665714385255, "grad_norm": 0.6871563613458431, "learning_rate": 6.801342339434937e-07, "loss": 1.4594, "step": 1631 }, { "epoch": 0.11371633627146988, "grad_norm": 0.7170332340740067, "learning_rate": 6.801101259829188e-07, "loss": 1.656, "step": 1632 }, { "epoch": 0.1137860153990872, "grad_norm": 0.7164452063926988, "learning_rate": 6.800860038800607e-07, "loss": 1.5006, "step": 1633 }, { "epoch": 0.11385569452670452, "grad_norm": 0.6937900817795392, "learning_rate": 6.800618676360755e-07, "loss": 1.4742, "step": 1634 }, { "epoch": 0.11392537365432184, "grad_norm": 0.7023556028757573, "learning_rate": 6.800377172521199e-07, "loss": 1.6291, "step": 1635 }, { "epoch": 0.11399505278193917, "grad_norm": 0.7169038482009281, "learning_rate": 6.800135527293511e-07, "loss": 1.5947, "step": 1636 }, { "epoch": 0.11406473190955649, "grad_norm": 0.7442205747389822, "learning_rate": 6.799893740689272e-07, "loss": 1.5385, "step": 1637 }, { "epoch": 0.11413441103717381, "grad_norm": 0.6974269350674046, "learning_rate": 6.79965181272007e-07, "loss": 1.44, "step": 1638 }, { "epoch": 0.11420409016479113, "grad_norm": 0.7202760860472499, "learning_rate": 6.799409743397497e-07, "loss": 1.5094, "step": 1639 }, { "epoch": 0.11427376929240846, "grad_norm": 0.7228977897934166, "learning_rate": 6.799167532733153e-07, "loss": 1.7074, "step": 1640 }, { "epoch": 0.11434344842002578, "grad_norm": 0.7422510209307608, "learning_rate": 6.798925180738649e-07, "loss": 1.4843, "step": 1641 }, { "epoch": 0.1144131275476431, "grad_norm": 0.7648101870166034, "learning_rate": 6.798682687425594e-07, "loss": 1.6436, "step": 1642 }, { "epoch": 0.11448280667526042, "grad_norm": 0.7794274991636997, "learning_rate": 6.798440052805611e-07, "loss": 1.6088, "step": 1643 }, { "epoch": 0.11455248580287775, "grad_norm": 0.7468761912839483, "learning_rate": 6.79819727689033e-07, "loss": 1.6533, "step": 1644 }, { "epoch": 0.11462216493049507, "grad_norm": 0.674230679158213, "learning_rate": 6.79795435969138e-07, "loss": 1.5058, "step": 1645 }, { "epoch": 0.11469184405811239, "grad_norm": 0.7430350345094352, "learning_rate": 6.797711301220406e-07, "loss": 1.545, "step": 1646 }, { "epoch": 0.11476152318572971, "grad_norm": 0.7276113930617449, "learning_rate": 6.797468101489056e-07, "loss": 1.5441, "step": 1647 }, { "epoch": 0.11483120231334704, "grad_norm": 0.7398915067049099, "learning_rate": 6.797224760508984e-07, "loss": 1.6512, "step": 1648 }, { "epoch": 0.11490088144096436, "grad_norm": 0.68940533642839, "learning_rate": 6.796981278291849e-07, "loss": 1.6151, "step": 1649 }, { "epoch": 0.11497056056858168, "grad_norm": 0.7555918014240699, "learning_rate": 6.796737654849322e-07, "loss": 1.5668, "step": 1650 }, { "epoch": 0.115040239696199, "grad_norm": 0.7099296673799076, "learning_rate": 6.796493890193077e-07, "loss": 1.4325, "step": 1651 }, { "epoch": 0.11510991882381633, "grad_norm": 0.7595492751199421, "learning_rate": 6.796249984334797e-07, "loss": 1.6228, "step": 1652 }, { "epoch": 0.11517959795143365, "grad_norm": 0.7632915208578668, "learning_rate": 6.796005937286167e-07, "loss": 1.5813, "step": 1653 }, { "epoch": 0.11524927707905097, "grad_norm": 0.6922449395291023, "learning_rate": 6.795761749058885e-07, "loss": 1.5088, "step": 1654 }, { "epoch": 0.1153189562066683, "grad_norm": 0.7099657991630075, "learning_rate": 6.795517419664653e-07, "loss": 1.5136, "step": 1655 }, { "epoch": 0.11538863533428562, "grad_norm": 0.7613853045497073, "learning_rate": 6.795272949115179e-07, "loss": 1.538, "step": 1656 }, { "epoch": 0.11545831446190294, "grad_norm": 0.7195462148384297, "learning_rate": 6.795028337422179e-07, "loss": 1.6404, "step": 1657 }, { "epoch": 0.11552799358952026, "grad_norm": 0.6747873760990837, "learning_rate": 6.794783584597375e-07, "loss": 1.5352, "step": 1658 }, { "epoch": 0.11559767271713758, "grad_norm": 0.6519250279592536, "learning_rate": 6.794538690652497e-07, "loss": 1.5127, "step": 1659 }, { "epoch": 0.1156673518447549, "grad_norm": 0.7162973540123703, "learning_rate": 6.794293655599279e-07, "loss": 1.4985, "step": 1660 }, { "epoch": 0.11573703097237223, "grad_norm": 0.7358283562766068, "learning_rate": 6.794048479449463e-07, "loss": 1.4902, "step": 1661 }, { "epoch": 0.11580671009998955, "grad_norm": 0.718594579591006, "learning_rate": 6.793803162214801e-07, "loss": 1.5252, "step": 1662 }, { "epoch": 0.11587638922760687, "grad_norm": 0.6804222879849213, "learning_rate": 6.793557703907049e-07, "loss": 1.4665, "step": 1663 }, { "epoch": 0.1159460683552242, "grad_norm": 0.6597234535748099, "learning_rate": 6.793312104537968e-07, "loss": 1.5345, "step": 1664 }, { "epoch": 0.11601574748284152, "grad_norm": 0.6858789541075233, "learning_rate": 6.793066364119327e-07, "loss": 1.3725, "step": 1665 }, { "epoch": 0.11608542661045884, "grad_norm": 0.7023154283430406, "learning_rate": 6.792820482662906e-07, "loss": 1.6363, "step": 1666 }, { "epoch": 0.11615510573807616, "grad_norm": 0.6987195535137968, "learning_rate": 6.792574460180486e-07, "loss": 1.5657, "step": 1667 }, { "epoch": 0.11622478486569349, "grad_norm": 0.6864641357349911, "learning_rate": 6.792328296683856e-07, "loss": 1.4052, "step": 1668 }, { "epoch": 0.11629446399331081, "grad_norm": 0.7222713490050489, "learning_rate": 6.792081992184813e-07, "loss": 1.5152, "step": 1669 }, { "epoch": 0.11636414312092813, "grad_norm": 0.7287387894977275, "learning_rate": 6.791835546695162e-07, "loss": 1.5372, "step": 1670 }, { "epoch": 0.11643382224854545, "grad_norm": 0.7969883890704471, "learning_rate": 6.791588960226712e-07, "loss": 1.4504, "step": 1671 }, { "epoch": 0.11650350137616278, "grad_norm": 0.6806299876221195, "learning_rate": 6.79134223279128e-07, "loss": 1.4165, "step": 1672 }, { "epoch": 0.1165731805037801, "grad_norm": 0.7626172771312602, "learning_rate": 6.791095364400689e-07, "loss": 1.6344, "step": 1673 }, { "epoch": 0.11664285963139741, "grad_norm": 0.7140725948962551, "learning_rate": 6.790848355066771e-07, "loss": 1.5458, "step": 1674 }, { "epoch": 0.11671253875901473, "grad_norm": 0.7015644476735204, "learning_rate": 6.790601204801361e-07, "loss": 1.5793, "step": 1675 }, { "epoch": 0.11678221788663205, "grad_norm": 0.7850028189623065, "learning_rate": 6.790353913616307e-07, "loss": 1.503, "step": 1676 }, { "epoch": 0.11685189701424938, "grad_norm": 0.7580222452334741, "learning_rate": 6.790106481523455e-07, "loss": 1.5288, "step": 1677 }, { "epoch": 0.1169215761418667, "grad_norm": 0.8206009490254131, "learning_rate": 6.789858908534665e-07, "loss": 1.5293, "step": 1678 }, { "epoch": 0.11699125526948402, "grad_norm": 0.7647388384602489, "learning_rate": 6.789611194661801e-07, "loss": 1.5605, "step": 1679 }, { "epoch": 0.11706093439710134, "grad_norm": 0.7159767052729963, "learning_rate": 6.789363339916733e-07, "loss": 1.6455, "step": 1680 }, { "epoch": 0.11713061352471867, "grad_norm": 0.77035475571459, "learning_rate": 6.78911534431134e-07, "loss": 1.6127, "step": 1681 }, { "epoch": 0.11720029265233599, "grad_norm": 0.6771402586909827, "learning_rate": 6.788867207857505e-07, "loss": 1.4831, "step": 1682 }, { "epoch": 0.11726997177995331, "grad_norm": 0.7260329904784459, "learning_rate": 6.78861893056712e-07, "loss": 1.5792, "step": 1683 }, { "epoch": 0.11733965090757063, "grad_norm": 0.6889949040174258, "learning_rate": 6.788370512452083e-07, "loss": 1.5853, "step": 1684 }, { "epoch": 0.11740933003518796, "grad_norm": 0.7097739042353229, "learning_rate": 6.7881219535243e-07, "loss": 1.6495, "step": 1685 }, { "epoch": 0.11747900916280528, "grad_norm": 0.76869404066403, "learning_rate": 6.78787325379568e-07, "loss": 1.5965, "step": 1686 }, { "epoch": 0.1175486882904226, "grad_norm": 0.7519933050837854, "learning_rate": 6.787624413278143e-07, "loss": 1.536, "step": 1687 }, { "epoch": 0.11761836741803992, "grad_norm": 0.6665676367181234, "learning_rate": 6.787375431983613e-07, "loss": 1.4836, "step": 1688 }, { "epoch": 0.11768804654565725, "grad_norm": 0.6976889194202481, "learning_rate": 6.787126309924023e-07, "loss": 1.5132, "step": 1689 }, { "epoch": 0.11775772567327457, "grad_norm": 0.6997377024825535, "learning_rate": 6.786877047111309e-07, "loss": 1.6681, "step": 1690 }, { "epoch": 0.11782740480089189, "grad_norm": 0.7204082985724929, "learning_rate": 6.786627643557416e-07, "loss": 1.5786, "step": 1691 }, { "epoch": 0.11789708392850921, "grad_norm": 0.6949032262013504, "learning_rate": 6.7863780992743e-07, "loss": 1.5075, "step": 1692 }, { "epoch": 0.11796676305612654, "grad_norm": 0.7113507414950234, "learning_rate": 6.786128414273917e-07, "loss": 1.5558, "step": 1693 }, { "epoch": 0.11803644218374386, "grad_norm": 0.7369337445932507, "learning_rate": 6.785878588568232e-07, "loss": 1.7454, "step": 1694 }, { "epoch": 0.11810612131136118, "grad_norm": 0.7139653769172889, "learning_rate": 6.785628622169219e-07, "loss": 1.5648, "step": 1695 }, { "epoch": 0.1181758004389785, "grad_norm": 0.7636368697269599, "learning_rate": 6.785378515088854e-07, "loss": 1.6432, "step": 1696 }, { "epoch": 0.11824547956659583, "grad_norm": 0.6979859259064816, "learning_rate": 6.785128267339125e-07, "loss": 1.6527, "step": 1697 }, { "epoch": 0.11831515869421315, "grad_norm": 0.7449511856669205, "learning_rate": 6.784877878932024e-07, "loss": 1.5515, "step": 1698 }, { "epoch": 0.11838483782183047, "grad_norm": 0.7035936657977012, "learning_rate": 6.784627349879551e-07, "loss": 1.476, "step": 1699 }, { "epoch": 0.1184545169494478, "grad_norm": 0.7051512194691226, "learning_rate": 6.784376680193709e-07, "loss": 1.5173, "step": 1700 }, { "epoch": 0.11852419607706512, "grad_norm": 0.6909050176678639, "learning_rate": 6.784125869886512e-07, "loss": 1.5744, "step": 1701 }, { "epoch": 0.11859387520468244, "grad_norm": 0.7165979408686158, "learning_rate": 6.78387491896998e-07, "loss": 1.4485, "step": 1702 }, { "epoch": 0.11866355433229976, "grad_norm": 0.7430454803926803, "learning_rate": 6.783623827456139e-07, "loss": 1.601, "step": 1703 }, { "epoch": 0.11873323345991708, "grad_norm": 0.7117476169672308, "learning_rate": 6.783372595357023e-07, "loss": 1.3338, "step": 1704 }, { "epoch": 0.1188029125875344, "grad_norm": 0.7970341950051524, "learning_rate": 6.783121222684668e-07, "loss": 1.5163, "step": 1705 }, { "epoch": 0.11887259171515173, "grad_norm": 0.7299219462336711, "learning_rate": 6.782869709451125e-07, "loss": 1.527, "step": 1706 }, { "epoch": 0.11894227084276905, "grad_norm": 0.7140113860452822, "learning_rate": 6.782618055668442e-07, "loss": 1.5946, "step": 1707 }, { "epoch": 0.11901194997038637, "grad_norm": 0.8280777358865281, "learning_rate": 6.782366261348682e-07, "loss": 1.5883, "step": 1708 }, { "epoch": 0.1190816290980037, "grad_norm": 0.761620219181658, "learning_rate": 6.782114326503911e-07, "loss": 1.5823, "step": 1709 }, { "epoch": 0.11915130822562102, "grad_norm": 0.8154486412727795, "learning_rate": 6.781862251146201e-07, "loss": 1.639, "step": 1710 }, { "epoch": 0.11922098735323834, "grad_norm": 0.729402725558841, "learning_rate": 6.781610035287634e-07, "loss": 1.4666, "step": 1711 }, { "epoch": 0.11929066648085566, "grad_norm": 0.7089244714070104, "learning_rate": 6.781357678940296e-07, "loss": 1.612, "step": 1712 }, { "epoch": 0.11936034560847299, "grad_norm": 0.663561610229027, "learning_rate": 6.781105182116277e-07, "loss": 1.4386, "step": 1713 }, { "epoch": 0.11943002473609031, "grad_norm": 0.7533233166790155, "learning_rate": 6.780852544827683e-07, "loss": 1.543, "step": 1714 }, { "epoch": 0.11949970386370763, "grad_norm": 0.6484805021471591, "learning_rate": 6.780599767086617e-07, "loss": 1.4806, "step": 1715 }, { "epoch": 0.11956938299132495, "grad_norm": 0.7670450412169437, "learning_rate": 6.780346848905196e-07, "loss": 1.569, "step": 1716 }, { "epoch": 0.11963906211894228, "grad_norm": 0.6593231059169545, "learning_rate": 6.780093790295537e-07, "loss": 1.4938, "step": 1717 }, { "epoch": 0.1197087412465596, "grad_norm": 0.7301933487162761, "learning_rate": 6.779840591269766e-07, "loss": 1.5712, "step": 1718 }, { "epoch": 0.11977842037417692, "grad_norm": 0.703158568302166, "learning_rate": 6.779587251840021e-07, "loss": 1.6006, "step": 1719 }, { "epoch": 0.11984809950179423, "grad_norm": 0.7672500156829497, "learning_rate": 6.779333772018441e-07, "loss": 1.5412, "step": 1720 }, { "epoch": 0.11991777862941155, "grad_norm": 0.7695414524452814, "learning_rate": 6.779080151817172e-07, "loss": 1.6895, "step": 1721 }, { "epoch": 0.11998745775702888, "grad_norm": 0.677453250708521, "learning_rate": 6.778826391248369e-07, "loss": 1.5178, "step": 1722 }, { "epoch": 0.1200571368846462, "grad_norm": 0.707782732326732, "learning_rate": 6.778572490324192e-07, "loss": 1.5283, "step": 1723 }, { "epoch": 0.12012681601226352, "grad_norm": 0.6959585255791797, "learning_rate": 6.778318449056811e-07, "loss": 1.4923, "step": 1724 }, { "epoch": 0.12019649513988084, "grad_norm": 0.7472007496068623, "learning_rate": 6.778064267458396e-07, "loss": 1.5479, "step": 1725 }, { "epoch": 0.12026617426749817, "grad_norm": 0.7676470925470961, "learning_rate": 6.77780994554113e-07, "loss": 1.62, "step": 1726 }, { "epoch": 0.12033585339511549, "grad_norm": 0.6976928226879325, "learning_rate": 6.777555483317201e-07, "loss": 1.5113, "step": 1727 }, { "epoch": 0.12040553252273281, "grad_norm": 0.748334145125312, "learning_rate": 6.777300880798806e-07, "loss": 1.539, "step": 1728 }, { "epoch": 0.12047521165035013, "grad_norm": 0.677784509849018, "learning_rate": 6.777046137998139e-07, "loss": 1.4158, "step": 1729 }, { "epoch": 0.12054489077796746, "grad_norm": 0.6416320964645821, "learning_rate": 6.776791254927415e-07, "loss": 1.4458, "step": 1730 }, { "epoch": 0.12061456990558478, "grad_norm": 0.7232355551723145, "learning_rate": 6.776536231598843e-07, "loss": 1.5007, "step": 1731 }, { "epoch": 0.1206842490332021, "grad_norm": 0.6940114252253201, "learning_rate": 6.776281068024648e-07, "loss": 1.5597, "step": 1732 }, { "epoch": 0.12075392816081942, "grad_norm": 0.766955235541318, "learning_rate": 6.776025764217057e-07, "loss": 1.6602, "step": 1733 }, { "epoch": 0.12082360728843675, "grad_norm": 0.6672225734828413, "learning_rate": 6.775770320188304e-07, "loss": 1.5247, "step": 1734 }, { "epoch": 0.12089328641605407, "grad_norm": 0.7381716051530299, "learning_rate": 6.77551473595063e-07, "loss": 1.5521, "step": 1735 }, { "epoch": 0.12096296554367139, "grad_norm": 0.6818041083316355, "learning_rate": 6.775259011516285e-07, "loss": 1.5988, "step": 1736 }, { "epoch": 0.12103264467128871, "grad_norm": 0.7532425489494298, "learning_rate": 6.775003146897523e-07, "loss": 1.5768, "step": 1737 }, { "epoch": 0.12110232379890604, "grad_norm": 0.7004792129217193, "learning_rate": 6.774747142106604e-07, "loss": 1.4175, "step": 1738 }, { "epoch": 0.12117200292652336, "grad_norm": 0.6979999270979104, "learning_rate": 6.774490997155799e-07, "loss": 1.5503, "step": 1739 }, { "epoch": 0.12124168205414068, "grad_norm": 0.7309248475402028, "learning_rate": 6.774234712057381e-07, "loss": 1.5828, "step": 1740 }, { "epoch": 0.121311361181758, "grad_norm": 0.7698670180342181, "learning_rate": 6.773978286823632e-07, "loss": 1.6536, "step": 1741 }, { "epoch": 0.12138104030937533, "grad_norm": 0.751428111254726, "learning_rate": 6.773721721466841e-07, "loss": 1.7044, "step": 1742 }, { "epoch": 0.12145071943699265, "grad_norm": 0.6960299296307737, "learning_rate": 6.773465015999302e-07, "loss": 1.5352, "step": 1743 }, { "epoch": 0.12152039856460997, "grad_norm": 0.7043981294939846, "learning_rate": 6.773208170433319e-07, "loss": 1.5553, "step": 1744 }, { "epoch": 0.1215900776922273, "grad_norm": 0.7442626020263271, "learning_rate": 6.772951184781199e-07, "loss": 1.4759, "step": 1745 }, { "epoch": 0.12165975681984462, "grad_norm": 0.7031340655574903, "learning_rate": 6.772694059055255e-07, "loss": 1.5555, "step": 1746 }, { "epoch": 0.12172943594746194, "grad_norm": 0.7117663767653726, "learning_rate": 6.772436793267814e-07, "loss": 1.5111, "step": 1747 }, { "epoch": 0.12179911507507926, "grad_norm": 0.6919303229240988, "learning_rate": 6.772179387431202e-07, "loss": 1.5557, "step": 1748 }, { "epoch": 0.12186879420269658, "grad_norm": 0.7151382209745435, "learning_rate": 6.771921841557755e-07, "loss": 1.5395, "step": 1749 }, { "epoch": 0.1219384733303139, "grad_norm": 0.7159206817537367, "learning_rate": 6.771664155659814e-07, "loss": 1.5215, "step": 1750 }, { "epoch": 0.12200815245793123, "grad_norm": 0.7647443834562503, "learning_rate": 6.771406329749728e-07, "loss": 1.5614, "step": 1751 }, { "epoch": 0.12207783158554855, "grad_norm": 0.7242339466839115, "learning_rate": 6.771148363839854e-07, "loss": 1.4939, "step": 1752 }, { "epoch": 0.12214751071316587, "grad_norm": 0.6616100019536842, "learning_rate": 6.770890257942553e-07, "loss": 1.461, "step": 1753 }, { "epoch": 0.1222171898407832, "grad_norm": 0.688560043683067, "learning_rate": 6.770632012070195e-07, "loss": 1.5556, "step": 1754 }, { "epoch": 0.12228686896840052, "grad_norm": 0.7160076848018615, "learning_rate": 6.770373626235155e-07, "loss": 1.5213, "step": 1755 }, { "epoch": 0.12235654809601784, "grad_norm": 0.7547550213528791, "learning_rate": 6.770115100449814e-07, "loss": 1.6089, "step": 1756 }, { "epoch": 0.12242622722363516, "grad_norm": 0.6926471414125789, "learning_rate": 6.769856434726564e-07, "loss": 1.6533, "step": 1757 }, { "epoch": 0.12249590635125249, "grad_norm": 0.8418470820434377, "learning_rate": 6.769597629077799e-07, "loss": 1.616, "step": 1758 }, { "epoch": 0.12256558547886981, "grad_norm": 0.7122046268206644, "learning_rate": 6.769338683515921e-07, "loss": 1.4934, "step": 1759 }, { "epoch": 0.12263526460648713, "grad_norm": 0.6997019866605317, "learning_rate": 6.76907959805334e-07, "loss": 1.5801, "step": 1760 }, { "epoch": 0.12270494373410445, "grad_norm": 0.7229950358350811, "learning_rate": 6.768820372702473e-07, "loss": 1.5317, "step": 1761 }, { "epoch": 0.12277462286172178, "grad_norm": 0.6836420011968071, "learning_rate": 6.768561007475743e-07, "loss": 1.5504, "step": 1762 }, { "epoch": 0.1228443019893391, "grad_norm": 0.729122942892889, "learning_rate": 6.768301502385575e-07, "loss": 1.544, "step": 1763 }, { "epoch": 0.12291398111695642, "grad_norm": 0.7293140061308643, "learning_rate": 6.768041857444408e-07, "loss": 1.524, "step": 1764 }, { "epoch": 0.12298366024457374, "grad_norm": 0.7587027647685078, "learning_rate": 6.767782072664686e-07, "loss": 1.6564, "step": 1765 }, { "epoch": 0.12305333937219105, "grad_norm": 0.9828600493808015, "learning_rate": 6.767522148058857e-07, "loss": 1.7059, "step": 1766 }, { "epoch": 0.12312301849980838, "grad_norm": 0.7534924840916932, "learning_rate": 6.767262083639376e-07, "loss": 1.5926, "step": 1767 }, { "epoch": 0.1231926976274257, "grad_norm": 0.6925293310330989, "learning_rate": 6.767001879418707e-07, "loss": 1.6045, "step": 1768 }, { "epoch": 0.12326237675504302, "grad_norm": 0.6414623823691792, "learning_rate": 6.76674153540932e-07, "loss": 1.458, "step": 1769 }, { "epoch": 0.12333205588266034, "grad_norm": 0.6865414673148647, "learning_rate": 6.766481051623689e-07, "loss": 1.6308, "step": 1770 }, { "epoch": 0.12340173501027767, "grad_norm": 0.7751892288650261, "learning_rate": 6.766220428074302e-07, "loss": 1.5195, "step": 1771 }, { "epoch": 0.12347141413789499, "grad_norm": 0.757617472740173, "learning_rate": 6.765959664773643e-07, "loss": 1.5832, "step": 1772 }, { "epoch": 0.12354109326551231, "grad_norm": 0.6845490688631454, "learning_rate": 6.76569876173421e-07, "loss": 1.4396, "step": 1773 }, { "epoch": 0.12361077239312963, "grad_norm": 0.704065341495557, "learning_rate": 6.765437718968508e-07, "loss": 1.5786, "step": 1774 }, { "epoch": 0.12368045152074696, "grad_norm": 0.7294914988615427, "learning_rate": 6.765176536489044e-07, "loss": 1.573, "step": 1775 }, { "epoch": 0.12375013064836428, "grad_norm": 0.7297804629562296, "learning_rate": 6.764915214308337e-07, "loss": 1.4654, "step": 1776 }, { "epoch": 0.1238198097759816, "grad_norm": 0.7017524980244302, "learning_rate": 6.764653752438906e-07, "loss": 1.6069, "step": 1777 }, { "epoch": 0.12388948890359892, "grad_norm": 0.6905428204511646, "learning_rate": 6.764392150893287e-07, "loss": 1.5557, "step": 1778 }, { "epoch": 0.12395916803121625, "grad_norm": 0.6926181291094297, "learning_rate": 6.764130409684011e-07, "loss": 1.6095, "step": 1779 }, { "epoch": 0.12402884715883357, "grad_norm": 0.6707933571259431, "learning_rate": 6.763868528823623e-07, "loss": 1.4735, "step": 1780 }, { "epoch": 0.12409852628645089, "grad_norm": 0.7107882072617481, "learning_rate": 6.763606508324675e-07, "loss": 1.5258, "step": 1781 }, { "epoch": 0.12416820541406821, "grad_norm": 0.6739566888923207, "learning_rate": 6.76334434819972e-07, "loss": 1.4255, "step": 1782 }, { "epoch": 0.12423788454168554, "grad_norm": 0.710791285317531, "learning_rate": 6.763082048461322e-07, "loss": 1.5633, "step": 1783 }, { "epoch": 0.12430756366930286, "grad_norm": 0.7549679673837194, "learning_rate": 6.762819609122052e-07, "loss": 1.6048, "step": 1784 }, { "epoch": 0.12437724279692018, "grad_norm": 0.6970338314567243, "learning_rate": 6.762557030194489e-07, "loss": 1.638, "step": 1785 }, { "epoch": 0.1244469219245375, "grad_norm": 0.7329681304954031, "learning_rate": 6.762294311691212e-07, "loss": 1.6062, "step": 1786 }, { "epoch": 0.12451660105215483, "grad_norm": 0.7518073218055812, "learning_rate": 6.762031453624812e-07, "loss": 1.5193, "step": 1787 }, { "epoch": 0.12458628017977215, "grad_norm": 0.753125266754797, "learning_rate": 6.761768456007888e-07, "loss": 1.6375, "step": 1788 }, { "epoch": 0.12465595930738947, "grad_norm": 0.7394329680594353, "learning_rate": 6.76150531885304e-07, "loss": 1.6436, "step": 1789 }, { "epoch": 0.1247256384350068, "grad_norm": 0.6387955308763775, "learning_rate": 6.761242042172882e-07, "loss": 1.4788, "step": 1790 }, { "epoch": 0.12479531756262412, "grad_norm": 0.7105153937432336, "learning_rate": 6.760978625980027e-07, "loss": 1.4578, "step": 1791 }, { "epoch": 0.12486499669024144, "grad_norm": 0.7117812369399352, "learning_rate": 6.760715070287101e-07, "loss": 1.4701, "step": 1792 }, { "epoch": 0.12493467581785876, "grad_norm": 0.6869069756247798, "learning_rate": 6.760451375106733e-07, "loss": 1.4755, "step": 1793 }, { "epoch": 0.12500435494547607, "grad_norm": 0.6883046784262724, "learning_rate": 6.76018754045156e-07, "loss": 1.3836, "step": 1794 }, { "epoch": 0.1250740340730934, "grad_norm": 0.7168411586962429, "learning_rate": 6.759923566334225e-07, "loss": 1.4577, "step": 1795 }, { "epoch": 0.12514371320071072, "grad_norm": 0.753246753390711, "learning_rate": 6.75965945276738e-07, "loss": 1.4208, "step": 1796 }, { "epoch": 0.12521339232832804, "grad_norm": 0.7194527123537333, "learning_rate": 6.75939519976368e-07, "loss": 1.5584, "step": 1797 }, { "epoch": 0.12528307145594536, "grad_norm": 0.7148704507883208, "learning_rate": 6.759130807335789e-07, "loss": 1.5267, "step": 1798 }, { "epoch": 0.12535275058356268, "grad_norm": 0.7706359160338018, "learning_rate": 6.758866275496378e-07, "loss": 1.622, "step": 1799 }, { "epoch": 0.12542242971118, "grad_norm": 0.6968279687765971, "learning_rate": 6.758601604258122e-07, "loss": 1.5589, "step": 1800 }, { "epoch": 0.12549210883879733, "grad_norm": 0.7075640465481938, "learning_rate": 6.758336793633707e-07, "loss": 1.5232, "step": 1801 }, { "epoch": 0.12556178796641465, "grad_norm": 0.6986171636247963, "learning_rate": 6.758071843635822e-07, "loss": 1.5965, "step": 1802 }, { "epoch": 0.12563146709403197, "grad_norm": 0.7080541368146622, "learning_rate": 6.757806754277164e-07, "loss": 1.6024, "step": 1803 }, { "epoch": 0.1257011462216493, "grad_norm": 0.6954983639729273, "learning_rate": 6.757541525570436e-07, "loss": 1.4651, "step": 1804 }, { "epoch": 0.12577082534926662, "grad_norm": 0.7513907877344518, "learning_rate": 6.75727615752835e-07, "loss": 1.6319, "step": 1805 }, { "epoch": 0.12584050447688394, "grad_norm": 0.7377134006135804, "learning_rate": 6.757010650163622e-07, "loss": 1.4196, "step": 1806 }, { "epoch": 0.12591018360450126, "grad_norm": 0.7379253766876244, "learning_rate": 6.756745003488975e-07, "loss": 1.6043, "step": 1807 }, { "epoch": 0.12597986273211859, "grad_norm": 0.6780338111507962, "learning_rate": 6.75647921751714e-07, "loss": 1.5364, "step": 1808 }, { "epoch": 0.1260495418597359, "grad_norm": 0.8363290539712473, "learning_rate": 6.756213292260855e-07, "loss": 1.4253, "step": 1809 }, { "epoch": 0.12611922098735323, "grad_norm": 0.7171329233065877, "learning_rate": 6.755947227732862e-07, "loss": 1.6683, "step": 1810 }, { "epoch": 0.12618890011497055, "grad_norm": 0.6956735267493139, "learning_rate": 6.755681023945912e-07, "loss": 1.5402, "step": 1811 }, { "epoch": 0.12625857924258788, "grad_norm": 0.7265898886948043, "learning_rate": 6.755414680912763e-07, "loss": 1.5621, "step": 1812 }, { "epoch": 0.1263282583702052, "grad_norm": 0.7832071974789533, "learning_rate": 6.755148198646176e-07, "loss": 1.6196, "step": 1813 }, { "epoch": 0.12639793749782252, "grad_norm": 0.6980941771921907, "learning_rate": 6.754881577158925e-07, "loss": 1.469, "step": 1814 }, { "epoch": 0.12646761662543984, "grad_norm": 0.7821845449425642, "learning_rate": 6.754614816463783e-07, "loss": 1.5984, "step": 1815 }, { "epoch": 0.12653729575305717, "grad_norm": 0.7196655127524775, "learning_rate": 6.754347916573539e-07, "loss": 1.4687, "step": 1816 }, { "epoch": 0.1266069748806745, "grad_norm": 0.7132709263239155, "learning_rate": 6.754080877500978e-07, "loss": 1.5156, "step": 1817 }, { "epoch": 0.1266766540082918, "grad_norm": 0.7073112967452371, "learning_rate": 6.7538136992589e-07, "loss": 1.5509, "step": 1818 }, { "epoch": 0.12674633313590913, "grad_norm": 0.7151991872285213, "learning_rate": 6.753546381860108e-07, "loss": 1.5542, "step": 1819 }, { "epoch": 0.12681601226352646, "grad_norm": 0.7242367578277304, "learning_rate": 6.753278925317413e-07, "loss": 1.5261, "step": 1820 }, { "epoch": 0.12688569139114378, "grad_norm": 0.6651665644113366, "learning_rate": 6.753011329643631e-07, "loss": 1.4353, "step": 1821 }, { "epoch": 0.1269553705187611, "grad_norm": 0.7588221525761546, "learning_rate": 6.752743594851586e-07, "loss": 1.4917, "step": 1822 }, { "epoch": 0.12702504964637842, "grad_norm": 0.7726293303712665, "learning_rate": 6.75247572095411e-07, "loss": 1.6404, "step": 1823 }, { "epoch": 0.12709472877399575, "grad_norm": 0.7105867827958886, "learning_rate": 6.752207707964037e-07, "loss": 1.484, "step": 1824 }, { "epoch": 0.12716440790161307, "grad_norm": 0.6953351793350567, "learning_rate": 6.751939555894213e-07, "loss": 1.531, "step": 1825 }, { "epoch": 0.1272340870292304, "grad_norm": 0.6677024686574735, "learning_rate": 6.75167126475749e-07, "loss": 1.5333, "step": 1826 }, { "epoch": 0.12730376615684771, "grad_norm": 0.7551124751250401, "learning_rate": 6.751402834566721e-07, "loss": 1.5653, "step": 1827 }, { "epoch": 0.12737344528446504, "grad_norm": 0.7104248422758904, "learning_rate": 6.751134265334772e-07, "loss": 1.5667, "step": 1828 }, { "epoch": 0.12744312441208236, "grad_norm": 0.6763604752203057, "learning_rate": 6.750865557074514e-07, "loss": 1.4499, "step": 1829 }, { "epoch": 0.12751280353969968, "grad_norm": 0.6766323797495721, "learning_rate": 6.750596709798822e-07, "loss": 1.3995, "step": 1830 }, { "epoch": 0.127582482667317, "grad_norm": 0.7736093226697133, "learning_rate": 6.750327723520581e-07, "loss": 1.5276, "step": 1831 }, { "epoch": 0.12765216179493433, "grad_norm": 0.6719858842850682, "learning_rate": 6.750058598252682e-07, "loss": 1.4956, "step": 1832 }, { "epoch": 0.12772184092255165, "grad_norm": 0.7346381580963557, "learning_rate": 6.74978933400802e-07, "loss": 1.5993, "step": 1833 }, { "epoch": 0.12779152005016897, "grad_norm": 0.6808106123344357, "learning_rate": 6.749519930799501e-07, "loss": 1.5483, "step": 1834 }, { "epoch": 0.1278611991777863, "grad_norm": 0.6727911376600302, "learning_rate": 6.749250388640033e-07, "loss": 1.5679, "step": 1835 }, { "epoch": 0.12793087830540362, "grad_norm": 0.6908490917326462, "learning_rate": 6.748980707542537e-07, "loss": 1.4939, "step": 1836 }, { "epoch": 0.12800055743302094, "grad_norm": 0.6991194309070923, "learning_rate": 6.748710887519931e-07, "loss": 1.5277, "step": 1837 }, { "epoch": 0.12807023656063826, "grad_norm": 0.7535462584673673, "learning_rate": 6.748440928585151e-07, "loss": 1.5031, "step": 1838 }, { "epoch": 0.12813991568825558, "grad_norm": 0.7257701505696328, "learning_rate": 6.748170830751129e-07, "loss": 1.6171, "step": 1839 }, { "epoch": 0.1282095948158729, "grad_norm": 0.7197206514629374, "learning_rate": 6.747900594030811e-07, "loss": 1.5134, "step": 1840 }, { "epoch": 0.12827927394349023, "grad_norm": 0.6863960162086155, "learning_rate": 6.747630218437149e-07, "loss": 1.6063, "step": 1841 }, { "epoch": 0.12834895307110755, "grad_norm": 0.7451035417247105, "learning_rate": 6.747359703983097e-07, "loss": 1.5635, "step": 1842 }, { "epoch": 0.12841863219872487, "grad_norm": 0.7056216450550601, "learning_rate": 6.747089050681621e-07, "loss": 1.4805, "step": 1843 }, { "epoch": 0.1284883113263422, "grad_norm": 0.6961347423164467, "learning_rate": 6.746818258545689e-07, "loss": 1.4463, "step": 1844 }, { "epoch": 0.12855799045395952, "grad_norm": 0.732709513808762, "learning_rate": 6.746547327588279e-07, "loss": 1.6429, "step": 1845 }, { "epoch": 0.12862766958157684, "grad_norm": 0.7357288567627138, "learning_rate": 6.746276257822375e-07, "loss": 1.4988, "step": 1846 }, { "epoch": 0.12869734870919416, "grad_norm": 0.6706891475934581, "learning_rate": 6.746005049260967e-07, "loss": 1.5309, "step": 1847 }, { "epoch": 0.1287670278368115, "grad_norm": 0.7319525874852473, "learning_rate": 6.745733701917052e-07, "loss": 1.5278, "step": 1848 }, { "epoch": 0.1288367069644288, "grad_norm": 0.6639104121057977, "learning_rate": 6.745462215803632e-07, "loss": 1.3677, "step": 1849 }, { "epoch": 0.12890638609204613, "grad_norm": 0.7085480744877006, "learning_rate": 6.745190590933719e-07, "loss": 1.518, "step": 1850 }, { "epoch": 0.12897606521966345, "grad_norm": 0.7505374113469124, "learning_rate": 6.744918827320328e-07, "loss": 1.5333, "step": 1851 }, { "epoch": 0.12904574434728078, "grad_norm": 0.7352795678485466, "learning_rate": 6.744646924976485e-07, "loss": 1.5475, "step": 1852 }, { "epoch": 0.1291154234748981, "grad_norm": 0.7035551100291901, "learning_rate": 6.744374883915218e-07, "loss": 1.5256, "step": 1853 }, { "epoch": 0.12918510260251542, "grad_norm": 0.7153574641045704, "learning_rate": 6.744102704149565e-07, "loss": 1.5735, "step": 1854 }, { "epoch": 0.12925478173013275, "grad_norm": 0.6577428673649858, "learning_rate": 6.743830385692569e-07, "loss": 1.4839, "step": 1855 }, { "epoch": 0.12932446085775007, "grad_norm": 0.7310828390027627, "learning_rate": 6.743557928557279e-07, "loss": 1.5818, "step": 1856 }, { "epoch": 0.1293941399853674, "grad_norm": 0.6714264380020178, "learning_rate": 6.743285332756753e-07, "loss": 1.4134, "step": 1857 }, { "epoch": 0.1294638191129847, "grad_norm": 0.7398456859492963, "learning_rate": 6.743012598304055e-07, "loss": 1.5288, "step": 1858 }, { "epoch": 0.12953349824060204, "grad_norm": 0.7655971284671793, "learning_rate": 6.742739725212255e-07, "loss": 1.6144, "step": 1859 }, { "epoch": 0.12960317736821936, "grad_norm": 0.7045382170108533, "learning_rate": 6.742466713494427e-07, "loss": 1.5161, "step": 1860 }, { "epoch": 0.12967285649583668, "grad_norm": 0.7037565360640923, "learning_rate": 6.742193563163656e-07, "loss": 1.5649, "step": 1861 }, { "epoch": 0.129742535623454, "grad_norm": 0.6883733361202018, "learning_rate": 6.741920274233033e-07, "loss": 1.5637, "step": 1862 }, { "epoch": 0.12981221475107133, "grad_norm": 0.7285700573399307, "learning_rate": 6.741646846715651e-07, "loss": 1.6719, "step": 1863 }, { "epoch": 0.12988189387868865, "grad_norm": 0.7301447510701851, "learning_rate": 6.741373280624618e-07, "loss": 1.5617, "step": 1864 }, { "epoch": 0.12995157300630597, "grad_norm": 0.7031543215374925, "learning_rate": 6.741099575973041e-07, "loss": 1.4602, "step": 1865 }, { "epoch": 0.1300212521339233, "grad_norm": 0.7094167448128131, "learning_rate": 6.740825732774036e-07, "loss": 1.616, "step": 1866 }, { "epoch": 0.13009093126154062, "grad_norm": 0.7651565895226398, "learning_rate": 6.740551751040729e-07, "loss": 1.5392, "step": 1867 }, { "epoch": 0.13016061038915794, "grad_norm": 0.7735065918268992, "learning_rate": 6.740277630786246e-07, "loss": 1.5778, "step": 1868 }, { "epoch": 0.13023028951677526, "grad_norm": 0.7967705631098297, "learning_rate": 6.740003372023727e-07, "loss": 1.7259, "step": 1869 }, { "epoch": 0.13029996864439258, "grad_norm": 0.6792582135508917, "learning_rate": 6.739728974766312e-07, "loss": 1.6366, "step": 1870 }, { "epoch": 0.1303696477720099, "grad_norm": 0.730703176577263, "learning_rate": 6.739454439027153e-07, "loss": 1.5665, "step": 1871 }, { "epoch": 0.13043932689962723, "grad_norm": 0.6920045189349746, "learning_rate": 6.739179764819405e-07, "loss": 1.5654, "step": 1872 }, { "epoch": 0.13050900602724455, "grad_norm": 0.7136204029101176, "learning_rate": 6.738904952156231e-07, "loss": 1.5337, "step": 1873 }, { "epoch": 0.13057868515486187, "grad_norm": 0.6619804892238528, "learning_rate": 6.738630001050801e-07, "loss": 1.5429, "step": 1874 }, { "epoch": 0.1306483642824792, "grad_norm": 0.7257253083228685, "learning_rate": 6.738354911516292e-07, "loss": 1.5279, "step": 1875 }, { "epoch": 0.13071804341009652, "grad_norm": 0.7053782168665625, "learning_rate": 6.738079683565885e-07, "loss": 1.5864, "step": 1876 }, { "epoch": 0.13078772253771384, "grad_norm": 0.763390428555883, "learning_rate": 6.73780431721277e-07, "loss": 1.5123, "step": 1877 }, { "epoch": 0.13085740166533116, "grad_norm": 0.7091548271863509, "learning_rate": 6.737528812470145e-07, "loss": 1.4875, "step": 1878 }, { "epoch": 0.13092708079294849, "grad_norm": 0.705405057687693, "learning_rate": 6.737253169351209e-07, "loss": 1.4578, "step": 1879 }, { "epoch": 0.13099675992056578, "grad_norm": 0.6716338179893901, "learning_rate": 6.736977387869176e-07, "loss": 1.4275, "step": 1880 }, { "epoch": 0.1310664390481831, "grad_norm": 0.6687999558803702, "learning_rate": 6.736701468037259e-07, "loss": 1.4048, "step": 1881 }, { "epoch": 0.13113611817580043, "grad_norm": 0.7167984046454882, "learning_rate": 6.736425409868682e-07, "loss": 1.7738, "step": 1882 }, { "epoch": 0.13120579730341775, "grad_norm": 0.730282334768594, "learning_rate": 6.736149213376672e-07, "loss": 1.5536, "step": 1883 }, { "epoch": 0.13127547643103507, "grad_norm": 0.7103695925549198, "learning_rate": 6.735872878574467e-07, "loss": 1.553, "step": 1884 }, { "epoch": 0.1313451555586524, "grad_norm": 0.6854489314675175, "learning_rate": 6.73559640547531e-07, "loss": 1.5653, "step": 1885 }, { "epoch": 0.13141483468626972, "grad_norm": 0.7505672021974592, "learning_rate": 6.735319794092449e-07, "loss": 1.5383, "step": 1886 }, { "epoch": 0.13148451381388704, "grad_norm": 0.732423515927833, "learning_rate": 6.73504304443914e-07, "loss": 1.4525, "step": 1887 }, { "epoch": 0.13155419294150436, "grad_norm": 0.685880905255038, "learning_rate": 6.734766156528645e-07, "loss": 1.616, "step": 1888 }, { "epoch": 0.13162387206912168, "grad_norm": 0.6960458179247873, "learning_rate": 6.734489130374234e-07, "loss": 1.5326, "step": 1889 }, { "epoch": 0.131693551196739, "grad_norm": 0.6866922904575083, "learning_rate": 6.734211965989182e-07, "loss": 1.6037, "step": 1890 }, { "epoch": 0.13176323032435633, "grad_norm": 0.673778268440158, "learning_rate": 6.73393466338677e-07, "loss": 1.535, "step": 1891 }, { "epoch": 0.13183290945197365, "grad_norm": 0.690886200558052, "learning_rate": 6.73365722258029e-07, "loss": 1.4375, "step": 1892 }, { "epoch": 0.13190258857959097, "grad_norm": 0.6994952752280379, "learning_rate": 6.733379643583036e-07, "loss": 1.5044, "step": 1893 }, { "epoch": 0.1319722677072083, "grad_norm": 0.7264263810645032, "learning_rate": 6.733101926408308e-07, "loss": 1.6547, "step": 1894 }, { "epoch": 0.13204194683482562, "grad_norm": 0.7103588905968264, "learning_rate": 6.732824071069419e-07, "loss": 1.5439, "step": 1895 }, { "epoch": 0.13211162596244294, "grad_norm": 0.672222207099807, "learning_rate": 6.732546077579681e-07, "loss": 1.491, "step": 1896 }, { "epoch": 0.13218130509006026, "grad_norm": 0.751933233368762, "learning_rate": 6.732267945952418e-07, "loss": 1.6748, "step": 1897 }, { "epoch": 0.1322509842176776, "grad_norm": 0.7135892877523559, "learning_rate": 6.731989676200958e-07, "loss": 1.6594, "step": 1898 }, { "epoch": 0.1323206633452949, "grad_norm": 0.6841008584696798, "learning_rate": 6.731711268338635e-07, "loss": 1.555, "step": 1899 }, { "epoch": 0.13239034247291223, "grad_norm": 0.7548485821820744, "learning_rate": 6.731432722378794e-07, "loss": 1.5304, "step": 1900 }, { "epoch": 0.13246002160052955, "grad_norm": 0.7137250878688367, "learning_rate": 6.73115403833478e-07, "loss": 1.4984, "step": 1901 }, { "epoch": 0.13252970072814688, "grad_norm": 0.6812981296452358, "learning_rate": 6.730875216219948e-07, "loss": 1.5644, "step": 1902 }, { "epoch": 0.1325993798557642, "grad_norm": 0.7216405903372481, "learning_rate": 6.730596256047663e-07, "loss": 1.5853, "step": 1903 }, { "epoch": 0.13266905898338152, "grad_norm": 0.7696458881454737, "learning_rate": 6.730317157831293e-07, "loss": 1.5555, "step": 1904 }, { "epoch": 0.13273873811099884, "grad_norm": 0.7513839173466563, "learning_rate": 6.730037921584209e-07, "loss": 1.5812, "step": 1905 }, { "epoch": 0.13280841723861617, "grad_norm": 0.7038080392696825, "learning_rate": 6.729758547319796e-07, "loss": 1.5747, "step": 1906 }, { "epoch": 0.1328780963662335, "grad_norm": 0.7021482922132364, "learning_rate": 6.729479035051443e-07, "loss": 1.6157, "step": 1907 }, { "epoch": 0.1329477754938508, "grad_norm": 0.7128968760411135, "learning_rate": 6.729199384792542e-07, "loss": 1.5565, "step": 1908 }, { "epoch": 0.13301745462146813, "grad_norm": 0.7591474416194975, "learning_rate": 6.728919596556496e-07, "loss": 1.5124, "step": 1909 }, { "epoch": 0.13308713374908546, "grad_norm": 0.731997601252689, "learning_rate": 6.728639670356711e-07, "loss": 1.5297, "step": 1910 }, { "epoch": 0.13315681287670278, "grad_norm": 0.7360419831946642, "learning_rate": 6.728359606206605e-07, "loss": 1.7696, "step": 1911 }, { "epoch": 0.1332264920043201, "grad_norm": 0.7143694573394039, "learning_rate": 6.728079404119597e-07, "loss": 1.4961, "step": 1912 }, { "epoch": 0.13329617113193742, "grad_norm": 0.6871856694114608, "learning_rate": 6.727799064109116e-07, "loss": 1.5909, "step": 1913 }, { "epoch": 0.13336585025955475, "grad_norm": 0.7120372611141601, "learning_rate": 6.727518586188593e-07, "loss": 1.6738, "step": 1914 }, { "epoch": 0.13343552938717207, "grad_norm": 0.7250648448379915, "learning_rate": 6.727237970371475e-07, "loss": 1.4843, "step": 1915 }, { "epoch": 0.1335052085147894, "grad_norm": 0.7316783552643227, "learning_rate": 6.726957216671206e-07, "loss": 1.537, "step": 1916 }, { "epoch": 0.13357488764240671, "grad_norm": 0.7110209139763414, "learning_rate": 6.72667632510124e-07, "loss": 1.5645, "step": 1917 }, { "epoch": 0.13364456677002404, "grad_norm": 0.7095671591176976, "learning_rate": 6.72639529567504e-07, "loss": 1.5458, "step": 1918 }, { "epoch": 0.13371424589764136, "grad_norm": 0.7720388159693496, "learning_rate": 6.726114128406072e-07, "loss": 1.4622, "step": 1919 }, { "epoch": 0.13378392502525868, "grad_norm": 0.7088699128516145, "learning_rate": 6.72583282330781e-07, "loss": 1.476, "step": 1920 }, { "epoch": 0.133853604152876, "grad_norm": 0.6908728730889724, "learning_rate": 6.725551380393735e-07, "loss": 1.5533, "step": 1921 }, { "epoch": 0.13392328328049333, "grad_norm": 0.6820942269733361, "learning_rate": 6.725269799677335e-07, "loss": 1.5603, "step": 1922 }, { "epoch": 0.13399296240811065, "grad_norm": 0.6923947732216261, "learning_rate": 6.724988081172102e-07, "loss": 1.5315, "step": 1923 }, { "epoch": 0.13406264153572797, "grad_norm": 0.7251093460202046, "learning_rate": 6.72470622489154e-07, "loss": 1.5488, "step": 1924 }, { "epoch": 0.1341323206633453, "grad_norm": 0.7012512826060395, "learning_rate": 6.724424230849153e-07, "loss": 1.4948, "step": 1925 }, { "epoch": 0.13420199979096262, "grad_norm": 0.7212684831182445, "learning_rate": 6.724142099058455e-07, "loss": 1.56, "step": 1926 }, { "epoch": 0.13427167891857994, "grad_norm": 0.7048665231478228, "learning_rate": 6.723859829532968e-07, "loss": 1.5274, "step": 1927 }, { "epoch": 0.13434135804619726, "grad_norm": 0.7718463262225225, "learning_rate": 6.723577422286217e-07, "loss": 1.7148, "step": 1928 }, { "epoch": 0.13441103717381458, "grad_norm": 0.6792581126537035, "learning_rate": 6.723294877331739e-07, "loss": 1.4344, "step": 1929 }, { "epoch": 0.1344807163014319, "grad_norm": 0.7081966504199131, "learning_rate": 6.723012194683071e-07, "loss": 1.5669, "step": 1930 }, { "epoch": 0.13455039542904923, "grad_norm": 0.7534230117240622, "learning_rate": 6.722729374353759e-07, "loss": 1.4576, "step": 1931 }, { "epoch": 0.13462007455666655, "grad_norm": 0.6962594541305029, "learning_rate": 6.722446416357359e-07, "loss": 1.5744, "step": 1932 }, { "epoch": 0.13468975368428387, "grad_norm": 0.7580060974065307, "learning_rate": 6.722163320707429e-07, "loss": 1.6539, "step": 1933 }, { "epoch": 0.1347594328119012, "grad_norm": 0.673830372373551, "learning_rate": 6.721880087417536e-07, "loss": 1.4916, "step": 1934 }, { "epoch": 0.13482911193951852, "grad_norm": 0.7218253612280532, "learning_rate": 6.721596716501253e-07, "loss": 1.4981, "step": 1935 }, { "epoch": 0.13489879106713584, "grad_norm": 0.7531583849102892, "learning_rate": 6.721313207972162e-07, "loss": 1.6348, "step": 1936 }, { "epoch": 0.13496847019475317, "grad_norm": 0.7429401300806515, "learning_rate": 6.721029561843847e-07, "loss": 1.6212, "step": 1937 }, { "epoch": 0.1350381493223705, "grad_norm": 0.6828835052353207, "learning_rate": 6.720745778129899e-07, "loss": 1.3943, "step": 1938 }, { "epoch": 0.1351078284499878, "grad_norm": 0.7085091052785595, "learning_rate": 6.720461856843922e-07, "loss": 1.5354, "step": 1939 }, { "epoch": 0.13517750757760513, "grad_norm": 0.7364242437633206, "learning_rate": 6.720177797999519e-07, "loss": 1.7678, "step": 1940 }, { "epoch": 0.13524718670522246, "grad_norm": 0.6570866689949861, "learning_rate": 6.719893601610304e-07, "loss": 1.5072, "step": 1941 }, { "epoch": 0.13531686583283978, "grad_norm": 0.7943585246747022, "learning_rate": 6.719609267689896e-07, "loss": 1.6721, "step": 1942 }, { "epoch": 0.1353865449604571, "grad_norm": 0.7188461240489994, "learning_rate": 6.71932479625192e-07, "loss": 1.5127, "step": 1943 }, { "epoch": 0.13545622408807442, "grad_norm": 0.7475198285216123, "learning_rate": 6.719040187310009e-07, "loss": 1.6445, "step": 1944 }, { "epoch": 0.13552590321569175, "grad_norm": 0.7258450468992407, "learning_rate": 6.718755440877802e-07, "loss": 1.5219, "step": 1945 }, { "epoch": 0.13559558234330907, "grad_norm": 0.7468264368633342, "learning_rate": 6.718470556968946e-07, "loss": 1.5828, "step": 1946 }, { "epoch": 0.1356652614709264, "grad_norm": 0.7125015542157345, "learning_rate": 6.718185535597091e-07, "loss": 1.5189, "step": 1947 }, { "epoch": 0.1357349405985437, "grad_norm": 0.7036505846596398, "learning_rate": 6.717900376775899e-07, "loss": 1.5386, "step": 1948 }, { "epoch": 0.13580461972616104, "grad_norm": 0.7382895590391404, "learning_rate": 6.71761508051903e-07, "loss": 1.5655, "step": 1949 }, { "epoch": 0.13587429885377836, "grad_norm": 0.7511916226599143, "learning_rate": 6.717329646840162e-07, "loss": 1.6689, "step": 1950 }, { "epoch": 0.13594397798139568, "grad_norm": 0.7217544848044565, "learning_rate": 6.717044075752969e-07, "loss": 1.5589, "step": 1951 }, { "epoch": 0.136013657109013, "grad_norm": 0.6802227867126514, "learning_rate": 6.716758367271138e-07, "loss": 1.4658, "step": 1952 }, { "epoch": 0.13608333623663033, "grad_norm": 0.6972733413609539, "learning_rate": 6.716472521408362e-07, "loss": 1.5688, "step": 1953 }, { "epoch": 0.13615301536424765, "grad_norm": 0.7289731126070428, "learning_rate": 6.716186538178338e-07, "loss": 1.572, "step": 1954 }, { "epoch": 0.13622269449186497, "grad_norm": 0.7096236736018127, "learning_rate": 6.715900417594769e-07, "loss": 1.5759, "step": 1955 }, { "epoch": 0.1362923736194823, "grad_norm": 0.7300802470602689, "learning_rate": 6.71561415967137e-07, "loss": 1.5944, "step": 1956 }, { "epoch": 0.13636205274709962, "grad_norm": 0.7424757456231104, "learning_rate": 6.715327764421858e-07, "loss": 1.5659, "step": 1957 }, { "epoch": 0.13643173187471694, "grad_norm": 0.6961374129664488, "learning_rate": 6.715041231859956e-07, "loss": 1.6426, "step": 1958 }, { "epoch": 0.13650141100233426, "grad_norm": 0.7139600080889801, "learning_rate": 6.714754561999395e-07, "loss": 1.4646, "step": 1959 }, { "epoch": 0.13657109012995158, "grad_norm": 0.737591170989514, "learning_rate": 6.714467754853917e-07, "loss": 1.5856, "step": 1960 }, { "epoch": 0.1366407692575689, "grad_norm": 0.7319158356469768, "learning_rate": 6.714180810437263e-07, "loss": 1.5179, "step": 1961 }, { "epoch": 0.13671044838518623, "grad_norm": 0.8217089235096635, "learning_rate": 6.713893728763184e-07, "loss": 1.575, "step": 1962 }, { "epoch": 0.13678012751280355, "grad_norm": 0.7349260888101283, "learning_rate": 6.713606509845437e-07, "loss": 1.4672, "step": 1963 }, { "epoch": 0.13684980664042087, "grad_norm": 0.6870022968581666, "learning_rate": 6.713319153697788e-07, "loss": 1.611, "step": 1964 }, { "epoch": 0.1369194857680382, "grad_norm": 0.6992649168712028, "learning_rate": 6.713031660334007e-07, "loss": 1.5757, "step": 1965 }, { "epoch": 0.13698916489565552, "grad_norm": 0.7178246262660336, "learning_rate": 6.712744029767871e-07, "loss": 1.4761, "step": 1966 }, { "epoch": 0.13705884402327284, "grad_norm": 0.7437799778592202, "learning_rate": 6.712456262013164e-07, "loss": 1.5656, "step": 1967 }, { "epoch": 0.13712852315089016, "grad_norm": 0.7408121743553396, "learning_rate": 6.712168357083677e-07, "loss": 1.4855, "step": 1968 }, { "epoch": 0.1371982022785075, "grad_norm": 0.7172543395443857, "learning_rate": 6.711880314993205e-07, "loss": 1.4648, "step": 1969 }, { "epoch": 0.1372678814061248, "grad_norm": 0.7059469068330578, "learning_rate": 6.711592135755555e-07, "loss": 1.5729, "step": 1970 }, { "epoch": 0.13733756053374213, "grad_norm": 0.7208503647318539, "learning_rate": 6.711303819384533e-07, "loss": 1.5759, "step": 1971 }, { "epoch": 0.13740723966135943, "grad_norm": 0.7529947820776219, "learning_rate": 6.711015365893959e-07, "loss": 1.5245, "step": 1972 }, { "epoch": 0.13747691878897675, "grad_norm": 0.7036672482791874, "learning_rate": 6.710726775297655e-07, "loss": 1.5135, "step": 1973 }, { "epoch": 0.13754659791659407, "grad_norm": 0.7944363990655092, "learning_rate": 6.710438047609452e-07, "loss": 1.6168, "step": 1974 }, { "epoch": 0.1376162770442114, "grad_norm": 0.7280065937367366, "learning_rate": 6.710149182843183e-07, "loss": 1.4074, "step": 1975 }, { "epoch": 0.13768595617182872, "grad_norm": 0.727312215451071, "learning_rate": 6.709860181012695e-07, "loss": 1.6429, "step": 1976 }, { "epoch": 0.13775563529944604, "grad_norm": 0.6953250639382146, "learning_rate": 6.709571042131836e-07, "loss": 1.596, "step": 1977 }, { "epoch": 0.13782531442706336, "grad_norm": 0.7968221935128836, "learning_rate": 6.709281766214462e-07, "loss": 1.4812, "step": 1978 }, { "epoch": 0.13789499355468068, "grad_norm": 0.731561061631931, "learning_rate": 6.708992353274434e-07, "loss": 1.5383, "step": 1979 }, { "epoch": 0.137964672682298, "grad_norm": 0.697983322450153, "learning_rate": 6.708702803325626e-07, "loss": 1.4938, "step": 1980 }, { "epoch": 0.13803435180991533, "grad_norm": 0.6999587134752484, "learning_rate": 6.70841311638191e-07, "loss": 1.5703, "step": 1981 }, { "epoch": 0.13810403093753265, "grad_norm": 0.7522221971120099, "learning_rate": 6.708123292457168e-07, "loss": 1.4194, "step": 1982 }, { "epoch": 0.13817371006514997, "grad_norm": 0.7996483404297469, "learning_rate": 6.707833331565289e-07, "loss": 1.3953, "step": 1983 }, { "epoch": 0.1382433891927673, "grad_norm": 0.7338395677152775, "learning_rate": 6.707543233720173e-07, "loss": 1.6296, "step": 1984 }, { "epoch": 0.13831306832038462, "grad_norm": 0.7426771851411896, "learning_rate": 6.707252998935717e-07, "loss": 1.5608, "step": 1985 }, { "epoch": 0.13838274744800194, "grad_norm": 0.6530700467546554, "learning_rate": 6.706962627225833e-07, "loss": 1.5599, "step": 1986 }, { "epoch": 0.13845242657561926, "grad_norm": 0.777687245902078, "learning_rate": 6.706672118604433e-07, "loss": 1.5721, "step": 1987 }, { "epoch": 0.1385221057032366, "grad_norm": 0.6768719388702751, "learning_rate": 6.706381473085441e-07, "loss": 1.5406, "step": 1988 }, { "epoch": 0.1385917848308539, "grad_norm": 0.7222488532422539, "learning_rate": 6.706090690682784e-07, "loss": 1.4633, "step": 1989 }, { "epoch": 0.13866146395847123, "grad_norm": 0.723866634938114, "learning_rate": 6.705799771410399e-07, "loss": 1.5605, "step": 1990 }, { "epoch": 0.13873114308608855, "grad_norm": 0.7870541821766887, "learning_rate": 6.705508715282225e-07, "loss": 1.5404, "step": 1991 }, { "epoch": 0.13880082221370588, "grad_norm": 0.7334998345863415, "learning_rate": 6.705217522312213e-07, "loss": 1.6006, "step": 1992 }, { "epoch": 0.1388705013413232, "grad_norm": 0.7960625479446524, "learning_rate": 6.704926192514313e-07, "loss": 1.6352, "step": 1993 }, { "epoch": 0.13894018046894052, "grad_norm": 0.7140325267209116, "learning_rate": 6.70463472590249e-07, "loss": 1.5576, "step": 1994 }, { "epoch": 0.13900985959655784, "grad_norm": 0.6927480097785106, "learning_rate": 6.70434312249071e-07, "loss": 1.4991, "step": 1995 }, { "epoch": 0.13907953872417517, "grad_norm": 0.6642520213604098, "learning_rate": 6.70405138229295e-07, "loss": 1.4621, "step": 1996 }, { "epoch": 0.1391492178517925, "grad_norm": 0.6805635598441102, "learning_rate": 6.703759505323186e-07, "loss": 1.6133, "step": 1997 }, { "epoch": 0.1392188969794098, "grad_norm": 0.7558783219492097, "learning_rate": 6.703467491595409e-07, "loss": 1.6302, "step": 1998 }, { "epoch": 0.13928857610702713, "grad_norm": 0.7137225258702494, "learning_rate": 6.703175341123611e-07, "loss": 1.546, "step": 1999 }, { "epoch": 0.13935825523464446, "grad_norm": 0.6896011638861811, "learning_rate": 6.702883053921793e-07, "loss": 1.5004, "step": 2000 }, { "epoch": 0.13942793436226178, "grad_norm": 0.7740936820333882, "learning_rate": 6.702590630003963e-07, "loss": 1.5578, "step": 2001 }, { "epoch": 0.1394976134898791, "grad_norm": 0.7113997941083866, "learning_rate": 6.702298069384134e-07, "loss": 1.5332, "step": 2002 }, { "epoch": 0.13956729261749642, "grad_norm": 0.6916645083605428, "learning_rate": 6.702005372076325e-07, "loss": 1.7124, "step": 2003 }, { "epoch": 0.13963697174511375, "grad_norm": 0.8047028801866378, "learning_rate": 6.701712538094564e-07, "loss": 1.5595, "step": 2004 }, { "epoch": 0.13970665087273107, "grad_norm": 0.6812523150401025, "learning_rate": 6.701419567452884e-07, "loss": 1.4507, "step": 2005 }, { "epoch": 0.1397763300003484, "grad_norm": 0.7103344035535895, "learning_rate": 6.701126460165324e-07, "loss": 1.5772, "step": 2006 }, { "epoch": 0.13984600912796571, "grad_norm": 0.7102780550548473, "learning_rate": 6.700833216245931e-07, "loss": 1.5876, "step": 2007 }, { "epoch": 0.13991568825558304, "grad_norm": 0.7369837252532143, "learning_rate": 6.700539835708757e-07, "loss": 1.5054, "step": 2008 }, { "epoch": 0.13998536738320036, "grad_norm": 0.74091795158049, "learning_rate": 6.700246318567862e-07, "loss": 1.547, "step": 2009 }, { "epoch": 0.14005504651081768, "grad_norm": 0.8230971961676586, "learning_rate": 6.699952664837312e-07, "loss": 1.6581, "step": 2010 }, { "epoch": 0.140124725638435, "grad_norm": 0.6696729949760143, "learning_rate": 6.699658874531181e-07, "loss": 1.5002, "step": 2011 }, { "epoch": 0.14019440476605233, "grad_norm": 0.6628325796270378, "learning_rate": 6.699364947663546e-07, "loss": 1.5461, "step": 2012 }, { "epoch": 0.14026408389366965, "grad_norm": 0.7288767149466291, "learning_rate": 6.699070884248492e-07, "loss": 1.674, "step": 2013 }, { "epoch": 0.14033376302128697, "grad_norm": 0.6900581440512029, "learning_rate": 6.698776684300113e-07, "loss": 1.539, "step": 2014 }, { "epoch": 0.1404034421489043, "grad_norm": 0.7013138547776667, "learning_rate": 6.698482347832506e-07, "loss": 1.519, "step": 2015 }, { "epoch": 0.14047312127652162, "grad_norm": 0.7269407616493614, "learning_rate": 6.698187874859778e-07, "loss": 1.4797, "step": 2016 }, { "epoch": 0.14054280040413894, "grad_norm": 0.7810767647483735, "learning_rate": 6.69789326539604e-07, "loss": 1.6596, "step": 2017 }, { "epoch": 0.14061247953175626, "grad_norm": 0.7286918267880533, "learning_rate": 6.697598519455409e-07, "loss": 1.4667, "step": 2018 }, { "epoch": 0.14068215865937359, "grad_norm": 0.6977287876601965, "learning_rate": 6.697303637052011e-07, "loss": 1.3941, "step": 2019 }, { "epoch": 0.1407518377869909, "grad_norm": 0.6633872121941339, "learning_rate": 6.697008618199978e-07, "loss": 1.4926, "step": 2020 }, { "epoch": 0.14082151691460823, "grad_norm": 0.6908639722598806, "learning_rate": 6.696713462913447e-07, "loss": 1.5853, "step": 2021 }, { "epoch": 0.14089119604222555, "grad_norm": 0.7069874421515819, "learning_rate": 6.696418171206563e-07, "loss": 1.4058, "step": 2022 }, { "epoch": 0.14096087516984288, "grad_norm": 0.7027241913436704, "learning_rate": 6.696122743093476e-07, "loss": 1.5894, "step": 2023 }, { "epoch": 0.1410305542974602, "grad_norm": 0.7560903773846185, "learning_rate": 6.695827178588346e-07, "loss": 1.6811, "step": 2024 }, { "epoch": 0.14110023342507752, "grad_norm": 0.7511736752192385, "learning_rate": 6.695531477705333e-07, "loss": 1.6184, "step": 2025 }, { "epoch": 0.14116991255269484, "grad_norm": 0.6690392393806831, "learning_rate": 6.69523564045861e-07, "loss": 1.4891, "step": 2026 }, { "epoch": 0.14123959168031217, "grad_norm": 0.6912196783448034, "learning_rate": 6.694939666862355e-07, "loss": 1.4651, "step": 2027 }, { "epoch": 0.1413092708079295, "grad_norm": 0.77137577829856, "learning_rate": 6.69464355693075e-07, "loss": 1.5403, "step": 2028 }, { "epoch": 0.1413789499355468, "grad_norm": 0.6865381749824687, "learning_rate": 6.694347310677985e-07, "loss": 1.6508, "step": 2029 }, { "epoch": 0.14144862906316413, "grad_norm": 0.7566844700374754, "learning_rate": 6.69405092811826e-07, "loss": 1.5167, "step": 2030 }, { "epoch": 0.14151830819078146, "grad_norm": 0.6952455915837616, "learning_rate": 6.693754409265774e-07, "loss": 1.4989, "step": 2031 }, { "epoch": 0.14158798731839878, "grad_norm": 0.7130424330099704, "learning_rate": 6.693457754134739e-07, "loss": 1.4692, "step": 2032 }, { "epoch": 0.1416576664460161, "grad_norm": 0.8873037536282933, "learning_rate": 6.693160962739372e-07, "loss": 1.4678, "step": 2033 }, { "epoch": 0.14172734557363342, "grad_norm": 0.7036011332421183, "learning_rate": 6.692864035093892e-07, "loss": 1.5706, "step": 2034 }, { "epoch": 0.14179702470125075, "grad_norm": 0.834179522399948, "learning_rate": 6.692566971212533e-07, "loss": 1.5409, "step": 2035 }, { "epoch": 0.14186670382886807, "grad_norm": 0.6949574416710086, "learning_rate": 6.69226977110953e-07, "loss": 1.5529, "step": 2036 }, { "epoch": 0.1419363829564854, "grad_norm": 0.7135623739474563, "learning_rate": 6.691972434799122e-07, "loss": 1.6741, "step": 2037 }, { "epoch": 0.1420060620841027, "grad_norm": 0.6687369374009454, "learning_rate": 6.691674962295562e-07, "loss": 1.525, "step": 2038 }, { "epoch": 0.14207574121172004, "grad_norm": 0.7433925112314416, "learning_rate": 6.691377353613104e-07, "loss": 1.4498, "step": 2039 }, { "epoch": 0.14214542033933736, "grad_norm": 0.6745110392616911, "learning_rate": 6.691079608766009e-07, "loss": 1.6143, "step": 2040 }, { "epoch": 0.14221509946695468, "grad_norm": 0.7382616845616087, "learning_rate": 6.690781727768548e-07, "loss": 1.541, "step": 2041 }, { "epoch": 0.142284778594572, "grad_norm": 0.7045004156463536, "learning_rate": 6.690483710634993e-07, "loss": 1.4801, "step": 2042 }, { "epoch": 0.14235445772218933, "grad_norm": 0.6965897989333067, "learning_rate": 6.690185557379629e-07, "loss": 1.489, "step": 2043 }, { "epoch": 0.14242413684980665, "grad_norm": 0.7438196665174391, "learning_rate": 6.689887268016741e-07, "loss": 1.5633, "step": 2044 }, { "epoch": 0.14249381597742397, "grad_norm": 0.7105715238696366, "learning_rate": 6.689588842560625e-07, "loss": 1.5686, "step": 2045 }, { "epoch": 0.1425634951050413, "grad_norm": 0.7049215190610151, "learning_rate": 6.689290281025582e-07, "loss": 1.6438, "step": 2046 }, { "epoch": 0.14263317423265862, "grad_norm": 0.6744774059408442, "learning_rate": 6.68899158342592e-07, "loss": 1.5095, "step": 2047 }, { "epoch": 0.14270285336027594, "grad_norm": 0.680417356610093, "learning_rate": 6.688692749775953e-07, "loss": 1.5074, "step": 2048 }, { "epoch": 0.14277253248789326, "grad_norm": 0.7151033904243572, "learning_rate": 6.688393780090002e-07, "loss": 1.604, "step": 2049 }, { "epoch": 0.14284221161551058, "grad_norm": 0.724966418349942, "learning_rate": 6.688094674382394e-07, "loss": 1.5373, "step": 2050 }, { "epoch": 0.1429118907431279, "grad_norm": 0.7360886176646285, "learning_rate": 6.687795432667462e-07, "loss": 1.5455, "step": 2051 }, { "epoch": 0.14298156987074523, "grad_norm": 0.7172255708598817, "learning_rate": 6.687496054959548e-07, "loss": 1.6347, "step": 2052 }, { "epoch": 0.14305124899836255, "grad_norm": 0.7699884407032171, "learning_rate": 6.687196541272997e-07, "loss": 1.6606, "step": 2053 }, { "epoch": 0.14312092812597987, "grad_norm": 0.7014245175284832, "learning_rate": 6.686896891622164e-07, "loss": 1.6195, "step": 2054 }, { "epoch": 0.1431906072535972, "grad_norm": 0.7050391085105184, "learning_rate": 6.686597106021406e-07, "loss": 1.4778, "step": 2055 }, { "epoch": 0.14326028638121452, "grad_norm": 0.7475241606305587, "learning_rate": 6.686297184485093e-07, "loss": 1.522, "step": 2056 }, { "epoch": 0.14332996550883184, "grad_norm": 0.7224955938632416, "learning_rate": 6.685997127027597e-07, "loss": 1.4863, "step": 2057 }, { "epoch": 0.14339964463644916, "grad_norm": 0.7205299342896766, "learning_rate": 6.685696933663295e-07, "loss": 1.451, "step": 2058 }, { "epoch": 0.1434693237640665, "grad_norm": 0.7375433453122535, "learning_rate": 6.685396604406574e-07, "loss": 1.5236, "step": 2059 }, { "epoch": 0.1435390028916838, "grad_norm": 0.6683998772094839, "learning_rate": 6.685096139271827e-07, "loss": 1.519, "step": 2060 }, { "epoch": 0.14360868201930113, "grad_norm": 0.7362126432095132, "learning_rate": 6.684795538273452e-07, "loss": 1.578, "step": 2061 }, { "epoch": 0.14367836114691845, "grad_norm": 0.7395787426618212, "learning_rate": 6.684494801425857e-07, "loss": 1.5395, "step": 2062 }, { "epoch": 0.14374804027453578, "grad_norm": 0.6894804607087299, "learning_rate": 6.684193928743451e-07, "loss": 1.5361, "step": 2063 }, { "epoch": 0.14381771940215307, "grad_norm": 0.7135583941854905, "learning_rate": 6.683892920240653e-07, "loss": 1.4923, "step": 2064 }, { "epoch": 0.1438873985297704, "grad_norm": 0.7829148162615202, "learning_rate": 6.683591775931889e-07, "loss": 1.5336, "step": 2065 }, { "epoch": 0.14395707765738772, "grad_norm": 0.7303266441004318, "learning_rate": 6.683290495831589e-07, "loss": 1.6255, "step": 2066 }, { "epoch": 0.14402675678500504, "grad_norm": 0.7839802506351182, "learning_rate": 6.682989079954192e-07, "loss": 1.5698, "step": 2067 }, { "epoch": 0.14409643591262236, "grad_norm": 0.6629158355551916, "learning_rate": 6.682687528314144e-07, "loss": 1.5633, "step": 2068 }, { "epoch": 0.14416611504023968, "grad_norm": 0.7241216398074919, "learning_rate": 6.682385840925893e-07, "loss": 1.545, "step": 2069 }, { "epoch": 0.144235794167857, "grad_norm": 0.7164951634406063, "learning_rate": 6.682084017803897e-07, "loss": 1.4695, "step": 2070 }, { "epoch": 0.14430547329547433, "grad_norm": 1.0271724115304646, "learning_rate": 6.681782058962621e-07, "loss": 1.4634, "step": 2071 }, { "epoch": 0.14437515242309165, "grad_norm": 0.7823040178808669, "learning_rate": 6.681479964416536e-07, "loss": 1.5905, "step": 2072 }, { "epoch": 0.14444483155070897, "grad_norm": 0.657817420957194, "learning_rate": 6.681177734180118e-07, "loss": 1.3752, "step": 2073 }, { "epoch": 0.1445145106783263, "grad_norm": 0.6852262198461414, "learning_rate": 6.68087536826785e-07, "loss": 1.6397, "step": 2074 }, { "epoch": 0.14458418980594362, "grad_norm": 0.756558074647055, "learning_rate": 6.680572866694223e-07, "loss": 1.635, "step": 2075 }, { "epoch": 0.14465386893356094, "grad_norm": 0.7433246997372718, "learning_rate": 6.680270229473733e-07, "loss": 1.5365, "step": 2076 }, { "epoch": 0.14472354806117826, "grad_norm": 0.78026047334549, "learning_rate": 6.679967456620883e-07, "loss": 1.5682, "step": 2077 }, { "epoch": 0.1447932271887956, "grad_norm": 0.7072922446822634, "learning_rate": 6.679664548150184e-07, "loss": 1.4905, "step": 2078 }, { "epoch": 0.1448629063164129, "grad_norm": 0.68509156320556, "learning_rate": 6.67936150407615e-07, "loss": 1.613, "step": 2079 }, { "epoch": 0.14493258544403023, "grad_norm": 0.7336974814746486, "learning_rate": 6.679058324413304e-07, "loss": 1.6432, "step": 2080 }, { "epoch": 0.14500226457164755, "grad_norm": 0.7801610741726437, "learning_rate": 6.678755009176175e-07, "loss": 1.7378, "step": 2081 }, { "epoch": 0.14507194369926488, "grad_norm": 0.7463363382104677, "learning_rate": 6.678451558379299e-07, "loss": 1.5998, "step": 2082 }, { "epoch": 0.1451416228268822, "grad_norm": 0.7695298804194249, "learning_rate": 6.678147972037217e-07, "loss": 1.4838, "step": 2083 }, { "epoch": 0.14521130195449952, "grad_norm": 0.7298021201070521, "learning_rate": 6.677844250164478e-07, "loss": 1.7206, "step": 2084 }, { "epoch": 0.14528098108211684, "grad_norm": 0.7045264264551108, "learning_rate": 6.677540392775638e-07, "loss": 1.6674, "step": 2085 }, { "epoch": 0.14535066020973417, "grad_norm": 0.7080820049733552, "learning_rate": 6.677236399885257e-07, "loss": 1.3703, "step": 2086 }, { "epoch": 0.1454203393373515, "grad_norm": 0.7405645499986991, "learning_rate": 6.676932271507905e-07, "loss": 1.6083, "step": 2087 }, { "epoch": 0.1454900184649688, "grad_norm": 0.6755073635241531, "learning_rate": 6.676628007658152e-07, "loss": 1.4891, "step": 2088 }, { "epoch": 0.14555969759258613, "grad_norm": 0.735196926318889, "learning_rate": 6.676323608350584e-07, "loss": 1.5538, "step": 2089 }, { "epoch": 0.14562937672020346, "grad_norm": 0.7158832810121535, "learning_rate": 6.676019073599786e-07, "loss": 1.4395, "step": 2090 }, { "epoch": 0.14569905584782078, "grad_norm": 0.8233995506467929, "learning_rate": 6.675714403420352e-07, "loss": 1.5909, "step": 2091 }, { "epoch": 0.1457687349754381, "grad_norm": 0.6981047452018204, "learning_rate": 6.675409597826883e-07, "loss": 1.5379, "step": 2092 }, { "epoch": 0.14583841410305542, "grad_norm": 0.7318049866634618, "learning_rate": 6.675104656833985e-07, "loss": 1.5167, "step": 2093 }, { "epoch": 0.14590809323067275, "grad_norm": 0.6711256547636123, "learning_rate": 6.674799580456273e-07, "loss": 1.4706, "step": 2094 }, { "epoch": 0.14597777235829007, "grad_norm": 0.7807090030494048, "learning_rate": 6.674494368708365e-07, "loss": 1.6408, "step": 2095 }, { "epoch": 0.1460474514859074, "grad_norm": 0.7560459522384863, "learning_rate": 6.674189021604889e-07, "loss": 1.5771, "step": 2096 }, { "epoch": 0.14611713061352471, "grad_norm": 0.7340658045296877, "learning_rate": 6.673883539160478e-07, "loss": 1.7691, "step": 2097 }, { "epoch": 0.14618680974114204, "grad_norm": 0.7166206168364373, "learning_rate": 6.673577921389768e-07, "loss": 1.5528, "step": 2098 }, { "epoch": 0.14625648886875936, "grad_norm": 0.7247772846536584, "learning_rate": 6.673272168307408e-07, "loss": 1.5739, "step": 2099 }, { "epoch": 0.14632616799637668, "grad_norm": 0.7122755245520854, "learning_rate": 6.672966279928051e-07, "loss": 1.517, "step": 2100 }, { "epoch": 0.146395847123994, "grad_norm": 0.709517658767935, "learning_rate": 6.672660256266353e-07, "loss": 1.5352, "step": 2101 }, { "epoch": 0.14646552625161133, "grad_norm": 0.7526325805880144, "learning_rate": 6.672354097336982e-07, "loss": 1.4883, "step": 2102 }, { "epoch": 0.14653520537922865, "grad_norm": 0.6888521999954886, "learning_rate": 6.672047803154608e-07, "loss": 1.6047, "step": 2103 }, { "epoch": 0.14660488450684597, "grad_norm": 0.7384235333319291, "learning_rate": 6.671741373733909e-07, "loss": 1.5317, "step": 2104 }, { "epoch": 0.1466745636344633, "grad_norm": 0.6736086119358556, "learning_rate": 6.671434809089571e-07, "loss": 1.5422, "step": 2105 }, { "epoch": 0.14674424276208062, "grad_norm": 0.6661337493931547, "learning_rate": 6.671128109236285e-07, "loss": 1.501, "step": 2106 }, { "epoch": 0.14681392188969794, "grad_norm": 0.7206096510228985, "learning_rate": 6.670821274188747e-07, "loss": 1.5317, "step": 2107 }, { "epoch": 0.14688360101731526, "grad_norm": 0.6859249482438138, "learning_rate": 6.670514303961664e-07, "loss": 1.5287, "step": 2108 }, { "epoch": 0.14695328014493259, "grad_norm": 0.7183963613016431, "learning_rate": 6.670207198569745e-07, "loss": 1.5517, "step": 2109 }, { "epoch": 0.1470229592725499, "grad_norm": 0.7341898396673695, "learning_rate": 6.669899958027707e-07, "loss": 1.6989, "step": 2110 }, { "epoch": 0.14709263840016723, "grad_norm": 0.7019688944440516, "learning_rate": 6.669592582350273e-07, "loss": 1.5366, "step": 2111 }, { "epoch": 0.14716231752778455, "grad_norm": 0.7247432024634127, "learning_rate": 6.669285071552174e-07, "loss": 1.5756, "step": 2112 }, { "epoch": 0.14723199665540188, "grad_norm": 0.7513673208989898, "learning_rate": 6.668977425648149e-07, "loss": 1.5718, "step": 2113 }, { "epoch": 0.1473016757830192, "grad_norm": 0.7819729777404506, "learning_rate": 6.668669644652937e-07, "loss": 1.5458, "step": 2114 }, { "epoch": 0.14737135491063652, "grad_norm": 0.750342648657007, "learning_rate": 6.668361728581288e-07, "loss": 1.4844, "step": 2115 }, { "epoch": 0.14744103403825384, "grad_norm": 0.6908345407203063, "learning_rate": 6.66805367744796e-07, "loss": 1.6642, "step": 2116 }, { "epoch": 0.14751071316587117, "grad_norm": 0.7344250712154198, "learning_rate": 6.667745491267713e-07, "loss": 1.6705, "step": 2117 }, { "epoch": 0.1475803922934885, "grad_norm": 0.7510819008972629, "learning_rate": 6.667437170055319e-07, "loss": 1.5469, "step": 2118 }, { "epoch": 0.1476500714211058, "grad_norm": 0.7555665446815104, "learning_rate": 6.66712871382555e-07, "loss": 1.6687, "step": 2119 }, { "epoch": 0.14771975054872313, "grad_norm": 0.6604616741494789, "learning_rate": 6.666820122593192e-07, "loss": 1.5087, "step": 2120 }, { "epoch": 0.14778942967634046, "grad_norm": 0.7348443193736736, "learning_rate": 6.666511396373029e-07, "loss": 1.5052, "step": 2121 }, { "epoch": 0.14785910880395778, "grad_norm": 0.672958863560216, "learning_rate": 6.666202535179859e-07, "loss": 1.4961, "step": 2122 }, { "epoch": 0.1479287879315751, "grad_norm": 0.7414274787253741, "learning_rate": 6.665893539028481e-07, "loss": 1.6306, "step": 2123 }, { "epoch": 0.14799846705919242, "grad_norm": 0.727858108215168, "learning_rate": 6.665584407933704e-07, "loss": 1.5347, "step": 2124 }, { "epoch": 0.14806814618680975, "grad_norm": 0.7294702840040114, "learning_rate": 6.665275141910343e-07, "loss": 1.3667, "step": 2125 }, { "epoch": 0.14813782531442707, "grad_norm": 0.7349044745056671, "learning_rate": 6.664965740973216e-07, "loss": 1.7463, "step": 2126 }, { "epoch": 0.1482075044420444, "grad_norm": 0.7758881380439382, "learning_rate": 6.664656205137151e-07, "loss": 1.6216, "step": 2127 }, { "epoch": 0.1482771835696617, "grad_norm": 0.7495364666245434, "learning_rate": 6.664346534416984e-07, "loss": 1.6273, "step": 2128 }, { "epoch": 0.14834686269727904, "grad_norm": 0.7040089705837504, "learning_rate": 6.664036728827553e-07, "loss": 1.5802, "step": 2129 }, { "epoch": 0.14841654182489636, "grad_norm": 0.6667731454990864, "learning_rate": 6.663726788383704e-07, "loss": 1.5177, "step": 2130 }, { "epoch": 0.14848622095251368, "grad_norm": 0.6997476564626672, "learning_rate": 6.663416713100291e-07, "loss": 1.5506, "step": 2131 }, { "epoch": 0.148555900080131, "grad_norm": 0.6911327597765583, "learning_rate": 6.663106502992175e-07, "loss": 1.4742, "step": 2132 }, { "epoch": 0.14862557920774833, "grad_norm": 0.7187628023471965, "learning_rate": 6.662796158074218e-07, "loss": 1.5424, "step": 2133 }, { "epoch": 0.14869525833536565, "grad_norm": 0.8128369317563799, "learning_rate": 6.662485678361296e-07, "loss": 1.5732, "step": 2134 }, { "epoch": 0.14876493746298297, "grad_norm": 0.782738410518543, "learning_rate": 6.662175063868286e-07, "loss": 1.6439, "step": 2135 }, { "epoch": 0.1488346165906003, "grad_norm": 0.7095268270329291, "learning_rate": 6.661864314610073e-07, "loss": 1.5901, "step": 2136 }, { "epoch": 0.14890429571821762, "grad_norm": 0.6888819253469545, "learning_rate": 6.66155343060155e-07, "loss": 1.4262, "step": 2137 }, { "epoch": 0.14897397484583494, "grad_norm": 0.698034927176739, "learning_rate": 6.661242411857614e-07, "loss": 1.4935, "step": 2138 }, { "epoch": 0.14904365397345226, "grad_norm": 0.7034207813846144, "learning_rate": 6.660931258393171e-07, "loss": 1.6056, "step": 2139 }, { "epoch": 0.14911333310106958, "grad_norm": 0.714720004406207, "learning_rate": 6.66061997022313e-07, "loss": 1.4297, "step": 2140 }, { "epoch": 0.1491830122286869, "grad_norm": 0.70720621104776, "learning_rate": 6.660308547362412e-07, "loss": 1.5657, "step": 2141 }, { "epoch": 0.14925269135630423, "grad_norm": 0.6818808254535282, "learning_rate": 6.659996989825938e-07, "loss": 1.4919, "step": 2142 }, { "epoch": 0.14932237048392155, "grad_norm": 1.2047689714013203, "learning_rate": 6.65968529762864e-07, "loss": 1.6194, "step": 2143 }, { "epoch": 0.14939204961153887, "grad_norm": 0.7234531463009618, "learning_rate": 6.659373470785454e-07, "loss": 1.6053, "step": 2144 }, { "epoch": 0.1494617287391562, "grad_norm": 0.8116627746519786, "learning_rate": 6.659061509311322e-07, "loss": 1.5547, "step": 2145 }, { "epoch": 0.14953140786677352, "grad_norm": 0.7010744160731812, "learning_rate": 6.658749413221197e-07, "loss": 1.6412, "step": 2146 }, { "epoch": 0.14960108699439084, "grad_norm": 0.7281161646233438, "learning_rate": 6.658437182530034e-07, "loss": 1.6422, "step": 2147 }, { "epoch": 0.14967076612200816, "grad_norm": 0.7123633183041211, "learning_rate": 6.658124817252796e-07, "loss": 1.6012, "step": 2148 }, { "epoch": 0.1497404452496255, "grad_norm": 0.7779085968751687, "learning_rate": 6.657812317404451e-07, "loss": 1.5819, "step": 2149 }, { "epoch": 0.1498101243772428, "grad_norm": 0.7245364414178849, "learning_rate": 6.657499682999976e-07, "loss": 1.6811, "step": 2150 }, { "epoch": 0.14987980350486013, "grad_norm": 0.7291548999158717, "learning_rate": 6.657186914054353e-07, "loss": 1.5303, "step": 2151 }, { "epoch": 0.14994948263247745, "grad_norm": 0.6850518007923204, "learning_rate": 6.656874010582568e-07, "loss": 1.6017, "step": 2152 }, { "epoch": 0.15001916176009478, "grad_norm": 0.7636515207105723, "learning_rate": 6.65656097259962e-07, "loss": 1.6767, "step": 2153 }, { "epoch": 0.1500888408877121, "grad_norm": 0.7095907892985733, "learning_rate": 6.656247800120508e-07, "loss": 1.5909, "step": 2154 }, { "epoch": 0.15015852001532942, "grad_norm": 0.8017043567634776, "learning_rate": 6.65593449316024e-07, "loss": 1.6075, "step": 2155 }, { "epoch": 0.15022819914294672, "grad_norm": 0.7075063766905534, "learning_rate": 6.655621051733831e-07, "loss": 1.5024, "step": 2156 }, { "epoch": 0.15029787827056404, "grad_norm": 0.7010902957564245, "learning_rate": 6.6553074758563e-07, "loss": 1.5442, "step": 2157 }, { "epoch": 0.15036755739818136, "grad_norm": 0.6911690892841524, "learning_rate": 6.654993765542677e-07, "loss": 1.539, "step": 2158 }, { "epoch": 0.15043723652579868, "grad_norm": 0.707345315596661, "learning_rate": 6.654679920807994e-07, "loss": 1.4807, "step": 2159 }, { "epoch": 0.150506915653416, "grad_norm": 0.7415392921869378, "learning_rate": 6.654365941667291e-07, "loss": 1.5474, "step": 2160 }, { "epoch": 0.15057659478103333, "grad_norm": 0.706173361060631, "learning_rate": 6.654051828135615e-07, "loss": 1.4426, "step": 2161 }, { "epoch": 0.15064627390865065, "grad_norm": 0.7303397511035342, "learning_rate": 6.653737580228018e-07, "loss": 1.5252, "step": 2162 }, { "epoch": 0.15071595303626797, "grad_norm": 0.6801700281353229, "learning_rate": 6.653423197959561e-07, "loss": 1.6305, "step": 2163 }, { "epoch": 0.1507856321638853, "grad_norm": 0.7319612660717305, "learning_rate": 6.653108681345308e-07, "loss": 1.5535, "step": 2164 }, { "epoch": 0.15085531129150262, "grad_norm": 0.739665723589392, "learning_rate": 6.652794030400334e-07, "loss": 1.4885, "step": 2165 }, { "epoch": 0.15092499041911994, "grad_norm": 0.7300207068686901, "learning_rate": 6.652479245139715e-07, "loss": 1.556, "step": 2166 }, { "epoch": 0.15099466954673726, "grad_norm": 0.7478877415893503, "learning_rate": 6.652164325578538e-07, "loss": 1.5387, "step": 2167 }, { "epoch": 0.1510643486743546, "grad_norm": 0.7583353301827843, "learning_rate": 6.651849271731893e-07, "loss": 1.5861, "step": 2168 }, { "epoch": 0.1511340278019719, "grad_norm": 0.7393233041700598, "learning_rate": 6.651534083614879e-07, "loss": 1.6991, "step": 2169 }, { "epoch": 0.15120370692958923, "grad_norm": 0.6878120430029538, "learning_rate": 6.6512187612426e-07, "loss": 1.5472, "step": 2170 }, { "epoch": 0.15127338605720655, "grad_norm": 0.7431748916767359, "learning_rate": 6.650903304630168e-07, "loss": 1.5066, "step": 2171 }, { "epoch": 0.15134306518482388, "grad_norm": 0.7058577241054244, "learning_rate": 6.650587713792698e-07, "loss": 1.4571, "step": 2172 }, { "epoch": 0.1514127443124412, "grad_norm": 0.7446431893400042, "learning_rate": 6.650271988745317e-07, "loss": 1.4761, "step": 2173 }, { "epoch": 0.15148242344005852, "grad_norm": 0.7038821399942174, "learning_rate": 6.649956129503152e-07, "loss": 1.4445, "step": 2174 }, { "epoch": 0.15155210256767584, "grad_norm": 0.8191464308394847, "learning_rate": 6.649640136081343e-07, "loss": 1.5595, "step": 2175 }, { "epoch": 0.15162178169529317, "grad_norm": 0.7133519258464575, "learning_rate": 6.64932400849503e-07, "loss": 1.5781, "step": 2176 }, { "epoch": 0.1516914608229105, "grad_norm": 0.6795663922248834, "learning_rate": 6.649007746759362e-07, "loss": 1.6141, "step": 2177 }, { "epoch": 0.1517611399505278, "grad_norm": 0.69403858097068, "learning_rate": 6.648691350889498e-07, "loss": 1.4282, "step": 2178 }, { "epoch": 0.15183081907814514, "grad_norm": 0.7119608330094656, "learning_rate": 6.6483748209006e-07, "loss": 1.3755, "step": 2179 }, { "epoch": 0.15190049820576246, "grad_norm": 0.7564469568638418, "learning_rate": 6.648058156807836e-07, "loss": 1.5567, "step": 2180 }, { "epoch": 0.15197017733337978, "grad_norm": 0.7486844475397654, "learning_rate": 6.647741358626378e-07, "loss": 1.5791, "step": 2181 }, { "epoch": 0.1520398564609971, "grad_norm": 0.7055998936778343, "learning_rate": 6.647424426371411e-07, "loss": 1.5442, "step": 2182 }, { "epoch": 0.15210953558861443, "grad_norm": 0.7470502207739791, "learning_rate": 6.647107360058124e-07, "loss": 1.5793, "step": 2183 }, { "epoch": 0.15217921471623175, "grad_norm": 0.7341460938876649, "learning_rate": 6.64679015970171e-07, "loss": 1.5151, "step": 2184 }, { "epoch": 0.15224889384384907, "grad_norm": 0.7346400065000422, "learning_rate": 6.646472825317368e-07, "loss": 1.5545, "step": 2185 }, { "epoch": 0.1523185729714664, "grad_norm": 0.7201065762380195, "learning_rate": 6.646155356920309e-07, "loss": 1.5169, "step": 2186 }, { "epoch": 0.15238825209908372, "grad_norm": 0.7393498689538193, "learning_rate": 6.645837754525743e-07, "loss": 1.5674, "step": 2187 }, { "epoch": 0.15245793122670104, "grad_norm": 0.7785664711016976, "learning_rate": 6.645520018148894e-07, "loss": 1.5922, "step": 2188 }, { "epoch": 0.15252761035431836, "grad_norm": 0.7293345596385744, "learning_rate": 6.645202147804986e-07, "loss": 1.5481, "step": 2189 }, { "epoch": 0.15259728948193568, "grad_norm": 0.6894072142217631, "learning_rate": 6.644884143509253e-07, "loss": 1.389, "step": 2190 }, { "epoch": 0.152666968609553, "grad_norm": 0.6740333835131167, "learning_rate": 6.644566005276932e-07, "loss": 1.572, "step": 2191 }, { "epoch": 0.15273664773717033, "grad_norm": 0.6956811615086338, "learning_rate": 6.644247733123273e-07, "loss": 1.5829, "step": 2192 }, { "epoch": 0.15280632686478765, "grad_norm": 0.7320923923783303, "learning_rate": 6.643929327063526e-07, "loss": 1.628, "step": 2193 }, { "epoch": 0.15287600599240497, "grad_norm": 0.7024646754235628, "learning_rate": 6.643610787112949e-07, "loss": 1.4922, "step": 2194 }, { "epoch": 0.1529456851200223, "grad_norm": 0.7097415544166261, "learning_rate": 6.643292113286809e-07, "loss": 1.5992, "step": 2195 }, { "epoch": 0.15301536424763962, "grad_norm": 0.6985595433662738, "learning_rate": 6.642973305600375e-07, "loss": 1.4852, "step": 2196 }, { "epoch": 0.15308504337525694, "grad_norm": 0.6991483533810473, "learning_rate": 6.642654364068927e-07, "loss": 1.4809, "step": 2197 }, { "epoch": 0.15315472250287426, "grad_norm": 0.7908856281336831, "learning_rate": 6.642335288707749e-07, "loss": 1.5996, "step": 2198 }, { "epoch": 0.15322440163049159, "grad_norm": 0.696976024748579, "learning_rate": 6.642016079532131e-07, "loss": 1.5017, "step": 2199 }, { "epoch": 0.1532940807581089, "grad_norm": 0.6691993666994807, "learning_rate": 6.64169673655737e-07, "loss": 1.5449, "step": 2200 }, { "epoch": 0.15336375988572623, "grad_norm": 0.738678893957917, "learning_rate": 6.641377259798771e-07, "loss": 1.5894, "step": 2201 }, { "epoch": 0.15343343901334355, "grad_norm": 0.6999886972654056, "learning_rate": 6.641057649271644e-07, "loss": 1.5552, "step": 2202 }, { "epoch": 0.15350311814096088, "grad_norm": 0.7839373696668602, "learning_rate": 6.640737904991303e-07, "loss": 1.628, "step": 2203 }, { "epoch": 0.1535727972685782, "grad_norm": 0.6958760832939552, "learning_rate": 6.640418026973073e-07, "loss": 1.5806, "step": 2204 }, { "epoch": 0.15364247639619552, "grad_norm": 0.6935626553735474, "learning_rate": 6.640098015232282e-07, "loss": 1.5275, "step": 2205 }, { "epoch": 0.15371215552381284, "grad_norm": 0.7681046260488631, "learning_rate": 6.639777869784266e-07, "loss": 1.6231, "step": 2206 }, { "epoch": 0.15378183465143017, "grad_norm": 0.6894945589980458, "learning_rate": 6.639457590644367e-07, "loss": 1.4775, "step": 2207 }, { "epoch": 0.1538515137790475, "grad_norm": 0.7261850091840004, "learning_rate": 6.639137177827935e-07, "loss": 1.5407, "step": 2208 }, { "epoch": 0.1539211929066648, "grad_norm": 0.6606501584232484, "learning_rate": 6.638816631350324e-07, "loss": 1.4989, "step": 2209 }, { "epoch": 0.15399087203428213, "grad_norm": 0.7152784548111163, "learning_rate": 6.638495951226891e-07, "loss": 1.4855, "step": 2210 }, { "epoch": 0.15406055116189946, "grad_norm": 0.7522656609768341, "learning_rate": 6.638175137473011e-07, "loss": 1.5111, "step": 2211 }, { "epoch": 0.15413023028951678, "grad_norm": 0.7745867847921484, "learning_rate": 6.637854190104053e-07, "loss": 1.534, "step": 2212 }, { "epoch": 0.1541999094171341, "grad_norm": 0.7233835267335126, "learning_rate": 6.637533109135399e-07, "loss": 1.623, "step": 2213 }, { "epoch": 0.15426958854475142, "grad_norm": 0.7270410330528998, "learning_rate": 6.637211894582435e-07, "loss": 1.6425, "step": 2214 }, { "epoch": 0.15433926767236875, "grad_norm": 0.7697491418177266, "learning_rate": 6.636890546460556e-07, "loss": 1.5283, "step": 2215 }, { "epoch": 0.15440894679998607, "grad_norm": 0.7117412043799898, "learning_rate": 6.636569064785162e-07, "loss": 1.5912, "step": 2216 }, { "epoch": 0.1544786259276034, "grad_norm": 0.7505816377254368, "learning_rate": 6.636247449571654e-07, "loss": 1.458, "step": 2217 }, { "epoch": 0.1545483050552207, "grad_norm": 0.6735296173568921, "learning_rate": 6.63592570083545e-07, "loss": 1.5044, "step": 2218 }, { "epoch": 0.15461798418283804, "grad_norm": 0.6846941209657414, "learning_rate": 6.635603818591967e-07, "loss": 1.5567, "step": 2219 }, { "epoch": 0.15468766331045536, "grad_norm": 0.6929598112898522, "learning_rate": 6.635281802856632e-07, "loss": 1.5509, "step": 2220 }, { "epoch": 0.15475734243807268, "grad_norm": 0.6792500496086945, "learning_rate": 6.634959653644873e-07, "loss": 1.5822, "step": 2221 }, { "epoch": 0.15482702156569, "grad_norm": 0.6842858576959354, "learning_rate": 6.634637370972131e-07, "loss": 1.5215, "step": 2222 }, { "epoch": 0.15489670069330733, "grad_norm": 0.6924953687899594, "learning_rate": 6.634314954853847e-07, "loss": 1.5317, "step": 2223 }, { "epoch": 0.15496637982092465, "grad_norm": 0.7730565766794356, "learning_rate": 6.633992405305477e-07, "loss": 1.5001, "step": 2224 }, { "epoch": 0.15503605894854197, "grad_norm": 0.7204812325831732, "learning_rate": 6.633669722342475e-07, "loss": 1.5332, "step": 2225 }, { "epoch": 0.1551057380761593, "grad_norm": 0.6750704593792517, "learning_rate": 6.633346905980304e-07, "loss": 1.5395, "step": 2226 }, { "epoch": 0.15517541720377662, "grad_norm": 0.7399175686766039, "learning_rate": 6.633023956234436e-07, "loss": 1.6439, "step": 2227 }, { "epoch": 0.15524509633139394, "grad_norm": 0.6596536831519212, "learning_rate": 6.632700873120346e-07, "loss": 1.4794, "step": 2228 }, { "epoch": 0.15531477545901126, "grad_norm": 0.6992076941173299, "learning_rate": 6.632377656653518e-07, "loss": 1.5523, "step": 2229 }, { "epoch": 0.15538445458662858, "grad_norm": 0.680191070215852, "learning_rate": 6.63205430684944e-07, "loss": 1.7234, "step": 2230 }, { "epoch": 0.1554541337142459, "grad_norm": 0.7405456874943854, "learning_rate": 6.631730823723609e-07, "loss": 1.6782, "step": 2231 }, { "epoch": 0.15552381284186323, "grad_norm": 0.7827715409835366, "learning_rate": 6.631407207291526e-07, "loss": 1.5043, "step": 2232 }, { "epoch": 0.15559349196948055, "grad_norm": 0.7125663974576547, "learning_rate": 6.631083457568699e-07, "loss": 1.5413, "step": 2233 }, { "epoch": 0.15566317109709787, "grad_norm": 0.7187003784037695, "learning_rate": 6.630759574570644e-07, "loss": 1.4771, "step": 2234 }, { "epoch": 0.1557328502247152, "grad_norm": 0.6781974858369967, "learning_rate": 6.63043555831288e-07, "loss": 1.523, "step": 2235 }, { "epoch": 0.15580252935233252, "grad_norm": 0.6951594427619806, "learning_rate": 6.630111408810937e-07, "loss": 1.6086, "step": 2236 }, { "epoch": 0.15587220847994984, "grad_norm": 0.7338407170382629, "learning_rate": 6.629787126080348e-07, "loss": 1.6122, "step": 2237 }, { "epoch": 0.15594188760756716, "grad_norm": 0.7159255072884011, "learning_rate": 6.629462710136653e-07, "loss": 1.5316, "step": 2238 }, { "epoch": 0.1560115667351845, "grad_norm": 0.7532205783926076, "learning_rate": 6.629138160995399e-07, "loss": 1.7628, "step": 2239 }, { "epoch": 0.1560812458628018, "grad_norm": 0.6981258987523404, "learning_rate": 6.628813478672139e-07, "loss": 1.4858, "step": 2240 }, { "epoch": 0.15615092499041913, "grad_norm": 0.6879762245284705, "learning_rate": 6.628488663182431e-07, "loss": 1.5243, "step": 2241 }, { "epoch": 0.15622060411803645, "grad_norm": 0.7106885629753927, "learning_rate": 6.628163714541844e-07, "loss": 1.4806, "step": 2242 }, { "epoch": 0.15629028324565378, "grad_norm": 0.774813261180234, "learning_rate": 6.627838632765946e-07, "loss": 1.5369, "step": 2243 }, { "epoch": 0.1563599623732711, "grad_norm": 0.7211700093562131, "learning_rate": 6.627513417870319e-07, "loss": 1.5611, "step": 2244 }, { "epoch": 0.15642964150088842, "grad_norm": 0.6932231497601744, "learning_rate": 6.627188069870548e-07, "loss": 1.4099, "step": 2245 }, { "epoch": 0.15649932062850574, "grad_norm": 0.7036873950506464, "learning_rate": 6.626862588782221e-07, "loss": 1.5305, "step": 2246 }, { "epoch": 0.15656899975612304, "grad_norm": 0.7089690133313291, "learning_rate": 6.626536974620939e-07, "loss": 1.4183, "step": 2247 }, { "epoch": 0.15663867888374036, "grad_norm": 0.7105684077832143, "learning_rate": 6.626211227402306e-07, "loss": 1.5593, "step": 2248 }, { "epoch": 0.15670835801135768, "grad_norm": 0.6832997394422372, "learning_rate": 6.625885347141931e-07, "loss": 1.4509, "step": 2249 }, { "epoch": 0.156778037138975, "grad_norm": 0.6977345495829199, "learning_rate": 6.625559333855431e-07, "loss": 1.6193, "step": 2250 }, { "epoch": 0.15684771626659233, "grad_norm": 0.6908184277057992, "learning_rate": 6.625233187558431e-07, "loss": 1.5806, "step": 2251 }, { "epoch": 0.15691739539420965, "grad_norm": 0.7326095761597486, "learning_rate": 6.624906908266557e-07, "loss": 1.6088, "step": 2252 }, { "epoch": 0.15698707452182697, "grad_norm": 0.650882245462365, "learning_rate": 6.624580495995447e-07, "loss": 1.4173, "step": 2253 }, { "epoch": 0.1570567536494443, "grad_norm": 0.7035964201999952, "learning_rate": 6.624253950760746e-07, "loss": 1.6664, "step": 2254 }, { "epoch": 0.15712643277706162, "grad_norm": 0.7171947278524486, "learning_rate": 6.623927272578098e-07, "loss": 1.5436, "step": 2255 }, { "epoch": 0.15719611190467894, "grad_norm": 0.7582978366124182, "learning_rate": 6.623600461463162e-07, "loss": 1.5061, "step": 2256 }, { "epoch": 0.15726579103229626, "grad_norm": 0.7197080632735179, "learning_rate": 6.623273517431597e-07, "loss": 1.4875, "step": 2257 }, { "epoch": 0.1573354701599136, "grad_norm": 0.6975155150989609, "learning_rate": 6.622946440499072e-07, "loss": 1.375, "step": 2258 }, { "epoch": 0.1574051492875309, "grad_norm": 0.7211246058188371, "learning_rate": 6.62261923068126e-07, "loss": 1.5137, "step": 2259 }, { "epoch": 0.15747482841514823, "grad_norm": 0.6786300156106961, "learning_rate": 6.622291887993843e-07, "loss": 1.4291, "step": 2260 }, { "epoch": 0.15754450754276556, "grad_norm": 0.7286101497808452, "learning_rate": 6.621964412452507e-07, "loss": 1.4559, "step": 2261 }, { "epoch": 0.15761418667038288, "grad_norm": 0.6679841831373213, "learning_rate": 6.621636804072947e-07, "loss": 1.4893, "step": 2262 }, { "epoch": 0.1576838657980002, "grad_norm": 0.7239015658745319, "learning_rate": 6.621309062870859e-07, "loss": 1.5985, "step": 2263 }, { "epoch": 0.15775354492561752, "grad_norm": 0.8033552658475741, "learning_rate": 6.620981188861952e-07, "loss": 1.5316, "step": 2264 }, { "epoch": 0.15782322405323485, "grad_norm": 0.8488918942742487, "learning_rate": 6.620653182061938e-07, "loss": 1.6772, "step": 2265 }, { "epoch": 0.15789290318085217, "grad_norm": 0.7367837946918524, "learning_rate": 6.620325042486537e-07, "loss": 1.5063, "step": 2266 }, { "epoch": 0.1579625823084695, "grad_norm": 0.7017635363309525, "learning_rate": 6.619996770151471e-07, "loss": 1.3885, "step": 2267 }, { "epoch": 0.1580322614360868, "grad_norm": 0.7708473358379301, "learning_rate": 6.619668365072472e-07, "loss": 1.5045, "step": 2268 }, { "epoch": 0.15810194056370414, "grad_norm": 0.6922310385911277, "learning_rate": 6.619339827265281e-07, "loss": 1.445, "step": 2269 }, { "epoch": 0.15817161969132146, "grad_norm": 0.7504965139466662, "learning_rate": 6.619011156745639e-07, "loss": 1.5939, "step": 2270 }, { "epoch": 0.15824129881893878, "grad_norm": 0.7091372028940393, "learning_rate": 6.618682353529299e-07, "loss": 1.5027, "step": 2271 }, { "epoch": 0.1583109779465561, "grad_norm": 0.6747431666388362, "learning_rate": 6.618353417632015e-07, "loss": 1.5093, "step": 2272 }, { "epoch": 0.15838065707417343, "grad_norm": 0.7506009039438517, "learning_rate": 6.618024349069553e-07, "loss": 1.5443, "step": 2273 }, { "epoch": 0.15845033620179075, "grad_norm": 0.7136774347676139, "learning_rate": 6.617695147857681e-07, "loss": 1.4922, "step": 2274 }, { "epoch": 0.15852001532940807, "grad_norm": 0.6607762672674673, "learning_rate": 6.617365814012176e-07, "loss": 1.4119, "step": 2275 }, { "epoch": 0.1585896944570254, "grad_norm": 0.7219437275521969, "learning_rate": 6.61703634754882e-07, "loss": 1.5683, "step": 2276 }, { "epoch": 0.15865937358464272, "grad_norm": 0.7557107319667966, "learning_rate": 6.616706748483401e-07, "loss": 1.6325, "step": 2277 }, { "epoch": 0.15872905271226004, "grad_norm": 0.7296816006462254, "learning_rate": 6.616377016831715e-07, "loss": 1.6051, "step": 2278 }, { "epoch": 0.15879873183987736, "grad_norm": 0.6952674966414605, "learning_rate": 6.616047152609562e-07, "loss": 1.484, "step": 2279 }, { "epoch": 0.15886841096749468, "grad_norm": 0.7239655613929008, "learning_rate": 6.615717155832753e-07, "loss": 1.4567, "step": 2280 }, { "epoch": 0.158938090095112, "grad_norm": 0.6928217629736545, "learning_rate": 6.615387026517097e-07, "loss": 1.6081, "step": 2281 }, { "epoch": 0.15900776922272933, "grad_norm": 0.6737837113780031, "learning_rate": 6.61505676467842e-07, "loss": 1.5322, "step": 2282 }, { "epoch": 0.15907744835034665, "grad_norm": 0.7578826835208271, "learning_rate": 6.614726370332546e-07, "loss": 1.5239, "step": 2283 }, { "epoch": 0.15914712747796397, "grad_norm": 0.707541021895692, "learning_rate": 6.614395843495308e-07, "loss": 1.4852, "step": 2284 }, { "epoch": 0.1592168066055813, "grad_norm": 0.7077574571137548, "learning_rate": 6.614065184182546e-07, "loss": 1.6096, "step": 2285 }, { "epoch": 0.15928648573319862, "grad_norm": 0.7182478632727132, "learning_rate": 6.613734392410106e-07, "loss": 1.5395, "step": 2286 }, { "epoch": 0.15935616486081594, "grad_norm": 0.7367290616820319, "learning_rate": 6.61340346819384e-07, "loss": 1.5181, "step": 2287 }, { "epoch": 0.15942584398843326, "grad_norm": 0.7042290216704357, "learning_rate": 6.613072411549606e-07, "loss": 1.5792, "step": 2288 }, { "epoch": 0.15949552311605059, "grad_norm": 0.7080735049459942, "learning_rate": 6.61274122249327e-07, "loss": 1.5474, "step": 2289 }, { "epoch": 0.1595652022436679, "grad_norm": 0.8896374246651081, "learning_rate": 6.612409901040703e-07, "loss": 1.697, "step": 2290 }, { "epoch": 0.15963488137128523, "grad_norm": 0.7105735026902991, "learning_rate": 6.612078447207782e-07, "loss": 1.5946, "step": 2291 }, { "epoch": 0.15970456049890255, "grad_norm": 0.6961245254676526, "learning_rate": 6.611746861010392e-07, "loss": 1.4702, "step": 2292 }, { "epoch": 0.15977423962651988, "grad_norm": 0.7613831462750842, "learning_rate": 6.611415142464423e-07, "loss": 1.4611, "step": 2293 }, { "epoch": 0.1598439187541372, "grad_norm": 0.7766601055554404, "learning_rate": 6.61108329158577e-07, "loss": 1.5536, "step": 2294 }, { "epoch": 0.15991359788175452, "grad_norm": 0.7216761614181849, "learning_rate": 6.610751308390338e-07, "loss": 1.62, "step": 2295 }, { "epoch": 0.15998327700937184, "grad_norm": 0.7602123159942696, "learning_rate": 6.610419192894035e-07, "loss": 1.5354, "step": 2296 }, { "epoch": 0.16005295613698917, "grad_norm": 0.7208680007068906, "learning_rate": 6.610086945112779e-07, "loss": 1.507, "step": 2297 }, { "epoch": 0.1601226352646065, "grad_norm": 0.7271009203166914, "learning_rate": 6.609754565062488e-07, "loss": 1.5454, "step": 2298 }, { "epoch": 0.1601923143922238, "grad_norm": 0.7155591566547312, "learning_rate": 6.609422052759093e-07, "loss": 1.5072, "step": 2299 }, { "epoch": 0.16026199351984113, "grad_norm": 0.7646577990143016, "learning_rate": 6.60908940821853e-07, "loss": 1.5958, "step": 2300 }, { "epoch": 0.16033167264745846, "grad_norm": 0.8432643486292445, "learning_rate": 6.608756631456737e-07, "loss": 1.5313, "step": 2301 }, { "epoch": 0.16040135177507578, "grad_norm": 0.7165983860712775, "learning_rate": 6.608423722489663e-07, "loss": 1.5866, "step": 2302 }, { "epoch": 0.1604710309026931, "grad_norm": 0.6948385437059152, "learning_rate": 6.608090681333261e-07, "loss": 1.4906, "step": 2303 }, { "epoch": 0.16054071003031042, "grad_norm": 0.6881623948883927, "learning_rate": 6.607757508003492e-07, "loss": 1.6308, "step": 2304 }, { "epoch": 0.16061038915792775, "grad_norm": 0.7202743271110196, "learning_rate": 6.60742420251632e-07, "loss": 1.5586, "step": 2305 }, { "epoch": 0.16068006828554507, "grad_norm": 0.728905182320733, "learning_rate": 6.607090764887721e-07, "loss": 1.382, "step": 2306 }, { "epoch": 0.1607497474131624, "grad_norm": 0.7111857908738513, "learning_rate": 6.606757195133672e-07, "loss": 1.5649, "step": 2307 }, { "epoch": 0.16081942654077971, "grad_norm": 0.7302731077054829, "learning_rate": 6.606423493270158e-07, "loss": 1.5731, "step": 2308 }, { "epoch": 0.16088910566839704, "grad_norm": 0.6801391548735751, "learning_rate": 6.606089659313172e-07, "loss": 1.5673, "step": 2309 }, { "epoch": 0.16095878479601436, "grad_norm": 0.7827230409718977, "learning_rate": 6.605755693278711e-07, "loss": 1.502, "step": 2310 }, { "epoch": 0.16102846392363168, "grad_norm": 0.7035852984193754, "learning_rate": 6.605421595182779e-07, "loss": 1.5612, "step": 2311 }, { "epoch": 0.161098143051249, "grad_norm": 0.6825492690800449, "learning_rate": 6.605087365041389e-07, "loss": 1.5236, "step": 2312 }, { "epoch": 0.16116782217886633, "grad_norm": 0.6942928057626722, "learning_rate": 6.604753002870555e-07, "loss": 1.5146, "step": 2313 }, { "epoch": 0.16123750130648365, "grad_norm": 0.8494657143717437, "learning_rate": 6.604418508686302e-07, "loss": 1.7396, "step": 2314 }, { "epoch": 0.16130718043410097, "grad_norm": 0.6673471103730584, "learning_rate": 6.604083882504659e-07, "loss": 1.5284, "step": 2315 }, { "epoch": 0.1613768595617183, "grad_norm": 0.7528593628272984, "learning_rate": 6.603749124341663e-07, "loss": 1.6444, "step": 2316 }, { "epoch": 0.16144653868933562, "grad_norm": 0.7210025295412614, "learning_rate": 6.603414234213357e-07, "loss": 1.6882, "step": 2317 }, { "epoch": 0.16151621781695294, "grad_norm": 0.7069438472104946, "learning_rate": 6.603079212135785e-07, "loss": 1.6581, "step": 2318 }, { "epoch": 0.16158589694457026, "grad_norm": 0.7138732731517777, "learning_rate": 6.602744058125009e-07, "loss": 1.5197, "step": 2319 }, { "epoch": 0.16165557607218758, "grad_norm": 0.7168132849247152, "learning_rate": 6.602408772197084e-07, "loss": 1.4548, "step": 2320 }, { "epoch": 0.1617252551998049, "grad_norm": 0.7326189178797672, "learning_rate": 6.60207335436808e-07, "loss": 1.6001, "step": 2321 }, { "epoch": 0.16179493432742223, "grad_norm": 0.6927914280769315, "learning_rate": 6.601737804654071e-07, "loss": 1.6246, "step": 2322 }, { "epoch": 0.16186461345503955, "grad_norm": 0.7548539239584744, "learning_rate": 6.601402123071138e-07, "loss": 1.7135, "step": 2323 }, { "epoch": 0.16193429258265687, "grad_norm": 0.8231932734792815, "learning_rate": 6.601066309635366e-07, "loss": 1.6542, "step": 2324 }, { "epoch": 0.1620039717102742, "grad_norm": 0.7076281356457971, "learning_rate": 6.600730364362849e-07, "loss": 1.6272, "step": 2325 }, { "epoch": 0.16207365083789152, "grad_norm": 0.7152242206236852, "learning_rate": 6.600394287269687e-07, "loss": 1.4146, "step": 2326 }, { "epoch": 0.16214332996550884, "grad_norm": 0.7079726966566244, "learning_rate": 6.600058078371983e-07, "loss": 1.4889, "step": 2327 }, { "epoch": 0.16221300909312616, "grad_norm": 0.7254514558992783, "learning_rate": 6.599721737685849e-07, "loss": 1.502, "step": 2328 }, { "epoch": 0.1622826882207435, "grad_norm": 0.6421208663306851, "learning_rate": 6.599385265227405e-07, "loss": 1.4715, "step": 2329 }, { "epoch": 0.1623523673483608, "grad_norm": 0.7134578161355969, "learning_rate": 6.599048661012776e-07, "loss": 1.4987, "step": 2330 }, { "epoch": 0.16242204647597813, "grad_norm": 0.6944470963507515, "learning_rate": 6.598711925058091e-07, "loss": 1.518, "step": 2331 }, { "epoch": 0.16249172560359545, "grad_norm": 0.7024672276158266, "learning_rate": 6.598375057379487e-07, "loss": 1.6774, "step": 2332 }, { "epoch": 0.16256140473121278, "grad_norm": 0.7529700252137836, "learning_rate": 6.598038057993109e-07, "loss": 1.5766, "step": 2333 }, { "epoch": 0.1626310838588301, "grad_norm": 0.7632965108002485, "learning_rate": 6.597700926915103e-07, "loss": 1.6279, "step": 2334 }, { "epoch": 0.16270076298644742, "grad_norm": 0.7201730099281007, "learning_rate": 6.59736366416163e-07, "loss": 1.5469, "step": 2335 }, { "epoch": 0.16277044211406475, "grad_norm": 0.6996857758852515, "learning_rate": 6.59702626974885e-07, "loss": 1.5442, "step": 2336 }, { "epoch": 0.16284012124168207, "grad_norm": 0.6917112721568993, "learning_rate": 6.596688743692931e-07, "loss": 1.563, "step": 2337 }, { "epoch": 0.1629098003692994, "grad_norm": 0.666913569996407, "learning_rate": 6.596351086010048e-07, "loss": 1.638, "step": 2338 }, { "epoch": 0.16297947949691668, "grad_norm": 0.6802046539952517, "learning_rate": 6.596013296716384e-07, "loss": 1.5565, "step": 2339 }, { "epoch": 0.163049158624534, "grad_norm": 0.7063793504298344, "learning_rate": 6.595675375828124e-07, "loss": 1.5484, "step": 2340 }, { "epoch": 0.16311883775215133, "grad_norm": 0.7332190383926808, "learning_rate": 6.595337323361465e-07, "loss": 1.5243, "step": 2341 }, { "epoch": 0.16318851687976865, "grad_norm": 0.7329461018523288, "learning_rate": 6.594999139332605e-07, "loss": 1.7221, "step": 2342 }, { "epoch": 0.16325819600738598, "grad_norm": 0.7419199688652642, "learning_rate": 6.594660823757749e-07, "loss": 1.5138, "step": 2343 }, { "epoch": 0.1633278751350033, "grad_norm": 0.7639302342058242, "learning_rate": 6.594322376653114e-07, "loss": 1.6771, "step": 2344 }, { "epoch": 0.16339755426262062, "grad_norm": 0.7472337265405039, "learning_rate": 6.593983798034915e-07, "loss": 1.518, "step": 2345 }, { "epoch": 0.16346723339023794, "grad_norm": 0.7281142288983273, "learning_rate": 6.59364508791938e-07, "loss": 1.5627, "step": 2346 }, { "epoch": 0.16353691251785527, "grad_norm": 0.7110147549449994, "learning_rate": 6.593306246322739e-07, "loss": 1.5052, "step": 2347 }, { "epoch": 0.1636065916454726, "grad_norm": 0.7074364175708336, "learning_rate": 6.592967273261232e-07, "loss": 1.5836, "step": 2348 }, { "epoch": 0.1636762707730899, "grad_norm": 0.7421192254211886, "learning_rate": 6.592628168751102e-07, "loss": 1.5003, "step": 2349 }, { "epoch": 0.16374594990070723, "grad_norm": 0.7168279539068398, "learning_rate": 6.592288932808598e-07, "loss": 1.5503, "step": 2350 }, { "epoch": 0.16381562902832456, "grad_norm": 0.7188929197436819, "learning_rate": 6.591949565449979e-07, "loss": 1.5518, "step": 2351 }, { "epoch": 0.16388530815594188, "grad_norm": 0.762876427581947, "learning_rate": 6.591610066691508e-07, "loss": 1.5161, "step": 2352 }, { "epoch": 0.1639549872835592, "grad_norm": 0.764966904869516, "learning_rate": 6.591270436549456e-07, "loss": 1.5519, "step": 2353 }, { "epoch": 0.16402466641117652, "grad_norm": 0.7005514577699263, "learning_rate": 6.590930675040095e-07, "loss": 1.5517, "step": 2354 }, { "epoch": 0.16409434553879385, "grad_norm": 0.6770300769048003, "learning_rate": 6.59059078217971e-07, "loss": 1.4233, "step": 2355 }, { "epoch": 0.16416402466641117, "grad_norm": 0.7150779632036103, "learning_rate": 6.590250757984588e-07, "loss": 1.5995, "step": 2356 }, { "epoch": 0.1642337037940285, "grad_norm": 0.6993402330838724, "learning_rate": 6.589910602471023e-07, "loss": 1.4289, "step": 2357 }, { "epoch": 0.1643033829216458, "grad_norm": 0.7129939019203483, "learning_rate": 6.589570315655318e-07, "loss": 1.6153, "step": 2358 }, { "epoch": 0.16437306204926314, "grad_norm": 0.7046647737874796, "learning_rate": 6.589229897553779e-07, "loss": 1.582, "step": 2359 }, { "epoch": 0.16444274117688046, "grad_norm": 0.7290829227675538, "learning_rate": 6.58888934818272e-07, "loss": 1.4972, "step": 2360 }, { "epoch": 0.16451242030449778, "grad_norm": 0.7072365195859416, "learning_rate": 6.588548667558461e-07, "loss": 1.4581, "step": 2361 }, { "epoch": 0.1645820994321151, "grad_norm": 0.735679978353686, "learning_rate": 6.588207855697326e-07, "loss": 1.5999, "step": 2362 }, { "epoch": 0.16465177855973243, "grad_norm": 0.7120299813416344, "learning_rate": 6.587866912615651e-07, "loss": 1.5704, "step": 2363 }, { "epoch": 0.16472145768734975, "grad_norm": 0.6820387705988862, "learning_rate": 6.587525838329772e-07, "loss": 1.4342, "step": 2364 }, { "epoch": 0.16479113681496707, "grad_norm": 0.7107393107714317, "learning_rate": 6.587184632856035e-07, "loss": 1.544, "step": 2365 }, { "epoch": 0.1648608159425844, "grad_norm": 0.7535777040315426, "learning_rate": 6.58684329621079e-07, "loss": 1.6942, "step": 2366 }, { "epoch": 0.16493049507020172, "grad_norm": 0.6771116719963444, "learning_rate": 6.586501828410397e-07, "loss": 1.5631, "step": 2367 }, { "epoch": 0.16500017419781904, "grad_norm": 0.6617367548227207, "learning_rate": 6.586160229471219e-07, "loss": 1.4529, "step": 2368 }, { "epoch": 0.16506985332543636, "grad_norm": 0.756471565795342, "learning_rate": 6.585818499409624e-07, "loss": 1.6085, "step": 2369 }, { "epoch": 0.16513953245305368, "grad_norm": 0.7215067440748905, "learning_rate": 6.585476638241991e-07, "loss": 1.5597, "step": 2370 }, { "epoch": 0.165209211580671, "grad_norm": 0.6715639533999088, "learning_rate": 6.585134645984701e-07, "loss": 1.5579, "step": 2371 }, { "epoch": 0.16527889070828833, "grad_norm": 0.7528494030685173, "learning_rate": 6.584792522654144e-07, "loss": 1.613, "step": 2372 }, { "epoch": 0.16534856983590565, "grad_norm": 0.9070409370704179, "learning_rate": 6.584450268266715e-07, "loss": 1.5739, "step": 2373 }, { "epoch": 0.16541824896352297, "grad_norm": 0.7127053800060373, "learning_rate": 6.584107882838815e-07, "loss": 1.5985, "step": 2374 }, { "epoch": 0.1654879280911403, "grad_norm": 0.7285186039275199, "learning_rate": 6.583765366386853e-07, "loss": 1.5165, "step": 2375 }, { "epoch": 0.16555760721875762, "grad_norm": 0.7571340368115554, "learning_rate": 6.583422718927242e-07, "loss": 1.4696, "step": 2376 }, { "epoch": 0.16562728634637494, "grad_norm": 0.6658658814069529, "learning_rate": 6.583079940476402e-07, "loss": 1.4661, "step": 2377 }, { "epoch": 0.16569696547399226, "grad_norm": 0.8030332451545158, "learning_rate": 6.582737031050761e-07, "loss": 1.8509, "step": 2378 }, { "epoch": 0.1657666446016096, "grad_norm": 0.6966252141405037, "learning_rate": 6.582393990666751e-07, "loss": 1.5828, "step": 2379 }, { "epoch": 0.1658363237292269, "grad_norm": 0.7364229221914593, "learning_rate": 6.582050819340812e-07, "loss": 1.6218, "step": 2380 }, { "epoch": 0.16590600285684423, "grad_norm": 0.7196295616760052, "learning_rate": 6.581707517089389e-07, "loss": 1.5562, "step": 2381 }, { "epoch": 0.16597568198446155, "grad_norm": 0.7378354060426576, "learning_rate": 6.581364083928932e-07, "loss": 1.6569, "step": 2382 }, { "epoch": 0.16604536111207888, "grad_norm": 0.7020021368288268, "learning_rate": 6.581020519875903e-07, "loss": 1.4947, "step": 2383 }, { "epoch": 0.1661150402396962, "grad_norm": 23.011605268027264, "learning_rate": 6.580676824946763e-07, "loss": 1.4042, "step": 2384 }, { "epoch": 0.16618471936731352, "grad_norm": 0.7040826666782872, "learning_rate": 6.580332999157983e-07, "loss": 1.469, "step": 2385 }, { "epoch": 0.16625439849493084, "grad_norm": 0.7218140697486265, "learning_rate": 6.579989042526041e-07, "loss": 1.5228, "step": 2386 }, { "epoch": 0.16632407762254817, "grad_norm": 0.7123392833850649, "learning_rate": 6.579644955067421e-07, "loss": 1.5075, "step": 2387 }, { "epoch": 0.1663937567501655, "grad_norm": 0.742613976751209, "learning_rate": 6.579300736798608e-07, "loss": 1.6328, "step": 2388 }, { "epoch": 0.1664634358777828, "grad_norm": 0.71508688345372, "learning_rate": 6.578956387736101e-07, "loss": 1.5266, "step": 2389 }, { "epoch": 0.16653311500540013, "grad_norm": 0.7778468846082288, "learning_rate": 6.578611907896403e-07, "loss": 1.6486, "step": 2390 }, { "epoch": 0.16660279413301746, "grad_norm": 0.6972662449499131, "learning_rate": 6.57826729729602e-07, "loss": 1.5245, "step": 2391 }, { "epoch": 0.16667247326063478, "grad_norm": 0.6742424513816168, "learning_rate": 6.577922555951467e-07, "loss": 1.4659, "step": 2392 }, { "epoch": 0.1667421523882521, "grad_norm": 0.7191566959015561, "learning_rate": 6.577577683879266e-07, "loss": 1.6706, "step": 2393 }, { "epoch": 0.16681183151586942, "grad_norm": 0.742215586917538, "learning_rate": 6.577232681095941e-07, "loss": 1.6183, "step": 2394 }, { "epoch": 0.16688151064348675, "grad_norm": 0.7459211762633696, "learning_rate": 6.576887547618028e-07, "loss": 1.6543, "step": 2395 }, { "epoch": 0.16695118977110407, "grad_norm": 0.7406229483744687, "learning_rate": 6.576542283462065e-07, "loss": 1.6511, "step": 2396 }, { "epoch": 0.1670208688987214, "grad_norm": 0.7041449161282237, "learning_rate": 6.576196888644599e-07, "loss": 1.4184, "step": 2397 }, { "epoch": 0.16709054802633871, "grad_norm": 0.705938722848296, "learning_rate": 6.57585136318218e-07, "loss": 1.5844, "step": 2398 }, { "epoch": 0.16716022715395604, "grad_norm": 0.7396198963679226, "learning_rate": 6.575505707091368e-07, "loss": 1.5891, "step": 2399 }, { "epoch": 0.16722990628157336, "grad_norm": 0.7350092724605711, "learning_rate": 6.575159920388728e-07, "loss": 1.6338, "step": 2400 }, { "epoch": 0.16729958540919068, "grad_norm": 0.7163445839743655, "learning_rate": 6.57481400309083e-07, "loss": 1.5543, "step": 2401 }, { "epoch": 0.167369264536808, "grad_norm": 0.7149076387617239, "learning_rate": 6.574467955214251e-07, "loss": 1.5448, "step": 2402 }, { "epoch": 0.16743894366442533, "grad_norm": 0.7125466696460487, "learning_rate": 6.574121776775573e-07, "loss": 1.3838, "step": 2403 }, { "epoch": 0.16750862279204265, "grad_norm": 0.7253541486554131, "learning_rate": 6.573775467791388e-07, "loss": 1.6541, "step": 2404 }, { "epoch": 0.16757830191965997, "grad_norm": 0.6888729373269321, "learning_rate": 6.57342902827829e-07, "loss": 1.4966, "step": 2405 }, { "epoch": 0.1676479810472773, "grad_norm": 0.7242788546055735, "learning_rate": 6.573082458252883e-07, "loss": 1.4608, "step": 2406 }, { "epoch": 0.16771766017489462, "grad_norm": 0.719095591694848, "learning_rate": 6.572735757731774e-07, "loss": 1.5231, "step": 2407 }, { "epoch": 0.16778733930251194, "grad_norm": 0.7062431253120159, "learning_rate": 6.572388926731578e-07, "loss": 1.5036, "step": 2408 }, { "epoch": 0.16785701843012926, "grad_norm": 0.7380879998828161, "learning_rate": 6.572041965268916e-07, "loss": 1.6707, "step": 2409 }, { "epoch": 0.16792669755774658, "grad_norm": 0.6863769124791635, "learning_rate": 6.571694873360414e-07, "loss": 1.5437, "step": 2410 }, { "epoch": 0.1679963766853639, "grad_norm": 0.708848587877779, "learning_rate": 6.571347651022706e-07, "loss": 1.5714, "step": 2411 }, { "epoch": 0.16806605581298123, "grad_norm": 0.7038223757104983, "learning_rate": 6.571000298272432e-07, "loss": 1.5171, "step": 2412 }, { "epoch": 0.16813573494059855, "grad_norm": 0.760760658766724, "learning_rate": 6.570652815126238e-07, "loss": 1.5314, "step": 2413 }, { "epoch": 0.16820541406821587, "grad_norm": 0.7801390262080484, "learning_rate": 6.570305201600774e-07, "loss": 1.6455, "step": 2414 }, { "epoch": 0.1682750931958332, "grad_norm": 0.6874636309530708, "learning_rate": 6.569957457712702e-07, "loss": 1.4671, "step": 2415 }, { "epoch": 0.16834477232345052, "grad_norm": 0.7394775403102596, "learning_rate": 6.569609583478684e-07, "loss": 1.5806, "step": 2416 }, { "epoch": 0.16841445145106784, "grad_norm": 0.7229398082360742, "learning_rate": 6.56926157891539e-07, "loss": 1.6871, "step": 2417 }, { "epoch": 0.16848413057868517, "grad_norm": 0.6976349597214885, "learning_rate": 6.568913444039502e-07, "loss": 1.4284, "step": 2418 }, { "epoch": 0.1685538097063025, "grad_norm": 0.7426082913327527, "learning_rate": 6.568565178867696e-07, "loss": 1.6139, "step": 2419 }, { "epoch": 0.1686234888339198, "grad_norm": 0.7408162730120007, "learning_rate": 6.568216783416669e-07, "loss": 1.4382, "step": 2420 }, { "epoch": 0.16869316796153713, "grad_norm": 0.7843645939651729, "learning_rate": 6.567868257703112e-07, "loss": 1.5234, "step": 2421 }, { "epoch": 0.16876284708915446, "grad_norm": 0.7106920389232463, "learning_rate": 6.567519601743728e-07, "loss": 1.5792, "step": 2422 }, { "epoch": 0.16883252621677178, "grad_norm": 0.7380262499470066, "learning_rate": 6.567170815555226e-07, "loss": 1.5791, "step": 2423 }, { "epoch": 0.1689022053443891, "grad_norm": 0.7456240829771935, "learning_rate": 6.56682189915432e-07, "loss": 1.5671, "step": 2424 }, { "epoch": 0.16897188447200642, "grad_norm": 0.7274486962987895, "learning_rate": 6.566472852557731e-07, "loss": 1.5414, "step": 2425 }, { "epoch": 0.16904156359962375, "grad_norm": 0.7512202796475355, "learning_rate": 6.566123675782187e-07, "loss": 1.5159, "step": 2426 }, { "epoch": 0.16911124272724107, "grad_norm": 0.709986504523135, "learning_rate": 6.56577436884442e-07, "loss": 1.5121, "step": 2427 }, { "epoch": 0.1691809218548584, "grad_norm": 0.714474528207041, "learning_rate": 6.56542493176117e-07, "loss": 1.4527, "step": 2428 }, { "epoch": 0.1692506009824757, "grad_norm": 0.6975956737888491, "learning_rate": 6.565075364549182e-07, "loss": 1.5378, "step": 2429 }, { "epoch": 0.16932028011009304, "grad_norm": 0.7889336946448815, "learning_rate": 6.564725667225209e-07, "loss": 1.5498, "step": 2430 }, { "epoch": 0.16938995923771033, "grad_norm": 0.7181576447687493, "learning_rate": 6.564375839806009e-07, "loss": 1.6174, "step": 2431 }, { "epoch": 0.16945963836532765, "grad_norm": 0.7157683881150451, "learning_rate": 6.564025882308345e-07, "loss": 1.5108, "step": 2432 }, { "epoch": 0.16952931749294498, "grad_norm": 0.719495796453952, "learning_rate": 6.56367579474899e-07, "loss": 1.5718, "step": 2433 }, { "epoch": 0.1695989966205623, "grad_norm": 0.7061695225090542, "learning_rate": 6.56332557714472e-07, "loss": 1.4915, "step": 2434 }, { "epoch": 0.16966867574817962, "grad_norm": 0.7577425760099268, "learning_rate": 6.562975229512317e-07, "loss": 1.5643, "step": 2435 }, { "epoch": 0.16973835487579694, "grad_norm": 0.6952801720982502, "learning_rate": 6.562624751868573e-07, "loss": 1.49, "step": 2436 }, { "epoch": 0.16980803400341427, "grad_norm": 0.7197087596785499, "learning_rate": 6.562274144230281e-07, "loss": 1.6057, "step": 2437 }, { "epoch": 0.1698777131310316, "grad_norm": 0.6958464094196392, "learning_rate": 6.561923406614243e-07, "loss": 1.5574, "step": 2438 }, { "epoch": 0.1699473922586489, "grad_norm": 0.7366661054000561, "learning_rate": 6.56157253903727e-07, "loss": 1.5172, "step": 2439 }, { "epoch": 0.17001707138626623, "grad_norm": 0.6770931017295103, "learning_rate": 6.561221541516173e-07, "loss": 1.5862, "step": 2440 }, { "epoch": 0.17008675051388356, "grad_norm": 0.7110420060229062, "learning_rate": 6.560870414067773e-07, "loss": 1.597, "step": 2441 }, { "epoch": 0.17015642964150088, "grad_norm": 0.7620049791494978, "learning_rate": 6.560519156708898e-07, "loss": 1.6246, "step": 2442 }, { "epoch": 0.1702261087691182, "grad_norm": 0.7140259867968641, "learning_rate": 6.560167769456381e-07, "loss": 1.5858, "step": 2443 }, { "epoch": 0.17029578789673552, "grad_norm": 0.7617281906746146, "learning_rate": 6.559816252327059e-07, "loss": 1.6849, "step": 2444 }, { "epoch": 0.17036546702435285, "grad_norm": 0.7099678487031629, "learning_rate": 6.55946460533778e-07, "loss": 1.5603, "step": 2445 }, { "epoch": 0.17043514615197017, "grad_norm": 0.8066709045974286, "learning_rate": 6.559112828505397e-07, "loss": 1.6058, "step": 2446 }, { "epoch": 0.1705048252795875, "grad_norm": 0.7135349848853826, "learning_rate": 6.558760921846762e-07, "loss": 1.6007, "step": 2447 }, { "epoch": 0.1705745044072048, "grad_norm": 0.7441499343871727, "learning_rate": 6.558408885378744e-07, "loss": 1.5822, "step": 2448 }, { "epoch": 0.17064418353482214, "grad_norm": 0.7321585891025582, "learning_rate": 6.558056719118212e-07, "loss": 1.6208, "step": 2449 }, { "epoch": 0.17071386266243946, "grad_norm": 0.7988542772381881, "learning_rate": 6.557704423082042e-07, "loss": 1.6834, "step": 2450 }, { "epoch": 0.17078354179005678, "grad_norm": 0.7160620851253273, "learning_rate": 6.557351997287118e-07, "loss": 1.5184, "step": 2451 }, { "epoch": 0.1708532209176741, "grad_norm": 0.6947991412978013, "learning_rate": 6.556999441750328e-07, "loss": 1.547, "step": 2452 }, { "epoch": 0.17092290004529143, "grad_norm": 0.7031959563327964, "learning_rate": 6.556646756488567e-07, "loss": 1.4695, "step": 2453 }, { "epoch": 0.17099257917290875, "grad_norm": 0.7041369358140038, "learning_rate": 6.556293941518737e-07, "loss": 1.5845, "step": 2454 }, { "epoch": 0.17106225830052607, "grad_norm": 0.7228048984189448, "learning_rate": 6.555940996857746e-07, "loss": 1.5521, "step": 2455 }, { "epoch": 0.1711319374281434, "grad_norm": 0.763823425561266, "learning_rate": 6.555587922522507e-07, "loss": 1.514, "step": 2456 }, { "epoch": 0.17120161655576072, "grad_norm": 0.7060298331702053, "learning_rate": 6.55523471852994e-07, "loss": 1.6084, "step": 2457 }, { "epoch": 0.17127129568337804, "grad_norm": 0.7791863111355747, "learning_rate": 6.554881384896971e-07, "loss": 1.6451, "step": 2458 }, { "epoch": 0.17134097481099536, "grad_norm": 0.7548442713753598, "learning_rate": 6.554527921640534e-07, "loss": 1.5738, "step": 2459 }, { "epoch": 0.17141065393861268, "grad_norm": 0.7318943333159449, "learning_rate": 6.554174328777566e-07, "loss": 1.5405, "step": 2460 }, { "epoch": 0.17148033306623, "grad_norm": 0.7107952467450837, "learning_rate": 6.553820606325013e-07, "loss": 1.5816, "step": 2461 }, { "epoch": 0.17155001219384733, "grad_norm": 0.7148920719538931, "learning_rate": 6.553466754299825e-07, "loss": 1.4621, "step": 2462 }, { "epoch": 0.17161969132146465, "grad_norm": 15.284258988235981, "learning_rate": 6.553112772718961e-07, "loss": 1.4982, "step": 2463 }, { "epoch": 0.17168937044908197, "grad_norm": 0.7606998847724221, "learning_rate": 6.552758661599384e-07, "loss": 1.505, "step": 2464 }, { "epoch": 0.1717590495766993, "grad_norm": 0.737717860072526, "learning_rate": 6.552404420958061e-07, "loss": 1.6274, "step": 2465 }, { "epoch": 0.17182872870431662, "grad_norm": 0.6936147594438409, "learning_rate": 6.552050050811973e-07, "loss": 1.4483, "step": 2466 }, { "epoch": 0.17189840783193394, "grad_norm": 0.6821739277267969, "learning_rate": 6.551695551178097e-07, "loss": 1.5747, "step": 2467 }, { "epoch": 0.17196808695955126, "grad_norm": 0.7209446081125167, "learning_rate": 6.551340922073425e-07, "loss": 1.6935, "step": 2468 }, { "epoch": 0.1720377660871686, "grad_norm": 0.706917031810206, "learning_rate": 6.55098616351495e-07, "loss": 1.5429, "step": 2469 }, { "epoch": 0.1721074452147859, "grad_norm": 0.6657369029274698, "learning_rate": 6.550631275519674e-07, "loss": 1.6344, "step": 2470 }, { "epoch": 0.17217712434240323, "grad_norm": 0.7300297982250075, "learning_rate": 6.550276258104601e-07, "loss": 1.7434, "step": 2471 }, { "epoch": 0.17224680347002055, "grad_norm": 0.7218093987525189, "learning_rate": 6.549921111286748e-07, "loss": 1.5561, "step": 2472 }, { "epoch": 0.17231648259763788, "grad_norm": 0.7178449444982237, "learning_rate": 6.549565835083131e-07, "loss": 1.4582, "step": 2473 }, { "epoch": 0.1723861617252552, "grad_norm": 0.7341473296783434, "learning_rate": 6.549210429510778e-07, "loss": 1.5608, "step": 2474 }, { "epoch": 0.17245584085287252, "grad_norm": 0.7524545461753286, "learning_rate": 6.548854894586719e-07, "loss": 1.5571, "step": 2475 }, { "epoch": 0.17252551998048984, "grad_norm": 0.7265542621338041, "learning_rate": 6.548499230327993e-07, "loss": 1.524, "step": 2476 }, { "epoch": 0.17259519910810717, "grad_norm": 0.7105786157504762, "learning_rate": 6.548143436751646e-07, "loss": 1.4915, "step": 2477 }, { "epoch": 0.1726648782357245, "grad_norm": 0.7174301051303112, "learning_rate": 6.547787513874723e-07, "loss": 1.4659, "step": 2478 }, { "epoch": 0.1727345573633418, "grad_norm": 0.7503192380533867, "learning_rate": 6.547431461714286e-07, "loss": 1.5658, "step": 2479 }, { "epoch": 0.17280423649095913, "grad_norm": 0.6907154911256304, "learning_rate": 6.547075280287396e-07, "loss": 1.5539, "step": 2480 }, { "epoch": 0.17287391561857646, "grad_norm": 0.7794515823710214, "learning_rate": 6.546718969611121e-07, "loss": 1.7881, "step": 2481 }, { "epoch": 0.17294359474619378, "grad_norm": 0.6603868909619415, "learning_rate": 6.546362529702536e-07, "loss": 1.4557, "step": 2482 }, { "epoch": 0.1730132738738111, "grad_norm": 0.7586159790450311, "learning_rate": 6.546005960578724e-07, "loss": 1.5587, "step": 2483 }, { "epoch": 0.17308295300142842, "grad_norm": 0.7404883917977331, "learning_rate": 6.545649262256771e-07, "loss": 1.483, "step": 2484 }, { "epoch": 0.17315263212904575, "grad_norm": 0.7526067704071594, "learning_rate": 6.545292434753772e-07, "loss": 1.4999, "step": 2485 }, { "epoch": 0.17322231125666307, "grad_norm": 0.799844700462213, "learning_rate": 6.544935478086825e-07, "loss": 1.6473, "step": 2486 }, { "epoch": 0.1732919903842804, "grad_norm": 0.7884391841826116, "learning_rate": 6.544578392273038e-07, "loss": 1.5843, "step": 2487 }, { "epoch": 0.17336166951189771, "grad_norm": 0.722546423895997, "learning_rate": 6.544221177329522e-07, "loss": 1.5647, "step": 2488 }, { "epoch": 0.17343134863951504, "grad_norm": 0.6995711761145036, "learning_rate": 6.543863833273397e-07, "loss": 1.4946, "step": 2489 }, { "epoch": 0.17350102776713236, "grad_norm": 0.7608256105047294, "learning_rate": 6.543506360121787e-07, "loss": 1.5278, "step": 2490 }, { "epoch": 0.17357070689474968, "grad_norm": 0.7060114399131842, "learning_rate": 6.543148757891821e-07, "loss": 1.4342, "step": 2491 }, { "epoch": 0.173640386022367, "grad_norm": 0.7038526017797239, "learning_rate": 6.542791026600639e-07, "loss": 1.5103, "step": 2492 }, { "epoch": 0.17371006514998433, "grad_norm": 0.7454708545350585, "learning_rate": 6.542433166265382e-07, "loss": 1.5699, "step": 2493 }, { "epoch": 0.17377974427760165, "grad_norm": 0.7241434175996339, "learning_rate": 6.5420751769032e-07, "loss": 1.5849, "step": 2494 }, { "epoch": 0.17384942340521897, "grad_norm": 0.6828214180093903, "learning_rate": 6.54171705853125e-07, "loss": 1.5575, "step": 2495 }, { "epoch": 0.1739191025328363, "grad_norm": 0.7645534413737075, "learning_rate": 6.541358811166691e-07, "loss": 1.7047, "step": 2496 }, { "epoch": 0.17398878166045362, "grad_norm": 0.7599757807395299, "learning_rate": 6.541000434826694e-07, "loss": 1.5909, "step": 2497 }, { "epoch": 0.17405846078807094, "grad_norm": 0.8077318342213113, "learning_rate": 6.540641929528431e-07, "loss": 1.6113, "step": 2498 }, { "epoch": 0.17412813991568826, "grad_norm": 0.7421508001687054, "learning_rate": 6.540283295289082e-07, "loss": 1.5622, "step": 2499 }, { "epoch": 0.17419781904330559, "grad_norm": 0.7088253814000094, "learning_rate": 6.539924532125835e-07, "loss": 1.6426, "step": 2500 }, { "epoch": 0.1742674981709229, "grad_norm": 0.7474674019877895, "learning_rate": 6.539565640055884e-07, "loss": 1.5854, "step": 2501 }, { "epoch": 0.17433717729854023, "grad_norm": 0.7365714423846182, "learning_rate": 6.539206619096424e-07, "loss": 1.6054, "step": 2502 }, { "epoch": 0.17440685642615755, "grad_norm": 0.7123103017402581, "learning_rate": 6.538847469264663e-07, "loss": 1.6297, "step": 2503 }, { "epoch": 0.17447653555377488, "grad_norm": 0.7224845735928211, "learning_rate": 6.53848819057781e-07, "loss": 1.6559, "step": 2504 }, { "epoch": 0.1745462146813922, "grad_norm": 0.6926473364257885, "learning_rate": 6.538128783053084e-07, "loss": 1.6328, "step": 2505 }, { "epoch": 0.17461589380900952, "grad_norm": 0.7662335923901936, "learning_rate": 6.537769246707708e-07, "loss": 1.405, "step": 2506 }, { "epoch": 0.17468557293662684, "grad_norm": 0.6971301204534728, "learning_rate": 6.537409581558911e-07, "loss": 1.4508, "step": 2507 }, { "epoch": 0.17475525206424417, "grad_norm": 0.7439320230916382, "learning_rate": 6.537049787623931e-07, "loss": 1.5743, "step": 2508 }, { "epoch": 0.1748249311918615, "grad_norm": 0.7336682024026748, "learning_rate": 6.536689864920009e-07, "loss": 1.6968, "step": 2509 }, { "epoch": 0.1748946103194788, "grad_norm": 0.7449360351785614, "learning_rate": 6.536329813464393e-07, "loss": 1.6851, "step": 2510 }, { "epoch": 0.17496428944709613, "grad_norm": 0.7593962246800922, "learning_rate": 6.535969633274337e-07, "loss": 1.5848, "step": 2511 }, { "epoch": 0.17503396857471346, "grad_norm": 0.6892811934428639, "learning_rate": 6.535609324367102e-07, "loss": 1.5004, "step": 2512 }, { "epoch": 0.17510364770233078, "grad_norm": 0.7357891038175344, "learning_rate": 6.535248886759954e-07, "loss": 1.5088, "step": 2513 }, { "epoch": 0.1751733268299481, "grad_norm": 0.7200528123000481, "learning_rate": 6.534888320470168e-07, "loss": 1.5738, "step": 2514 }, { "epoch": 0.17524300595756542, "grad_norm": 0.6873780049343139, "learning_rate": 6.53452762551502e-07, "loss": 1.4716, "step": 2515 }, { "epoch": 0.17531268508518275, "grad_norm": 0.724561950813103, "learning_rate": 6.534166801911799e-07, "loss": 1.434, "step": 2516 }, { "epoch": 0.17538236421280007, "grad_norm": 0.7489153459676924, "learning_rate": 6.533805849677792e-07, "loss": 1.4961, "step": 2517 }, { "epoch": 0.1754520433404174, "grad_norm": 0.7401673701928653, "learning_rate": 6.533444768830302e-07, "loss": 1.5407, "step": 2518 }, { "epoch": 0.1755217224680347, "grad_norm": 0.6934330185732609, "learning_rate": 6.533083559386627e-07, "loss": 1.4262, "step": 2519 }, { "epoch": 0.17559140159565204, "grad_norm": 0.7294097563407264, "learning_rate": 6.532722221364081e-07, "loss": 1.6563, "step": 2520 }, { "epoch": 0.17566108072326936, "grad_norm": 0.6607520492198753, "learning_rate": 6.532360754779977e-07, "loss": 1.4277, "step": 2521 }, { "epoch": 0.17573075985088665, "grad_norm": 0.734449690451649, "learning_rate": 6.53199915965164e-07, "loss": 1.5041, "step": 2522 }, { "epoch": 0.17580043897850398, "grad_norm": 0.7278895608033406, "learning_rate": 6.531637435996397e-07, "loss": 1.5348, "step": 2523 }, { "epoch": 0.1758701181061213, "grad_norm": 0.6880733498662125, "learning_rate": 6.531275583831584e-07, "loss": 1.5598, "step": 2524 }, { "epoch": 0.17593979723373862, "grad_norm": 0.7219542430932584, "learning_rate": 6.530913603174539e-07, "loss": 1.6169, "step": 2525 }, { "epoch": 0.17600947636135594, "grad_norm": 0.6768504677014932, "learning_rate": 6.530551494042611e-07, "loss": 1.3468, "step": 2526 }, { "epoch": 0.17607915548897327, "grad_norm": 0.7628140093864075, "learning_rate": 6.530189256453151e-07, "loss": 1.4331, "step": 2527 }, { "epoch": 0.1761488346165906, "grad_norm": 0.6937139477500073, "learning_rate": 6.529826890423521e-07, "loss": 1.5771, "step": 2528 }, { "epoch": 0.1762185137442079, "grad_norm": 0.7446668334736429, "learning_rate": 6.529464395971085e-07, "loss": 1.5671, "step": 2529 }, { "epoch": 0.17628819287182523, "grad_norm": 0.8951513009342587, "learning_rate": 6.529101773113213e-07, "loss": 1.6593, "step": 2530 }, { "epoch": 0.17635787199944256, "grad_norm": 0.7501370933617739, "learning_rate": 6.528739021867285e-07, "loss": 1.5608, "step": 2531 }, { "epoch": 0.17642755112705988, "grad_norm": 0.7259327370195917, "learning_rate": 6.528376142250684e-07, "loss": 1.6285, "step": 2532 }, { "epoch": 0.1764972302546772, "grad_norm": 0.6513213101252865, "learning_rate": 6.528013134280799e-07, "loss": 1.4204, "step": 2533 }, { "epoch": 0.17656690938229452, "grad_norm": 0.6998756742851133, "learning_rate": 6.527649997975026e-07, "loss": 1.495, "step": 2534 }, { "epoch": 0.17663658850991185, "grad_norm": 0.7206725602051455, "learning_rate": 6.52728673335077e-07, "loss": 1.6582, "step": 2535 }, { "epoch": 0.17670626763752917, "grad_norm": 0.725602973875328, "learning_rate": 6.526923340425437e-07, "loss": 1.5383, "step": 2536 }, { "epoch": 0.1767759467651465, "grad_norm": 0.7314479526711624, "learning_rate": 6.52655981921644e-07, "loss": 1.378, "step": 2537 }, { "epoch": 0.1768456258927638, "grad_norm": 0.7116441177034473, "learning_rate": 6.526196169741202e-07, "loss": 1.4627, "step": 2538 }, { "epoch": 0.17691530502038114, "grad_norm": 0.7290784007685396, "learning_rate": 6.525832392017151e-07, "loss": 1.5596, "step": 2539 }, { "epoch": 0.17698498414799846, "grad_norm": 0.6934353435568296, "learning_rate": 6.525468486061717e-07, "loss": 1.4273, "step": 2540 }, { "epoch": 0.17705466327561578, "grad_norm": 0.7232881714758217, "learning_rate": 6.525104451892341e-07, "loss": 1.5556, "step": 2541 }, { "epoch": 0.1771243424032331, "grad_norm": 0.7330226056875261, "learning_rate": 6.524740289526469e-07, "loss": 1.5676, "step": 2542 }, { "epoch": 0.17719402153085043, "grad_norm": 0.6942435985096442, "learning_rate": 6.524375998981549e-07, "loss": 1.4992, "step": 2543 }, { "epoch": 0.17726370065846775, "grad_norm": 0.7087192076326799, "learning_rate": 6.524011580275041e-07, "loss": 1.5234, "step": 2544 }, { "epoch": 0.17733337978608507, "grad_norm": 0.6906658016292048, "learning_rate": 6.52364703342441e-07, "loss": 1.5446, "step": 2545 }, { "epoch": 0.1774030589137024, "grad_norm": 0.6590162253016592, "learning_rate": 6.523282358447123e-07, "loss": 1.4942, "step": 2546 }, { "epoch": 0.17747273804131972, "grad_norm": 0.7374886336643569, "learning_rate": 6.522917555360658e-07, "loss": 1.6685, "step": 2547 }, { "epoch": 0.17754241716893704, "grad_norm": 0.7288316061604521, "learning_rate": 6.522552624182495e-07, "loss": 1.5612, "step": 2548 }, { "epoch": 0.17761209629655436, "grad_norm": 0.7986604299216298, "learning_rate": 6.522187564930125e-07, "loss": 1.5161, "step": 2549 }, { "epoch": 0.17768177542417168, "grad_norm": 0.7490611352894104, "learning_rate": 6.521822377621042e-07, "loss": 1.571, "step": 2550 }, { "epoch": 0.177751454551789, "grad_norm": 0.7096302346445801, "learning_rate": 6.521457062272743e-07, "loss": 1.4839, "step": 2551 }, { "epoch": 0.17782113367940633, "grad_norm": 0.7300140825304802, "learning_rate": 6.521091618902738e-07, "loss": 1.615, "step": 2552 }, { "epoch": 0.17789081280702365, "grad_norm": 0.6966168480177054, "learning_rate": 6.520726047528539e-07, "loss": 1.5791, "step": 2553 }, { "epoch": 0.17796049193464097, "grad_norm": 0.7468143622329642, "learning_rate": 6.520360348167666e-07, "loss": 1.5228, "step": 2554 }, { "epoch": 0.1780301710622583, "grad_norm": 0.6872146413398952, "learning_rate": 6.519994520837641e-07, "loss": 1.4354, "step": 2555 }, { "epoch": 0.17809985018987562, "grad_norm": 0.7369132879346765, "learning_rate": 6.519628565555998e-07, "loss": 1.564, "step": 2556 }, { "epoch": 0.17816952931749294, "grad_norm": 0.7484987827082581, "learning_rate": 6.519262482340275e-07, "loss": 1.4987, "step": 2557 }, { "epoch": 0.17823920844511026, "grad_norm": 0.7590198399272611, "learning_rate": 6.518896271208012e-07, "loss": 1.7448, "step": 2558 }, { "epoch": 0.1783088875727276, "grad_norm": 0.6911442734510137, "learning_rate": 6.518529932176761e-07, "loss": 1.6123, "step": 2559 }, { "epoch": 0.1783785667003449, "grad_norm": 0.6705004696348974, "learning_rate": 6.518163465264078e-07, "loss": 1.5614, "step": 2560 }, { "epoch": 0.17844824582796223, "grad_norm": 0.750693147635284, "learning_rate": 6.517796870487524e-07, "loss": 1.5328, "step": 2561 }, { "epoch": 0.17851792495557955, "grad_norm": 0.6856127610822711, "learning_rate": 6.517430147864667e-07, "loss": 1.48, "step": 2562 }, { "epoch": 0.17858760408319688, "grad_norm": 0.7275820774071743, "learning_rate": 6.51706329741308e-07, "loss": 1.5447, "step": 2563 }, { "epoch": 0.1786572832108142, "grad_norm": 0.6711195393492667, "learning_rate": 6.516696319150345e-07, "loss": 1.5349, "step": 2564 }, { "epoch": 0.17872696233843152, "grad_norm": 0.7017299635771622, "learning_rate": 6.51632921309405e-07, "loss": 1.4836, "step": 2565 }, { "epoch": 0.17879664146604884, "grad_norm": 0.6998012714366909, "learning_rate": 6.515961979261782e-07, "loss": 1.4723, "step": 2566 }, { "epoch": 0.17886632059366617, "grad_norm": 0.7104918945327922, "learning_rate": 6.515594617671145e-07, "loss": 1.582, "step": 2567 }, { "epoch": 0.1789359997212835, "grad_norm": 0.6731653151031656, "learning_rate": 6.515227128339741e-07, "loss": 1.5919, "step": 2568 }, { "epoch": 0.1790056788489008, "grad_norm": 0.703887895548082, "learning_rate": 6.51485951128518e-07, "loss": 1.5629, "step": 2569 }, { "epoch": 0.17907535797651813, "grad_norm": 0.7025048472660292, "learning_rate": 6.514491766525082e-07, "loss": 1.5729, "step": 2570 }, { "epoch": 0.17914503710413546, "grad_norm": 0.6551133693668119, "learning_rate": 6.514123894077068e-07, "loss": 1.5205, "step": 2571 }, { "epoch": 0.17921471623175278, "grad_norm": 0.7821354033326472, "learning_rate": 6.513755893958768e-07, "loss": 1.5875, "step": 2572 }, { "epoch": 0.1792843953593701, "grad_norm": 0.7426558766874091, "learning_rate": 6.513387766187816e-07, "loss": 1.5528, "step": 2573 }, { "epoch": 0.17935407448698742, "grad_norm": 0.7372215085776161, "learning_rate": 6.513019510781856e-07, "loss": 1.6203, "step": 2574 }, { "epoch": 0.17942375361460475, "grad_norm": 0.7242908564459493, "learning_rate": 6.512651127758533e-07, "loss": 1.564, "step": 2575 }, { "epoch": 0.17949343274222207, "grad_norm": 0.7324473423283194, "learning_rate": 6.512282617135501e-07, "loss": 1.5448, "step": 2576 }, { "epoch": 0.1795631118698394, "grad_norm": 0.6708021000359229, "learning_rate": 6.511913978930421e-07, "loss": 1.4548, "step": 2577 }, { "epoch": 0.17963279099745671, "grad_norm": 0.7622906166024526, "learning_rate": 6.511545213160959e-07, "loss": 1.72, "step": 2578 }, { "epoch": 0.17970247012507404, "grad_norm": 0.8097203764903231, "learning_rate": 6.511176319844786e-07, "loss": 1.5922, "step": 2579 }, { "epoch": 0.17977214925269136, "grad_norm": 0.7167045481979863, "learning_rate": 6.51080729899958e-07, "loss": 1.5534, "step": 2580 }, { "epoch": 0.17984182838030868, "grad_norm": 0.7130025941415435, "learning_rate": 6.510438150643025e-07, "loss": 1.4931, "step": 2581 }, { "epoch": 0.179911507507926, "grad_norm": 0.7206872658927816, "learning_rate": 6.510068874792813e-07, "loss": 1.549, "step": 2582 }, { "epoch": 0.17998118663554333, "grad_norm": 0.6855445976689892, "learning_rate": 6.50969947146664e-07, "loss": 1.5097, "step": 2583 }, { "epoch": 0.18005086576316065, "grad_norm": 0.8358535909689165, "learning_rate": 6.509329940682207e-07, "loss": 1.6702, "step": 2584 }, { "epoch": 0.18012054489077797, "grad_norm": 0.7251461113964899, "learning_rate": 6.508960282457224e-07, "loss": 1.5584, "step": 2585 }, { "epoch": 0.1801902240183953, "grad_norm": 0.731726399487568, "learning_rate": 6.508590496809407e-07, "loss": 1.6408, "step": 2586 }, { "epoch": 0.18025990314601262, "grad_norm": 0.771355055088224, "learning_rate": 6.508220583756474e-07, "loss": 1.6582, "step": 2587 }, { "epoch": 0.18032958227362994, "grad_norm": 0.6991176945437412, "learning_rate": 6.507850543316153e-07, "loss": 1.5338, "step": 2588 }, { "epoch": 0.18039926140124726, "grad_norm": 0.6942418119782533, "learning_rate": 6.507480375506177e-07, "loss": 1.6089, "step": 2589 }, { "epoch": 0.18046894052886459, "grad_norm": 0.7744690223635904, "learning_rate": 6.507110080344287e-07, "loss": 1.6037, "step": 2590 }, { "epoch": 0.1805386196564819, "grad_norm": 0.748844004383975, "learning_rate": 6.506739657848227e-07, "loss": 1.5866, "step": 2591 }, { "epoch": 0.18060829878409923, "grad_norm": 0.7649898548673842, "learning_rate": 6.506369108035749e-07, "loss": 1.5733, "step": 2592 }, { "epoch": 0.18067797791171655, "grad_norm": 0.7134369595094384, "learning_rate": 6.505998430924608e-07, "loss": 1.515, "step": 2593 }, { "epoch": 0.18074765703933388, "grad_norm": 0.678241174097191, "learning_rate": 6.505627626532572e-07, "loss": 1.5061, "step": 2594 }, { "epoch": 0.1808173361669512, "grad_norm": 0.7642942419987621, "learning_rate": 6.505256694877406e-07, "loss": 1.5778, "step": 2595 }, { "epoch": 0.18088701529456852, "grad_norm": 0.7179287288029527, "learning_rate": 6.50488563597689e-07, "loss": 1.6228, "step": 2596 }, { "epoch": 0.18095669442218584, "grad_norm": 0.7435897191353903, "learning_rate": 6.504514449848803e-07, "loss": 1.5228, "step": 2597 }, { "epoch": 0.18102637354980317, "grad_norm": 0.7270413279809006, "learning_rate": 6.504143136510932e-07, "loss": 1.5702, "step": 2598 }, { "epoch": 0.1810960526774205, "grad_norm": 0.6872507120511585, "learning_rate": 6.503771695981076e-07, "loss": 1.4811, "step": 2599 }, { "epoch": 0.1811657318050378, "grad_norm": 0.7803447095575899, "learning_rate": 6.503400128277032e-07, "loss": 1.4736, "step": 2600 }, { "epoch": 0.18123541093265513, "grad_norm": 0.7177509621225837, "learning_rate": 6.503028433416605e-07, "loss": 1.5244, "step": 2601 }, { "epoch": 0.18130509006027246, "grad_norm": 0.7965152976759453, "learning_rate": 6.502656611417609e-07, "loss": 1.4269, "step": 2602 }, { "epoch": 0.18137476918788978, "grad_norm": 0.7101628004895346, "learning_rate": 6.502284662297863e-07, "loss": 1.5922, "step": 2603 }, { "epoch": 0.1814444483155071, "grad_norm": 0.6982929845687866, "learning_rate": 6.50191258607519e-07, "loss": 1.5254, "step": 2604 }, { "epoch": 0.18151412744312442, "grad_norm": 0.6950322548952821, "learning_rate": 6.501540382767424e-07, "loss": 1.4629, "step": 2605 }, { "epoch": 0.18158380657074175, "grad_norm": 0.7202886551001328, "learning_rate": 6.501168052392397e-07, "loss": 1.552, "step": 2606 }, { "epoch": 0.18165348569835907, "grad_norm": 0.7161505166062673, "learning_rate": 6.500795594967954e-07, "loss": 1.6631, "step": 2607 }, { "epoch": 0.1817231648259764, "grad_norm": 0.6878116644197528, "learning_rate": 6.500423010511946e-07, "loss": 1.4918, "step": 2608 }, { "epoch": 0.1817928439535937, "grad_norm": 0.7353866773063502, "learning_rate": 6.500050299042224e-07, "loss": 1.554, "step": 2609 }, { "epoch": 0.18186252308121104, "grad_norm": 0.7164396573550172, "learning_rate": 6.499677460576653e-07, "loss": 1.4531, "step": 2610 }, { "epoch": 0.18193220220882836, "grad_norm": 0.7912418932841199, "learning_rate": 6.499304495133098e-07, "loss": 1.5789, "step": 2611 }, { "epoch": 0.18200188133644568, "grad_norm": 0.763240467980432, "learning_rate": 6.498931402729434e-07, "loss": 1.5829, "step": 2612 }, { "epoch": 0.182071560464063, "grad_norm": 0.7439100105793851, "learning_rate": 6.498558183383537e-07, "loss": 1.5418, "step": 2613 }, { "epoch": 0.1821412395916803, "grad_norm": 0.6859069824967786, "learning_rate": 6.498184837113295e-07, "loss": 1.6184, "step": 2614 }, { "epoch": 0.18221091871929762, "grad_norm": 0.7132179337585111, "learning_rate": 6.497811363936598e-07, "loss": 1.4808, "step": 2615 }, { "epoch": 0.18228059784691494, "grad_norm": 0.7409771501660511, "learning_rate": 6.497437763871346e-07, "loss": 1.4906, "step": 2616 }, { "epoch": 0.18235027697453227, "grad_norm": 0.6944759248214315, "learning_rate": 6.497064036935441e-07, "loss": 1.5897, "step": 2617 }, { "epoch": 0.1824199561021496, "grad_norm": 0.7015003873915269, "learning_rate": 6.496690183146794e-07, "loss": 1.509, "step": 2618 }, { "epoch": 0.1824896352297669, "grad_norm": 0.6690613285222292, "learning_rate": 6.496316202523318e-07, "loss": 1.4293, "step": 2619 }, { "epoch": 0.18255931435738423, "grad_norm": 0.749788872274255, "learning_rate": 6.495942095082938e-07, "loss": 1.5291, "step": 2620 }, { "epoch": 0.18262899348500156, "grad_norm": 0.7473142725831187, "learning_rate": 6.49556786084358e-07, "loss": 1.6343, "step": 2621 }, { "epoch": 0.18269867261261888, "grad_norm": 0.7249410808106185, "learning_rate": 6.49519349982318e-07, "loss": 1.4134, "step": 2622 }, { "epoch": 0.1827683517402362, "grad_norm": 0.6833719220191706, "learning_rate": 6.494819012039675e-07, "loss": 1.5008, "step": 2623 }, { "epoch": 0.18283803086785352, "grad_norm": 0.7862174459510464, "learning_rate": 6.494444397511014e-07, "loss": 1.6386, "step": 2624 }, { "epoch": 0.18290770999547085, "grad_norm": 0.730100169454004, "learning_rate": 6.49406965625515e-07, "loss": 1.6254, "step": 2625 }, { "epoch": 0.18297738912308817, "grad_norm": 0.7313275186062451, "learning_rate": 6.493694788290038e-07, "loss": 1.5602, "step": 2626 }, { "epoch": 0.1830470682507055, "grad_norm": 0.8129828741881585, "learning_rate": 6.493319793633646e-07, "loss": 1.594, "step": 2627 }, { "epoch": 0.18311674737832281, "grad_norm": 0.6898176815693812, "learning_rate": 6.492944672303941e-07, "loss": 1.634, "step": 2628 }, { "epoch": 0.18318642650594014, "grad_norm": 0.708461442459301, "learning_rate": 6.492569424318904e-07, "loss": 1.5954, "step": 2629 }, { "epoch": 0.18325610563355746, "grad_norm": 0.7568109034106955, "learning_rate": 6.492194049696513e-07, "loss": 1.5439, "step": 2630 }, { "epoch": 0.18332578476117478, "grad_norm": 0.6771063848093974, "learning_rate": 6.49181854845476e-07, "loss": 1.5704, "step": 2631 }, { "epoch": 0.1833954638887921, "grad_norm": 0.676580159279378, "learning_rate": 6.491442920611637e-07, "loss": 1.5426, "step": 2632 }, { "epoch": 0.18346514301640943, "grad_norm": 0.6600845607953997, "learning_rate": 6.491067166185148e-07, "loss": 1.4584, "step": 2633 }, { "epoch": 0.18353482214402675, "grad_norm": 0.724526501155734, "learning_rate": 6.490691285193298e-07, "loss": 1.4994, "step": 2634 }, { "epoch": 0.18360450127164407, "grad_norm": 0.7652557824216829, "learning_rate": 6.490315277654101e-07, "loss": 1.5651, "step": 2635 }, { "epoch": 0.1836741803992614, "grad_norm": 0.7187368483648745, "learning_rate": 6.489939143585574e-07, "loss": 1.5492, "step": 2636 }, { "epoch": 0.18374385952687872, "grad_norm": 0.7394868496695424, "learning_rate": 6.489562883005744e-07, "loss": 1.7474, "step": 2637 }, { "epoch": 0.18381353865449604, "grad_norm": 0.7707547565878867, "learning_rate": 6.489186495932642e-07, "loss": 1.6094, "step": 2638 }, { "epoch": 0.18388321778211336, "grad_norm": 0.6817021323889921, "learning_rate": 6.488809982384305e-07, "loss": 1.5874, "step": 2639 }, { "epoch": 0.18395289690973068, "grad_norm": 0.6530574602289403, "learning_rate": 6.488433342378776e-07, "loss": 1.3532, "step": 2640 }, { "epoch": 0.184022576037348, "grad_norm": 0.6891115609332592, "learning_rate": 6.488056575934104e-07, "loss": 1.5033, "step": 2641 }, { "epoch": 0.18409225516496533, "grad_norm": 0.7075654971214155, "learning_rate": 6.487679683068348e-07, "loss": 1.5582, "step": 2642 }, { "epoch": 0.18416193429258265, "grad_norm": 0.6979922146733539, "learning_rate": 6.487302663799565e-07, "loss": 1.5725, "step": 2643 }, { "epoch": 0.18423161342019997, "grad_norm": 0.7038692584176877, "learning_rate": 6.486925518145822e-07, "loss": 1.5976, "step": 2644 }, { "epoch": 0.1843012925478173, "grad_norm": 0.7598408079706039, "learning_rate": 6.486548246125197e-07, "loss": 1.5574, "step": 2645 }, { "epoch": 0.18437097167543462, "grad_norm": 0.6855757317942535, "learning_rate": 6.486170847755767e-07, "loss": 1.4375, "step": 2646 }, { "epoch": 0.18444065080305194, "grad_norm": 0.7631668431699284, "learning_rate": 6.485793323055616e-07, "loss": 1.616, "step": 2647 }, { "epoch": 0.18451032993066926, "grad_norm": 0.7368144191558978, "learning_rate": 6.485415672042839e-07, "loss": 1.5148, "step": 2648 }, { "epoch": 0.1845800090582866, "grad_norm": 0.7685902842120601, "learning_rate": 6.485037894735534e-07, "loss": 1.7105, "step": 2649 }, { "epoch": 0.1846496881859039, "grad_norm": 0.6976902801252264, "learning_rate": 6.484659991151802e-07, "loss": 1.4219, "step": 2650 }, { "epoch": 0.18471936731352123, "grad_norm": 0.7580061537305252, "learning_rate": 6.484281961309754e-07, "loss": 1.5707, "step": 2651 }, { "epoch": 0.18478904644113855, "grad_norm": 0.7500641059710291, "learning_rate": 6.483903805227508e-07, "loss": 1.5827, "step": 2652 }, { "epoch": 0.18485872556875588, "grad_norm": 0.6649438106538588, "learning_rate": 6.483525522923183e-07, "loss": 1.4317, "step": 2653 }, { "epoch": 0.1849284046963732, "grad_norm": 0.7351706654236744, "learning_rate": 6.483147114414908e-07, "loss": 1.5674, "step": 2654 }, { "epoch": 0.18499808382399052, "grad_norm": 0.6680546391703736, "learning_rate": 6.482768579720819e-07, "loss": 1.5953, "step": 2655 }, { "epoch": 0.18506776295160784, "grad_norm": 0.7247126042568591, "learning_rate": 6.482389918859054e-07, "loss": 1.4191, "step": 2656 }, { "epoch": 0.18513744207922517, "grad_norm": 0.6911414934075633, "learning_rate": 6.48201113184776e-07, "loss": 1.438, "step": 2657 }, { "epoch": 0.1852071212068425, "grad_norm": 0.734603021831186, "learning_rate": 6.481632218705089e-07, "loss": 1.5527, "step": 2658 }, { "epoch": 0.1852768003344598, "grad_norm": 0.7504027564292364, "learning_rate": 6.4812531794492e-07, "loss": 1.6085, "step": 2659 }, { "epoch": 0.18534647946207713, "grad_norm": 0.6875098502184053, "learning_rate": 6.480874014098256e-07, "loss": 1.4462, "step": 2660 }, { "epoch": 0.18541615858969446, "grad_norm": 0.7557531183356848, "learning_rate": 6.480494722670429e-07, "loss": 1.5846, "step": 2661 }, { "epoch": 0.18548583771731178, "grad_norm": 0.7337273902395194, "learning_rate": 6.480115305183894e-07, "loss": 1.4645, "step": 2662 }, { "epoch": 0.1855555168449291, "grad_norm": 0.7159247755309575, "learning_rate": 6.479735761656834e-07, "loss": 1.5828, "step": 2663 }, { "epoch": 0.18562519597254643, "grad_norm": 0.7390118659196949, "learning_rate": 6.479356092107438e-07, "loss": 1.62, "step": 2664 }, { "epoch": 0.18569487510016375, "grad_norm": 0.758500507595704, "learning_rate": 6.478976296553899e-07, "loss": 1.5198, "step": 2665 }, { "epoch": 0.18576455422778107, "grad_norm": 0.7302673339657927, "learning_rate": 6.478596375014419e-07, "loss": 1.6104, "step": 2666 }, { "epoch": 0.1858342333553984, "grad_norm": 0.7771031453203399, "learning_rate": 6.478216327507206e-07, "loss": 1.6206, "step": 2667 }, { "epoch": 0.18590391248301572, "grad_norm": 0.7260781373547577, "learning_rate": 6.477836154050468e-07, "loss": 1.5566, "step": 2668 }, { "epoch": 0.18597359161063304, "grad_norm": 0.7006581746625344, "learning_rate": 6.47745585466243e-07, "loss": 1.3939, "step": 2669 }, { "epoch": 0.18604327073825036, "grad_norm": 0.7801900637952093, "learning_rate": 6.477075429361312e-07, "loss": 1.5328, "step": 2670 }, { "epoch": 0.18611294986586768, "grad_norm": 0.7423656847875956, "learning_rate": 6.476694878165345e-07, "loss": 1.6035, "step": 2671 }, { "epoch": 0.186182628993485, "grad_norm": 0.7195463532032887, "learning_rate": 6.476314201092768e-07, "loss": 1.5965, "step": 2672 }, { "epoch": 0.18625230812110233, "grad_norm": 0.7532581893821502, "learning_rate": 6.475933398161822e-07, "loss": 1.6735, "step": 2673 }, { "epoch": 0.18632198724871965, "grad_norm": 0.7769211162227893, "learning_rate": 6.475552469390758e-07, "loss": 1.4756, "step": 2674 }, { "epoch": 0.18639166637633697, "grad_norm": 0.7550947624847407, "learning_rate": 6.475171414797828e-07, "loss": 1.6108, "step": 2675 }, { "epoch": 0.1864613455039543, "grad_norm": 0.6690296420086265, "learning_rate": 6.474790234401296e-07, "loss": 1.4959, "step": 2676 }, { "epoch": 0.18653102463157162, "grad_norm": 0.6832451741796913, "learning_rate": 6.474408928219426e-07, "loss": 1.498, "step": 2677 }, { "epoch": 0.18660070375918894, "grad_norm": 0.747393796002889, "learning_rate": 6.474027496270493e-07, "loss": 1.562, "step": 2678 }, { "epoch": 0.18667038288680626, "grad_norm": 0.7611818415803452, "learning_rate": 6.473645938572774e-07, "loss": 1.6005, "step": 2679 }, { "epoch": 0.18674006201442359, "grad_norm": 0.732431807713295, "learning_rate": 6.473264255144558e-07, "loss": 1.6562, "step": 2680 }, { "epoch": 0.1868097411420409, "grad_norm": 0.8045845891090851, "learning_rate": 6.472882446004132e-07, "loss": 1.5858, "step": 2681 }, { "epoch": 0.18687942026965823, "grad_norm": 0.6645879405992747, "learning_rate": 6.472500511169794e-07, "loss": 1.4494, "step": 2682 }, { "epoch": 0.18694909939727555, "grad_norm": 0.7534293667028573, "learning_rate": 6.472118450659848e-07, "loss": 1.4723, "step": 2683 }, { "epoch": 0.18701877852489288, "grad_norm": 0.7352604989735883, "learning_rate": 6.471736264492604e-07, "loss": 1.431, "step": 2684 }, { "epoch": 0.1870884576525102, "grad_norm": 0.7933364996076561, "learning_rate": 6.471353952686373e-07, "loss": 1.524, "step": 2685 }, { "epoch": 0.18715813678012752, "grad_norm": 0.7600850522766464, "learning_rate": 6.470971515259481e-07, "loss": 1.46, "step": 2686 }, { "epoch": 0.18722781590774484, "grad_norm": 0.7938840091949416, "learning_rate": 6.470588952230254e-07, "loss": 1.5313, "step": 2687 }, { "epoch": 0.18729749503536217, "grad_norm": 0.7195140084978836, "learning_rate": 6.470206263617024e-07, "loss": 1.4697, "step": 2688 }, { "epoch": 0.1873671741629795, "grad_norm": 0.7005601442693249, "learning_rate": 6.469823449438129e-07, "loss": 1.5984, "step": 2689 }, { "epoch": 0.1874368532905968, "grad_norm": 0.7700719550441382, "learning_rate": 6.469440509711918e-07, "loss": 1.5505, "step": 2690 }, { "epoch": 0.18750653241821413, "grad_norm": 0.6955391533447876, "learning_rate": 6.469057444456739e-07, "loss": 1.5746, "step": 2691 }, { "epoch": 0.18757621154583146, "grad_norm": 0.7533355959862432, "learning_rate": 6.46867425369095e-07, "loss": 1.4649, "step": 2692 }, { "epoch": 0.18764589067344878, "grad_norm": 0.6861651730926349, "learning_rate": 6.468290937432916e-07, "loss": 1.556, "step": 2693 }, { "epoch": 0.1877155698010661, "grad_norm": 0.6969686824642832, "learning_rate": 6.467907495701004e-07, "loss": 1.4597, "step": 2694 }, { "epoch": 0.18778524892868342, "grad_norm": 0.7780056460087894, "learning_rate": 6.467523928513592e-07, "loss": 1.584, "step": 2695 }, { "epoch": 0.18785492805630075, "grad_norm": 0.6971351165916029, "learning_rate": 6.467140235889058e-07, "loss": 1.3735, "step": 2696 }, { "epoch": 0.18792460718391807, "grad_norm": 0.7640130727336821, "learning_rate": 6.466756417845792e-07, "loss": 1.6152, "step": 2697 }, { "epoch": 0.1879942863115354, "grad_norm": 0.7174718505159229, "learning_rate": 6.466372474402185e-07, "loss": 1.5299, "step": 2698 }, { "epoch": 0.1880639654391527, "grad_norm": 0.705437048563619, "learning_rate": 6.465988405576638e-07, "loss": 1.5884, "step": 2699 }, { "epoch": 0.18813364456677004, "grad_norm": 0.8496788834128441, "learning_rate": 6.465604211387557e-07, "loss": 1.6326, "step": 2700 }, { "epoch": 0.18820332369438736, "grad_norm": 0.7183308884189702, "learning_rate": 6.465219891853353e-07, "loss": 1.4403, "step": 2701 }, { "epoch": 0.18827300282200468, "grad_norm": 0.7363655214283799, "learning_rate": 6.464835446992441e-07, "loss": 1.502, "step": 2702 }, { "epoch": 0.188342681949622, "grad_norm": 0.7137203184429629, "learning_rate": 6.464450876823248e-07, "loss": 1.5203, "step": 2703 }, { "epoch": 0.18841236107723933, "grad_norm": 0.7252300369350988, "learning_rate": 6.464066181364201e-07, "loss": 1.5046, "step": 2704 }, { "epoch": 0.18848204020485665, "grad_norm": 0.7117919170248129, "learning_rate": 6.463681360633735e-07, "loss": 1.6556, "step": 2705 }, { "epoch": 0.18855171933247394, "grad_norm": 0.720869879698167, "learning_rate": 6.463296414650294e-07, "loss": 1.5998, "step": 2706 }, { "epoch": 0.18862139846009127, "grad_norm": 0.7449188955522009, "learning_rate": 6.462911343432322e-07, "loss": 1.4649, "step": 2707 }, { "epoch": 0.1886910775877086, "grad_norm": 0.7439890979647477, "learning_rate": 6.462526146998275e-07, "loss": 1.6382, "step": 2708 }, { "epoch": 0.1887607567153259, "grad_norm": 0.7126331244413916, "learning_rate": 6.462140825366612e-07, "loss": 1.5312, "step": 2709 }, { "epoch": 0.18883043584294323, "grad_norm": 0.7735940649273119, "learning_rate": 6.461755378555798e-07, "loss": 1.5014, "step": 2710 }, { "epoch": 0.18890011497056056, "grad_norm": 0.7440600101186636, "learning_rate": 6.461369806584305e-07, "loss": 1.6966, "step": 2711 }, { "epoch": 0.18896979409817788, "grad_norm": 0.6759130137405256, "learning_rate": 6.460984109470609e-07, "loss": 1.5484, "step": 2712 }, { "epoch": 0.1890394732257952, "grad_norm": 0.8264161895423117, "learning_rate": 6.460598287233195e-07, "loss": 1.5834, "step": 2713 }, { "epoch": 0.18910915235341252, "grad_norm": 0.7491457334304705, "learning_rate": 6.46021233989055e-07, "loss": 1.5877, "step": 2714 }, { "epoch": 0.18917883148102985, "grad_norm": 0.6864192440850518, "learning_rate": 6.459826267461172e-07, "loss": 1.5166, "step": 2715 }, { "epoch": 0.18924851060864717, "grad_norm": 0.7259227539963868, "learning_rate": 6.459440069963562e-07, "loss": 1.5007, "step": 2716 }, { "epoch": 0.1893181897362645, "grad_norm": 0.7092268754966882, "learning_rate": 6.459053747416225e-07, "loss": 1.5473, "step": 2717 }, { "epoch": 0.18938786886388181, "grad_norm": 0.6882956952095184, "learning_rate": 6.458667299837677e-07, "loss": 1.5654, "step": 2718 }, { "epoch": 0.18945754799149914, "grad_norm": 0.8355331625656581, "learning_rate": 6.458280727246438e-07, "loss": 1.5154, "step": 2719 }, { "epoch": 0.18952722711911646, "grad_norm": 0.7352037316311917, "learning_rate": 6.45789402966103e-07, "loss": 1.6327, "step": 2720 }, { "epoch": 0.18959690624673378, "grad_norm": 0.6941993455689607, "learning_rate": 6.457507207099988e-07, "loss": 1.4324, "step": 2721 }, { "epoch": 0.1896665853743511, "grad_norm": 0.7649191465845473, "learning_rate": 6.457120259581845e-07, "loss": 1.6697, "step": 2722 }, { "epoch": 0.18973626450196843, "grad_norm": 0.7466998683828353, "learning_rate": 6.456733187125149e-07, "loss": 1.659, "step": 2723 }, { "epoch": 0.18980594362958575, "grad_norm": 0.7588182856390634, "learning_rate": 6.456345989748445e-07, "loss": 1.5306, "step": 2724 }, { "epoch": 0.18987562275720307, "grad_norm": 0.6838571523104907, "learning_rate": 6.455958667470293e-07, "loss": 1.6308, "step": 2725 }, { "epoch": 0.1899453018848204, "grad_norm": 0.7125291705245504, "learning_rate": 6.455571220309251e-07, "loss": 1.5757, "step": 2726 }, { "epoch": 0.19001498101243772, "grad_norm": 0.7950404350336909, "learning_rate": 6.455183648283886e-07, "loss": 1.5175, "step": 2727 }, { "epoch": 0.19008466014005504, "grad_norm": 0.6832770487165002, "learning_rate": 6.454795951412773e-07, "loss": 1.5996, "step": 2728 }, { "epoch": 0.19015433926767236, "grad_norm": 0.723433406934566, "learning_rate": 6.45440812971449e-07, "loss": 1.5254, "step": 2729 }, { "epoch": 0.19022401839528968, "grad_norm": 0.7054317859702796, "learning_rate": 6.454020183207623e-07, "loss": 1.5587, "step": 2730 }, { "epoch": 0.190293697522907, "grad_norm": 0.7909451789471312, "learning_rate": 6.453632111910763e-07, "loss": 1.6194, "step": 2731 }, { "epoch": 0.19036337665052433, "grad_norm": 0.6953562567380657, "learning_rate": 6.453243915842507e-07, "loss": 1.503, "step": 2732 }, { "epoch": 0.19043305577814165, "grad_norm": 0.7637019576203848, "learning_rate": 6.452855595021457e-07, "loss": 1.6011, "step": 2733 }, { "epoch": 0.19050273490575897, "grad_norm": 0.6822582597425052, "learning_rate": 6.452467149466225e-07, "loss": 1.3143, "step": 2734 }, { "epoch": 0.1905724140333763, "grad_norm": 0.7178449079461839, "learning_rate": 6.452078579195424e-07, "loss": 1.5419, "step": 2735 }, { "epoch": 0.19064209316099362, "grad_norm": 0.7138948499344089, "learning_rate": 6.451689884227674e-07, "loss": 1.4824, "step": 2736 }, { "epoch": 0.19071177228861094, "grad_norm": 0.7322828001630005, "learning_rate": 6.451301064581605e-07, "loss": 1.506, "step": 2737 }, { "epoch": 0.19078145141622826, "grad_norm": 0.811654935475173, "learning_rate": 6.45091212027585e-07, "loss": 1.5939, "step": 2738 }, { "epoch": 0.1908511305438456, "grad_norm": 0.6802507343095897, "learning_rate": 6.450523051329044e-07, "loss": 1.5206, "step": 2739 }, { "epoch": 0.1909208096714629, "grad_norm": 0.7181069784358606, "learning_rate": 6.450133857759836e-07, "loss": 1.566, "step": 2740 }, { "epoch": 0.19099048879908023, "grad_norm": 0.7119050567279736, "learning_rate": 6.449744539586875e-07, "loss": 1.4723, "step": 2741 }, { "epoch": 0.19106016792669755, "grad_norm": 0.7359612711039888, "learning_rate": 6.449355096828818e-07, "loss": 1.4862, "step": 2742 }, { "epoch": 0.19112984705431488, "grad_norm": 0.7097328511153883, "learning_rate": 6.44896552950433e-07, "loss": 1.5173, "step": 2743 }, { "epoch": 0.1911995261819322, "grad_norm": 0.6929533047668857, "learning_rate": 6.448575837632079e-07, "loss": 1.5577, "step": 2744 }, { "epoch": 0.19126920530954952, "grad_norm": 0.7041347408340864, "learning_rate": 6.448186021230737e-07, "loss": 1.452, "step": 2745 }, { "epoch": 0.19133888443716685, "grad_norm": 0.72942054200097, "learning_rate": 6.447796080318986e-07, "loss": 1.5466, "step": 2746 }, { "epoch": 0.19140856356478417, "grad_norm": 0.7194334581281807, "learning_rate": 6.447406014915515e-07, "loss": 1.5971, "step": 2747 }, { "epoch": 0.1914782426924015, "grad_norm": 0.746922395656889, "learning_rate": 6.447015825039016e-07, "loss": 1.6671, "step": 2748 }, { "epoch": 0.1915479218200188, "grad_norm": 0.6766416878379267, "learning_rate": 6.446625510708187e-07, "loss": 1.5329, "step": 2749 }, { "epoch": 0.19161760094763614, "grad_norm": 0.7661740967695121, "learning_rate": 6.446235071941732e-07, "loss": 1.6282, "step": 2750 }, { "epoch": 0.19168728007525346, "grad_norm": 0.7270021181227755, "learning_rate": 6.445844508758363e-07, "loss": 1.4649, "step": 2751 }, { "epoch": 0.19175695920287078, "grad_norm": 0.6904466633241558, "learning_rate": 6.445453821176795e-07, "loss": 1.4696, "step": 2752 }, { "epoch": 0.1918266383304881, "grad_norm": 0.7594226179740771, "learning_rate": 6.445063009215751e-07, "loss": 1.5229, "step": 2753 }, { "epoch": 0.19189631745810543, "grad_norm": 0.7340481003476917, "learning_rate": 6.444672072893962e-07, "loss": 1.5785, "step": 2754 }, { "epoch": 0.19196599658572275, "grad_norm": 0.7553905406477419, "learning_rate": 6.444281012230159e-07, "loss": 1.5664, "step": 2755 }, { "epoch": 0.19203567571334007, "grad_norm": 0.7126604908145795, "learning_rate": 6.443889827243085e-07, "loss": 1.5051, "step": 2756 }, { "epoch": 0.1921053548409574, "grad_norm": 0.7123761884060794, "learning_rate": 6.443498517951485e-07, "loss": 1.4743, "step": 2757 }, { "epoch": 0.19217503396857472, "grad_norm": 0.7877452122261493, "learning_rate": 6.443107084374112e-07, "loss": 1.5403, "step": 2758 }, { "epoch": 0.19224471309619204, "grad_norm": 0.6537712332432835, "learning_rate": 6.442715526529724e-07, "loss": 1.3929, "step": 2759 }, { "epoch": 0.19231439222380936, "grad_norm": 0.6982635432356034, "learning_rate": 6.442323844437085e-07, "loss": 1.5458, "step": 2760 }, { "epoch": 0.19238407135142668, "grad_norm": 0.7007628799980677, "learning_rate": 6.441932038114964e-07, "loss": 1.4499, "step": 2761 }, { "epoch": 0.192453750479044, "grad_norm": 0.6998289345209359, "learning_rate": 6.441540107582141e-07, "loss": 1.527, "step": 2762 }, { "epoch": 0.19252342960666133, "grad_norm": 0.7224707964363176, "learning_rate": 6.441148052857394e-07, "loss": 1.5791, "step": 2763 }, { "epoch": 0.19259310873427865, "grad_norm": 0.7549588551015194, "learning_rate": 6.440755873959513e-07, "loss": 1.5293, "step": 2764 }, { "epoch": 0.19266278786189597, "grad_norm": 0.8271235962565269, "learning_rate": 6.440363570907294e-07, "loss": 1.6131, "step": 2765 }, { "epoch": 0.1927324669895133, "grad_norm": 0.7332975981376336, "learning_rate": 6.439971143719531e-07, "loss": 1.5622, "step": 2766 }, { "epoch": 0.19280214611713062, "grad_norm": 0.7431066568250002, "learning_rate": 6.439578592415036e-07, "loss": 1.4984, "step": 2767 }, { "epoch": 0.19287182524474794, "grad_norm": 0.7062334001251804, "learning_rate": 6.43918591701262e-07, "loss": 1.5469, "step": 2768 }, { "epoch": 0.19294150437236526, "grad_norm": 0.8259805932604753, "learning_rate": 6.438793117531097e-07, "loss": 1.813, "step": 2769 }, { "epoch": 0.19301118349998259, "grad_norm": 0.7265022203713082, "learning_rate": 6.438400193989292e-07, "loss": 1.5044, "step": 2770 }, { "epoch": 0.1930808626275999, "grad_norm": 0.7800710904673711, "learning_rate": 6.438007146406037e-07, "loss": 1.5255, "step": 2771 }, { "epoch": 0.19315054175521723, "grad_norm": 0.6915373563261974, "learning_rate": 6.437613974800168e-07, "loss": 1.4791, "step": 2772 }, { "epoch": 0.19322022088283455, "grad_norm": 0.7455382855726259, "learning_rate": 6.437220679190524e-07, "loss": 1.6197, "step": 2773 }, { "epoch": 0.19328990001045188, "grad_norm": 0.741388638904269, "learning_rate": 6.436827259595954e-07, "loss": 1.5326, "step": 2774 }, { "epoch": 0.1933595791380692, "grad_norm": 0.7274532124482767, "learning_rate": 6.436433716035309e-07, "loss": 1.6042, "step": 2775 }, { "epoch": 0.19342925826568652, "grad_norm": 0.6613153079163728, "learning_rate": 6.436040048527453e-07, "loss": 1.423, "step": 2776 }, { "epoch": 0.19349893739330384, "grad_norm": 0.7526717406315007, "learning_rate": 6.435646257091248e-07, "loss": 1.6477, "step": 2777 }, { "epoch": 0.19356861652092117, "grad_norm": 0.7457469971419062, "learning_rate": 6.435252341745566e-07, "loss": 1.7472, "step": 2778 }, { "epoch": 0.1936382956485385, "grad_norm": 0.7283337615791947, "learning_rate": 6.434858302509284e-07, "loss": 1.6787, "step": 2779 }, { "epoch": 0.1937079747761558, "grad_norm": 0.7479235036434875, "learning_rate": 6.434464139401287e-07, "loss": 1.8808, "step": 2780 }, { "epoch": 0.19377765390377313, "grad_norm": 0.6914895910578951, "learning_rate": 6.434069852440461e-07, "loss": 1.5673, "step": 2781 }, { "epoch": 0.19384733303139046, "grad_norm": 0.7254723360609027, "learning_rate": 6.433675441645703e-07, "loss": 1.539, "step": 2782 }, { "epoch": 0.19391701215900778, "grad_norm": 0.6833639219823902, "learning_rate": 6.433280907035914e-07, "loss": 1.5052, "step": 2783 }, { "epoch": 0.1939866912866251, "grad_norm": 0.7420464590441822, "learning_rate": 6.432886248630001e-07, "loss": 1.5594, "step": 2784 }, { "epoch": 0.19405637041424242, "grad_norm": 0.6652408829333618, "learning_rate": 6.432491466446876e-07, "loss": 1.2722, "step": 2785 }, { "epoch": 0.19412604954185975, "grad_norm": 0.6626300147201509, "learning_rate": 6.432096560505458e-07, "loss": 1.398, "step": 2786 }, { "epoch": 0.19419572866947707, "grad_norm": 0.7465600044204658, "learning_rate": 6.431701530824671e-07, "loss": 1.5446, "step": 2787 }, { "epoch": 0.1942654077970944, "grad_norm": 0.7559501299835806, "learning_rate": 6.431306377423449e-07, "loss": 1.4106, "step": 2788 }, { "epoch": 0.19433508692471171, "grad_norm": 0.7012209472341381, "learning_rate": 6.430911100320723e-07, "loss": 1.492, "step": 2789 }, { "epoch": 0.19440476605232904, "grad_norm": 0.711451679800713, "learning_rate": 6.430515699535441e-07, "loss": 1.516, "step": 2790 }, { "epoch": 0.19447444517994636, "grad_norm": 0.7340907534836132, "learning_rate": 6.430120175086548e-07, "loss": 1.5429, "step": 2791 }, { "epoch": 0.19454412430756368, "grad_norm": 0.7246947540430131, "learning_rate": 6.429724526993e-07, "loss": 1.4757, "step": 2792 }, { "epoch": 0.194613803435181, "grad_norm": 0.6721144443775302, "learning_rate": 6.429328755273755e-07, "loss": 1.4574, "step": 2793 }, { "epoch": 0.19468348256279833, "grad_norm": 0.7162298227034134, "learning_rate": 6.428932859947782e-07, "loss": 1.5135, "step": 2794 }, { "epoch": 0.19475316169041565, "grad_norm": 0.6931706305166699, "learning_rate": 6.428536841034052e-07, "loss": 1.6258, "step": 2795 }, { "epoch": 0.19482284081803297, "grad_norm": 0.6696857466893444, "learning_rate": 6.428140698551542e-07, "loss": 1.4967, "step": 2796 }, { "epoch": 0.19489251994565027, "grad_norm": 0.7079042834337671, "learning_rate": 6.427744432519237e-07, "loss": 1.4456, "step": 2797 }, { "epoch": 0.1949621990732676, "grad_norm": 0.7021521523597029, "learning_rate": 6.427348042956129e-07, "loss": 1.6123, "step": 2798 }, { "epoch": 0.1950318782008849, "grad_norm": 0.7125268105966331, "learning_rate": 6.426951529881209e-07, "loss": 1.4722, "step": 2799 }, { "epoch": 0.19510155732850223, "grad_norm": 0.6802108240355292, "learning_rate": 6.426554893313482e-07, "loss": 1.5581, "step": 2800 }, { "epoch": 0.19517123645611956, "grad_norm": 0.7984301225935575, "learning_rate": 6.426158133271956e-07, "loss": 1.7121, "step": 2801 }, { "epoch": 0.19524091558373688, "grad_norm": 0.6879133160196618, "learning_rate": 6.425761249775642e-07, "loss": 1.5569, "step": 2802 }, { "epoch": 0.1953105947113542, "grad_norm": 0.6831021054413613, "learning_rate": 6.425364242843561e-07, "loss": 1.6133, "step": 2803 }, { "epoch": 0.19538027383897152, "grad_norm": 0.7194943179719568, "learning_rate": 6.42496711249474e-07, "loss": 1.6279, "step": 2804 }, { "epoch": 0.19544995296658885, "grad_norm": 0.7155004699065798, "learning_rate": 6.424569858748207e-07, "loss": 1.5022, "step": 2805 }, { "epoch": 0.19551963209420617, "grad_norm": 0.7372121801210776, "learning_rate": 6.424172481623002e-07, "loss": 1.4252, "step": 2806 }, { "epoch": 0.1955893112218235, "grad_norm": 0.7147709147935137, "learning_rate": 6.423774981138167e-07, "loss": 1.6929, "step": 2807 }, { "epoch": 0.19565899034944081, "grad_norm": 0.7087472475940669, "learning_rate": 6.423377357312749e-07, "loss": 1.6249, "step": 2808 }, { "epoch": 0.19572866947705814, "grad_norm": 0.6918451314684043, "learning_rate": 6.422979610165808e-07, "loss": 1.5191, "step": 2809 }, { "epoch": 0.19579834860467546, "grad_norm": 0.7084086537536417, "learning_rate": 6.422581739716399e-07, "loss": 1.5604, "step": 2810 }, { "epoch": 0.19586802773229278, "grad_norm": 0.703091479351792, "learning_rate": 6.422183745983593e-07, "loss": 1.6192, "step": 2811 }, { "epoch": 0.1959377068599101, "grad_norm": 0.7999940834598073, "learning_rate": 6.421785628986463e-07, "loss": 1.6106, "step": 2812 }, { "epoch": 0.19600738598752743, "grad_norm": 0.7439364790535018, "learning_rate": 6.421387388744083e-07, "loss": 1.4405, "step": 2813 }, { "epoch": 0.19607706511514475, "grad_norm": 0.7334064002022052, "learning_rate": 6.420989025275542e-07, "loss": 1.2867, "step": 2814 }, { "epoch": 0.19614674424276207, "grad_norm": 0.746628498475421, "learning_rate": 6.420590538599928e-07, "loss": 1.4626, "step": 2815 }, { "epoch": 0.1962164233703794, "grad_norm": 0.6954573088788314, "learning_rate": 6.420191928736339e-07, "loss": 1.5406, "step": 2816 }, { "epoch": 0.19628610249799672, "grad_norm": 0.6996811523016951, "learning_rate": 6.419793195703875e-07, "loss": 1.6512, "step": 2817 }, { "epoch": 0.19635578162561404, "grad_norm": 0.7488194125821913, "learning_rate": 6.419394339521647e-07, "loss": 1.5551, "step": 2818 }, { "epoch": 0.19642546075323136, "grad_norm": 0.6858185427092187, "learning_rate": 6.418995360208766e-07, "loss": 1.5719, "step": 2819 }, { "epoch": 0.19649513988084868, "grad_norm": 0.8012233152006635, "learning_rate": 6.418596257784354e-07, "loss": 1.5205, "step": 2820 }, { "epoch": 0.196564819008466, "grad_norm": 0.6869950460759762, "learning_rate": 6.418197032267536e-07, "loss": 1.4701, "step": 2821 }, { "epoch": 0.19663449813608333, "grad_norm": 0.7558716905890024, "learning_rate": 6.417797683677444e-07, "loss": 1.5402, "step": 2822 }, { "epoch": 0.19670417726370065, "grad_norm": 0.6739914935874416, "learning_rate": 6.417398212033214e-07, "loss": 1.5758, "step": 2823 }, { "epoch": 0.19677385639131798, "grad_norm": 0.7447417175089804, "learning_rate": 6.416998617353993e-07, "loss": 1.6096, "step": 2824 }, { "epoch": 0.1968435355189353, "grad_norm": 0.6751254016745756, "learning_rate": 6.416598899658926e-07, "loss": 1.4143, "step": 2825 }, { "epoch": 0.19691321464655262, "grad_norm": 0.7045687553583755, "learning_rate": 6.416199058967173e-07, "loss": 1.6254, "step": 2826 }, { "epoch": 0.19698289377416994, "grad_norm": 0.7201577315390923, "learning_rate": 6.41579909529789e-07, "loss": 1.561, "step": 2827 }, { "epoch": 0.19705257290178727, "grad_norm": 0.6772138874740977, "learning_rate": 6.41539900867025e-07, "loss": 1.6012, "step": 2828 }, { "epoch": 0.1971222520294046, "grad_norm": 0.7547835224780833, "learning_rate": 6.414998799103421e-07, "loss": 1.6242, "step": 2829 }, { "epoch": 0.1971919311570219, "grad_norm": 0.7656065190946523, "learning_rate": 6.414598466616585e-07, "loss": 1.6035, "step": 2830 }, { "epoch": 0.19726161028463923, "grad_norm": 0.7244976383852213, "learning_rate": 6.414198011228923e-07, "loss": 1.6974, "step": 2831 }, { "epoch": 0.19733128941225656, "grad_norm": 0.7259803282047949, "learning_rate": 6.413797432959631e-07, "loss": 1.5899, "step": 2832 }, { "epoch": 0.19740096853987388, "grad_norm": 0.7657140131144465, "learning_rate": 6.413396731827901e-07, "loss": 1.5469, "step": 2833 }, { "epoch": 0.1974706476674912, "grad_norm": 0.7295398215507435, "learning_rate": 6.412995907852937e-07, "loss": 1.5622, "step": 2834 }, { "epoch": 0.19754032679510852, "grad_norm": 0.7318433019876475, "learning_rate": 6.412594961053948e-07, "loss": 1.5309, "step": 2835 }, { "epoch": 0.19761000592272585, "grad_norm": 0.71192020542836, "learning_rate": 6.412193891450147e-07, "loss": 1.5897, "step": 2836 }, { "epoch": 0.19767968505034317, "grad_norm": 0.7021056662939148, "learning_rate": 6.411792699060755e-07, "loss": 1.5287, "step": 2837 }, { "epoch": 0.1977493641779605, "grad_norm": 0.7681891994041536, "learning_rate": 6.411391383904998e-07, "loss": 1.5876, "step": 2838 }, { "epoch": 0.1978190433055778, "grad_norm": 0.7493583117634097, "learning_rate": 6.410989946002107e-07, "loss": 1.5165, "step": 2839 }, { "epoch": 0.19788872243319514, "grad_norm": 0.7378067482423463, "learning_rate": 6.41058838537132e-07, "loss": 1.4552, "step": 2840 }, { "epoch": 0.19795840156081246, "grad_norm": 0.7445427788569985, "learning_rate": 6.41018670203188e-07, "loss": 1.5561, "step": 2841 }, { "epoch": 0.19802808068842978, "grad_norm": 0.7622629977423278, "learning_rate": 6.409784896003039e-07, "loss": 1.5894, "step": 2842 }, { "epoch": 0.1980977598160471, "grad_norm": 0.7665002701824735, "learning_rate": 6.40938296730405e-07, "loss": 1.5411, "step": 2843 }, { "epoch": 0.19816743894366443, "grad_norm": 0.7254647373761564, "learning_rate": 6.408980915954175e-07, "loss": 1.464, "step": 2844 }, { "epoch": 0.19823711807128175, "grad_norm": 0.7271343977692023, "learning_rate": 6.40857874197268e-07, "loss": 1.6614, "step": 2845 }, { "epoch": 0.19830679719889907, "grad_norm": 0.7139000862151373, "learning_rate": 6.40817644537884e-07, "loss": 1.5599, "step": 2846 }, { "epoch": 0.1983764763265164, "grad_norm": 0.6681638182102779, "learning_rate": 6.407774026191933e-07, "loss": 1.6665, "step": 2847 }, { "epoch": 0.19844615545413372, "grad_norm": 0.7868727777360315, "learning_rate": 6.407371484431243e-07, "loss": 1.5568, "step": 2848 }, { "epoch": 0.19851583458175104, "grad_norm": 0.7553355394120074, "learning_rate": 6.406968820116061e-07, "loss": 1.551, "step": 2849 }, { "epoch": 0.19858551370936836, "grad_norm": 0.745667391752033, "learning_rate": 6.406566033265686e-07, "loss": 1.467, "step": 2850 }, { "epoch": 0.19865519283698568, "grad_norm": 0.7214889502095606, "learning_rate": 6.406163123899416e-07, "loss": 1.5626, "step": 2851 }, { "epoch": 0.198724871964603, "grad_norm": 0.7039722794357991, "learning_rate": 6.405760092036561e-07, "loss": 1.4811, "step": 2852 }, { "epoch": 0.19879455109222033, "grad_norm": 0.7028228796727578, "learning_rate": 6.405356937696437e-07, "loss": 1.5365, "step": 2853 }, { "epoch": 0.19886423021983765, "grad_norm": 0.7327098572778575, "learning_rate": 6.404953660898362e-07, "loss": 1.6272, "step": 2854 }, { "epoch": 0.19893390934745497, "grad_norm": 0.6640798875637417, "learning_rate": 6.404550261661662e-07, "loss": 1.4853, "step": 2855 }, { "epoch": 0.1990035884750723, "grad_norm": 0.6993347936293075, "learning_rate": 6.40414674000567e-07, "loss": 1.565, "step": 2856 }, { "epoch": 0.19907326760268962, "grad_norm": 0.68104277668191, "learning_rate": 6.403743095949722e-07, "loss": 1.4548, "step": 2857 }, { "epoch": 0.19914294673030694, "grad_norm": 0.7617610054585428, "learning_rate": 6.403339329513161e-07, "loss": 1.6688, "step": 2858 }, { "epoch": 0.19921262585792426, "grad_norm": 0.7131057683123566, "learning_rate": 6.402935440715339e-07, "loss": 1.6218, "step": 2859 }, { "epoch": 0.1992823049855416, "grad_norm": 0.6968176358706246, "learning_rate": 6.402531429575609e-07, "loss": 1.4128, "step": 2860 }, { "epoch": 0.1993519841131589, "grad_norm": 0.718108124628976, "learning_rate": 6.402127296113334e-07, "loss": 1.6234, "step": 2861 }, { "epoch": 0.19942166324077623, "grad_norm": 0.7445450879925423, "learning_rate": 6.401723040347878e-07, "loss": 1.4594, "step": 2862 }, { "epoch": 0.19949134236839355, "grad_norm": 0.7261122364370047, "learning_rate": 6.401318662298615e-07, "loss": 1.5517, "step": 2863 }, { "epoch": 0.19956102149601088, "grad_norm": 0.7220852520629888, "learning_rate": 6.400914161984925e-07, "loss": 1.5947, "step": 2864 }, { "epoch": 0.1996307006236282, "grad_norm": 0.6652206452519076, "learning_rate": 6.400509539426191e-07, "loss": 1.4174, "step": 2865 }, { "epoch": 0.19970037975124552, "grad_norm": 0.7005410182147795, "learning_rate": 6.400104794641804e-07, "loss": 1.6145, "step": 2866 }, { "epoch": 0.19977005887886284, "grad_norm": 0.7164367875663481, "learning_rate": 6.39969992765116e-07, "loss": 1.6245, "step": 2867 }, { "epoch": 0.19983973800648017, "grad_norm": 0.7140554642769725, "learning_rate": 6.399294938473661e-07, "loss": 1.5329, "step": 2868 }, { "epoch": 0.1999094171340975, "grad_norm": 0.7436557780926554, "learning_rate": 6.398889827128715e-07, "loss": 1.6935, "step": 2869 }, { "epoch": 0.1999790962617148, "grad_norm": 0.7231366816956215, "learning_rate": 6.398484593635735e-07, "loss": 1.5485, "step": 2870 }, { "epoch": 0.20004877538933213, "grad_norm": 0.66648456054266, "learning_rate": 6.398079238014141e-07, "loss": 1.4717, "step": 2871 }, { "epoch": 0.20011845451694946, "grad_norm": 0.7575185945561026, "learning_rate": 6.39767376028336e-07, "loss": 1.6266, "step": 2872 }, { "epoch": 0.20018813364456678, "grad_norm": 0.7215300676555968, "learning_rate": 6.397268160462822e-07, "loss": 1.5363, "step": 2873 }, { "epoch": 0.2002578127721841, "grad_norm": 0.7612344619035825, "learning_rate": 6.396862438571965e-07, "loss": 1.5018, "step": 2874 }, { "epoch": 0.20032749189980142, "grad_norm": 0.7581568365524771, "learning_rate": 6.39645659463023e-07, "loss": 1.5225, "step": 2875 }, { "epoch": 0.20039717102741875, "grad_norm": 0.7129849329155351, "learning_rate": 6.396050628657068e-07, "loss": 1.5566, "step": 2876 }, { "epoch": 0.20046685015503607, "grad_norm": 0.7277241962059409, "learning_rate": 6.395644540671932e-07, "loss": 1.5693, "step": 2877 }, { "epoch": 0.2005365292826534, "grad_norm": 0.7268033481810099, "learning_rate": 6.395238330694284e-07, "loss": 1.7033, "step": 2878 }, { "epoch": 0.20060620841027071, "grad_norm": 0.7578464136236305, "learning_rate": 6.39483199874359e-07, "loss": 1.5682, "step": 2879 }, { "epoch": 0.20067588753788804, "grad_norm": 0.685464342009214, "learning_rate": 6.394425544839323e-07, "loss": 1.5303, "step": 2880 }, { "epoch": 0.20074556666550536, "grad_norm": 0.7182341780133301, "learning_rate": 6.394018969000958e-07, "loss": 1.592, "step": 2881 }, { "epoch": 0.20081524579312268, "grad_norm": 0.6970355774318064, "learning_rate": 6.393612271247984e-07, "loss": 1.5237, "step": 2882 }, { "epoch": 0.20088492492074, "grad_norm": 0.692437523420572, "learning_rate": 6.393205451599887e-07, "loss": 1.5745, "step": 2883 }, { "epoch": 0.20095460404835733, "grad_norm": 0.7421810031357134, "learning_rate": 6.392798510076162e-07, "loss": 1.5122, "step": 2884 }, { "epoch": 0.20102428317597465, "grad_norm": 0.7247718274256905, "learning_rate": 6.392391446696315e-07, "loss": 1.5325, "step": 2885 }, { "epoch": 0.20109396230359197, "grad_norm": 0.7015610226696939, "learning_rate": 6.391984261479848e-07, "loss": 1.4387, "step": 2886 }, { "epoch": 0.2011636414312093, "grad_norm": 0.7136527685788008, "learning_rate": 6.391576954446278e-07, "loss": 1.5067, "step": 2887 }, { "epoch": 0.20123332055882662, "grad_norm": 0.751014147824683, "learning_rate": 6.391169525615121e-07, "loss": 1.6104, "step": 2888 }, { "epoch": 0.2013029996864439, "grad_norm": 0.8270177555880811, "learning_rate": 6.390761975005905e-07, "loss": 1.6917, "step": 2889 }, { "epoch": 0.20137267881406123, "grad_norm": 0.7686620792228451, "learning_rate": 6.39035430263816e-07, "loss": 1.4402, "step": 2890 }, { "epoch": 0.20144235794167856, "grad_norm": 0.6852734847807493, "learning_rate": 6.38994650853142e-07, "loss": 1.4847, "step": 2891 }, { "epoch": 0.20151203706929588, "grad_norm": 0.6861463442143995, "learning_rate": 6.389538592705229e-07, "loss": 1.56, "step": 2892 }, { "epoch": 0.2015817161969132, "grad_norm": 0.7185857414585152, "learning_rate": 6.389130555179134e-07, "loss": 1.4896, "step": 2893 }, { "epoch": 0.20165139532453052, "grad_norm": 0.7038209840042945, "learning_rate": 6.388722395972692e-07, "loss": 1.4935, "step": 2894 }, { "epoch": 0.20172107445214785, "grad_norm": 0.7107114427154652, "learning_rate": 6.38831411510546e-07, "loss": 1.6031, "step": 2895 }, { "epoch": 0.20179075357976517, "grad_norm": 0.6991671503850788, "learning_rate": 6.387905712597004e-07, "loss": 1.5549, "step": 2896 }, { "epoch": 0.2018604327073825, "grad_norm": 0.7530770913699526, "learning_rate": 6.387497188466897e-07, "loss": 1.4468, "step": 2897 }, { "epoch": 0.20193011183499981, "grad_norm": 0.6670243465782024, "learning_rate": 6.387088542734715e-07, "loss": 1.6758, "step": 2898 }, { "epoch": 0.20199979096261714, "grad_norm": 0.7092271431291487, "learning_rate": 6.386679775420042e-07, "loss": 1.4933, "step": 2899 }, { "epoch": 0.20206947009023446, "grad_norm": 0.736826431597258, "learning_rate": 6.386270886542466e-07, "loss": 1.4696, "step": 2900 }, { "epoch": 0.20213914921785178, "grad_norm": 0.6884689205263212, "learning_rate": 6.385861876121582e-07, "loss": 1.5714, "step": 2901 }, { "epoch": 0.2022088283454691, "grad_norm": 0.7191280188616926, "learning_rate": 6.385452744176991e-07, "loss": 1.4575, "step": 2902 }, { "epoch": 0.20227850747308643, "grad_norm": 0.7246993662960771, "learning_rate": 6.385043490728299e-07, "loss": 1.5546, "step": 2903 }, { "epoch": 0.20234818660070375, "grad_norm": 0.7135160908956892, "learning_rate": 6.384634115795119e-07, "loss": 1.5166, "step": 2904 }, { "epoch": 0.20241786572832107, "grad_norm": 0.7142107711582788, "learning_rate": 6.384224619397069e-07, "loss": 1.4995, "step": 2905 }, { "epoch": 0.2024875448559384, "grad_norm": 0.7530747506999542, "learning_rate": 6.383815001553772e-07, "loss": 1.5028, "step": 2906 }, { "epoch": 0.20255722398355572, "grad_norm": 0.6930300478756766, "learning_rate": 6.383405262284859e-07, "loss": 1.6004, "step": 2907 }, { "epoch": 0.20262690311117304, "grad_norm": 0.8030841056649302, "learning_rate": 6.382995401609964e-07, "loss": 1.5417, "step": 2908 }, { "epoch": 0.20269658223879036, "grad_norm": 0.7682765721959441, "learning_rate": 6.38258541954873e-07, "loss": 1.6121, "step": 2909 }, { "epoch": 0.20276626136640769, "grad_norm": 0.7125907867109845, "learning_rate": 6.382175316120802e-07, "loss": 1.4901, "step": 2910 }, { "epoch": 0.202835940494025, "grad_norm": 0.7415556985743339, "learning_rate": 6.381765091345836e-07, "loss": 1.624, "step": 2911 }, { "epoch": 0.20290561962164233, "grad_norm": 0.7215852980031595, "learning_rate": 6.381354745243489e-07, "loss": 1.5852, "step": 2912 }, { "epoch": 0.20297529874925965, "grad_norm": 0.6972105029569049, "learning_rate": 6.380944277833425e-07, "loss": 1.4699, "step": 2913 }, { "epoch": 0.20304497787687698, "grad_norm": 0.7509877613104287, "learning_rate": 6.380533689135316e-07, "loss": 1.5867, "step": 2914 }, { "epoch": 0.2031146570044943, "grad_norm": 0.7736493844250907, "learning_rate": 6.380122979168837e-07, "loss": 1.5804, "step": 2915 }, { "epoch": 0.20318433613211162, "grad_norm": 0.689680312040028, "learning_rate": 6.379712147953671e-07, "loss": 1.5172, "step": 2916 }, { "epoch": 0.20325401525972894, "grad_norm": 0.7474879041731053, "learning_rate": 6.379301195509505e-07, "loss": 1.4659, "step": 2917 }, { "epoch": 0.20332369438734627, "grad_norm": 0.7394318317131775, "learning_rate": 6.378890121856034e-07, "loss": 1.5584, "step": 2918 }, { "epoch": 0.2033933735149636, "grad_norm": 0.6932945340619577, "learning_rate": 6.378478927012956e-07, "loss": 1.5079, "step": 2919 }, { "epoch": 0.2034630526425809, "grad_norm": 0.7306806137581724, "learning_rate": 6.378067610999978e-07, "loss": 1.6069, "step": 2920 }, { "epoch": 0.20353273177019823, "grad_norm": 0.7651741778475234, "learning_rate": 6.377656173836808e-07, "loss": 1.4645, "step": 2921 }, { "epoch": 0.20360241089781556, "grad_norm": 0.7220245915728065, "learning_rate": 6.377244615543167e-07, "loss": 1.5917, "step": 2922 }, { "epoch": 0.20367209002543288, "grad_norm": 0.7149824274459259, "learning_rate": 6.376832936138774e-07, "loss": 1.6134, "step": 2923 }, { "epoch": 0.2037417691530502, "grad_norm": 0.7180373040033633, "learning_rate": 6.37642113564336e-07, "loss": 1.4994, "step": 2924 }, { "epoch": 0.20381144828066752, "grad_norm": 0.7100774209110662, "learning_rate": 6.376009214076658e-07, "loss": 1.4923, "step": 2925 }, { "epoch": 0.20388112740828485, "grad_norm": 0.6672838981463166, "learning_rate": 6.37559717145841e-07, "loss": 1.5192, "step": 2926 }, { "epoch": 0.20395080653590217, "grad_norm": 0.7585267274723554, "learning_rate": 6.37518500780836e-07, "loss": 1.556, "step": 2927 }, { "epoch": 0.2040204856635195, "grad_norm": 0.7175807603218348, "learning_rate": 6.374772723146259e-07, "loss": 1.592, "step": 2928 }, { "epoch": 0.2040901647911368, "grad_norm": 0.7037496395350076, "learning_rate": 6.374360317491867e-07, "loss": 1.5354, "step": 2929 }, { "epoch": 0.20415984391875414, "grad_norm": 0.691830573824489, "learning_rate": 6.373947790864947e-07, "loss": 1.5469, "step": 2930 }, { "epoch": 0.20422952304637146, "grad_norm": 0.7167616804294645, "learning_rate": 6.373535143285267e-07, "loss": 1.6573, "step": 2931 }, { "epoch": 0.20429920217398878, "grad_norm": 0.6779235031573106, "learning_rate": 6.373122374772601e-07, "loss": 1.5974, "step": 2932 }, { "epoch": 0.2043688813016061, "grad_norm": 0.6919311558080987, "learning_rate": 6.372709485346731e-07, "loss": 1.5194, "step": 2933 }, { "epoch": 0.20443856042922343, "grad_norm": 0.6804788103429456, "learning_rate": 6.372296475027444e-07, "loss": 1.4618, "step": 2934 }, { "epoch": 0.20450823955684075, "grad_norm": 0.7378684110679562, "learning_rate": 6.371883343834532e-07, "loss": 1.591, "step": 2935 }, { "epoch": 0.20457791868445807, "grad_norm": 0.7033761641976377, "learning_rate": 6.371470091787792e-07, "loss": 1.4276, "step": 2936 }, { "epoch": 0.2046475978120754, "grad_norm": 0.7226159322335646, "learning_rate": 6.371056718907029e-07, "loss": 1.4797, "step": 2937 }, { "epoch": 0.20471727693969272, "grad_norm": 0.7733144725121075, "learning_rate": 6.370643225212054e-07, "loss": 1.6505, "step": 2938 }, { "epoch": 0.20478695606731004, "grad_norm": 0.7425310543293385, "learning_rate": 6.370229610722679e-07, "loss": 1.5771, "step": 2939 }, { "epoch": 0.20485663519492736, "grad_norm": 0.6828119499786712, "learning_rate": 6.369815875458727e-07, "loss": 1.5115, "step": 2940 }, { "epoch": 0.20492631432254468, "grad_norm": 0.7025329013795072, "learning_rate": 6.369402019440027e-07, "loss": 1.5258, "step": 2941 }, { "epoch": 0.204995993450162, "grad_norm": 0.7426863118585209, "learning_rate": 6.368988042686408e-07, "loss": 1.4511, "step": 2942 }, { "epoch": 0.20506567257777933, "grad_norm": 0.6975590020208378, "learning_rate": 6.368573945217712e-07, "loss": 1.5915, "step": 2943 }, { "epoch": 0.20513535170539665, "grad_norm": 0.6555378771055111, "learning_rate": 6.368159727053781e-07, "loss": 1.53, "step": 2944 }, { "epoch": 0.20520503083301397, "grad_norm": 0.730334550632792, "learning_rate": 6.367745388214467e-07, "loss": 1.5767, "step": 2945 }, { "epoch": 0.2052747099606313, "grad_norm": 0.6796967601212938, "learning_rate": 6.367330928719625e-07, "loss": 1.5181, "step": 2946 }, { "epoch": 0.20534438908824862, "grad_norm": 0.7077642966530233, "learning_rate": 6.366916348589116e-07, "loss": 1.3541, "step": 2947 }, { "epoch": 0.20541406821586594, "grad_norm": 0.739817028672939, "learning_rate": 6.36650164784281e-07, "loss": 1.5237, "step": 2948 }, { "epoch": 0.20548374734348326, "grad_norm": 0.7232713294038303, "learning_rate": 6.366086826500578e-07, "loss": 1.642, "step": 2949 }, { "epoch": 0.2055534264711006, "grad_norm": 0.7529785742217403, "learning_rate": 6.3656718845823e-07, "loss": 1.5518, "step": 2950 }, { "epoch": 0.2056231055987179, "grad_norm": 0.7295660194953272, "learning_rate": 6.365256822107859e-07, "loss": 1.4517, "step": 2951 }, { "epoch": 0.20569278472633523, "grad_norm": 0.6964765639169348, "learning_rate": 6.36484163909715e-07, "loss": 1.6233, "step": 2952 }, { "epoch": 0.20576246385395255, "grad_norm": 0.710909846668233, "learning_rate": 6.364426335570066e-07, "loss": 1.6418, "step": 2953 }, { "epoch": 0.20583214298156988, "grad_norm": 0.7652461920615087, "learning_rate": 6.364010911546508e-07, "loss": 1.5322, "step": 2954 }, { "epoch": 0.2059018221091872, "grad_norm": 0.7045875156326461, "learning_rate": 6.363595367046389e-07, "loss": 1.5305, "step": 2955 }, { "epoch": 0.20597150123680452, "grad_norm": 0.7563366210538794, "learning_rate": 6.363179702089618e-07, "loss": 1.5893, "step": 2956 }, { "epoch": 0.20604118036442184, "grad_norm": 0.7511868496946549, "learning_rate": 6.362763916696117e-07, "loss": 1.732, "step": 2957 }, { "epoch": 0.20611085949203917, "grad_norm": 0.6697718587186111, "learning_rate": 6.362348010885809e-07, "loss": 1.5072, "step": 2958 }, { "epoch": 0.2061805386196565, "grad_norm": 0.7530888567410823, "learning_rate": 6.361931984678628e-07, "loss": 1.5611, "step": 2959 }, { "epoch": 0.2062502177472738, "grad_norm": 0.7185438842375362, "learning_rate": 6.361515838094509e-07, "loss": 1.6223, "step": 2960 }, { "epoch": 0.20631989687489113, "grad_norm": 0.7036427853275619, "learning_rate": 6.361099571153395e-07, "loss": 1.4914, "step": 2961 }, { "epoch": 0.20638957600250846, "grad_norm": 0.7408387503191232, "learning_rate": 6.360683183875235e-07, "loss": 1.5372, "step": 2962 }, { "epoch": 0.20645925513012578, "grad_norm": 0.7178825080999842, "learning_rate": 6.360266676279981e-07, "loss": 1.6065, "step": 2963 }, { "epoch": 0.2065289342577431, "grad_norm": 0.7429861155708916, "learning_rate": 6.359850048387594e-07, "loss": 1.4552, "step": 2964 }, { "epoch": 0.20659861338536042, "grad_norm": 0.7202664843056981, "learning_rate": 6.359433300218042e-07, "loss": 1.594, "step": 2965 }, { "epoch": 0.20666829251297775, "grad_norm": 0.7222321118467498, "learning_rate": 6.359016431791294e-07, "loss": 1.5255, "step": 2966 }, { "epoch": 0.20673797164059507, "grad_norm": 0.734344864117076, "learning_rate": 6.358599443127328e-07, "loss": 1.5508, "step": 2967 }, { "epoch": 0.2068076507682124, "grad_norm": 0.7206494532948653, "learning_rate": 6.358182334246125e-07, "loss": 1.4947, "step": 2968 }, { "epoch": 0.20687732989582971, "grad_norm": 0.7140087905352821, "learning_rate": 6.357765105167676e-07, "loss": 1.5102, "step": 2969 }, { "epoch": 0.20694700902344704, "grad_norm": 0.7852221694206425, "learning_rate": 6.357347755911976e-07, "loss": 1.6091, "step": 2970 }, { "epoch": 0.20701668815106436, "grad_norm": 0.7048526642376749, "learning_rate": 6.356930286499024e-07, "loss": 1.5422, "step": 2971 }, { "epoch": 0.20708636727868168, "grad_norm": 0.8007350555344261, "learning_rate": 6.356512696948825e-07, "loss": 1.5874, "step": 2972 }, { "epoch": 0.207156046406299, "grad_norm": 0.7615620997332188, "learning_rate": 6.356094987281391e-07, "loss": 1.4809, "step": 2973 }, { "epoch": 0.20722572553391633, "grad_norm": 0.7434804810145779, "learning_rate": 6.355677157516743e-07, "loss": 1.5758, "step": 2974 }, { "epoch": 0.20729540466153365, "grad_norm": 0.7457304333301299, "learning_rate": 6.355259207674899e-07, "loss": 1.4783, "step": 2975 }, { "epoch": 0.20736508378915097, "grad_norm": 0.706990856124059, "learning_rate": 6.354841137775891e-07, "loss": 1.4879, "step": 2976 }, { "epoch": 0.2074347629167683, "grad_norm": 0.6955401431087637, "learning_rate": 6.354422947839755e-07, "loss": 1.5454, "step": 2977 }, { "epoch": 0.20750444204438562, "grad_norm": 0.7384175680602987, "learning_rate": 6.354004637886528e-07, "loss": 1.5092, "step": 2978 }, { "epoch": 0.20757412117200294, "grad_norm": 0.685234971809416, "learning_rate": 6.35358620793626e-07, "loss": 1.4753, "step": 2979 }, { "epoch": 0.20764380029962026, "grad_norm": 0.70711394306406, "learning_rate": 6.353167658009e-07, "loss": 1.4555, "step": 2980 }, { "epoch": 0.20771347942723756, "grad_norm": 0.7962597663635689, "learning_rate": 6.352748988124805e-07, "loss": 1.4569, "step": 2981 }, { "epoch": 0.20778315855485488, "grad_norm": 0.6868913171560058, "learning_rate": 6.352330198303742e-07, "loss": 1.4446, "step": 2982 }, { "epoch": 0.2078528376824722, "grad_norm": 0.7735566971642617, "learning_rate": 6.35191128856588e-07, "loss": 1.4883, "step": 2983 }, { "epoch": 0.20792251681008952, "grad_norm": 0.7079499524333197, "learning_rate": 6.351492258931292e-07, "loss": 1.5655, "step": 2984 }, { "epoch": 0.20799219593770685, "grad_norm": 0.7512965571925899, "learning_rate": 6.351073109420059e-07, "loss": 1.4857, "step": 2985 }, { "epoch": 0.20806187506532417, "grad_norm": 0.7695609293964266, "learning_rate": 6.350653840052269e-07, "loss": 1.5553, "step": 2986 }, { "epoch": 0.2081315541929415, "grad_norm": 0.7377077462933005, "learning_rate": 6.350234450848013e-07, "loss": 1.5265, "step": 2987 }, { "epoch": 0.20820123332055882, "grad_norm": 0.7476217154283284, "learning_rate": 6.349814941827387e-07, "loss": 1.6797, "step": 2988 }, { "epoch": 0.20827091244817614, "grad_norm": 0.7456407850803368, "learning_rate": 6.3493953130105e-07, "loss": 1.4848, "step": 2989 }, { "epoch": 0.20834059157579346, "grad_norm": 0.7976077275272218, "learning_rate": 6.348975564417456e-07, "loss": 1.583, "step": 2990 }, { "epoch": 0.20841027070341078, "grad_norm": 0.6623779094432293, "learning_rate": 6.348555696068374e-07, "loss": 1.4389, "step": 2991 }, { "epoch": 0.2084799498310281, "grad_norm": 0.7602196162699404, "learning_rate": 6.348135707983374e-07, "loss": 1.6182, "step": 2992 }, { "epoch": 0.20854962895864543, "grad_norm": 0.7172420272508857, "learning_rate": 6.347715600182582e-07, "loss": 1.5597, "step": 2993 }, { "epoch": 0.20861930808626275, "grad_norm": 0.7716121956224239, "learning_rate": 6.347295372686129e-07, "loss": 1.574, "step": 2994 }, { "epoch": 0.20868898721388007, "grad_norm": 0.6863157372578493, "learning_rate": 6.346875025514157e-07, "loss": 1.5149, "step": 2995 }, { "epoch": 0.2087586663414974, "grad_norm": 0.7010956595448394, "learning_rate": 6.346454558686807e-07, "loss": 1.491, "step": 2996 }, { "epoch": 0.20882834546911472, "grad_norm": 0.7242536207410032, "learning_rate": 6.34603397222423e-07, "loss": 1.4427, "step": 2997 }, { "epoch": 0.20889802459673204, "grad_norm": 0.7040783271824286, "learning_rate": 6.345613266146581e-07, "loss": 1.4716, "step": 2998 }, { "epoch": 0.20896770372434936, "grad_norm": 0.7624020600820641, "learning_rate": 6.34519244047402e-07, "loss": 1.7078, "step": 2999 }, { "epoch": 0.20903738285196669, "grad_norm": 0.7175278942824543, "learning_rate": 6.344771495226715e-07, "loss": 1.5841, "step": 3000 }, { "epoch": 0.209107061979584, "grad_norm": 0.7130038274068573, "learning_rate": 6.344350430424838e-07, "loss": 1.5632, "step": 3001 }, { "epoch": 0.20917674110720133, "grad_norm": 0.7453698321614705, "learning_rate": 6.343929246088567e-07, "loss": 1.6019, "step": 3002 }, { "epoch": 0.20924642023481865, "grad_norm": 0.7151850979286338, "learning_rate": 6.343507942238088e-07, "loss": 1.6559, "step": 3003 }, { "epoch": 0.20931609936243598, "grad_norm": 0.6936345747851341, "learning_rate": 6.343086518893588e-07, "loss": 1.4821, "step": 3004 }, { "epoch": 0.2093857784900533, "grad_norm": 0.7956254510612235, "learning_rate": 6.342664976075265e-07, "loss": 1.5881, "step": 3005 }, { "epoch": 0.20945545761767062, "grad_norm": 0.7429121066816823, "learning_rate": 6.342243313803317e-07, "loss": 1.5633, "step": 3006 }, { "epoch": 0.20952513674528794, "grad_norm": 0.8037190129898167, "learning_rate": 6.341821532097956e-07, "loss": 1.5909, "step": 3007 }, { "epoch": 0.20959481587290527, "grad_norm": 0.6477652783679634, "learning_rate": 6.34139963097939e-07, "loss": 1.488, "step": 3008 }, { "epoch": 0.2096644950005226, "grad_norm": 0.7000979494433776, "learning_rate": 6.340977610467839e-07, "loss": 1.4087, "step": 3009 }, { "epoch": 0.2097341741281399, "grad_norm": 0.7648185849056666, "learning_rate": 6.340555470583526e-07, "loss": 1.5636, "step": 3010 }, { "epoch": 0.20980385325575723, "grad_norm": 0.6881245272099351, "learning_rate": 6.340133211346684e-07, "loss": 1.4254, "step": 3011 }, { "epoch": 0.20987353238337456, "grad_norm": 0.7170054857436351, "learning_rate": 6.339710832777545e-07, "loss": 1.5242, "step": 3012 }, { "epoch": 0.20994321151099188, "grad_norm": 0.7091403344071238, "learning_rate": 6.339288334896352e-07, "loss": 1.5227, "step": 3013 }, { "epoch": 0.2100128906386092, "grad_norm": 0.7188824298263891, "learning_rate": 6.338865717723351e-07, "loss": 1.5106, "step": 3014 }, { "epoch": 0.21008256976622652, "grad_norm": 0.7123102173208373, "learning_rate": 6.338442981278796e-07, "loss": 1.6579, "step": 3015 }, { "epoch": 0.21015224889384385, "grad_norm": 0.7421057665043513, "learning_rate": 6.338020125582944e-07, "loss": 1.5459, "step": 3016 }, { "epoch": 0.21022192802146117, "grad_norm": 0.726883902227082, "learning_rate": 6.33759715065606e-07, "loss": 1.493, "step": 3017 }, { "epoch": 0.2102916071490785, "grad_norm": 0.7538909742546808, "learning_rate": 6.337174056518413e-07, "loss": 1.6548, "step": 3018 }, { "epoch": 0.2103612862766958, "grad_norm": 0.6874902631466682, "learning_rate": 6.336750843190281e-07, "loss": 1.5101, "step": 3019 }, { "epoch": 0.21043096540431314, "grad_norm": 0.6991743258217981, "learning_rate": 6.336327510691941e-07, "loss": 1.4517, "step": 3020 }, { "epoch": 0.21050064453193046, "grad_norm": 0.7216212073939949, "learning_rate": 6.335904059043684e-07, "loss": 1.5649, "step": 3021 }, { "epoch": 0.21057032365954778, "grad_norm": 0.6953455603901181, "learning_rate": 6.335480488265801e-07, "loss": 1.5648, "step": 3022 }, { "epoch": 0.2106400027871651, "grad_norm": 0.7269944245240746, "learning_rate": 6.33505679837859e-07, "loss": 1.5551, "step": 3023 }, { "epoch": 0.21070968191478243, "grad_norm": 0.7521226273898199, "learning_rate": 6.334632989402356e-07, "loss": 1.5345, "step": 3024 }, { "epoch": 0.21077936104239975, "grad_norm": 0.7288849333564393, "learning_rate": 6.334209061357407e-07, "loss": 1.5542, "step": 3025 }, { "epoch": 0.21084904017001707, "grad_norm": 0.723684896541463, "learning_rate": 6.333785014264061e-07, "loss": 1.4942, "step": 3026 }, { "epoch": 0.2109187192976344, "grad_norm": 0.8354732057230474, "learning_rate": 6.333360848142637e-07, "loss": 1.5765, "step": 3027 }, { "epoch": 0.21098839842525172, "grad_norm": 0.7266486063179814, "learning_rate": 6.332936563013465e-07, "loss": 1.5522, "step": 3028 }, { "epoch": 0.21105807755286904, "grad_norm": 0.7620578829901353, "learning_rate": 6.332512158896873e-07, "loss": 1.6742, "step": 3029 }, { "epoch": 0.21112775668048636, "grad_norm": 0.6453483203498609, "learning_rate": 6.332087635813202e-07, "loss": 1.4459, "step": 3030 }, { "epoch": 0.21119743580810368, "grad_norm": 0.7671250472225403, "learning_rate": 6.331662993782797e-07, "loss": 1.608, "step": 3031 }, { "epoch": 0.211267114935721, "grad_norm": 0.7297600261040783, "learning_rate": 6.331238232826006e-07, "loss": 1.5218, "step": 3032 }, { "epoch": 0.21133679406333833, "grad_norm": 0.6976240359205483, "learning_rate": 6.330813352963186e-07, "loss": 1.5028, "step": 3033 }, { "epoch": 0.21140647319095565, "grad_norm": 0.7366466923541604, "learning_rate": 6.330388354214697e-07, "loss": 1.5963, "step": 3034 }, { "epoch": 0.21147615231857297, "grad_norm": 0.743045908625905, "learning_rate": 6.329963236600905e-07, "loss": 1.5822, "step": 3035 }, { "epoch": 0.2115458314461903, "grad_norm": 0.7694559232108855, "learning_rate": 6.329538000142183e-07, "loss": 1.5745, "step": 3036 }, { "epoch": 0.21161551057380762, "grad_norm": 0.7136032068936781, "learning_rate": 6.329112644858911e-07, "loss": 1.6823, "step": 3037 }, { "epoch": 0.21168518970142494, "grad_norm": 0.7209064494933916, "learning_rate": 6.328687170771472e-07, "loss": 1.5415, "step": 3038 }, { "epoch": 0.21175486882904226, "grad_norm": 0.7601206403956654, "learning_rate": 6.328261577900253e-07, "loss": 1.531, "step": 3039 }, { "epoch": 0.2118245479566596, "grad_norm": 0.6959998472222233, "learning_rate": 6.327835866265652e-07, "loss": 1.4477, "step": 3040 }, { "epoch": 0.2118942270842769, "grad_norm": 0.6953710184416528, "learning_rate": 6.327410035888071e-07, "loss": 1.58, "step": 3041 }, { "epoch": 0.21196390621189423, "grad_norm": 0.6943533580699583, "learning_rate": 6.326984086787913e-07, "loss": 1.5223, "step": 3042 }, { "epoch": 0.21203358533951155, "grad_norm": 0.7408013816517577, "learning_rate": 6.326558018985593e-07, "loss": 1.5385, "step": 3043 }, { "epoch": 0.21210326446712888, "grad_norm": 0.7128173593393061, "learning_rate": 6.32613183250153e-07, "loss": 1.5008, "step": 3044 }, { "epoch": 0.2121729435947462, "grad_norm": 0.7578937862434474, "learning_rate": 6.325705527356144e-07, "loss": 1.5314, "step": 3045 }, { "epoch": 0.21224262272236352, "grad_norm": 0.7242230650402078, "learning_rate": 6.325279103569868e-07, "loss": 1.5105, "step": 3046 }, { "epoch": 0.21231230184998084, "grad_norm": 0.7271388530049001, "learning_rate": 6.324852561163136e-07, "loss": 1.56, "step": 3047 }, { "epoch": 0.21238198097759817, "grad_norm": 0.7251757959446895, "learning_rate": 6.324425900156387e-07, "loss": 1.6189, "step": 3048 }, { "epoch": 0.2124516601052155, "grad_norm": 0.6949230910117429, "learning_rate": 6.32399912057007e-07, "loss": 1.4936, "step": 3049 }, { "epoch": 0.2125213392328328, "grad_norm": 0.7083185802548545, "learning_rate": 6.323572222424636e-07, "loss": 1.6193, "step": 3050 }, { "epoch": 0.21259101836045013, "grad_norm": 0.7188362586038636, "learning_rate": 6.323145205740543e-07, "loss": 1.5033, "step": 3051 }, { "epoch": 0.21266069748806746, "grad_norm": 0.6807201669919434, "learning_rate": 6.322718070538254e-07, "loss": 1.4721, "step": 3052 }, { "epoch": 0.21273037661568478, "grad_norm": 0.7290543207491141, "learning_rate": 6.32229081683824e-07, "loss": 1.464, "step": 3053 }, { "epoch": 0.2128000557433021, "grad_norm": 0.7049188321594511, "learning_rate": 6.321863444660972e-07, "loss": 1.5481, "step": 3054 }, { "epoch": 0.21286973487091942, "grad_norm": 0.7531590172268701, "learning_rate": 6.321435954026935e-07, "loss": 1.5502, "step": 3055 }, { "epoch": 0.21293941399853675, "grad_norm": 0.7204791442591438, "learning_rate": 6.321008344956612e-07, "loss": 1.5385, "step": 3056 }, { "epoch": 0.21300909312615407, "grad_norm": 0.766737235051107, "learning_rate": 6.320580617470498e-07, "loss": 1.5535, "step": 3057 }, { "epoch": 0.2130787722537714, "grad_norm": 0.6949576898262453, "learning_rate": 6.320152771589089e-07, "loss": 1.5787, "step": 3058 }, { "epoch": 0.21314845138138871, "grad_norm": 0.7854280947188981, "learning_rate": 6.319724807332886e-07, "loss": 1.5751, "step": 3059 }, { "epoch": 0.21321813050900604, "grad_norm": 0.7310738000268694, "learning_rate": 6.319296724722401e-07, "loss": 1.4912, "step": 3060 }, { "epoch": 0.21328780963662336, "grad_norm": 0.710144969632082, "learning_rate": 6.318868523778147e-07, "loss": 1.501, "step": 3061 }, { "epoch": 0.21335748876424068, "grad_norm": 0.7624755707474923, "learning_rate": 6.318440204520646e-07, "loss": 1.5925, "step": 3062 }, { "epoch": 0.213427167891858, "grad_norm": 0.6779707671850002, "learning_rate": 6.318011766970422e-07, "loss": 1.566, "step": 3063 }, { "epoch": 0.21349684701947533, "grad_norm": 0.7282347315793222, "learning_rate": 6.317583211148007e-07, "loss": 1.637, "step": 3064 }, { "epoch": 0.21356652614709265, "grad_norm": 0.6777857214243174, "learning_rate": 6.31715453707394e-07, "loss": 1.5141, "step": 3065 }, { "epoch": 0.21363620527470997, "grad_norm": 0.7409112879146345, "learning_rate": 6.316725744768762e-07, "loss": 1.5923, "step": 3066 }, { "epoch": 0.2137058844023273, "grad_norm": 0.7024204744147935, "learning_rate": 6.316296834253022e-07, "loss": 1.5464, "step": 3067 }, { "epoch": 0.21377556352994462, "grad_norm": 0.6877856023309812, "learning_rate": 6.315867805547275e-07, "loss": 1.5618, "step": 3068 }, { "epoch": 0.21384524265756194, "grad_norm": 0.7212486356758446, "learning_rate": 6.31543865867208e-07, "loss": 1.5316, "step": 3069 }, { "epoch": 0.21391492178517926, "grad_norm": 0.776152740751751, "learning_rate": 6.315009393648003e-07, "loss": 1.6051, "step": 3070 }, { "epoch": 0.21398460091279659, "grad_norm": 0.7071509959023832, "learning_rate": 6.314580010495615e-07, "loss": 1.5317, "step": 3071 }, { "epoch": 0.2140542800404139, "grad_norm": 0.6884549731762428, "learning_rate": 6.314150509235494e-07, "loss": 1.4914, "step": 3072 }, { "epoch": 0.2141239591680312, "grad_norm": 0.7408015065712721, "learning_rate": 6.313720889888221e-07, "loss": 1.5193, "step": 3073 }, { "epoch": 0.21419363829564853, "grad_norm": 0.7518489520556682, "learning_rate": 6.313291152474385e-07, "loss": 1.5984, "step": 3074 }, { "epoch": 0.21426331742326585, "grad_norm": 0.6931740419432002, "learning_rate": 6.312861297014581e-07, "loss": 1.5873, "step": 3075 }, { "epoch": 0.21433299655088317, "grad_norm": 0.7460738164111779, "learning_rate": 6.312431323529407e-07, "loss": 1.5159, "step": 3076 }, { "epoch": 0.2144026756785005, "grad_norm": 0.7066855472272406, "learning_rate": 6.312001232039468e-07, "loss": 1.4636, "step": 3077 }, { "epoch": 0.21447235480611782, "grad_norm": 0.7111733820215189, "learning_rate": 6.311571022565376e-07, "loss": 1.6072, "step": 3078 }, { "epoch": 0.21454203393373514, "grad_norm": 0.7146743692003119, "learning_rate": 6.311140695127748e-07, "loss": 1.5797, "step": 3079 }, { "epoch": 0.21461171306135246, "grad_norm": 0.7428280015586619, "learning_rate": 6.310710249747204e-07, "loss": 1.5731, "step": 3080 }, { "epoch": 0.21468139218896978, "grad_norm": 0.7142132227243223, "learning_rate": 6.310279686444374e-07, "loss": 1.5772, "step": 3081 }, { "epoch": 0.2147510713165871, "grad_norm": 0.7367688211537811, "learning_rate": 6.30984900523989e-07, "loss": 1.5302, "step": 3082 }, { "epoch": 0.21482075044420443, "grad_norm": 0.7051161284853323, "learning_rate": 6.309418206154392e-07, "loss": 1.3608, "step": 3083 }, { "epoch": 0.21489042957182175, "grad_norm": 0.6561827725117855, "learning_rate": 6.308987289208523e-07, "loss": 1.442, "step": 3084 }, { "epoch": 0.21496010869943907, "grad_norm": 0.7060953414362706, "learning_rate": 6.308556254422937e-07, "loss": 1.5811, "step": 3085 }, { "epoch": 0.2150297878270564, "grad_norm": 0.712986152298592, "learning_rate": 6.308125101818287e-07, "loss": 1.4907, "step": 3086 }, { "epoch": 0.21509946695467372, "grad_norm": 0.7064297350621125, "learning_rate": 6.307693831415236e-07, "loss": 1.4074, "step": 3087 }, { "epoch": 0.21516914608229104, "grad_norm": 0.6978571997252455, "learning_rate": 6.307262443234451e-07, "loss": 1.5763, "step": 3088 }, { "epoch": 0.21523882520990836, "grad_norm": 0.7204214153527864, "learning_rate": 6.306830937296605e-07, "loss": 1.5953, "step": 3089 }, { "epoch": 0.21530850433752569, "grad_norm": 0.7251712825238252, "learning_rate": 6.306399313622376e-07, "loss": 1.4922, "step": 3090 }, { "epoch": 0.215378183465143, "grad_norm": 0.789317477528938, "learning_rate": 6.30596757223245e-07, "loss": 1.5323, "step": 3091 }, { "epoch": 0.21544786259276033, "grad_norm": 0.7018313808348219, "learning_rate": 6.305535713147515e-07, "loss": 1.5525, "step": 3092 }, { "epoch": 0.21551754172037765, "grad_norm": 0.6641796971823418, "learning_rate": 6.305103736388268e-07, "loss": 1.4296, "step": 3093 }, { "epoch": 0.21558722084799498, "grad_norm": 0.7333376762555791, "learning_rate": 6.30467164197541e-07, "loss": 1.5683, "step": 3094 }, { "epoch": 0.2156568999756123, "grad_norm": 0.715162682097904, "learning_rate": 6.304239429929647e-07, "loss": 1.5447, "step": 3095 }, { "epoch": 0.21572657910322962, "grad_norm": 0.705125764957599, "learning_rate": 6.303807100271694e-07, "loss": 1.5388, "step": 3096 }, { "epoch": 0.21579625823084694, "grad_norm": 0.6932910886448272, "learning_rate": 6.303374653022264e-07, "loss": 1.3299, "step": 3097 }, { "epoch": 0.21586593735846427, "grad_norm": 0.7285862258034365, "learning_rate": 6.302942088202086e-07, "loss": 1.5698, "step": 3098 }, { "epoch": 0.2159356164860816, "grad_norm": 0.6525649404815276, "learning_rate": 6.302509405831887e-07, "loss": 1.4322, "step": 3099 }, { "epoch": 0.2160052956136989, "grad_norm": 0.7069636227599213, "learning_rate": 6.302076605932402e-07, "loss": 1.5997, "step": 3100 }, { "epoch": 0.21607497474131623, "grad_norm": 0.7333395774155352, "learning_rate": 6.301643688524372e-07, "loss": 1.5292, "step": 3101 }, { "epoch": 0.21614465386893356, "grad_norm": 0.7868825512640599, "learning_rate": 6.301210653628545e-07, "loss": 1.5164, "step": 3102 }, { "epoch": 0.21621433299655088, "grad_norm": 0.6477530449996382, "learning_rate": 6.300777501265669e-07, "loss": 1.4932, "step": 3103 }, { "epoch": 0.2162840121241682, "grad_norm": 0.7382703043519941, "learning_rate": 6.300344231456505e-07, "loss": 1.5096, "step": 3104 }, { "epoch": 0.21635369125178552, "grad_norm": 0.7197826816393428, "learning_rate": 6.299910844221815e-07, "loss": 1.5127, "step": 3105 }, { "epoch": 0.21642337037940285, "grad_norm": 0.7162601287647885, "learning_rate": 6.299477339582367e-07, "loss": 1.5613, "step": 3106 }, { "epoch": 0.21649304950702017, "grad_norm": 0.6993308172485544, "learning_rate": 6.299043717558937e-07, "loss": 1.3961, "step": 3107 }, { "epoch": 0.2165627286346375, "grad_norm": 0.7050100563001422, "learning_rate": 6.298609978172303e-07, "loss": 1.522, "step": 3108 }, { "epoch": 0.2166324077622548, "grad_norm": 0.7030621104722599, "learning_rate": 6.298176121443252e-07, "loss": 1.5093, "step": 3109 }, { "epoch": 0.21670208688987214, "grad_norm": 0.6658177312948771, "learning_rate": 6.297742147392577e-07, "loss": 1.5186, "step": 3110 }, { "epoch": 0.21677176601748946, "grad_norm": 0.6695545637176513, "learning_rate": 6.297308056041072e-07, "loss": 1.4912, "step": 3111 }, { "epoch": 0.21684144514510678, "grad_norm": 0.6955784740818475, "learning_rate": 6.296873847409542e-07, "loss": 1.5496, "step": 3112 }, { "epoch": 0.2169111242727241, "grad_norm": 0.6878524383136772, "learning_rate": 6.296439521518792e-07, "loss": 1.5043, "step": 3113 }, { "epoch": 0.21698080340034143, "grad_norm": 0.7220128624196103, "learning_rate": 6.296005078389639e-07, "loss": 1.5356, "step": 3114 }, { "epoch": 0.21705048252795875, "grad_norm": 0.6693205323609417, "learning_rate": 6.2955705180429e-07, "loss": 1.3807, "step": 3115 }, { "epoch": 0.21712016165557607, "grad_norm": 0.68769141490313, "learning_rate": 6.295135840499401e-07, "loss": 1.6205, "step": 3116 }, { "epoch": 0.2171898407831934, "grad_norm": 0.7197082361841207, "learning_rate": 6.294701045779974e-07, "loss": 1.4412, "step": 3117 }, { "epoch": 0.21725951991081072, "grad_norm": 0.7310208796664356, "learning_rate": 6.294266133905453e-07, "loss": 1.612, "step": 3118 }, { "epoch": 0.21732919903842804, "grad_norm": 0.7665599518177661, "learning_rate": 6.293831104896682e-07, "loss": 1.5117, "step": 3119 }, { "epoch": 0.21739887816604536, "grad_norm": 0.7444580585825434, "learning_rate": 6.293395958774507e-07, "loss": 1.6397, "step": 3120 }, { "epoch": 0.21746855729366268, "grad_norm": 0.7523077640557353, "learning_rate": 6.29296069555978e-07, "loss": 1.5666, "step": 3121 }, { "epoch": 0.21753823642128, "grad_norm": 0.7124509029071153, "learning_rate": 6.292525315273363e-07, "loss": 1.3989, "step": 3122 }, { "epoch": 0.21760791554889733, "grad_norm": 0.6772900926590302, "learning_rate": 6.292089817936119e-07, "loss": 1.4556, "step": 3123 }, { "epoch": 0.21767759467651465, "grad_norm": 0.6820042166343526, "learning_rate": 6.291654203568915e-07, "loss": 1.4178, "step": 3124 }, { "epoch": 0.21774727380413197, "grad_norm": 0.7656657737383707, "learning_rate": 6.29121847219263e-07, "loss": 1.5057, "step": 3125 }, { "epoch": 0.2178169529317493, "grad_norm": 0.7702721728012195, "learning_rate": 6.290782623828146e-07, "loss": 1.7169, "step": 3126 }, { "epoch": 0.21788663205936662, "grad_norm": 0.7139401052066671, "learning_rate": 6.290346658496345e-07, "loss": 1.4959, "step": 3127 }, { "epoch": 0.21795631118698394, "grad_norm": 0.6650157900893546, "learning_rate": 6.289910576218124e-07, "loss": 1.5168, "step": 3128 }, { "epoch": 0.21802599031460126, "grad_norm": 0.7229572770264344, "learning_rate": 6.289474377014378e-07, "loss": 1.5193, "step": 3129 }, { "epoch": 0.2180956694422186, "grad_norm": 0.7516357460383902, "learning_rate": 6.289038060906011e-07, "loss": 1.6042, "step": 3130 }, { "epoch": 0.2181653485698359, "grad_norm": 0.6990707991385572, "learning_rate": 6.288601627913935e-07, "loss": 1.5118, "step": 3131 }, { "epoch": 0.21823502769745323, "grad_norm": 0.7402279645798079, "learning_rate": 6.288165078059062e-07, "loss": 1.5159, "step": 3132 }, { "epoch": 0.21830470682507055, "grad_norm": 0.7071125829282526, "learning_rate": 6.287728411362312e-07, "loss": 1.4857, "step": 3133 }, { "epoch": 0.21837438595268788, "grad_norm": 0.7145022706649667, "learning_rate": 6.287291627844613e-07, "loss": 1.4423, "step": 3134 }, { "epoch": 0.2184440650803052, "grad_norm": 0.7153095213061711, "learning_rate": 6.286854727526895e-07, "loss": 1.5401, "step": 3135 }, { "epoch": 0.21851374420792252, "grad_norm": 0.7388320724188436, "learning_rate": 6.286417710430096e-07, "loss": 1.5551, "step": 3136 }, { "epoch": 0.21858342333553984, "grad_norm": 0.7351605398058645, "learning_rate": 6.285980576575158e-07, "loss": 1.5743, "step": 3137 }, { "epoch": 0.21865310246315717, "grad_norm": 0.7103834284273812, "learning_rate": 6.28554332598303e-07, "loss": 1.62, "step": 3138 }, { "epoch": 0.2187227815907745, "grad_norm": 0.7124917000500621, "learning_rate": 6.285105958674667e-07, "loss": 1.5462, "step": 3139 }, { "epoch": 0.2187924607183918, "grad_norm": 0.6719455946928833, "learning_rate": 6.284668474671026e-07, "loss": 1.4454, "step": 3140 }, { "epoch": 0.21886213984600913, "grad_norm": 0.7342723618991475, "learning_rate": 6.284230873993073e-07, "loss": 1.5297, "step": 3141 }, { "epoch": 0.21893181897362646, "grad_norm": 0.7050821767435831, "learning_rate": 6.28379315666178e-07, "loss": 1.4949, "step": 3142 }, { "epoch": 0.21900149810124378, "grad_norm": 0.7305929985653432, "learning_rate": 6.283355322698121e-07, "loss": 1.5449, "step": 3143 }, { "epoch": 0.2190711772288611, "grad_norm": 0.7355604484531012, "learning_rate": 6.282917372123081e-07, "loss": 1.631, "step": 3144 }, { "epoch": 0.21914085635647843, "grad_norm": 0.6723855105370937, "learning_rate": 6.282479304957646e-07, "loss": 1.521, "step": 3145 }, { "epoch": 0.21921053548409575, "grad_norm": 0.7500049197897934, "learning_rate": 6.282041121222808e-07, "loss": 1.4776, "step": 3146 }, { "epoch": 0.21928021461171307, "grad_norm": 0.705067231005872, "learning_rate": 6.281602820939566e-07, "loss": 1.4577, "step": 3147 }, { "epoch": 0.2193498937393304, "grad_norm": 0.7447843195035967, "learning_rate": 6.281164404128927e-07, "loss": 1.464, "step": 3148 }, { "epoch": 0.21941957286694772, "grad_norm": 0.6669666094624056, "learning_rate": 6.280725870811896e-07, "loss": 1.4811, "step": 3149 }, { "epoch": 0.21948925199456504, "grad_norm": 0.7372591631824635, "learning_rate": 6.280287221009493e-07, "loss": 1.4874, "step": 3150 }, { "epoch": 0.21955893112218236, "grad_norm": 0.7400195760354873, "learning_rate": 6.279848454742736e-07, "loss": 1.5233, "step": 3151 }, { "epoch": 0.21962861024979968, "grad_norm": 0.7100152026287823, "learning_rate": 6.279409572032653e-07, "loss": 1.5391, "step": 3152 }, { "epoch": 0.219698289377417, "grad_norm": 0.6998698777309963, "learning_rate": 6.278970572900277e-07, "loss": 1.5045, "step": 3153 }, { "epoch": 0.21976796850503433, "grad_norm": 0.8130378158487237, "learning_rate": 6.278531457366644e-07, "loss": 1.5623, "step": 3154 }, { "epoch": 0.21983764763265165, "grad_norm": 0.7189054680097751, "learning_rate": 6.278092225452796e-07, "loss": 1.4549, "step": 3155 }, { "epoch": 0.21990732676026897, "grad_norm": 0.7060714272932562, "learning_rate": 6.277652877179786e-07, "loss": 1.6297, "step": 3156 }, { "epoch": 0.2199770058878863, "grad_norm": 0.7851668596549601, "learning_rate": 6.277213412568665e-07, "loss": 1.5901, "step": 3157 }, { "epoch": 0.22004668501550362, "grad_norm": 0.721129749016274, "learning_rate": 6.276773831640495e-07, "loss": 1.5617, "step": 3158 }, { "epoch": 0.22011636414312094, "grad_norm": 0.7291404792430866, "learning_rate": 6.276334134416341e-07, "loss": 1.6845, "step": 3159 }, { "epoch": 0.22018604327073826, "grad_norm": 0.6621504202587892, "learning_rate": 6.275894320917273e-07, "loss": 1.5213, "step": 3160 }, { "epoch": 0.22025572239835559, "grad_norm": 0.7520204029846614, "learning_rate": 6.27545439116437e-07, "loss": 1.5872, "step": 3161 }, { "epoch": 0.2203254015259729, "grad_norm": 0.7445862369015398, "learning_rate": 6.275014345178713e-07, "loss": 1.5763, "step": 3162 }, { "epoch": 0.22039508065359023, "grad_norm": 0.7290777915425307, "learning_rate": 6.27457418298139e-07, "loss": 1.6284, "step": 3163 }, { "epoch": 0.22046475978120753, "grad_norm": 0.752459227931593, "learning_rate": 6.274133904593496e-07, "loss": 1.6027, "step": 3164 }, { "epoch": 0.22053443890882485, "grad_norm": 0.6618546415584925, "learning_rate": 6.273693510036126e-07, "loss": 1.5229, "step": 3165 }, { "epoch": 0.22060411803644217, "grad_norm": 0.6864959676253173, "learning_rate": 6.273252999330389e-07, "loss": 1.6194, "step": 3166 }, { "epoch": 0.2206737971640595, "grad_norm": 0.8145056745835206, "learning_rate": 6.272812372497392e-07, "loss": 1.3994, "step": 3167 }, { "epoch": 0.22074347629167682, "grad_norm": 0.7224351871692737, "learning_rate": 6.272371629558254e-07, "loss": 1.6291, "step": 3168 }, { "epoch": 0.22081315541929414, "grad_norm": 0.752897078662678, "learning_rate": 6.271930770534093e-07, "loss": 1.6119, "step": 3169 }, { "epoch": 0.22088283454691146, "grad_norm": 0.70856599701562, "learning_rate": 6.271489795446038e-07, "loss": 1.6155, "step": 3170 }, { "epoch": 0.22095251367452878, "grad_norm": 0.755885503203361, "learning_rate": 6.27104870431522e-07, "loss": 1.6431, "step": 3171 }, { "epoch": 0.2210221928021461, "grad_norm": 0.7865730300687626, "learning_rate": 6.27060749716278e-07, "loss": 1.4305, "step": 3172 }, { "epoch": 0.22109187192976343, "grad_norm": 0.7013495707737089, "learning_rate": 6.270166174009857e-07, "loss": 1.4782, "step": 3173 }, { "epoch": 0.22116155105738075, "grad_norm": 0.6739764223939262, "learning_rate": 6.269724734877604e-07, "loss": 1.5228, "step": 3174 }, { "epoch": 0.22123123018499807, "grad_norm": 0.7092554794529752, "learning_rate": 6.269283179787171e-07, "loss": 1.5998, "step": 3175 }, { "epoch": 0.2213009093126154, "grad_norm": 0.68071132595703, "learning_rate": 6.268841508759725e-07, "loss": 1.5796, "step": 3176 }, { "epoch": 0.22137058844023272, "grad_norm": 0.6705866455284171, "learning_rate": 6.268399721816427e-07, "loss": 1.5333, "step": 3177 }, { "epoch": 0.22144026756785004, "grad_norm": 0.7316307681953858, "learning_rate": 6.267957818978449e-07, "loss": 1.5732, "step": 3178 }, { "epoch": 0.22150994669546736, "grad_norm": 0.7036388021018205, "learning_rate": 6.267515800266969e-07, "loss": 1.4423, "step": 3179 }, { "epoch": 0.22157962582308469, "grad_norm": 0.701486644574639, "learning_rate": 6.267073665703168e-07, "loss": 1.5843, "step": 3180 }, { "epoch": 0.221649304950702, "grad_norm": 0.7315723398194484, "learning_rate": 6.266631415308236e-07, "loss": 1.5913, "step": 3181 }, { "epoch": 0.22171898407831933, "grad_norm": 0.7610730029702969, "learning_rate": 6.266189049103364e-07, "loss": 1.5173, "step": 3182 }, { "epoch": 0.22178866320593665, "grad_norm": 0.7267234976534973, "learning_rate": 6.265746567109752e-07, "loss": 1.5464, "step": 3183 }, { "epoch": 0.22185834233355398, "grad_norm": 0.7581939561966579, "learning_rate": 6.265303969348606e-07, "loss": 1.5121, "step": 3184 }, { "epoch": 0.2219280214611713, "grad_norm": 0.7818869908905624, "learning_rate": 6.264861255841136e-07, "loss": 1.6076, "step": 3185 }, { "epoch": 0.22199770058878862, "grad_norm": 0.7334237426975201, "learning_rate": 6.264418426608556e-07, "loss": 1.5393, "step": 3186 }, { "epoch": 0.22206737971640594, "grad_norm": 0.666609199162287, "learning_rate": 6.263975481672088e-07, "loss": 1.4628, "step": 3187 }, { "epoch": 0.22213705884402327, "grad_norm": 0.7218983139497597, "learning_rate": 6.26353242105296e-07, "loss": 1.5396, "step": 3188 }, { "epoch": 0.2222067379716406, "grad_norm": 0.7243553909759005, "learning_rate": 6.263089244772403e-07, "loss": 1.4566, "step": 3189 }, { "epoch": 0.2222764170992579, "grad_norm": 0.7754149969874606, "learning_rate": 6.262645952851656e-07, "loss": 1.6057, "step": 3190 }, { "epoch": 0.22234609622687523, "grad_norm": 0.7007605347212531, "learning_rate": 6.262202545311961e-07, "loss": 1.5546, "step": 3191 }, { "epoch": 0.22241577535449256, "grad_norm": 0.7193548432527994, "learning_rate": 6.261759022174569e-07, "loss": 1.5214, "step": 3192 }, { "epoch": 0.22248545448210988, "grad_norm": 0.7430888845944795, "learning_rate": 6.261315383460732e-07, "loss": 1.5679, "step": 3193 }, { "epoch": 0.2225551336097272, "grad_norm": 0.7067278834368642, "learning_rate": 6.260871629191712e-07, "loss": 1.6559, "step": 3194 }, { "epoch": 0.22262481273734452, "grad_norm": 0.679596178691687, "learning_rate": 6.260427759388774e-07, "loss": 1.4959, "step": 3195 }, { "epoch": 0.22269449186496185, "grad_norm": 0.736290631526461, "learning_rate": 6.25998377407319e-07, "loss": 1.4611, "step": 3196 }, { "epoch": 0.22276417099257917, "grad_norm": 0.7230193809098943, "learning_rate": 6.259539673266236e-07, "loss": 1.5939, "step": 3197 }, { "epoch": 0.2228338501201965, "grad_norm": 0.7508187483513579, "learning_rate": 6.259095456989193e-07, "loss": 1.5279, "step": 3198 }, { "epoch": 0.22290352924781381, "grad_norm": 0.7190325739384879, "learning_rate": 6.258651125263351e-07, "loss": 1.5578, "step": 3199 }, { "epoch": 0.22297320837543114, "grad_norm": 0.6868033846137466, "learning_rate": 6.258206678110001e-07, "loss": 1.5303, "step": 3200 }, { "epoch": 0.22304288750304846, "grad_norm": 0.7103286386533224, "learning_rate": 6.257762115550445e-07, "loss": 1.5063, "step": 3201 }, { "epoch": 0.22311256663066578, "grad_norm": 0.7159508152038254, "learning_rate": 6.257317437605984e-07, "loss": 1.5417, "step": 3202 }, { "epoch": 0.2231822457582831, "grad_norm": 0.754030296647133, "learning_rate": 6.256872644297928e-07, "loss": 1.593, "step": 3203 }, { "epoch": 0.22325192488590043, "grad_norm": 0.7129339711853596, "learning_rate": 6.256427735647596e-07, "loss": 1.5354, "step": 3204 }, { "epoch": 0.22332160401351775, "grad_norm": 0.7398839096789644, "learning_rate": 6.255982711676306e-07, "loss": 1.6543, "step": 3205 }, { "epoch": 0.22339128314113507, "grad_norm": 0.7076361329437126, "learning_rate": 6.255537572405385e-07, "loss": 1.5077, "step": 3206 }, { "epoch": 0.2234609622687524, "grad_norm": 0.7558888533563315, "learning_rate": 6.255092317856164e-07, "loss": 1.5365, "step": 3207 }, { "epoch": 0.22353064139636972, "grad_norm": 0.6913289274165797, "learning_rate": 6.254646948049982e-07, "loss": 1.4091, "step": 3208 }, { "epoch": 0.22360032052398704, "grad_norm": 0.6862729311578976, "learning_rate": 6.254201463008183e-07, "loss": 1.5289, "step": 3209 }, { "epoch": 0.22366999965160436, "grad_norm": 0.7032564177413655, "learning_rate": 6.253755862752113e-07, "loss": 1.6496, "step": 3210 }, { "epoch": 0.22373967877922168, "grad_norm": 0.672748516405407, "learning_rate": 6.253310147303128e-07, "loss": 1.3317, "step": 3211 }, { "epoch": 0.223809357906839, "grad_norm": 0.6511809527835895, "learning_rate": 6.252864316682587e-07, "loss": 1.4124, "step": 3212 }, { "epoch": 0.22387903703445633, "grad_norm": 0.785166870644245, "learning_rate": 6.252418370911853e-07, "loss": 1.4795, "step": 3213 }, { "epoch": 0.22394871616207365, "grad_norm": 0.7156469515997128, "learning_rate": 6.2519723100123e-07, "loss": 1.61, "step": 3214 }, { "epoch": 0.22401839528969097, "grad_norm": 0.7322960624451204, "learning_rate": 6.251526134005302e-07, "loss": 1.6137, "step": 3215 }, { "epoch": 0.2240880744173083, "grad_norm": 0.7298373702823864, "learning_rate": 6.251079842912242e-07, "loss": 1.5798, "step": 3216 }, { "epoch": 0.22415775354492562, "grad_norm": 0.7420559329181559, "learning_rate": 6.250633436754507e-07, "loss": 1.569, "step": 3217 }, { "epoch": 0.22422743267254294, "grad_norm": 0.6912194583934528, "learning_rate": 6.25018691555349e-07, "loss": 1.5474, "step": 3218 }, { "epoch": 0.22429711180016026, "grad_norm": 0.8192448526866579, "learning_rate": 6.249740279330586e-07, "loss": 1.691, "step": 3219 }, { "epoch": 0.2243667909277776, "grad_norm": 0.712953611035292, "learning_rate": 6.249293528107203e-07, "loss": 1.5323, "step": 3220 }, { "epoch": 0.2244364700553949, "grad_norm": 0.7296944378767534, "learning_rate": 6.248846661904748e-07, "loss": 1.5539, "step": 3221 }, { "epoch": 0.22450614918301223, "grad_norm": 0.7445076520950352, "learning_rate": 6.248399680744637e-07, "loss": 1.5898, "step": 3222 }, { "epoch": 0.22457582831062955, "grad_norm": 0.7085715501497795, "learning_rate": 6.247952584648289e-07, "loss": 1.6631, "step": 3223 }, { "epoch": 0.22464550743824688, "grad_norm": 0.73313301704967, "learning_rate": 6.247505373637131e-07, "loss": 1.6425, "step": 3224 }, { "epoch": 0.2247151865658642, "grad_norm": 0.7468409271178961, "learning_rate": 6.247058047732591e-07, "loss": 1.4843, "step": 3225 }, { "epoch": 0.22478486569348152, "grad_norm": 0.7406310185623337, "learning_rate": 6.24661060695611e-07, "loss": 1.5649, "step": 3226 }, { "epoch": 0.22485454482109885, "grad_norm": 0.7286940068337966, "learning_rate": 6.246163051329129e-07, "loss": 1.5882, "step": 3227 }, { "epoch": 0.22492422394871617, "grad_norm": 0.7416579984863053, "learning_rate": 6.245715380873094e-07, "loss": 1.5131, "step": 3228 }, { "epoch": 0.2249939030763335, "grad_norm": 0.678153652186507, "learning_rate": 6.245267595609461e-07, "loss": 1.4576, "step": 3229 }, { "epoch": 0.2250635822039508, "grad_norm": 0.7760279305504684, "learning_rate": 6.244819695559686e-07, "loss": 1.4768, "step": 3230 }, { "epoch": 0.22513326133156814, "grad_norm": 0.6735251740482139, "learning_rate": 6.244371680745236e-07, "loss": 1.4129, "step": 3231 }, { "epoch": 0.22520294045918546, "grad_norm": 0.7608296780045913, "learning_rate": 6.243923551187581e-07, "loss": 1.5914, "step": 3232 }, { "epoch": 0.22527261958680278, "grad_norm": 0.7262753496045481, "learning_rate": 6.243475306908191e-07, "loss": 1.5632, "step": 3233 }, { "epoch": 0.2253422987144201, "grad_norm": 0.7168146417665728, "learning_rate": 6.243026947928552e-07, "loss": 1.5634, "step": 3234 }, { "epoch": 0.22541197784203743, "grad_norm": 0.7666596628392616, "learning_rate": 6.24257847427015e-07, "loss": 1.5986, "step": 3235 }, { "epoch": 0.22548165696965475, "grad_norm": 0.7096138543338777, "learning_rate": 6.242129885954475e-07, "loss": 1.5359, "step": 3236 }, { "epoch": 0.22555133609727207, "grad_norm": 0.7387515003643389, "learning_rate": 6.241681183003026e-07, "loss": 1.533, "step": 3237 }, { "epoch": 0.2256210152248894, "grad_norm": 0.6686624505246506, "learning_rate": 6.241232365437303e-07, "loss": 1.4926, "step": 3238 }, { "epoch": 0.22569069435250672, "grad_norm": 0.7244864005371732, "learning_rate": 6.240783433278816e-07, "loss": 1.5109, "step": 3239 }, { "epoch": 0.22576037348012404, "grad_norm": 0.7300898381688603, "learning_rate": 6.240334386549079e-07, "loss": 1.4947, "step": 3240 }, { "epoch": 0.22583005260774136, "grad_norm": 0.694543083655283, "learning_rate": 6.239885225269611e-07, "loss": 1.5542, "step": 3241 }, { "epoch": 0.22589973173535868, "grad_norm": 0.6938706067751353, "learning_rate": 6.239435949461937e-07, "loss": 1.5457, "step": 3242 }, { "epoch": 0.225969410862976, "grad_norm": 0.8120680247426197, "learning_rate": 6.238986559147587e-07, "loss": 1.4822, "step": 3243 }, { "epoch": 0.22603908999059333, "grad_norm": 1.0393662763357927, "learning_rate": 6.238537054348097e-07, "loss": 1.5993, "step": 3244 }, { "epoch": 0.22610876911821065, "grad_norm": 0.6939123246891056, "learning_rate": 6.238087435085006e-07, "loss": 1.5299, "step": 3245 }, { "epoch": 0.22617844824582797, "grad_norm": 0.7103542178040833, "learning_rate": 6.237637701379864e-07, "loss": 1.5642, "step": 3246 }, { "epoch": 0.2262481273734453, "grad_norm": 0.6790085012080815, "learning_rate": 6.237187853254221e-07, "loss": 1.4859, "step": 3247 }, { "epoch": 0.22631780650106262, "grad_norm": 0.7613369388575224, "learning_rate": 6.236737890729635e-07, "loss": 1.5173, "step": 3248 }, { "epoch": 0.22638748562867994, "grad_norm": 0.7266168413309799, "learning_rate": 6.23628781382767e-07, "loss": 1.5771, "step": 3249 }, { "epoch": 0.22645716475629726, "grad_norm": 0.7838417336138501, "learning_rate": 6.235837622569894e-07, "loss": 1.6464, "step": 3250 }, { "epoch": 0.22652684388391459, "grad_norm": 0.6878127982873993, "learning_rate": 6.235387316977881e-07, "loss": 1.4028, "step": 3251 }, { "epoch": 0.2265965230115319, "grad_norm": 0.6759827777027689, "learning_rate": 6.23493689707321e-07, "loss": 1.4111, "step": 3252 }, { "epoch": 0.22666620213914923, "grad_norm": 0.7563518707403992, "learning_rate": 6.234486362877468e-07, "loss": 1.6306, "step": 3253 }, { "epoch": 0.22673588126676655, "grad_norm": 0.705109375720128, "learning_rate": 6.234035714412243e-07, "loss": 1.4413, "step": 3254 }, { "epoch": 0.22680556039438388, "grad_norm": 0.6972099729276918, "learning_rate": 6.233584951699133e-07, "loss": 1.5258, "step": 3255 }, { "epoch": 0.22687523952200117, "grad_norm": 0.7448910365246851, "learning_rate": 6.233134074759739e-07, "loss": 1.5063, "step": 3256 }, { "epoch": 0.2269449186496185, "grad_norm": 0.7259674297447937, "learning_rate": 6.232683083615668e-07, "loss": 1.6128, "step": 3257 }, { "epoch": 0.22701459777723582, "grad_norm": 0.6804071352169611, "learning_rate": 6.23223197828853e-07, "loss": 1.4106, "step": 3258 }, { "epoch": 0.22708427690485314, "grad_norm": 0.7116889143448107, "learning_rate": 6.231780758799946e-07, "loss": 1.628, "step": 3259 }, { "epoch": 0.22715395603247046, "grad_norm": 0.7161916261000766, "learning_rate": 6.231329425171538e-07, "loss": 1.6124, "step": 3260 }, { "epoch": 0.22722363516008778, "grad_norm": 0.7276125930319626, "learning_rate": 6.230877977424936e-07, "loss": 1.5841, "step": 3261 }, { "epoch": 0.2272933142877051, "grad_norm": 0.7186008176701532, "learning_rate": 6.230426415581773e-07, "loss": 1.5515, "step": 3262 }, { "epoch": 0.22736299341532243, "grad_norm": 0.7167681974231933, "learning_rate": 6.229974739663689e-07, "loss": 1.6088, "step": 3263 }, { "epoch": 0.22743267254293975, "grad_norm": 0.706065353428564, "learning_rate": 6.229522949692331e-07, "loss": 1.556, "step": 3264 }, { "epoch": 0.22750235167055707, "grad_norm": 0.7269340461885619, "learning_rate": 6.229071045689346e-07, "loss": 1.5334, "step": 3265 }, { "epoch": 0.2275720307981744, "grad_norm": 0.7456034153946854, "learning_rate": 6.228619027676394e-07, "loss": 1.4739, "step": 3266 }, { "epoch": 0.22764170992579172, "grad_norm": 0.7358562726000603, "learning_rate": 6.228166895675134e-07, "loss": 1.6216, "step": 3267 }, { "epoch": 0.22771138905340904, "grad_norm": 0.7259410956511649, "learning_rate": 6.227714649707234e-07, "loss": 1.5702, "step": 3268 }, { "epoch": 0.22778106818102636, "grad_norm": 0.6678683131356811, "learning_rate": 6.227262289794368e-07, "loss": 1.451, "step": 3269 }, { "epoch": 0.2278507473086437, "grad_norm": 0.8432593479988705, "learning_rate": 6.226809815958212e-07, "loss": 1.749, "step": 3270 }, { "epoch": 0.227920426436261, "grad_norm": 0.6699718251478374, "learning_rate": 6.22635722822045e-07, "loss": 1.3672, "step": 3271 }, { "epoch": 0.22799010556387833, "grad_norm": 0.7134528447425973, "learning_rate": 6.22590452660277e-07, "loss": 1.4964, "step": 3272 }, { "epoch": 0.22805978469149565, "grad_norm": 0.8470823062532795, "learning_rate": 6.22545171112687e-07, "loss": 1.7031, "step": 3273 }, { "epoch": 0.22812946381911298, "grad_norm": 0.7057268115009272, "learning_rate": 6.224998781814445e-07, "loss": 1.5361, "step": 3274 }, { "epoch": 0.2281991429467303, "grad_norm": 0.7369497880685594, "learning_rate": 6.224545738687203e-07, "loss": 1.5211, "step": 3275 }, { "epoch": 0.22826882207434762, "grad_norm": 0.7145201468391852, "learning_rate": 6.224092581766854e-07, "loss": 1.5296, "step": 3276 }, { "epoch": 0.22833850120196494, "grad_norm": 0.7103105050401702, "learning_rate": 6.223639311075114e-07, "loss": 1.4471, "step": 3277 }, { "epoch": 0.22840818032958227, "grad_norm": 0.844762788612772, "learning_rate": 6.223185926633709e-07, "loss": 1.5651, "step": 3278 }, { "epoch": 0.2284778594571996, "grad_norm": 0.7034913972328873, "learning_rate": 6.22273242846436e-07, "loss": 1.5359, "step": 3279 }, { "epoch": 0.2285475385848169, "grad_norm": 0.7461126329176588, "learning_rate": 6.2222788165888e-07, "loss": 1.6537, "step": 3280 }, { "epoch": 0.22861721771243423, "grad_norm": 0.7269639564103746, "learning_rate": 6.221825091028772e-07, "loss": 1.4432, "step": 3281 }, { "epoch": 0.22868689684005156, "grad_norm": 0.6712173022990764, "learning_rate": 6.221371251806014e-07, "loss": 1.5733, "step": 3282 }, { "epoch": 0.22875657596766888, "grad_norm": 0.7475145129410613, "learning_rate": 6.220917298942278e-07, "loss": 1.5138, "step": 3283 }, { "epoch": 0.2288262550952862, "grad_norm": 0.7008954768552739, "learning_rate": 6.220463232459318e-07, "loss": 1.4898, "step": 3284 }, { "epoch": 0.22889593422290352, "grad_norm": 0.7248710827182362, "learning_rate": 6.220009052378892e-07, "loss": 1.5659, "step": 3285 }, { "epoch": 0.22896561335052085, "grad_norm": 0.7064860142782986, "learning_rate": 6.21955475872277e-07, "loss": 1.5297, "step": 3286 }, { "epoch": 0.22903529247813817, "grad_norm": 0.7221219290864086, "learning_rate": 6.219100351512717e-07, "loss": 1.5716, "step": 3287 }, { "epoch": 0.2291049716057555, "grad_norm": 0.7565337053436231, "learning_rate": 6.218645830770511e-07, "loss": 1.6439, "step": 3288 }, { "epoch": 0.22917465073337281, "grad_norm": 0.7723173370820656, "learning_rate": 6.218191196517935e-07, "loss": 1.5419, "step": 3289 }, { "epoch": 0.22924432986099014, "grad_norm": 0.6800261948404732, "learning_rate": 6.217736448776775e-07, "loss": 1.5262, "step": 3290 }, { "epoch": 0.22931400898860746, "grad_norm": 0.6742666990820785, "learning_rate": 6.217281587568823e-07, "loss": 1.4581, "step": 3291 }, { "epoch": 0.22938368811622478, "grad_norm": 0.6924075637187566, "learning_rate": 6.216826612915877e-07, "loss": 1.5268, "step": 3292 }, { "epoch": 0.2294533672438421, "grad_norm": 0.6770195243966041, "learning_rate": 6.216371524839743e-07, "loss": 1.5681, "step": 3293 }, { "epoch": 0.22952304637145943, "grad_norm": 0.6831251809175937, "learning_rate": 6.215916323362225e-07, "loss": 1.4456, "step": 3294 }, { "epoch": 0.22959272549907675, "grad_norm": 0.6829693545829544, "learning_rate": 6.215461008505141e-07, "loss": 1.616, "step": 3295 }, { "epoch": 0.22966240462669407, "grad_norm": 0.6949054818073175, "learning_rate": 6.215005580290309e-07, "loss": 1.5678, "step": 3296 }, { "epoch": 0.2297320837543114, "grad_norm": 0.6812757453937095, "learning_rate": 6.214550038739554e-07, "loss": 1.5277, "step": 3297 }, { "epoch": 0.22980176288192872, "grad_norm": 0.6736556299964148, "learning_rate": 6.214094383874707e-07, "loss": 1.5254, "step": 3298 }, { "epoch": 0.22987144200954604, "grad_norm": 0.6692168534392988, "learning_rate": 6.213638615717605e-07, "loss": 1.5795, "step": 3299 }, { "epoch": 0.22994112113716336, "grad_norm": 0.7090645204789643, "learning_rate": 6.213182734290085e-07, "loss": 1.5528, "step": 3300 }, { "epoch": 0.23001080026478068, "grad_norm": 0.7001362981176051, "learning_rate": 6.212726739613998e-07, "loss": 1.4763, "step": 3301 }, { "epoch": 0.230080479392398, "grad_norm": 0.7586834311900935, "learning_rate": 6.212270631711197e-07, "loss": 1.6276, "step": 3302 }, { "epoch": 0.23015015852001533, "grad_norm": 0.7934441272813372, "learning_rate": 6.211814410603536e-07, "loss": 1.5447, "step": 3303 }, { "epoch": 0.23021983764763265, "grad_norm": 0.7482907591214504, "learning_rate": 6.211358076312881e-07, "loss": 1.4875, "step": 3304 }, { "epoch": 0.23028951677524997, "grad_norm": 0.6763665819146414, "learning_rate": 6.210901628861098e-07, "loss": 1.4554, "step": 3305 }, { "epoch": 0.2303591959028673, "grad_norm": 0.7312207211708315, "learning_rate": 6.210445068270063e-07, "loss": 1.5047, "step": 3306 }, { "epoch": 0.23042887503048462, "grad_norm": 0.7269062813292612, "learning_rate": 6.209988394561652e-07, "loss": 1.6715, "step": 3307 }, { "epoch": 0.23049855415810194, "grad_norm": 0.7149134256932522, "learning_rate": 6.209531607757755e-07, "loss": 1.4913, "step": 3308 }, { "epoch": 0.23056823328571927, "grad_norm": 0.6878175606991938, "learning_rate": 6.209074707880259e-07, "loss": 1.4401, "step": 3309 }, { "epoch": 0.2306379124133366, "grad_norm": 0.7116693886252153, "learning_rate": 6.208617694951059e-07, "loss": 1.4861, "step": 3310 }, { "epoch": 0.2307075915409539, "grad_norm": 0.6677721593746183, "learning_rate": 6.208160568992057e-07, "loss": 1.575, "step": 3311 }, { "epoch": 0.23077727066857123, "grad_norm": 0.7350179713720592, "learning_rate": 6.20770333002516e-07, "loss": 1.5758, "step": 3312 }, { "epoch": 0.23084694979618856, "grad_norm": 0.6928707351994486, "learning_rate": 6.207245978072279e-07, "loss": 1.4518, "step": 3313 }, { "epoch": 0.23091662892380588, "grad_norm": 0.6738940033331204, "learning_rate": 6.206788513155331e-07, "loss": 1.4948, "step": 3314 }, { "epoch": 0.2309863080514232, "grad_norm": 0.7204099383196804, "learning_rate": 6.206330935296239e-07, "loss": 1.5013, "step": 3315 }, { "epoch": 0.23105598717904052, "grad_norm": 0.7320799572268409, "learning_rate": 6.205873244516931e-07, "loss": 1.6149, "step": 3316 }, { "epoch": 0.23112566630665785, "grad_norm": 0.679257456943743, "learning_rate": 6.20541544083934e-07, "loss": 1.5815, "step": 3317 }, { "epoch": 0.23119534543427517, "grad_norm": 0.759769092880823, "learning_rate": 6.204957524285407e-07, "loss": 1.659, "step": 3318 }, { "epoch": 0.2312650245618925, "grad_norm": 0.7434624189053274, "learning_rate": 6.204499494877074e-07, "loss": 1.5217, "step": 3319 }, { "epoch": 0.2313347036895098, "grad_norm": 0.7239122991163198, "learning_rate": 6.204041352636293e-07, "loss": 1.494, "step": 3320 }, { "epoch": 0.23140438281712714, "grad_norm": 0.730206357832636, "learning_rate": 6.203583097585015e-07, "loss": 1.598, "step": 3321 }, { "epoch": 0.23147406194474446, "grad_norm": 0.7236108764408529, "learning_rate": 6.203124729745206e-07, "loss": 1.5986, "step": 3322 }, { "epoch": 0.23154374107236178, "grad_norm": 0.7670495414586959, "learning_rate": 6.202666249138827e-07, "loss": 1.6228, "step": 3323 }, { "epoch": 0.2316134201999791, "grad_norm": 0.7049449713350656, "learning_rate": 6.202207655787851e-07, "loss": 1.4829, "step": 3324 }, { "epoch": 0.23168309932759643, "grad_norm": 0.7305992229489915, "learning_rate": 6.201748949714257e-07, "loss": 1.6744, "step": 3325 }, { "epoch": 0.23175277845521375, "grad_norm": 0.7119919576125097, "learning_rate": 6.201290130940024e-07, "loss": 1.4452, "step": 3326 }, { "epoch": 0.23182245758283107, "grad_norm": 0.6822509435659513, "learning_rate": 6.200831199487141e-07, "loss": 1.4767, "step": 3327 }, { "epoch": 0.2318921367104484, "grad_norm": 0.7289185593864849, "learning_rate": 6.200372155377601e-07, "loss": 1.5892, "step": 3328 }, { "epoch": 0.23196181583806572, "grad_norm": 0.6963543315365277, "learning_rate": 6.199912998633401e-07, "loss": 1.5631, "step": 3329 }, { "epoch": 0.23203149496568304, "grad_norm": 0.6930151776226096, "learning_rate": 6.199453729276547e-07, "loss": 1.6462, "step": 3330 }, { "epoch": 0.23210117409330036, "grad_norm": 0.69238854833818, "learning_rate": 6.198994347329047e-07, "loss": 1.5036, "step": 3331 }, { "epoch": 0.23217085322091768, "grad_norm": 0.7474791474026496, "learning_rate": 6.198534852812916e-07, "loss": 1.4649, "step": 3332 }, { "epoch": 0.232240532348535, "grad_norm": 0.725018393418341, "learning_rate": 6.198075245750173e-07, "loss": 1.5417, "step": 3333 }, { "epoch": 0.23231021147615233, "grad_norm": 0.7522669532923917, "learning_rate": 6.197615526162843e-07, "loss": 1.5881, "step": 3334 }, { "epoch": 0.23237989060376965, "grad_norm": 0.7084064750535294, "learning_rate": 6.197155694072958e-07, "loss": 1.5404, "step": 3335 }, { "epoch": 0.23244956973138697, "grad_norm": 0.7518904247342453, "learning_rate": 6.196695749502553e-07, "loss": 1.6081, "step": 3336 }, { "epoch": 0.2325192488590043, "grad_norm": 0.7462448084328533, "learning_rate": 6.19623569247367e-07, "loss": 1.6945, "step": 3337 }, { "epoch": 0.23258892798662162, "grad_norm": 0.7352790334391689, "learning_rate": 6.195775523008357e-07, "loss": 1.6254, "step": 3338 }, { "epoch": 0.23265860711423894, "grad_norm": 0.692602365608469, "learning_rate": 6.195315241128664e-07, "loss": 1.5174, "step": 3339 }, { "epoch": 0.23272828624185626, "grad_norm": 0.7557983065691252, "learning_rate": 6.19485484685665e-07, "loss": 1.7174, "step": 3340 }, { "epoch": 0.2327979653694736, "grad_norm": 0.7820184701909594, "learning_rate": 6.194394340214378e-07, "loss": 1.629, "step": 3341 }, { "epoch": 0.2328676444970909, "grad_norm": 0.741342894454099, "learning_rate": 6.193933721223916e-07, "loss": 1.5313, "step": 3342 }, { "epoch": 0.23293732362470823, "grad_norm": 0.78097018285349, "learning_rate": 6.193472989907339e-07, "loss": 1.6066, "step": 3343 }, { "epoch": 0.23300700275232555, "grad_norm": 0.7650208326074384, "learning_rate": 6.193012146286725e-07, "loss": 1.5455, "step": 3344 }, { "epoch": 0.23307668187994288, "grad_norm": 0.8015620503451505, "learning_rate": 6.192551190384158e-07, "loss": 1.468, "step": 3345 }, { "epoch": 0.2331463610075602, "grad_norm": 0.7433641051339869, "learning_rate": 6.192090122221729e-07, "loss": 1.5386, "step": 3346 }, { "epoch": 0.23321604013517752, "grad_norm": 0.7174021653327852, "learning_rate": 6.191628941821534e-07, "loss": 1.7446, "step": 3347 }, { "epoch": 0.23328571926279482, "grad_norm": 0.6836406889381682, "learning_rate": 6.191167649205672e-07, "loss": 1.5269, "step": 3348 }, { "epoch": 0.23335539839041214, "grad_norm": 0.7099080109048698, "learning_rate": 6.190706244396251e-07, "loss": 1.5104, "step": 3349 }, { "epoch": 0.23342507751802946, "grad_norm": 0.663174403385495, "learning_rate": 6.19024472741538e-07, "loss": 1.5063, "step": 3350 }, { "epoch": 0.23349475664564678, "grad_norm": 0.7500897550374107, "learning_rate": 6.189783098285178e-07, "loss": 1.5334, "step": 3351 }, { "epoch": 0.2335644357732641, "grad_norm": 0.7258609882745846, "learning_rate": 6.189321357027766e-07, "loss": 1.4554, "step": 3352 }, { "epoch": 0.23363411490088143, "grad_norm": 0.7432031040341716, "learning_rate": 6.188859503665272e-07, "loss": 1.6481, "step": 3353 }, { "epoch": 0.23370379402849875, "grad_norm": 0.7166121656267893, "learning_rate": 6.188397538219829e-07, "loss": 1.595, "step": 3354 }, { "epoch": 0.23377347315611607, "grad_norm": 0.6927380416464944, "learning_rate": 6.187935460713575e-07, "loss": 1.4588, "step": 3355 }, { "epoch": 0.2338431522837334, "grad_norm": 0.7540129503422559, "learning_rate": 6.187473271168655e-07, "loss": 1.5675, "step": 3356 }, { "epoch": 0.23391283141135072, "grad_norm": 0.7308119077873947, "learning_rate": 6.187010969607217e-07, "loss": 1.6251, "step": 3357 }, { "epoch": 0.23398251053896804, "grad_norm": 0.6693944602398552, "learning_rate": 6.186548556051415e-07, "loss": 1.5107, "step": 3358 }, { "epoch": 0.23405218966658536, "grad_norm": 0.7086992977341247, "learning_rate": 6.18608603052341e-07, "loss": 1.5787, "step": 3359 }, { "epoch": 0.2341218687942027, "grad_norm": 0.6669225432306505, "learning_rate": 6.185623393045367e-07, "loss": 1.5375, "step": 3360 }, { "epoch": 0.23419154792182, "grad_norm": 0.6538619423769466, "learning_rate": 6.185160643639454e-07, "loss": 1.4486, "step": 3361 }, { "epoch": 0.23426122704943733, "grad_norm": 0.7317000886747869, "learning_rate": 6.184697782327851e-07, "loss": 1.5035, "step": 3362 }, { "epoch": 0.23433090617705465, "grad_norm": 0.680179715005664, "learning_rate": 6.184234809132737e-07, "loss": 1.4639, "step": 3363 }, { "epoch": 0.23440058530467198, "grad_norm": 0.8368622763484619, "learning_rate": 6.183771724076298e-07, "loss": 1.6463, "step": 3364 }, { "epoch": 0.2344702644322893, "grad_norm": 0.8092067224444279, "learning_rate": 6.183308527180727e-07, "loss": 1.6825, "step": 3365 }, { "epoch": 0.23453994355990662, "grad_norm": 0.727191601732615, "learning_rate": 6.182845218468222e-07, "loss": 1.5721, "step": 3366 }, { "epoch": 0.23460962268752394, "grad_norm": 0.7190081206311841, "learning_rate": 6.182381797960983e-07, "loss": 1.4984, "step": 3367 }, { "epoch": 0.23467930181514127, "grad_norm": 0.721597520938331, "learning_rate": 6.181918265681221e-07, "loss": 1.3929, "step": 3368 }, { "epoch": 0.2347489809427586, "grad_norm": 0.7270928500070124, "learning_rate": 6.181454621651149e-07, "loss": 1.4872, "step": 3369 }, { "epoch": 0.2348186600703759, "grad_norm": 0.7859246873980587, "learning_rate": 6.180990865892984e-07, "loss": 1.6426, "step": 3370 }, { "epoch": 0.23488833919799323, "grad_norm": 0.7658917026133951, "learning_rate": 6.180526998428953e-07, "loss": 1.4258, "step": 3371 }, { "epoch": 0.23495801832561056, "grad_norm": 0.7703297932331463, "learning_rate": 6.180063019281282e-07, "loss": 1.7079, "step": 3372 }, { "epoch": 0.23502769745322788, "grad_norm": 0.7604510847538044, "learning_rate": 6.179598928472208e-07, "loss": 1.6182, "step": 3373 }, { "epoch": 0.2350973765808452, "grad_norm": 0.7085186689342032, "learning_rate": 6.179134726023971e-07, "loss": 1.5748, "step": 3374 }, { "epoch": 0.23516705570846252, "grad_norm": 0.7427333380583586, "learning_rate": 6.178670411958817e-07, "loss": 1.4754, "step": 3375 }, { "epoch": 0.23523673483607985, "grad_norm": 0.7173447702200312, "learning_rate": 6.178205986298996e-07, "loss": 1.5343, "step": 3376 }, { "epoch": 0.23530641396369717, "grad_norm": 0.7043449607322789, "learning_rate": 6.177741449066763e-07, "loss": 1.4961, "step": 3377 }, { "epoch": 0.2353760930913145, "grad_norm": 0.7456152236319041, "learning_rate": 6.177276800284382e-07, "loss": 1.4932, "step": 3378 }, { "epoch": 0.23544577221893181, "grad_norm": 0.7424025461195325, "learning_rate": 6.176812039974119e-07, "loss": 1.4947, "step": 3379 }, { "epoch": 0.23551545134654914, "grad_norm": 0.7232428004855358, "learning_rate": 6.176347168158246e-07, "loss": 1.552, "step": 3380 }, { "epoch": 0.23558513047416646, "grad_norm": 0.6779956331594822, "learning_rate": 6.175882184859041e-07, "loss": 1.4757, "step": 3381 }, { "epoch": 0.23565480960178378, "grad_norm": 0.7171144453890865, "learning_rate": 6.175417090098787e-07, "loss": 1.5146, "step": 3382 }, { "epoch": 0.2357244887294011, "grad_norm": 0.7121967513830976, "learning_rate": 6.174951883899771e-07, "loss": 1.5433, "step": 3383 }, { "epoch": 0.23579416785701843, "grad_norm": 0.6891281985086806, "learning_rate": 6.17448656628429e-07, "loss": 1.6011, "step": 3384 }, { "epoch": 0.23586384698463575, "grad_norm": 0.6951321103009805, "learning_rate": 6.174021137274638e-07, "loss": 1.486, "step": 3385 }, { "epoch": 0.23593352611225307, "grad_norm": 0.7260711549908915, "learning_rate": 6.173555596893123e-07, "loss": 1.4865, "step": 3386 }, { "epoch": 0.2360032052398704, "grad_norm": 0.6964511042381395, "learning_rate": 6.173089945162053e-07, "loss": 1.5472, "step": 3387 }, { "epoch": 0.23607288436748772, "grad_norm": 0.7433377677254371, "learning_rate": 6.172624182103744e-07, "loss": 1.5743, "step": 3388 }, { "epoch": 0.23614256349510504, "grad_norm": 0.7242210025791754, "learning_rate": 6.172158307740517e-07, "loss": 1.5983, "step": 3389 }, { "epoch": 0.23621224262272236, "grad_norm": 0.7789543820644682, "learning_rate": 6.171692322094696e-07, "loss": 1.4545, "step": 3390 }, { "epoch": 0.23628192175033969, "grad_norm": 0.751532733815692, "learning_rate": 6.171226225188612e-07, "loss": 1.5104, "step": 3391 }, { "epoch": 0.236351600877957, "grad_norm": 0.7256233715460338, "learning_rate": 6.170760017044602e-07, "loss": 1.5217, "step": 3392 }, { "epoch": 0.23642128000557433, "grad_norm": 0.695169404445898, "learning_rate": 6.170293697685008e-07, "loss": 1.4637, "step": 3393 }, { "epoch": 0.23649095913319165, "grad_norm": 0.7268883932193495, "learning_rate": 6.169827267132177e-07, "loss": 1.5521, "step": 3394 }, { "epoch": 0.23656063826080898, "grad_norm": 0.8191933964958839, "learning_rate": 6.169360725408461e-07, "loss": 1.5085, "step": 3395 }, { "epoch": 0.2366303173884263, "grad_norm": 0.7463896155748065, "learning_rate": 6.168894072536215e-07, "loss": 1.631, "step": 3396 }, { "epoch": 0.23669999651604362, "grad_norm": 0.7538657089938234, "learning_rate": 6.168427308537807e-07, "loss": 1.6949, "step": 3397 }, { "epoch": 0.23676967564366094, "grad_norm": 0.6922451358544848, "learning_rate": 6.167960433435602e-07, "loss": 1.5991, "step": 3398 }, { "epoch": 0.23683935477127827, "grad_norm": 0.7173957121519011, "learning_rate": 6.167493447251974e-07, "loss": 1.5608, "step": 3399 }, { "epoch": 0.2369090338988956, "grad_norm": 0.6702020251445877, "learning_rate": 6.167026350009302e-07, "loss": 1.5437, "step": 3400 }, { "epoch": 0.2369787130265129, "grad_norm": 0.8055652122242971, "learning_rate": 6.166559141729971e-07, "loss": 1.506, "step": 3401 }, { "epoch": 0.23704839215413023, "grad_norm": 0.7832627703406165, "learning_rate": 6.166091822436371e-07, "loss": 1.6436, "step": 3402 }, { "epoch": 0.23711807128174756, "grad_norm": 0.6791136166363546, "learning_rate": 6.165624392150895e-07, "loss": 1.4786, "step": 3403 }, { "epoch": 0.23718775040936488, "grad_norm": 0.772012569377695, "learning_rate": 6.165156850895944e-07, "loss": 1.56, "step": 3404 }, { "epoch": 0.2372574295369822, "grad_norm": 0.7148878873649895, "learning_rate": 6.164689198693925e-07, "loss": 1.5209, "step": 3405 }, { "epoch": 0.23732710866459952, "grad_norm": 0.7661887521009225, "learning_rate": 6.164221435567247e-07, "loss": 1.4896, "step": 3406 }, { "epoch": 0.23739678779221685, "grad_norm": 0.8144245815031177, "learning_rate": 6.163753561538325e-07, "loss": 1.3957, "step": 3407 }, { "epoch": 0.23746646691983417, "grad_norm": 0.7057628127124091, "learning_rate": 6.163285576629585e-07, "loss": 1.5469, "step": 3408 }, { "epoch": 0.2375361460474515, "grad_norm": 0.7065635665133951, "learning_rate": 6.16281748086345e-07, "loss": 1.6138, "step": 3409 }, { "epoch": 0.2376058251750688, "grad_norm": 0.7702791357963328, "learning_rate": 6.162349274262353e-07, "loss": 1.6062, "step": 3410 }, { "epoch": 0.23767550430268614, "grad_norm": 0.7129163308798244, "learning_rate": 6.161880956848732e-07, "loss": 1.5728, "step": 3411 }, { "epoch": 0.23774518343030346, "grad_norm": 0.7367937664828406, "learning_rate": 6.16141252864503e-07, "loss": 1.5456, "step": 3412 }, { "epoch": 0.23781486255792078, "grad_norm": 0.7393353115134411, "learning_rate": 6.160943989673692e-07, "loss": 1.5298, "step": 3413 }, { "epoch": 0.2378845416855381, "grad_norm": 0.7583212519478039, "learning_rate": 6.160475339957176e-07, "loss": 1.543, "step": 3414 }, { "epoch": 0.23795422081315543, "grad_norm": 0.7226687449053614, "learning_rate": 6.160006579517937e-07, "loss": 1.5804, "step": 3415 }, { "epoch": 0.23802389994077275, "grad_norm": 0.7184778025421876, "learning_rate": 6.159537708378441e-07, "loss": 1.5452, "step": 3416 }, { "epoch": 0.23809357906839007, "grad_norm": 0.7181375940406853, "learning_rate": 6.159068726561158e-07, "loss": 1.418, "step": 3417 }, { "epoch": 0.2381632581960074, "grad_norm": 0.7104117242390996, "learning_rate": 6.158599634088559e-07, "loss": 1.538, "step": 3418 }, { "epoch": 0.23823293732362472, "grad_norm": 0.7020110087325926, "learning_rate": 6.158130430983127e-07, "loss": 1.516, "step": 3419 }, { "epoch": 0.23830261645124204, "grad_norm": 0.705561198667519, "learning_rate": 6.157661117267347e-07, "loss": 1.6158, "step": 3420 }, { "epoch": 0.23837229557885936, "grad_norm": 0.6897473127244094, "learning_rate": 6.157191692963706e-07, "loss": 1.5299, "step": 3421 }, { "epoch": 0.23844197470647668, "grad_norm": 0.7142173166774628, "learning_rate": 6.156722158094705e-07, "loss": 1.6315, "step": 3422 }, { "epoch": 0.238511653834094, "grad_norm": 0.7311353735621938, "learning_rate": 6.156252512682842e-07, "loss": 1.5065, "step": 3423 }, { "epoch": 0.23858133296171133, "grad_norm": 0.7386698453917857, "learning_rate": 6.155782756750624e-07, "loss": 1.5852, "step": 3424 }, { "epoch": 0.23865101208932865, "grad_norm": 0.7174450568898096, "learning_rate": 6.155312890320563e-07, "loss": 1.6653, "step": 3425 }, { "epoch": 0.23872069121694597, "grad_norm": 0.6757530045972093, "learning_rate": 6.154842913415175e-07, "loss": 1.519, "step": 3426 }, { "epoch": 0.2387903703445633, "grad_norm": 0.7123687389973483, "learning_rate": 6.154372826056983e-07, "loss": 1.5688, "step": 3427 }, { "epoch": 0.23886004947218062, "grad_norm": 0.731046582437802, "learning_rate": 6.153902628268514e-07, "loss": 1.6112, "step": 3428 }, { "epoch": 0.23892972859979794, "grad_norm": 0.6917579901963694, "learning_rate": 6.153432320072301e-07, "loss": 1.5759, "step": 3429 }, { "epoch": 0.23899940772741526, "grad_norm": 0.6981507803157918, "learning_rate": 6.152961901490884e-07, "loss": 1.6106, "step": 3430 }, { "epoch": 0.2390690868550326, "grad_norm": 0.7125774274588189, "learning_rate": 6.152491372546804e-07, "loss": 1.5118, "step": 3431 }, { "epoch": 0.2391387659826499, "grad_norm": 0.7006860564252603, "learning_rate": 6.15202073326261e-07, "loss": 1.4689, "step": 3432 }, { "epoch": 0.23920844511026723, "grad_norm": 0.7095641180879917, "learning_rate": 6.151549983660856e-07, "loss": 1.4697, "step": 3433 }, { "epoch": 0.23927812423788455, "grad_norm": 0.7259126769570512, "learning_rate": 6.151079123764104e-07, "loss": 1.5463, "step": 3434 }, { "epoch": 0.23934780336550188, "grad_norm": 0.8038698655154628, "learning_rate": 6.150608153594915e-07, "loss": 1.5724, "step": 3435 }, { "epoch": 0.2394174824931192, "grad_norm": 0.7405163393205098, "learning_rate": 6.150137073175859e-07, "loss": 1.5711, "step": 3436 }, { "epoch": 0.23948716162073652, "grad_norm": 0.697010624031155, "learning_rate": 6.149665882529513e-07, "loss": 1.4704, "step": 3437 }, { "epoch": 0.23955684074835384, "grad_norm": 0.7037022988926063, "learning_rate": 6.149194581678457e-07, "loss": 1.5133, "step": 3438 }, { "epoch": 0.23962651987597114, "grad_norm": 0.7656938726176664, "learning_rate": 6.148723170645277e-07, "loss": 1.5413, "step": 3439 }, { "epoch": 0.23969619900358846, "grad_norm": 0.7258567001381848, "learning_rate": 6.148251649452564e-07, "loss": 1.4647, "step": 3440 }, { "epoch": 0.23976587813120578, "grad_norm": 0.721650592199456, "learning_rate": 6.147780018122912e-07, "loss": 1.5456, "step": 3441 }, { "epoch": 0.2398355572588231, "grad_norm": 0.7469447720018643, "learning_rate": 6.147308276678926e-07, "loss": 1.6895, "step": 3442 }, { "epoch": 0.23990523638644043, "grad_norm": 0.6885474505548068, "learning_rate": 6.14683642514321e-07, "loss": 1.5356, "step": 3443 }, { "epoch": 0.23997491551405775, "grad_norm": 0.7302283015243304, "learning_rate": 6.146364463538377e-07, "loss": 1.6192, "step": 3444 }, { "epoch": 0.24004459464167507, "grad_norm": 0.7402686792467148, "learning_rate": 6.145892391887046e-07, "loss": 1.6829, "step": 3445 }, { "epoch": 0.2401142737692924, "grad_norm": 0.752528153723735, "learning_rate": 6.145420210211837e-07, "loss": 1.5519, "step": 3446 }, { "epoch": 0.24018395289690972, "grad_norm": 0.6629770876275467, "learning_rate": 6.144947918535379e-07, "loss": 1.4805, "step": 3447 }, { "epoch": 0.24025363202452704, "grad_norm": 0.699902908697308, "learning_rate": 6.144475516880307e-07, "loss": 1.4926, "step": 3448 }, { "epoch": 0.24032331115214436, "grad_norm": 0.7410783032423637, "learning_rate": 6.144003005269256e-07, "loss": 1.4714, "step": 3449 }, { "epoch": 0.2403929902797617, "grad_norm": 0.7367406097734193, "learning_rate": 6.143530383724872e-07, "loss": 1.5405, "step": 3450 }, { "epoch": 0.240462669407379, "grad_norm": 0.771099676513623, "learning_rate": 6.143057652269803e-07, "loss": 1.5602, "step": 3451 }, { "epoch": 0.24053234853499633, "grad_norm": 0.7071012761640494, "learning_rate": 6.142584810926704e-07, "loss": 1.5461, "step": 3452 }, { "epoch": 0.24060202766261365, "grad_norm": 0.7170107410005159, "learning_rate": 6.142111859718235e-07, "loss": 1.4895, "step": 3453 }, { "epoch": 0.24067170679023098, "grad_norm": 0.7104958509461615, "learning_rate": 6.141638798667058e-07, "loss": 1.4503, "step": 3454 }, { "epoch": 0.2407413859178483, "grad_norm": 0.8801778611348274, "learning_rate": 6.141165627795848e-07, "loss": 1.6851, "step": 3455 }, { "epoch": 0.24081106504546562, "grad_norm": 0.7737450201788408, "learning_rate": 6.140692347127276e-07, "loss": 1.4547, "step": 3456 }, { "epoch": 0.24088074417308294, "grad_norm": 0.7241405832766921, "learning_rate": 6.140218956684024e-07, "loss": 1.6129, "step": 3457 }, { "epoch": 0.24095042330070027, "grad_norm": 0.7633311415281308, "learning_rate": 6.139745456488778e-07, "loss": 1.6202, "step": 3458 }, { "epoch": 0.2410201024283176, "grad_norm": 0.7854016040714885, "learning_rate": 6.139271846564229e-07, "loss": 1.7454, "step": 3459 }, { "epoch": 0.2410897815559349, "grad_norm": 0.7161975046816076, "learning_rate": 6.138798126933074e-07, "loss": 1.6831, "step": 3460 }, { "epoch": 0.24115946068355223, "grad_norm": 0.6605931548102567, "learning_rate": 6.138324297618012e-07, "loss": 1.5774, "step": 3461 }, { "epoch": 0.24122913981116956, "grad_norm": 0.6311640853920627, "learning_rate": 6.137850358641754e-07, "loss": 1.4281, "step": 3462 }, { "epoch": 0.24129881893878688, "grad_norm": 0.7140109842514096, "learning_rate": 6.137376310027008e-07, "loss": 1.5915, "step": 3463 }, { "epoch": 0.2413684980664042, "grad_norm": 0.7039807459381068, "learning_rate": 6.136902151796495e-07, "loss": 1.5709, "step": 3464 }, { "epoch": 0.24143817719402152, "grad_norm": 0.6721860425881251, "learning_rate": 6.136427883972935e-07, "loss": 1.5871, "step": 3465 }, { "epoch": 0.24150785632163885, "grad_norm": 0.7615669920967632, "learning_rate": 6.135953506579057e-07, "loss": 1.5575, "step": 3466 }, { "epoch": 0.24157753544925617, "grad_norm": 0.6938093239176405, "learning_rate": 6.135479019637593e-07, "loss": 1.5081, "step": 3467 }, { "epoch": 0.2416472145768735, "grad_norm": 0.6998087722630385, "learning_rate": 6.135004423171284e-07, "loss": 1.475, "step": 3468 }, { "epoch": 0.24171689370449082, "grad_norm": 0.7284799755105115, "learning_rate": 6.134529717202873e-07, "loss": 1.5256, "step": 3469 }, { "epoch": 0.24178657283210814, "grad_norm": 0.7339876042097843, "learning_rate": 6.134054901755106e-07, "loss": 1.4791, "step": 3470 }, { "epoch": 0.24185625195972546, "grad_norm": 0.7344813016736725, "learning_rate": 6.133579976850738e-07, "loss": 1.6821, "step": 3471 }, { "epoch": 0.24192593108734278, "grad_norm": 0.8034131484006056, "learning_rate": 6.133104942512532e-07, "loss": 1.5536, "step": 3472 }, { "epoch": 0.2419956102149601, "grad_norm": 0.7091929112333546, "learning_rate": 6.132629798763249e-07, "loss": 1.5343, "step": 3473 }, { "epoch": 0.24206528934257743, "grad_norm": 0.7372457302786409, "learning_rate": 6.13215454562566e-07, "loss": 1.5681, "step": 3474 }, { "epoch": 0.24213496847019475, "grad_norm": 0.7163122501788207, "learning_rate": 6.131679183122539e-07, "loss": 1.5721, "step": 3475 }, { "epoch": 0.24220464759781207, "grad_norm": 0.7644696332870136, "learning_rate": 6.131203711276669e-07, "loss": 1.5467, "step": 3476 }, { "epoch": 0.2422743267254294, "grad_norm": 0.659718083288887, "learning_rate": 6.130728130110833e-07, "loss": 1.5017, "step": 3477 }, { "epoch": 0.24234400585304672, "grad_norm": 0.6883044205305839, "learning_rate": 6.130252439647823e-07, "loss": 1.5113, "step": 3478 }, { "epoch": 0.24241368498066404, "grad_norm": 0.7201350430252552, "learning_rate": 6.129776639910434e-07, "loss": 1.5899, "step": 3479 }, { "epoch": 0.24248336410828136, "grad_norm": 0.7132112424660706, "learning_rate": 6.129300730921468e-07, "loss": 1.4527, "step": 3480 }, { "epoch": 0.24255304323589869, "grad_norm": 0.7118297627685076, "learning_rate": 6.128824712703734e-07, "loss": 1.6241, "step": 3481 }, { "epoch": 0.242622722363516, "grad_norm": 0.7700354394842043, "learning_rate": 6.128348585280039e-07, "loss": 1.5645, "step": 3482 }, { "epoch": 0.24269240149113333, "grad_norm": 0.7532057834209641, "learning_rate": 6.127872348673204e-07, "loss": 1.5646, "step": 3483 }, { "epoch": 0.24276208061875065, "grad_norm": 0.7249802886382719, "learning_rate": 6.127396002906049e-07, "loss": 1.4486, "step": 3484 }, { "epoch": 0.24283175974636798, "grad_norm": 0.7016431329438683, "learning_rate": 6.126919548001403e-07, "loss": 1.6213, "step": 3485 }, { "epoch": 0.2429014388739853, "grad_norm": 0.7030641444243805, "learning_rate": 6.126442983982096e-07, "loss": 1.5588, "step": 3486 }, { "epoch": 0.24297111800160262, "grad_norm": 0.7413791208700851, "learning_rate": 6.125966310870968e-07, "loss": 1.6794, "step": 3487 }, { "epoch": 0.24304079712921994, "grad_norm": 0.6779401090861621, "learning_rate": 6.125489528690863e-07, "loss": 1.6256, "step": 3488 }, { "epoch": 0.24311047625683727, "grad_norm": 0.6852961562158302, "learning_rate": 6.125012637464628e-07, "loss": 1.578, "step": 3489 }, { "epoch": 0.2431801553844546, "grad_norm": 0.7876391800882989, "learning_rate": 6.124535637215116e-07, "loss": 1.6865, "step": 3490 }, { "epoch": 0.2432498345120719, "grad_norm": 0.7089052160927828, "learning_rate": 6.124058527965189e-07, "loss": 1.4728, "step": 3491 }, { "epoch": 0.24331951363968923, "grad_norm": 0.6710252479864719, "learning_rate": 6.123581309737707e-07, "loss": 1.5328, "step": 3492 }, { "epoch": 0.24338919276730656, "grad_norm": 7.417390349939993, "learning_rate": 6.12310398255554e-07, "loss": 1.468, "step": 3493 }, { "epoch": 0.24345887189492388, "grad_norm": 0.6818307687602854, "learning_rate": 6.122626546441567e-07, "loss": 1.6087, "step": 3494 }, { "epoch": 0.2435285510225412, "grad_norm": 0.7113519070432572, "learning_rate": 6.122149001418661e-07, "loss": 1.5702, "step": 3495 }, { "epoch": 0.24359823015015852, "grad_norm": 0.7331933417391607, "learning_rate": 6.121671347509712e-07, "loss": 1.6366, "step": 3496 }, { "epoch": 0.24366790927777585, "grad_norm": 0.7353912377560792, "learning_rate": 6.121193584737607e-07, "loss": 1.4549, "step": 3497 }, { "epoch": 0.24373758840539317, "grad_norm": 0.747218662762241, "learning_rate": 6.120715713125245e-07, "loss": 1.6776, "step": 3498 }, { "epoch": 0.2438072675330105, "grad_norm": 0.7783135396187291, "learning_rate": 6.120237732695521e-07, "loss": 1.5549, "step": 3499 }, { "epoch": 0.2438769466606278, "grad_norm": 0.7866557794969244, "learning_rate": 6.119759643471347e-07, "loss": 1.3877, "step": 3500 }, { "epoch": 0.24394662578824514, "grad_norm": 0.7104409997433944, "learning_rate": 6.11928144547563e-07, "loss": 1.5761, "step": 3501 }, { "epoch": 0.24401630491586246, "grad_norm": 0.7434088447922035, "learning_rate": 6.118803138731287e-07, "loss": 1.513, "step": 3502 }, { "epoch": 0.24408598404347978, "grad_norm": 0.719581279474016, "learning_rate": 6.118324723261241e-07, "loss": 1.4972, "step": 3503 }, { "epoch": 0.2441556631710971, "grad_norm": 0.643472580546137, "learning_rate": 6.117846199088417e-07, "loss": 1.4641, "step": 3504 }, { "epoch": 0.24422534229871443, "grad_norm": 0.7445412858204775, "learning_rate": 6.117367566235748e-07, "loss": 1.5576, "step": 3505 }, { "epoch": 0.24429502142633175, "grad_norm": 0.7239385537409243, "learning_rate": 6.11688882472617e-07, "loss": 1.5952, "step": 3506 }, { "epoch": 0.24436470055394907, "grad_norm": 0.7764917647022519, "learning_rate": 6.116409974582625e-07, "loss": 1.6206, "step": 3507 }, { "epoch": 0.2444343796815664, "grad_norm": 0.74202044266512, "learning_rate": 6.115931015828062e-07, "loss": 1.5645, "step": 3508 }, { "epoch": 0.24450405880918372, "grad_norm": 0.6935767282043237, "learning_rate": 6.115451948485431e-07, "loss": 1.5034, "step": 3509 }, { "epoch": 0.24457373793680104, "grad_norm": 0.7039628910097253, "learning_rate": 6.114972772577693e-07, "loss": 1.5239, "step": 3510 }, { "epoch": 0.24464341706441836, "grad_norm": 0.6874285293399682, "learning_rate": 6.11449348812781e-07, "loss": 1.6456, "step": 3511 }, { "epoch": 0.24471309619203568, "grad_norm": 0.6522379381976608, "learning_rate": 6.11401409515875e-07, "loss": 1.5646, "step": 3512 }, { "epoch": 0.244782775319653, "grad_norm": 0.8171749552425288, "learning_rate": 6.113534593693486e-07, "loss": 1.5602, "step": 3513 }, { "epoch": 0.24485245444727033, "grad_norm": 0.7396199021021814, "learning_rate": 6.113054983754999e-07, "loss": 1.5303, "step": 3514 }, { "epoch": 0.24492213357488765, "grad_norm": 0.7662564325697268, "learning_rate": 6.11257526536627e-07, "loss": 1.5687, "step": 3515 }, { "epoch": 0.24499181270250497, "grad_norm": 0.6908179397521864, "learning_rate": 6.11209543855029e-07, "loss": 1.5637, "step": 3516 }, { "epoch": 0.2450614918301223, "grad_norm": 0.6836038980227871, "learning_rate": 6.111615503330051e-07, "loss": 1.5156, "step": 3517 }, { "epoch": 0.24513117095773962, "grad_norm": 0.73581436271446, "learning_rate": 6.111135459728556e-07, "loss": 1.6565, "step": 3518 }, { "epoch": 0.24520085008535694, "grad_norm": 0.7405195856434742, "learning_rate": 6.110655307768808e-07, "loss": 1.4523, "step": 3519 }, { "epoch": 0.24527052921297426, "grad_norm": 0.7585536397564181, "learning_rate": 6.110175047473816e-07, "loss": 1.3932, "step": 3520 }, { "epoch": 0.2453402083405916, "grad_norm": 0.709276744464445, "learning_rate": 6.109694678866594e-07, "loss": 1.4675, "step": 3521 }, { "epoch": 0.2454098874682089, "grad_norm": 0.750704891947952, "learning_rate": 6.109214201970165e-07, "loss": 1.4458, "step": 3522 }, { "epoch": 0.24547956659582623, "grad_norm": 0.772678393930129, "learning_rate": 6.108733616807554e-07, "loss": 1.6841, "step": 3523 }, { "epoch": 0.24554924572344355, "grad_norm": 0.6817202500029117, "learning_rate": 6.10825292340179e-07, "loss": 1.5587, "step": 3524 }, { "epoch": 0.24561892485106088, "grad_norm": 0.8040107414482522, "learning_rate": 6.10777212177591e-07, "loss": 1.5774, "step": 3525 }, { "epoch": 0.2456886039786782, "grad_norm": 1.796804413019553, "learning_rate": 6.107291211952956e-07, "loss": 1.4213, "step": 3526 }, { "epoch": 0.24575828310629552, "grad_norm": 0.7693183737224776, "learning_rate": 6.106810193955972e-07, "loss": 1.6151, "step": 3527 }, { "epoch": 0.24582796223391284, "grad_norm": 0.7251457431661789, "learning_rate": 6.10632906780801e-07, "loss": 1.5434, "step": 3528 }, { "epoch": 0.24589764136153017, "grad_norm": 0.7261229116118169, "learning_rate": 6.105847833532127e-07, "loss": 1.4882, "step": 3529 }, { "epoch": 0.2459673204891475, "grad_norm": 0.7457010223359175, "learning_rate": 6.105366491151387e-07, "loss": 1.7156, "step": 3530 }, { "epoch": 0.24603699961676478, "grad_norm": 0.7071322583151507, "learning_rate": 6.104885040688851e-07, "loss": 1.5663, "step": 3531 }, { "epoch": 0.2461066787443821, "grad_norm": 0.6879911743671988, "learning_rate": 6.104403482167596e-07, "loss": 1.5783, "step": 3532 }, { "epoch": 0.24617635787199943, "grad_norm": 0.744748622425573, "learning_rate": 6.103921815610699e-07, "loss": 1.6289, "step": 3533 }, { "epoch": 0.24624603699961675, "grad_norm": 0.691139277317366, "learning_rate": 6.10344004104124e-07, "loss": 1.4919, "step": 3534 }, { "epoch": 0.24631571612723407, "grad_norm": 0.6674381484094699, "learning_rate": 6.102958158482309e-07, "loss": 1.344, "step": 3535 }, { "epoch": 0.2463853952548514, "grad_norm": 0.7468365533757317, "learning_rate": 6.102476167956997e-07, "loss": 1.5975, "step": 3536 }, { "epoch": 0.24645507438246872, "grad_norm": 0.7617737698264097, "learning_rate": 6.101994069488403e-07, "loss": 1.6178, "step": 3537 }, { "epoch": 0.24652475351008604, "grad_norm": 0.6924214940752028, "learning_rate": 6.10151186309963e-07, "loss": 1.5002, "step": 3538 }, { "epoch": 0.24659443263770336, "grad_norm": 0.7349604377014846, "learning_rate": 6.101029548813787e-07, "loss": 1.6046, "step": 3539 }, { "epoch": 0.2466641117653207, "grad_norm": 0.7471521985422922, "learning_rate": 6.100547126653986e-07, "loss": 1.5667, "step": 3540 }, { "epoch": 0.246733790892938, "grad_norm": 0.7367598222463619, "learning_rate": 6.100064596643346e-07, "loss": 1.6448, "step": 3541 }, { "epoch": 0.24680347002055533, "grad_norm": 0.7776711532694474, "learning_rate": 6.099581958804993e-07, "loss": 1.4532, "step": 3542 }, { "epoch": 0.24687314914817265, "grad_norm": 0.7806167018788672, "learning_rate": 6.099099213162053e-07, "loss": 1.638, "step": 3543 }, { "epoch": 0.24694282827578998, "grad_norm": 0.7661398733958934, "learning_rate": 6.098616359737661e-07, "loss": 1.6671, "step": 3544 }, { "epoch": 0.2470125074034073, "grad_norm": 0.7234253141818819, "learning_rate": 6.098133398554956e-07, "loss": 1.524, "step": 3545 }, { "epoch": 0.24708218653102462, "grad_norm": 0.7359219209086516, "learning_rate": 6.097650329637085e-07, "loss": 1.6014, "step": 3546 }, { "epoch": 0.24715186565864194, "grad_norm": 0.681303466428281, "learning_rate": 6.097167153007195e-07, "loss": 1.6173, "step": 3547 }, { "epoch": 0.24722154478625927, "grad_norm": 0.6865820922181248, "learning_rate": 6.096683868688443e-07, "loss": 1.5139, "step": 3548 }, { "epoch": 0.2472912239138766, "grad_norm": 0.6920372454919099, "learning_rate": 6.096200476703986e-07, "loss": 1.3954, "step": 3549 }, { "epoch": 0.2473609030414939, "grad_norm": 0.7476041916829527, "learning_rate": 6.095716977076992e-07, "loss": 1.3092, "step": 3550 }, { "epoch": 0.24743058216911124, "grad_norm": 0.8072680023419683, "learning_rate": 6.095233369830628e-07, "loss": 1.5462, "step": 3551 }, { "epoch": 0.24750026129672856, "grad_norm": 0.8213723342959321, "learning_rate": 6.094749654988073e-07, "loss": 1.7721, "step": 3552 }, { "epoch": 0.24756994042434588, "grad_norm": 0.6873792435636162, "learning_rate": 6.094265832572506e-07, "loss": 1.5889, "step": 3553 }, { "epoch": 0.2476396195519632, "grad_norm": 0.7316708923206956, "learning_rate": 6.093781902607114e-07, "loss": 1.523, "step": 3554 }, { "epoch": 0.24770929867958053, "grad_norm": 0.7469485247831148, "learning_rate": 6.093297865115086e-07, "loss": 1.6226, "step": 3555 }, { "epoch": 0.24777897780719785, "grad_norm": 0.6725940287268622, "learning_rate": 6.092813720119618e-07, "loss": 1.435, "step": 3556 }, { "epoch": 0.24784865693481517, "grad_norm": 0.6954422705334324, "learning_rate": 6.092329467643914e-07, "loss": 1.4995, "step": 3557 }, { "epoch": 0.2479183360624325, "grad_norm": 0.7117543686613589, "learning_rate": 6.091845107711177e-07, "loss": 1.384, "step": 3558 }, { "epoch": 0.24798801519004982, "grad_norm": 0.7432403113648027, "learning_rate": 6.091360640344619e-07, "loss": 1.6325, "step": 3559 }, { "epoch": 0.24805769431766714, "grad_norm": 0.7451833768308808, "learning_rate": 6.09087606556746e-07, "loss": 1.6012, "step": 3560 }, { "epoch": 0.24812737344528446, "grad_norm": 1.4392707418778636, "learning_rate": 6.090391383402919e-07, "loss": 1.5517, "step": 3561 }, { "epoch": 0.24819705257290178, "grad_norm": 0.726831617640144, "learning_rate": 6.089906593874222e-07, "loss": 1.492, "step": 3562 }, { "epoch": 0.2482667317005191, "grad_norm": 0.768420711031291, "learning_rate": 6.089421697004604e-07, "loss": 1.6141, "step": 3563 }, { "epoch": 0.24833641082813643, "grad_norm": 0.7361593118586319, "learning_rate": 6.088936692817301e-07, "loss": 1.7136, "step": 3564 }, { "epoch": 0.24840608995575375, "grad_norm": 0.7033722393985278, "learning_rate": 6.088451581335555e-07, "loss": 1.4913, "step": 3565 }, { "epoch": 0.24847576908337107, "grad_norm": 0.7232376051382627, "learning_rate": 6.087966362582614e-07, "loss": 1.5684, "step": 3566 }, { "epoch": 0.2485454482109884, "grad_norm": 0.7350353882547026, "learning_rate": 6.087481036581729e-07, "loss": 1.54, "step": 3567 }, { "epoch": 0.24861512733860572, "grad_norm": 0.7768085786334259, "learning_rate": 6.08699560335616e-07, "loss": 1.6108, "step": 3568 }, { "epoch": 0.24868480646622304, "grad_norm": 0.7023493785204465, "learning_rate": 6.08651006292917e-07, "loss": 1.4761, "step": 3569 }, { "epoch": 0.24875448559384036, "grad_norm": 0.699437447824012, "learning_rate": 6.086024415324025e-07, "loss": 1.5039, "step": 3570 }, { "epoch": 0.24882416472145769, "grad_norm": 0.7063756052529659, "learning_rate": 6.085538660564001e-07, "loss": 1.4976, "step": 3571 }, { "epoch": 0.248893843849075, "grad_norm": 0.7142139878054212, "learning_rate": 6.085052798672374e-07, "loss": 1.3809, "step": 3572 }, { "epoch": 0.24896352297669233, "grad_norm": 0.6959967770546999, "learning_rate": 6.084566829672429e-07, "loss": 1.5112, "step": 3573 }, { "epoch": 0.24903320210430965, "grad_norm": 0.6939186177106823, "learning_rate": 6.084080753587453e-07, "loss": 1.5446, "step": 3574 }, { "epoch": 0.24910288123192698, "grad_norm": 0.7029487486994859, "learning_rate": 6.083594570440742e-07, "loss": 1.4871, "step": 3575 }, { "epoch": 0.2491725603595443, "grad_norm": 0.742816701423201, "learning_rate": 6.083108280255593e-07, "loss": 1.6708, "step": 3576 }, { "epoch": 0.24924223948716162, "grad_norm": 0.7137477992334339, "learning_rate": 6.08262188305531e-07, "loss": 1.431, "step": 3577 }, { "epoch": 0.24931191861477894, "grad_norm": 0.7036883536953713, "learning_rate": 6.082135378863204e-07, "loss": 1.5361, "step": 3578 }, { "epoch": 0.24938159774239627, "grad_norm": 0.807427431662475, "learning_rate": 6.081648767702589e-07, "loss": 1.6449, "step": 3579 }, { "epoch": 0.2494512768700136, "grad_norm": 0.6729778495040805, "learning_rate": 6.081162049596781e-07, "loss": 1.5132, "step": 3580 }, { "epoch": 0.2495209559976309, "grad_norm": 0.6724953431528643, "learning_rate": 6.080675224569108e-07, "loss": 1.4788, "step": 3581 }, { "epoch": 0.24959063512524823, "grad_norm": 0.7154576876166014, "learning_rate": 6.080188292642901e-07, "loss": 1.5311, "step": 3582 }, { "epoch": 0.24966031425286556, "grad_norm": 0.7493444527994712, "learning_rate": 6.07970125384149e-07, "loss": 1.5591, "step": 3583 }, { "epoch": 0.24972999338048288, "grad_norm": 0.7266179891707369, "learning_rate": 6.079214108188219e-07, "loss": 1.5179, "step": 3584 }, { "epoch": 0.2497996725081002, "grad_norm": 0.7217565810210871, "learning_rate": 6.07872685570643e-07, "loss": 1.5389, "step": 3585 }, { "epoch": 0.24986935163571752, "grad_norm": 0.6896161569382101, "learning_rate": 6.078239496419476e-07, "loss": 1.5998, "step": 3586 }, { "epoch": 0.24993903076333485, "grad_norm": 0.7371361263420952, "learning_rate": 6.07775203035071e-07, "loss": 1.5602, "step": 3587 }, { "epoch": 0.25000870989095214, "grad_norm": 0.7334616155478288, "learning_rate": 6.077264457523493e-07, "loss": 1.5357, "step": 3588 }, { "epoch": 0.2500783890185695, "grad_norm": 0.6977451408653291, "learning_rate": 6.076776777961192e-07, "loss": 1.7069, "step": 3589 }, { "epoch": 0.2501480681461868, "grad_norm": 0.6836085763879641, "learning_rate": 6.076288991687174e-07, "loss": 1.5012, "step": 3590 }, { "epoch": 0.25021774727380414, "grad_norm": 0.724668401614909, "learning_rate": 6.075801098724819e-07, "loss": 1.4189, "step": 3591 }, { "epoch": 0.25028742640142143, "grad_norm": 0.7218857976482577, "learning_rate": 6.075313099097505e-07, "loss": 1.533, "step": 3592 }, { "epoch": 0.2503571055290388, "grad_norm": 0.7454390129326977, "learning_rate": 6.07482499282862e-07, "loss": 1.6404, "step": 3593 }, { "epoch": 0.2504267846566561, "grad_norm": 0.7276507976619264, "learning_rate": 6.074336779941551e-07, "loss": 1.5575, "step": 3594 }, { "epoch": 0.2504964637842734, "grad_norm": 0.8181538349091576, "learning_rate": 6.0738484604597e-07, "loss": 1.5305, "step": 3595 }, { "epoch": 0.2505661429118907, "grad_norm": 0.7022901688141522, "learning_rate": 6.073360034406465e-07, "loss": 1.4621, "step": 3596 }, { "epoch": 0.25063582203950807, "grad_norm": 0.7053344078146021, "learning_rate": 6.072871501805251e-07, "loss": 1.6526, "step": 3597 }, { "epoch": 0.25070550116712537, "grad_norm": 0.6812712012110201, "learning_rate": 6.072382862679472e-07, "loss": 1.4954, "step": 3598 }, { "epoch": 0.2507751802947427, "grad_norm": 0.6865562783180164, "learning_rate": 6.071894117052545e-07, "loss": 1.5444, "step": 3599 }, { "epoch": 0.25084485942236, "grad_norm": 0.7130584509089323, "learning_rate": 6.071405264947889e-07, "loss": 1.5434, "step": 3600 }, { "epoch": 0.25091453854997736, "grad_norm": 0.7343554727384859, "learning_rate": 6.070916306388933e-07, "loss": 1.4086, "step": 3601 }, { "epoch": 0.25098421767759466, "grad_norm": 0.7308227185668241, "learning_rate": 6.070427241399108e-07, "loss": 1.533, "step": 3602 }, { "epoch": 0.251053896805212, "grad_norm": 0.7054271106104274, "learning_rate": 6.06993807000185e-07, "loss": 1.5204, "step": 3603 }, { "epoch": 0.2511235759328293, "grad_norm": 0.7238952984007729, "learning_rate": 6.069448792220603e-07, "loss": 1.5857, "step": 3604 }, { "epoch": 0.25119325506044665, "grad_norm": 0.7619163189051473, "learning_rate": 6.068959408078813e-07, "loss": 1.6456, "step": 3605 }, { "epoch": 0.25126293418806395, "grad_norm": 0.7691821142914758, "learning_rate": 6.068469917599934e-07, "loss": 1.6207, "step": 3606 }, { "epoch": 0.2513326133156813, "grad_norm": 0.7844297210575869, "learning_rate": 6.067980320807421e-07, "loss": 1.6005, "step": 3607 }, { "epoch": 0.2514022924432986, "grad_norm": 0.767312581828994, "learning_rate": 6.067490617724739e-07, "loss": 1.5474, "step": 3608 }, { "epoch": 0.25147197157091594, "grad_norm": 0.7466302509493756, "learning_rate": 6.067000808375353e-07, "loss": 1.5245, "step": 3609 }, { "epoch": 0.25154165069853324, "grad_norm": 0.7194713716465715, "learning_rate": 6.066510892782737e-07, "loss": 1.5255, "step": 3610 }, { "epoch": 0.2516113298261506, "grad_norm": 0.6903691577711621, "learning_rate": 6.066020870970368e-07, "loss": 1.5452, "step": 3611 }, { "epoch": 0.2516810089537679, "grad_norm": 0.8469633001258798, "learning_rate": 6.065530742961731e-07, "loss": 1.5305, "step": 3612 }, { "epoch": 0.25175068808138523, "grad_norm": 0.6957640057146053, "learning_rate": 6.065040508780312e-07, "loss": 1.4241, "step": 3613 }, { "epoch": 0.2518203672090025, "grad_norm": 0.7070417869184299, "learning_rate": 6.064550168449603e-07, "loss": 1.5566, "step": 3614 }, { "epoch": 0.2518900463366199, "grad_norm": 0.7573782379837182, "learning_rate": 6.064059721993104e-07, "loss": 1.5595, "step": 3615 }, { "epoch": 0.25195972546423717, "grad_norm": 0.7678449791648668, "learning_rate": 6.063569169434319e-07, "loss": 1.5895, "step": 3616 }, { "epoch": 0.2520294045918545, "grad_norm": 0.7780040963314879, "learning_rate": 6.063078510796754e-07, "loss": 1.5685, "step": 3617 }, { "epoch": 0.2520990837194718, "grad_norm": 0.697366601001487, "learning_rate": 6.062587746103924e-07, "loss": 1.6299, "step": 3618 }, { "epoch": 0.25216876284708917, "grad_norm": 0.6918393842439775, "learning_rate": 6.062096875379346e-07, "loss": 1.6065, "step": 3619 }, { "epoch": 0.25223844197470646, "grad_norm": 0.7765055919439487, "learning_rate": 6.061605898646545e-07, "loss": 1.6526, "step": 3620 }, { "epoch": 0.2523081211023238, "grad_norm": 0.7472761617400331, "learning_rate": 6.06111481592905e-07, "loss": 1.542, "step": 3621 }, { "epoch": 0.2523778002299411, "grad_norm": 0.7486342805260572, "learning_rate": 6.060623627250391e-07, "loss": 1.6035, "step": 3622 }, { "epoch": 0.25244747935755846, "grad_norm": 0.7500589843074995, "learning_rate": 6.060132332634111e-07, "loss": 1.5478, "step": 3623 }, { "epoch": 0.25251715848517575, "grad_norm": 0.7331824331258279, "learning_rate": 6.059640932103753e-07, "loss": 1.4816, "step": 3624 }, { "epoch": 0.2525868376127931, "grad_norm": 0.6785392683089051, "learning_rate": 6.059149425682865e-07, "loss": 1.4475, "step": 3625 }, { "epoch": 0.2526565167404104, "grad_norm": 0.7152394572979834, "learning_rate": 6.058657813395e-07, "loss": 1.6863, "step": 3626 }, { "epoch": 0.25272619586802775, "grad_norm": 0.7231210956234625, "learning_rate": 6.058166095263719e-07, "loss": 1.583, "step": 3627 }, { "epoch": 0.25279587499564504, "grad_norm": 0.7245911280542969, "learning_rate": 6.057674271312585e-07, "loss": 1.64, "step": 3628 }, { "epoch": 0.2528655541232624, "grad_norm": 0.7097081774448422, "learning_rate": 6.057182341565169e-07, "loss": 1.5683, "step": 3629 }, { "epoch": 0.2529352332508797, "grad_norm": 0.7191293124979857, "learning_rate": 6.056690306045043e-07, "loss": 1.5606, "step": 3630 }, { "epoch": 0.25300491237849704, "grad_norm": 0.7614871453223097, "learning_rate": 6.056198164775786e-07, "loss": 1.5594, "step": 3631 }, { "epoch": 0.25307459150611433, "grad_norm": 0.8002734672962935, "learning_rate": 6.055705917780987e-07, "loss": 1.5608, "step": 3632 }, { "epoch": 0.2531442706337317, "grad_norm": 0.7754927308854186, "learning_rate": 6.055213565084229e-07, "loss": 1.678, "step": 3633 }, { "epoch": 0.253213949761349, "grad_norm": 0.832975102500679, "learning_rate": 6.054721106709111e-07, "loss": 1.5249, "step": 3634 }, { "epoch": 0.25328362888896633, "grad_norm": 0.697784227911405, "learning_rate": 6.054228542679231e-07, "loss": 1.5183, "step": 3635 }, { "epoch": 0.2533533080165836, "grad_norm": 0.7001080333414058, "learning_rate": 6.053735873018195e-07, "loss": 1.5209, "step": 3636 }, { "epoch": 0.253422987144201, "grad_norm": 0.6923807006499639, "learning_rate": 6.053243097749611e-07, "loss": 1.5297, "step": 3637 }, { "epoch": 0.25349266627181827, "grad_norm": 0.8379293298094277, "learning_rate": 6.052750216897095e-07, "loss": 1.5739, "step": 3638 }, { "epoch": 0.2535623453994356, "grad_norm": 0.6882683163202826, "learning_rate": 6.052257230484266e-07, "loss": 1.4844, "step": 3639 }, { "epoch": 0.2536320245270529, "grad_norm": 0.7033906157479533, "learning_rate": 6.05176413853475e-07, "loss": 1.435, "step": 3640 }, { "epoch": 0.25370170365467026, "grad_norm": 0.6733432624515764, "learning_rate": 6.051270941072176e-07, "loss": 1.3151, "step": 3641 }, { "epoch": 0.25377138278228756, "grad_norm": 0.6875760437295031, "learning_rate": 6.050777638120179e-07, "loss": 1.5586, "step": 3642 }, { "epoch": 0.2538410619099049, "grad_norm": 0.7383449477336569, "learning_rate": 6.050284229702399e-07, "loss": 1.468, "step": 3643 }, { "epoch": 0.2539107410375222, "grad_norm": 0.7189865672271544, "learning_rate": 6.049790715842483e-07, "loss": 1.5838, "step": 3644 }, { "epoch": 0.25398042016513955, "grad_norm": 0.6813274745603186, "learning_rate": 6.04929709656408e-07, "loss": 1.4486, "step": 3645 }, { "epoch": 0.25405009929275685, "grad_norm": 0.7344695622304153, "learning_rate": 6.048803371890844e-07, "loss": 1.499, "step": 3646 }, { "epoch": 0.2541197784203742, "grad_norm": 0.7489876891373174, "learning_rate": 6.048309541846436e-07, "loss": 1.573, "step": 3647 }, { "epoch": 0.2541894575479915, "grad_norm": 0.6704671719396704, "learning_rate": 6.047815606454523e-07, "loss": 1.4899, "step": 3648 }, { "epoch": 0.25425913667560884, "grad_norm": 0.7009858120136003, "learning_rate": 6.047321565738773e-07, "loss": 1.5259, "step": 3649 }, { "epoch": 0.25432881580322614, "grad_norm": 0.6893590317823148, "learning_rate": 6.046827419722863e-07, "loss": 1.5622, "step": 3650 }, { "epoch": 0.2543984949308435, "grad_norm": 0.7623085036945566, "learning_rate": 6.046333168430474e-07, "loss": 1.7747, "step": 3651 }, { "epoch": 0.2544681740584608, "grad_norm": 0.6939366234602085, "learning_rate": 6.045838811885289e-07, "loss": 1.6092, "step": 3652 }, { "epoch": 0.25453785318607813, "grad_norm": 0.7396889519355708, "learning_rate": 6.045344350111001e-07, "loss": 1.5682, "step": 3653 }, { "epoch": 0.25460753231369543, "grad_norm": 0.7759652144680148, "learning_rate": 6.044849783131304e-07, "loss": 1.6867, "step": 3654 }, { "epoch": 0.2546772114413128, "grad_norm": 0.6993831801702053, "learning_rate": 6.044355110969901e-07, "loss": 1.5669, "step": 3655 }, { "epoch": 0.2547468905689301, "grad_norm": 0.738306542563742, "learning_rate": 6.043860333650495e-07, "loss": 1.4476, "step": 3656 }, { "epoch": 0.2548165696965474, "grad_norm": 0.6945902157434075, "learning_rate": 6.043365451196799e-07, "loss": 1.5581, "step": 3657 }, { "epoch": 0.2548862488241647, "grad_norm": 0.7798903180473131, "learning_rate": 6.042870463632525e-07, "loss": 1.5781, "step": 3658 }, { "epoch": 0.25495592795178207, "grad_norm": 0.6992002259989656, "learning_rate": 6.042375370981399e-07, "loss": 1.5645, "step": 3659 }, { "epoch": 0.25502560707939936, "grad_norm": 0.6678133223554386, "learning_rate": 6.041880173267144e-07, "loss": 1.4591, "step": 3660 }, { "epoch": 0.2550952862070167, "grad_norm": 0.6941086204495733, "learning_rate": 6.041384870513491e-07, "loss": 1.5182, "step": 3661 }, { "epoch": 0.255164965334634, "grad_norm": 0.7269003400483975, "learning_rate": 6.040889462744175e-07, "loss": 1.4942, "step": 3662 }, { "epoch": 0.25523464446225136, "grad_norm": 0.6982954121554564, "learning_rate": 6.04039394998294e-07, "loss": 1.5595, "step": 3663 }, { "epoch": 0.25530432358986865, "grad_norm": 0.7186993415671693, "learning_rate": 6.039898332253529e-07, "loss": 1.4928, "step": 3664 }, { "epoch": 0.255374002717486, "grad_norm": 0.7386453946777918, "learning_rate": 6.039402609579695e-07, "loss": 1.514, "step": 3665 }, { "epoch": 0.2554436818451033, "grad_norm": 0.7891239561661717, "learning_rate": 6.038906781985192e-07, "loss": 1.5981, "step": 3666 }, { "epoch": 0.25551336097272065, "grad_norm": 0.7333305917018511, "learning_rate": 6.038410849493784e-07, "loss": 1.6627, "step": 3667 }, { "epoch": 0.25558304010033794, "grad_norm": 0.7238548352809335, "learning_rate": 6.037914812129233e-07, "loss": 1.5441, "step": 3668 }, { "epoch": 0.25565271922795524, "grad_norm": 0.7443351369453468, "learning_rate": 6.037418669915314e-07, "loss": 1.5688, "step": 3669 }, { "epoch": 0.2557223983555726, "grad_norm": 0.766995135505678, "learning_rate": 6.036922422875802e-07, "loss": 1.6102, "step": 3670 }, { "epoch": 0.2557920774831899, "grad_norm": 0.7051706694725376, "learning_rate": 6.036426071034477e-07, "loss": 1.6172, "step": 3671 }, { "epoch": 0.25586175661080723, "grad_norm": 0.7193831318040109, "learning_rate": 6.035929614415127e-07, "loss": 1.4728, "step": 3672 }, { "epoch": 0.25593143573842453, "grad_norm": 0.6910373279699918, "learning_rate": 6.035433053041542e-07, "loss": 1.4988, "step": 3673 }, { "epoch": 0.2560011148660419, "grad_norm": 0.6740834430678302, "learning_rate": 6.034936386937517e-07, "loss": 1.4256, "step": 3674 }, { "epoch": 0.2560707939936592, "grad_norm": 0.7120350209786269, "learning_rate": 6.034439616126855e-07, "loss": 1.4566, "step": 3675 }, { "epoch": 0.2561404731212765, "grad_norm": 0.7113209360044545, "learning_rate": 6.033942740633364e-07, "loss": 1.5773, "step": 3676 }, { "epoch": 0.2562101522488938, "grad_norm": 0.7035221439888028, "learning_rate": 6.033445760480852e-07, "loss": 1.5885, "step": 3677 }, { "epoch": 0.25627983137651117, "grad_norm": 0.7274637005286824, "learning_rate": 6.032948675693137e-07, "loss": 1.6685, "step": 3678 }, { "epoch": 0.25634951050412846, "grad_norm": 0.7882564844836323, "learning_rate": 6.03245148629404e-07, "loss": 1.5647, "step": 3679 }, { "epoch": 0.2564191896317458, "grad_norm": 0.7194757461164577, "learning_rate": 6.031954192307387e-07, "loss": 1.6198, "step": 3680 }, { "epoch": 0.2564888687593631, "grad_norm": 0.7119046320308019, "learning_rate": 6.031456793757009e-07, "loss": 1.5605, "step": 3681 }, { "epoch": 0.25655854788698046, "grad_norm": 0.7037393192757397, "learning_rate": 6.030959290666744e-07, "loss": 1.5774, "step": 3682 }, { "epoch": 0.25662822701459775, "grad_norm": 0.7657401184144824, "learning_rate": 6.030461683060431e-07, "loss": 1.5446, "step": 3683 }, { "epoch": 0.2566979061422151, "grad_norm": 0.7131206536769255, "learning_rate": 6.02996397096192e-07, "loss": 1.5325, "step": 3684 }, { "epoch": 0.2567675852698324, "grad_norm": 0.9433307003881891, "learning_rate": 6.029466154395059e-07, "loss": 1.5525, "step": 3685 }, { "epoch": 0.25683726439744975, "grad_norm": 0.7386253299710522, "learning_rate": 6.028968233383705e-07, "loss": 1.4944, "step": 3686 }, { "epoch": 0.25690694352506704, "grad_norm": 0.7490420571721698, "learning_rate": 6.028470207951719e-07, "loss": 1.5285, "step": 3687 }, { "epoch": 0.2569766226526844, "grad_norm": 0.7557014064188572, "learning_rate": 6.027972078122972e-07, "loss": 1.6712, "step": 3688 }, { "epoch": 0.2570463017803017, "grad_norm": 0.7693733995532998, "learning_rate": 6.027473843921329e-07, "loss": 1.6772, "step": 3689 }, { "epoch": 0.25711598090791904, "grad_norm": 0.7186790659646887, "learning_rate": 6.026975505370669e-07, "loss": 1.5178, "step": 3690 }, { "epoch": 0.25718566003553633, "grad_norm": 0.7160997653437787, "learning_rate": 6.026477062494874e-07, "loss": 1.4966, "step": 3691 }, { "epoch": 0.2572553391631537, "grad_norm": 0.758441640620066, "learning_rate": 6.025978515317831e-07, "loss": 1.6479, "step": 3692 }, { "epoch": 0.257325018290771, "grad_norm": 0.7598611067484194, "learning_rate": 6.025479863863429e-07, "loss": 1.6224, "step": 3693 }, { "epoch": 0.25739469741838833, "grad_norm": 0.7295528775548727, "learning_rate": 6.024981108155564e-07, "loss": 1.6253, "step": 3694 }, { "epoch": 0.2574643765460056, "grad_norm": 0.8061244876510675, "learning_rate": 6.024482248218143e-07, "loss": 1.5635, "step": 3695 }, { "epoch": 0.257534055673623, "grad_norm": 0.7410224621117784, "learning_rate": 6.023983284075067e-07, "loss": 1.5478, "step": 3696 }, { "epoch": 0.25760373480124027, "grad_norm": 0.784543350091465, "learning_rate": 6.023484215750249e-07, "loss": 1.498, "step": 3697 }, { "epoch": 0.2576734139288576, "grad_norm": 0.7045651266569742, "learning_rate": 6.022985043267604e-07, "loss": 1.4791, "step": 3698 }, { "epoch": 0.2577430930564749, "grad_norm": 0.6606350336266129, "learning_rate": 6.022485766651056e-07, "loss": 1.54, "step": 3699 }, { "epoch": 0.25781277218409226, "grad_norm": 0.707716525899006, "learning_rate": 6.02198638592453e-07, "loss": 1.4594, "step": 3700 }, { "epoch": 0.25788245131170956, "grad_norm": 0.7500573443667033, "learning_rate": 6.021486901111957e-07, "loss": 1.6371, "step": 3701 }, { "epoch": 0.2579521304393269, "grad_norm": 0.7185889496885267, "learning_rate": 6.020987312237273e-07, "loss": 1.5289, "step": 3702 }, { "epoch": 0.2580218095669442, "grad_norm": 0.7717476468655174, "learning_rate": 6.020487619324421e-07, "loss": 1.5658, "step": 3703 }, { "epoch": 0.25809148869456155, "grad_norm": 0.7144107425409536, "learning_rate": 6.019987822397346e-07, "loss": 1.4534, "step": 3704 }, { "epoch": 0.25816116782217885, "grad_norm": 0.6853109997445458, "learning_rate": 6.019487921480001e-07, "loss": 1.5396, "step": 3705 }, { "epoch": 0.2582308469497962, "grad_norm": 0.6840861044462495, "learning_rate": 6.018987916596339e-07, "loss": 1.5586, "step": 3706 }, { "epoch": 0.2583005260774135, "grad_norm": 0.7329357026479503, "learning_rate": 6.018487807770325e-07, "loss": 1.5688, "step": 3707 }, { "epoch": 0.25837020520503085, "grad_norm": 0.7701244503623086, "learning_rate": 6.017987595025921e-07, "loss": 1.6769, "step": 3708 }, { "epoch": 0.25843988433264814, "grad_norm": 0.7615865858349096, "learning_rate": 6.017487278387103e-07, "loss": 1.6313, "step": 3709 }, { "epoch": 0.2585095634602655, "grad_norm": 0.7481709300334196, "learning_rate": 6.016986857877844e-07, "loss": 1.6588, "step": 3710 }, { "epoch": 0.2585792425878828, "grad_norm": 0.7067990272665308, "learning_rate": 6.016486333522125e-07, "loss": 1.6063, "step": 3711 }, { "epoch": 0.25864892171550014, "grad_norm": 0.7049759736878695, "learning_rate": 6.015985705343935e-07, "loss": 1.6585, "step": 3712 }, { "epoch": 0.25871860084311743, "grad_norm": 0.6755596694999774, "learning_rate": 6.015484973367262e-07, "loss": 1.4303, "step": 3713 }, { "epoch": 0.2587882799707348, "grad_norm": 0.7357990096575269, "learning_rate": 6.014984137616104e-07, "loss": 1.5035, "step": 3714 }, { "epoch": 0.2588579590983521, "grad_norm": 0.7120439456701165, "learning_rate": 6.014483198114461e-07, "loss": 1.5557, "step": 3715 }, { "epoch": 0.2589276382259694, "grad_norm": 0.7012771032934041, "learning_rate": 6.01398215488634e-07, "loss": 1.5811, "step": 3716 }, { "epoch": 0.2589973173535867, "grad_norm": 0.7305842165014204, "learning_rate": 6.013481007955752e-07, "loss": 1.5853, "step": 3717 }, { "epoch": 0.25906699648120407, "grad_norm": 0.7919793887127279, "learning_rate": 6.012979757346712e-07, "loss": 1.5073, "step": 3718 }, { "epoch": 0.25913667560882137, "grad_norm": 0.7068150998297997, "learning_rate": 6.012478403083242e-07, "loss": 1.5391, "step": 3719 }, { "epoch": 0.2592063547364387, "grad_norm": 0.6993516094342813, "learning_rate": 6.011976945189366e-07, "loss": 1.6703, "step": 3720 }, { "epoch": 0.259276033864056, "grad_norm": 0.7608863004195396, "learning_rate": 6.011475383689118e-07, "loss": 1.5703, "step": 3721 }, { "epoch": 0.25934571299167336, "grad_norm": 0.6914592273320355, "learning_rate": 6.010973718606531e-07, "loss": 1.4446, "step": 3722 }, { "epoch": 0.25941539211929066, "grad_norm": 0.6983881594270946, "learning_rate": 6.010471949965648e-07, "loss": 1.4893, "step": 3723 }, { "epoch": 0.259485071246908, "grad_norm": 0.7046492246974446, "learning_rate": 6.009970077790513e-07, "loss": 1.5758, "step": 3724 }, { "epoch": 0.2595547503745253, "grad_norm": 0.7441364234564669, "learning_rate": 6.009468102105178e-07, "loss": 1.4596, "step": 3725 }, { "epoch": 0.25962442950214265, "grad_norm": 0.7656422173680353, "learning_rate": 6.008966022933698e-07, "loss": 1.6614, "step": 3726 }, { "epoch": 0.25969410862975995, "grad_norm": 0.7559299822870974, "learning_rate": 6.008463840300134e-07, "loss": 1.5062, "step": 3727 }, { "epoch": 0.2597637877573773, "grad_norm": 0.7669212531621459, "learning_rate": 6.007961554228552e-07, "loss": 1.7258, "step": 3728 }, { "epoch": 0.2598334668849946, "grad_norm": 0.6772176348456944, "learning_rate": 6.007459164743022e-07, "loss": 1.4701, "step": 3729 }, { "epoch": 0.25990314601261194, "grad_norm": 0.7499056635774635, "learning_rate": 6.006956671867618e-07, "loss": 1.6034, "step": 3730 }, { "epoch": 0.25997282514022924, "grad_norm": 0.7225704922652993, "learning_rate": 6.006454075626425e-07, "loss": 1.5875, "step": 3731 }, { "epoch": 0.2600425042678466, "grad_norm": 0.6980878984816783, "learning_rate": 6.005951376043523e-07, "loss": 1.5183, "step": 3732 }, { "epoch": 0.2601121833954639, "grad_norm": 0.7491400607437037, "learning_rate": 6.005448573143007e-07, "loss": 1.5805, "step": 3733 }, { "epoch": 0.26018186252308123, "grad_norm": 0.8043793521506405, "learning_rate": 6.004945666948968e-07, "loss": 1.5908, "step": 3734 }, { "epoch": 0.2602515416506985, "grad_norm": 0.691452771123545, "learning_rate": 6.004442657485511e-07, "loss": 1.4322, "step": 3735 }, { "epoch": 0.2603212207783159, "grad_norm": 0.7392515048446673, "learning_rate": 6.003939544776738e-07, "loss": 1.4845, "step": 3736 }, { "epoch": 0.26039089990593317, "grad_norm": 0.7362991468882549, "learning_rate": 6.003436328846759e-07, "loss": 1.6551, "step": 3737 }, { "epoch": 0.2604605790335505, "grad_norm": 0.7294508844401375, "learning_rate": 6.002933009719691e-07, "loss": 1.5713, "step": 3738 }, { "epoch": 0.2605302581611678, "grad_norm": 0.7223413768091034, "learning_rate": 6.002429587419654e-07, "loss": 1.6509, "step": 3739 }, { "epoch": 0.26059993728878517, "grad_norm": 0.7098332952131358, "learning_rate": 6.001926061970771e-07, "loss": 1.5559, "step": 3740 }, { "epoch": 0.26066961641640246, "grad_norm": 0.683662666270985, "learning_rate": 6.001422433397174e-07, "loss": 1.5261, "step": 3741 }, { "epoch": 0.2607392955440198, "grad_norm": 0.7534065865215693, "learning_rate": 6.000918701722998e-07, "loss": 1.498, "step": 3742 }, { "epoch": 0.2608089746716371, "grad_norm": 0.6924815806601603, "learning_rate": 6.00041486697238e-07, "loss": 1.5978, "step": 3743 }, { "epoch": 0.26087865379925446, "grad_norm": 0.739100535066475, "learning_rate": 5.99991092916947e-07, "loss": 1.5646, "step": 3744 }, { "epoch": 0.26094833292687175, "grad_norm": 0.7083409366002702, "learning_rate": 5.999406888338412e-07, "loss": 1.5164, "step": 3745 }, { "epoch": 0.2610180120544891, "grad_norm": 0.6906008402718623, "learning_rate": 5.998902744503363e-07, "loss": 1.5732, "step": 3746 }, { "epoch": 0.2610876911821064, "grad_norm": 0.8148269080140913, "learning_rate": 5.998398497688484e-07, "loss": 1.5443, "step": 3747 }, { "epoch": 0.26115737030972375, "grad_norm": 0.7406162957615721, "learning_rate": 5.99789414791794e-07, "loss": 1.5242, "step": 3748 }, { "epoch": 0.26122704943734104, "grad_norm": 0.7937614876731822, "learning_rate": 5.997389695215896e-07, "loss": 1.5805, "step": 3749 }, { "epoch": 0.2612967285649584, "grad_norm": 0.7573554802703404, "learning_rate": 5.99688513960653e-07, "loss": 1.5713, "step": 3750 }, { "epoch": 0.2613664076925757, "grad_norm": 0.7646561069072213, "learning_rate": 5.996380481114021e-07, "loss": 1.5567, "step": 3751 }, { "epoch": 0.26143608682019304, "grad_norm": 0.6612253926929647, "learning_rate": 5.995875719762554e-07, "loss": 1.5712, "step": 3752 }, { "epoch": 0.26150576594781033, "grad_norm": 0.692424855543187, "learning_rate": 5.995370855576315e-07, "loss": 1.579, "step": 3753 }, { "epoch": 0.2615754450754277, "grad_norm": 0.7442513843985062, "learning_rate": 5.994865888579501e-07, "loss": 1.4847, "step": 3754 }, { "epoch": 0.261645124203045, "grad_norm": 0.7325976152475112, "learning_rate": 5.994360818796312e-07, "loss": 1.2739, "step": 3755 }, { "epoch": 0.2617148033306623, "grad_norm": 0.6912059227547824, "learning_rate": 5.993855646250948e-07, "loss": 1.4752, "step": 3756 }, { "epoch": 0.2617844824582796, "grad_norm": 0.7352347757664525, "learning_rate": 5.993350370967621e-07, "loss": 1.5596, "step": 3757 }, { "epoch": 0.26185416158589697, "grad_norm": 0.6756060946917455, "learning_rate": 5.992844992970544e-07, "loss": 1.6212, "step": 3758 }, { "epoch": 0.26192384071351427, "grad_norm": 0.6798226503928068, "learning_rate": 5.992339512283936e-07, "loss": 1.4912, "step": 3759 }, { "epoch": 0.26199351984113156, "grad_norm": 0.7513251410438295, "learning_rate": 5.991833928932022e-07, "loss": 1.6829, "step": 3760 }, { "epoch": 0.2620631989687489, "grad_norm": 0.6865988084084071, "learning_rate": 5.991328242939027e-07, "loss": 1.4194, "step": 3761 }, { "epoch": 0.2621328780963662, "grad_norm": 0.6832901524123712, "learning_rate": 5.990822454329185e-07, "loss": 1.4876, "step": 3762 }, { "epoch": 0.26220255722398356, "grad_norm": 0.7563513546614019, "learning_rate": 5.990316563126739e-07, "loss": 1.5627, "step": 3763 }, { "epoch": 0.26227223635160085, "grad_norm": 0.7891804562004142, "learning_rate": 5.989810569355928e-07, "loss": 1.5799, "step": 3764 }, { "epoch": 0.2623419154792182, "grad_norm": 0.7740143811084153, "learning_rate": 5.989304473041002e-07, "loss": 1.5068, "step": 3765 }, { "epoch": 0.2624115946068355, "grad_norm": 0.6755385725152862, "learning_rate": 5.988798274206213e-07, "loss": 1.5628, "step": 3766 }, { "epoch": 0.26248127373445285, "grad_norm": 0.67862127255677, "learning_rate": 5.98829197287582e-07, "loss": 1.4632, "step": 3767 }, { "epoch": 0.26255095286207014, "grad_norm": 0.7277909881984042, "learning_rate": 5.987785569074086e-07, "loss": 1.4731, "step": 3768 }, { "epoch": 0.2626206319896875, "grad_norm": 0.7746379657241323, "learning_rate": 5.987279062825278e-07, "loss": 1.5755, "step": 3769 }, { "epoch": 0.2626903111173048, "grad_norm": 0.7188812385609231, "learning_rate": 5.986772454153671e-07, "loss": 1.5729, "step": 3770 }, { "epoch": 0.26275999024492214, "grad_norm": 0.7012551759451631, "learning_rate": 5.98626574308354e-07, "loss": 1.579, "step": 3771 }, { "epoch": 0.26282966937253943, "grad_norm": 0.6879359396906378, "learning_rate": 5.985758929639171e-07, "loss": 1.5858, "step": 3772 }, { "epoch": 0.2628993485001568, "grad_norm": 0.7367205083171565, "learning_rate": 5.985252013844848e-07, "loss": 1.6155, "step": 3773 }, { "epoch": 0.2629690276277741, "grad_norm": 0.6755010882852025, "learning_rate": 5.984744995724865e-07, "loss": 1.544, "step": 3774 }, { "epoch": 0.2630387067553914, "grad_norm": 0.7220361856984341, "learning_rate": 5.984237875303518e-07, "loss": 1.5533, "step": 3775 }, { "epoch": 0.2631083858830087, "grad_norm": 0.6939224696872534, "learning_rate": 5.983730652605112e-07, "loss": 1.5252, "step": 3776 }, { "epoch": 0.26317806501062607, "grad_norm": 0.7790602795147022, "learning_rate": 5.983223327653953e-07, "loss": 1.5188, "step": 3777 }, { "epoch": 0.26324774413824337, "grad_norm": 0.6997604006917725, "learning_rate": 5.982715900474351e-07, "loss": 1.5381, "step": 3778 }, { "epoch": 0.2633174232658607, "grad_norm": 0.7261413109184857, "learning_rate": 5.982208371090626e-07, "loss": 1.5845, "step": 3779 }, { "epoch": 0.263387102393478, "grad_norm": 0.7014290816886654, "learning_rate": 5.981700739527099e-07, "loss": 1.4236, "step": 3780 }, { "epoch": 0.26345678152109536, "grad_norm": 0.7262834750193699, "learning_rate": 5.981193005808095e-07, "loss": 1.5169, "step": 3781 }, { "epoch": 0.26352646064871266, "grad_norm": 0.7254743030652147, "learning_rate": 5.980685169957948e-07, "loss": 1.5322, "step": 3782 }, { "epoch": 0.26359613977633, "grad_norm": 0.7169497660477526, "learning_rate": 5.980177232000992e-07, "loss": 1.6798, "step": 3783 }, { "epoch": 0.2636658189039473, "grad_norm": 0.7078883148268486, "learning_rate": 5.979669191961572e-07, "loss": 1.5295, "step": 3784 }, { "epoch": 0.26373549803156465, "grad_norm": 0.6876144944709754, "learning_rate": 5.979161049864031e-07, "loss": 1.4183, "step": 3785 }, { "epoch": 0.26380517715918195, "grad_norm": 0.7007535327284083, "learning_rate": 5.978652805732721e-07, "loss": 1.5592, "step": 3786 }, { "epoch": 0.2638748562867993, "grad_norm": 0.7329176497548947, "learning_rate": 5.978144459591999e-07, "loss": 1.6272, "step": 3787 }, { "epoch": 0.2639445354144166, "grad_norm": 0.7552545836313325, "learning_rate": 5.977636011466225e-07, "loss": 1.6171, "step": 3788 }, { "epoch": 0.26401421454203394, "grad_norm": 0.7909895832044823, "learning_rate": 5.977127461379767e-07, "loss": 1.586, "step": 3789 }, { "epoch": 0.26408389366965124, "grad_norm": 0.7172742828183447, "learning_rate": 5.976618809356991e-07, "loss": 1.5546, "step": 3790 }, { "epoch": 0.2641535727972686, "grad_norm": 0.7377383764434615, "learning_rate": 5.976110055422278e-07, "loss": 1.592, "step": 3791 }, { "epoch": 0.2642232519248859, "grad_norm": 0.7335537641666481, "learning_rate": 5.975601199600006e-07, "loss": 1.7052, "step": 3792 }, { "epoch": 0.26429293105250323, "grad_norm": 0.7697742371966286, "learning_rate": 5.975092241914562e-07, "loss": 1.5435, "step": 3793 }, { "epoch": 0.2643626101801205, "grad_norm": 0.7501305244135283, "learning_rate": 5.974583182390333e-07, "loss": 1.4065, "step": 3794 }, { "epoch": 0.2644322893077379, "grad_norm": 0.721222982117427, "learning_rate": 5.974074021051717e-07, "loss": 1.6465, "step": 3795 }, { "epoch": 0.2645019684353552, "grad_norm": 0.7255111363489318, "learning_rate": 5.973564757923113e-07, "loss": 1.5097, "step": 3796 }, { "epoch": 0.2645716475629725, "grad_norm": 0.7725503160769095, "learning_rate": 5.973055393028927e-07, "loss": 1.5885, "step": 3797 }, { "epoch": 0.2646413266905898, "grad_norm": 0.722794112667975, "learning_rate": 5.972545926393567e-07, "loss": 1.4518, "step": 3798 }, { "epoch": 0.26471100581820717, "grad_norm": 0.750610046876159, "learning_rate": 5.97203635804145e-07, "loss": 1.4518, "step": 3799 }, { "epoch": 0.26478068494582446, "grad_norm": 0.7373683509483345, "learning_rate": 5.971526687996992e-07, "loss": 1.6069, "step": 3800 }, { "epoch": 0.2648503640734418, "grad_norm": 0.7197788095293496, "learning_rate": 5.971016916284623e-07, "loss": 1.4767, "step": 3801 }, { "epoch": 0.2649200432010591, "grad_norm": 0.7489669567408815, "learning_rate": 5.970507042928765e-07, "loss": 1.5974, "step": 3802 }, { "epoch": 0.26498972232867646, "grad_norm": 0.7200758831805548, "learning_rate": 5.969997067953859e-07, "loss": 1.4463, "step": 3803 }, { "epoch": 0.26505940145629375, "grad_norm": 0.7146080622081296, "learning_rate": 5.969486991384342e-07, "loss": 1.4771, "step": 3804 }, { "epoch": 0.2651290805839111, "grad_norm": 0.712843526792142, "learning_rate": 5.968976813244654e-07, "loss": 1.5082, "step": 3805 }, { "epoch": 0.2651987597115284, "grad_norm": 0.7322880790478435, "learning_rate": 5.968466533559249e-07, "loss": 1.517, "step": 3806 }, { "epoch": 0.26526843883914575, "grad_norm": 0.7157357524960127, "learning_rate": 5.967956152352578e-07, "loss": 1.6056, "step": 3807 }, { "epoch": 0.26533811796676304, "grad_norm": 0.7047036158964357, "learning_rate": 5.967445669649101e-07, "loss": 1.4682, "step": 3808 }, { "epoch": 0.2654077970943804, "grad_norm": 0.7234814253316819, "learning_rate": 5.96693508547328e-07, "loss": 1.4715, "step": 3809 }, { "epoch": 0.2654774762219977, "grad_norm": 0.7030356586208922, "learning_rate": 5.966424399849583e-07, "loss": 1.5992, "step": 3810 }, { "epoch": 0.26554715534961504, "grad_norm": 0.7439713742620705, "learning_rate": 5.965913612802485e-07, "loss": 1.5361, "step": 3811 }, { "epoch": 0.26561683447723233, "grad_norm": 0.739432309777115, "learning_rate": 5.965402724356462e-07, "loss": 1.6648, "step": 3812 }, { "epoch": 0.2656865136048497, "grad_norm": 0.7282252553032253, "learning_rate": 5.964891734535997e-07, "loss": 1.5186, "step": 3813 }, { "epoch": 0.265756192732467, "grad_norm": 0.7404702107542114, "learning_rate": 5.964380643365579e-07, "loss": 1.6763, "step": 3814 }, { "epoch": 0.26582587186008433, "grad_norm": 0.7245406478505013, "learning_rate": 5.9638694508697e-07, "loss": 1.5752, "step": 3815 }, { "epoch": 0.2658955509877016, "grad_norm": 0.7015576237292017, "learning_rate": 5.963358157072858e-07, "loss": 1.5034, "step": 3816 }, { "epoch": 0.265965230115319, "grad_norm": 0.7258510311401303, "learning_rate": 5.962846761999553e-07, "loss": 1.499, "step": 3817 }, { "epoch": 0.26603490924293627, "grad_norm": 0.7092976051769906, "learning_rate": 5.962335265674295e-07, "loss": 1.5071, "step": 3818 }, { "epoch": 0.2661045883705536, "grad_norm": 0.7026549998783386, "learning_rate": 5.961823668121593e-07, "loss": 1.444, "step": 3819 }, { "epoch": 0.2661742674981709, "grad_norm": 0.7836415542286314, "learning_rate": 5.961311969365966e-07, "loss": 1.6607, "step": 3820 }, { "epoch": 0.26624394662578826, "grad_norm": 0.7517676503653571, "learning_rate": 5.960800169431935e-07, "loss": 1.6165, "step": 3821 }, { "epoch": 0.26631362575340556, "grad_norm": 0.7071949248379115, "learning_rate": 5.960288268344027e-07, "loss": 1.6095, "step": 3822 }, { "epoch": 0.2663833048810229, "grad_norm": 0.7524462940338275, "learning_rate": 5.959776266126772e-07, "loss": 1.5655, "step": 3823 }, { "epoch": 0.2664529840086402, "grad_norm": 0.7905471950668109, "learning_rate": 5.959264162804707e-07, "loss": 1.5676, "step": 3824 }, { "epoch": 0.26652266313625755, "grad_norm": 0.7576424220733925, "learning_rate": 5.958751958402374e-07, "loss": 1.5781, "step": 3825 }, { "epoch": 0.26659234226387485, "grad_norm": 0.7559598623005291, "learning_rate": 5.958239652944317e-07, "loss": 1.3891, "step": 3826 }, { "epoch": 0.2666620213914922, "grad_norm": 0.7388353146062195, "learning_rate": 5.957727246455088e-07, "loss": 1.399, "step": 3827 }, { "epoch": 0.2667317005191095, "grad_norm": 0.7097430840949527, "learning_rate": 5.957214738959243e-07, "loss": 1.4865, "step": 3828 }, { "epoch": 0.26680137964672684, "grad_norm": 0.7271421482613403, "learning_rate": 5.95670213048134e-07, "loss": 1.4648, "step": 3829 }, { "epoch": 0.26687105877434414, "grad_norm": 0.7505684797204005, "learning_rate": 5.956189421045947e-07, "loss": 1.5283, "step": 3830 }, { "epoch": 0.2669407379019615, "grad_norm": 0.7260488787468974, "learning_rate": 5.955676610677633e-07, "loss": 1.4497, "step": 3831 }, { "epoch": 0.2670104170295788, "grad_norm": 0.7848335820145739, "learning_rate": 5.955163699400973e-07, "loss": 1.8227, "step": 3832 }, { "epoch": 0.26708009615719613, "grad_norm": 0.6623961844248426, "learning_rate": 5.954650687240547e-07, "loss": 1.5249, "step": 3833 }, { "epoch": 0.26714977528481343, "grad_norm": 0.6783606484283297, "learning_rate": 5.954137574220939e-07, "loss": 1.5641, "step": 3834 }, { "epoch": 0.2672194544124308, "grad_norm": 0.6901063180908451, "learning_rate": 5.953624360366739e-07, "loss": 1.4457, "step": 3835 }, { "epoch": 0.2672891335400481, "grad_norm": 0.6583457935445118, "learning_rate": 5.95311104570254e-07, "loss": 1.481, "step": 3836 }, { "epoch": 0.2673588126676654, "grad_norm": 0.7459974223450113, "learning_rate": 5.952597630252943e-07, "loss": 1.6124, "step": 3837 }, { "epoch": 0.2674284917952827, "grad_norm": 0.7200644745261434, "learning_rate": 5.952084114042551e-07, "loss": 1.5149, "step": 3838 }, { "epoch": 0.26749817092290007, "grad_norm": 0.8150337193713256, "learning_rate": 5.951570497095973e-07, "loss": 1.5111, "step": 3839 }, { "epoch": 0.26756785005051736, "grad_norm": 0.6647113024228307, "learning_rate": 5.951056779437821e-07, "loss": 1.5736, "step": 3840 }, { "epoch": 0.2676375291781347, "grad_norm": 0.6977037781613992, "learning_rate": 5.950542961092715e-07, "loss": 1.4829, "step": 3841 }, { "epoch": 0.267707208305752, "grad_norm": 0.7263040973123264, "learning_rate": 5.950029042085279e-07, "loss": 1.4914, "step": 3842 }, { "epoch": 0.26777688743336936, "grad_norm": 0.7059006018144998, "learning_rate": 5.949515022440141e-07, "loss": 1.4649, "step": 3843 }, { "epoch": 0.26784656656098665, "grad_norm": 0.7146294619304026, "learning_rate": 5.949000902181932e-07, "loss": 1.5275, "step": 3844 }, { "epoch": 0.267916245688604, "grad_norm": 0.7528834166368202, "learning_rate": 5.948486681335289e-07, "loss": 1.4976, "step": 3845 }, { "epoch": 0.2679859248162213, "grad_norm": 0.6506852625680217, "learning_rate": 5.947972359924857e-07, "loss": 1.4088, "step": 3846 }, { "epoch": 0.26805560394383865, "grad_norm": 0.7112999509070153, "learning_rate": 5.947457937975282e-07, "loss": 1.5822, "step": 3847 }, { "epoch": 0.26812528307145594, "grad_norm": 0.6937904601326073, "learning_rate": 5.946943415511218e-07, "loss": 1.5623, "step": 3848 }, { "epoch": 0.2681949621990733, "grad_norm": 0.7137558963987013, "learning_rate": 5.946428792557321e-07, "loss": 1.535, "step": 3849 }, { "epoch": 0.2682646413266906, "grad_norm": 0.6923843161735229, "learning_rate": 5.94591406913825e-07, "loss": 1.4935, "step": 3850 }, { "epoch": 0.26833432045430794, "grad_norm": 0.6818469415482451, "learning_rate": 5.945399245278675e-07, "loss": 1.5496, "step": 3851 }, { "epoch": 0.26840399958192523, "grad_norm": 0.6965188523204926, "learning_rate": 5.944884321003267e-07, "loss": 1.4331, "step": 3852 }, { "epoch": 0.26847367870954253, "grad_norm": 0.7738072123350715, "learning_rate": 5.944369296336701e-07, "loss": 1.782, "step": 3853 }, { "epoch": 0.2685433578371599, "grad_norm": 0.7172882849130879, "learning_rate": 5.943854171303659e-07, "loss": 1.5333, "step": 3854 }, { "epoch": 0.2686130369647772, "grad_norm": 0.7399299275792292, "learning_rate": 5.943338945928827e-07, "loss": 1.4818, "step": 3855 }, { "epoch": 0.2686827160923945, "grad_norm": 0.7494982746550088, "learning_rate": 5.942823620236894e-07, "loss": 1.5849, "step": 3856 }, { "epoch": 0.2687523952200118, "grad_norm": 1.3592300358320584, "learning_rate": 5.942308194252557e-07, "loss": 1.6021, "step": 3857 }, { "epoch": 0.26882207434762917, "grad_norm": 0.6870898200709619, "learning_rate": 5.941792668000517e-07, "loss": 1.5074, "step": 3858 }, { "epoch": 0.26889175347524646, "grad_norm": 0.7256808114459625, "learning_rate": 5.941277041505477e-07, "loss": 1.5594, "step": 3859 }, { "epoch": 0.2689614326028638, "grad_norm": 0.6883037203550751, "learning_rate": 5.940761314792148e-07, "loss": 1.5388, "step": 3860 }, { "epoch": 0.2690311117304811, "grad_norm": 0.7187263352455807, "learning_rate": 5.940245487885244e-07, "loss": 1.4854, "step": 3861 }, { "epoch": 0.26910079085809846, "grad_norm": 0.7166388609160896, "learning_rate": 5.939729560809486e-07, "loss": 1.4961, "step": 3862 }, { "epoch": 0.26917046998571575, "grad_norm": 0.7070290799262712, "learning_rate": 5.939213533589596e-07, "loss": 1.5669, "step": 3863 }, { "epoch": 0.2692401491133331, "grad_norm": 0.6967168983725861, "learning_rate": 5.938697406250307e-07, "loss": 1.3717, "step": 3864 }, { "epoch": 0.2693098282409504, "grad_norm": 0.681060189861049, "learning_rate": 5.938181178816349e-07, "loss": 1.5998, "step": 3865 }, { "epoch": 0.26937950736856775, "grad_norm": 0.8336457379589953, "learning_rate": 5.93766485131246e-07, "loss": 1.5647, "step": 3866 }, { "epoch": 0.26944918649618504, "grad_norm": 0.6879300359719382, "learning_rate": 5.937148423763387e-07, "loss": 1.4075, "step": 3867 }, { "epoch": 0.2695188656238024, "grad_norm": 0.7410474852761608, "learning_rate": 5.936631896193877e-07, "loss": 1.5457, "step": 3868 }, { "epoch": 0.2695885447514197, "grad_norm": 0.7548080374598897, "learning_rate": 5.936115268628682e-07, "loss": 1.5415, "step": 3869 }, { "epoch": 0.26965822387903704, "grad_norm": 0.7264228100243976, "learning_rate": 5.935598541092561e-07, "loss": 1.5861, "step": 3870 }, { "epoch": 0.26972790300665433, "grad_norm": 0.7722431128355013, "learning_rate": 5.935081713610277e-07, "loss": 1.6332, "step": 3871 }, { "epoch": 0.2697975821342717, "grad_norm": 0.754648281167634, "learning_rate": 5.934564786206595e-07, "loss": 1.5744, "step": 3872 }, { "epoch": 0.269867261261889, "grad_norm": 0.6926936779814764, "learning_rate": 5.934047758906291e-07, "loss": 1.4116, "step": 3873 }, { "epoch": 0.26993694038950633, "grad_norm": 0.7420917039064506, "learning_rate": 5.933530631734138e-07, "loss": 1.5542, "step": 3874 }, { "epoch": 0.2700066195171236, "grad_norm": 0.7718671864174498, "learning_rate": 5.93301340471492e-07, "loss": 1.5018, "step": 3875 }, { "epoch": 0.270076298644741, "grad_norm": 0.7436769872040874, "learning_rate": 5.932496077873425e-07, "loss": 1.5629, "step": 3876 }, { "epoch": 0.27014597777235827, "grad_norm": 0.7024002295532155, "learning_rate": 5.93197865123444e-07, "loss": 1.5561, "step": 3877 }, { "epoch": 0.2702156568999756, "grad_norm": 0.7036180595380055, "learning_rate": 5.931461124822766e-07, "loss": 1.4487, "step": 3878 }, { "epoch": 0.2702853360275929, "grad_norm": 0.7291033209991897, "learning_rate": 5.9309434986632e-07, "loss": 1.6397, "step": 3879 }, { "epoch": 0.27035501515521027, "grad_norm": 0.7270278064368998, "learning_rate": 5.930425772780551e-07, "loss": 1.6212, "step": 3880 }, { "epoch": 0.27042469428282756, "grad_norm": 0.7196922674297855, "learning_rate": 5.929907947199628e-07, "loss": 1.5628, "step": 3881 }, { "epoch": 0.2704943734104449, "grad_norm": 0.7405096746089026, "learning_rate": 5.929390021945246e-07, "loss": 1.3544, "step": 3882 }, { "epoch": 0.2705640525380622, "grad_norm": 0.7450568923816304, "learning_rate": 5.928871997042224e-07, "loss": 1.4874, "step": 3883 }, { "epoch": 0.27063373166567956, "grad_norm": 0.7056369007831684, "learning_rate": 5.928353872515389e-07, "loss": 1.4927, "step": 3884 }, { "epoch": 0.27070341079329685, "grad_norm": 0.761581874927409, "learning_rate": 5.92783564838957e-07, "loss": 1.5912, "step": 3885 }, { "epoch": 0.2707730899209142, "grad_norm": 0.6921846983017318, "learning_rate": 5.9273173246896e-07, "loss": 1.5517, "step": 3886 }, { "epoch": 0.2708427690485315, "grad_norm": 0.7002922072429691, "learning_rate": 5.926798901440321e-07, "loss": 1.5203, "step": 3887 }, { "epoch": 0.27091244817614885, "grad_norm": 0.683729525033674, "learning_rate": 5.926280378666573e-07, "loss": 1.4792, "step": 3888 }, { "epoch": 0.27098212730376614, "grad_norm": 0.6414599752660434, "learning_rate": 5.925761756393207e-07, "loss": 1.5579, "step": 3889 }, { "epoch": 0.2710518064313835, "grad_norm": 0.7010029702153505, "learning_rate": 5.925243034645077e-07, "loss": 1.3604, "step": 3890 }, { "epoch": 0.2711214855590008, "grad_norm": 0.6772803213294848, "learning_rate": 5.92472421344704e-07, "loss": 1.4497, "step": 3891 }, { "epoch": 0.27119116468661814, "grad_norm": 0.7423859825107686, "learning_rate": 5.92420529282396e-07, "loss": 1.6493, "step": 3892 }, { "epoch": 0.27126084381423543, "grad_norm": 0.7069407783189319, "learning_rate": 5.923686272800703e-07, "loss": 1.4724, "step": 3893 }, { "epoch": 0.2713305229418528, "grad_norm": 0.7288513991064071, "learning_rate": 5.923167153402144e-07, "loss": 1.6363, "step": 3894 }, { "epoch": 0.2714002020694701, "grad_norm": 0.6983283286669905, "learning_rate": 5.922647934653158e-07, "loss": 1.5045, "step": 3895 }, { "epoch": 0.2714698811970874, "grad_norm": 0.729073744684144, "learning_rate": 5.922128616578627e-07, "loss": 1.497, "step": 3896 }, { "epoch": 0.2715395603247047, "grad_norm": 0.6998014105535623, "learning_rate": 5.92160919920344e-07, "loss": 1.6703, "step": 3897 }, { "epoch": 0.27160923945232207, "grad_norm": 0.7285550394088254, "learning_rate": 5.921089682552487e-07, "loss": 1.5291, "step": 3898 }, { "epoch": 0.27167891857993937, "grad_norm": 0.7584854068310711, "learning_rate": 5.920570066650665e-07, "loss": 1.4431, "step": 3899 }, { "epoch": 0.2717485977075567, "grad_norm": 0.7782366181505483, "learning_rate": 5.920050351522874e-07, "loss": 1.5376, "step": 3900 }, { "epoch": 0.271818276835174, "grad_norm": 0.7645158556386534, "learning_rate": 5.91953053719402e-07, "loss": 1.6062, "step": 3901 }, { "epoch": 0.27188795596279136, "grad_norm": 0.6900052460278817, "learning_rate": 5.919010623689015e-07, "loss": 1.5655, "step": 3902 }, { "epoch": 0.27195763509040866, "grad_norm": 0.7540368159165663, "learning_rate": 5.918490611032772e-07, "loss": 1.6374, "step": 3903 }, { "epoch": 0.272027314218026, "grad_norm": 0.7339110598734926, "learning_rate": 5.917970499250214e-07, "loss": 1.6146, "step": 3904 }, { "epoch": 0.2720969933456433, "grad_norm": 0.7138874824396794, "learning_rate": 5.917450288366263e-07, "loss": 1.5524, "step": 3905 }, { "epoch": 0.27216667247326065, "grad_norm": 0.6772495253223015, "learning_rate": 5.916929978405849e-07, "loss": 1.6166, "step": 3906 }, { "epoch": 0.27223635160087795, "grad_norm": 0.7887647513498364, "learning_rate": 5.916409569393909e-07, "loss": 1.5605, "step": 3907 }, { "epoch": 0.2723060307284953, "grad_norm": 0.6988054387986751, "learning_rate": 5.915889061355379e-07, "loss": 1.5141, "step": 3908 }, { "epoch": 0.2723757098561126, "grad_norm": 0.7296767353519555, "learning_rate": 5.915368454315205e-07, "loss": 1.5563, "step": 3909 }, { "epoch": 0.27244538898372994, "grad_norm": 0.7497446074632108, "learning_rate": 5.914847748298333e-07, "loss": 1.6255, "step": 3910 }, { "epoch": 0.27251506811134724, "grad_norm": 0.6639192068050025, "learning_rate": 5.914326943329719e-07, "loss": 1.5285, "step": 3911 }, { "epoch": 0.2725847472389646, "grad_norm": 0.7428035774613251, "learning_rate": 5.913806039434321e-07, "loss": 1.4783, "step": 3912 }, { "epoch": 0.2726544263665819, "grad_norm": 0.7281922760552719, "learning_rate": 5.913285036637098e-07, "loss": 1.5469, "step": 3913 }, { "epoch": 0.27272410549419923, "grad_norm": 0.7333014735087869, "learning_rate": 5.912763934963022e-07, "loss": 1.592, "step": 3914 }, { "epoch": 0.2727937846218165, "grad_norm": 0.7798965806035877, "learning_rate": 5.912242734437064e-07, "loss": 1.6835, "step": 3915 }, { "epoch": 0.2728634637494339, "grad_norm": 0.7536671752599499, "learning_rate": 5.911721435084199e-07, "loss": 1.7395, "step": 3916 }, { "epoch": 0.27293314287705117, "grad_norm": 0.6902780463980486, "learning_rate": 5.911200036929411e-07, "loss": 1.6335, "step": 3917 }, { "epoch": 0.2730028220046685, "grad_norm": 0.6967623275559346, "learning_rate": 5.910678539997686e-07, "loss": 1.5714, "step": 3918 }, { "epoch": 0.2730725011322858, "grad_norm": 0.7397568053887077, "learning_rate": 5.910156944314013e-07, "loss": 1.6657, "step": 3919 }, { "epoch": 0.27314218025990317, "grad_norm": 0.701659478786721, "learning_rate": 5.909635249903391e-07, "loss": 1.5462, "step": 3920 }, { "epoch": 0.27321185938752046, "grad_norm": 0.7725419571633488, "learning_rate": 5.90911345679082e-07, "loss": 1.6394, "step": 3921 }, { "epoch": 0.2732815385151378, "grad_norm": 0.6980051136129517, "learning_rate": 5.908591565001304e-07, "loss": 1.4364, "step": 3922 }, { "epoch": 0.2733512176427551, "grad_norm": 0.8135918312405112, "learning_rate": 5.908069574559854e-07, "loss": 1.5069, "step": 3923 }, { "epoch": 0.27342089677037246, "grad_norm": 0.7053317409211138, "learning_rate": 5.907547485491487e-07, "loss": 1.5591, "step": 3924 }, { "epoch": 0.27349057589798975, "grad_norm": 0.7018480395090504, "learning_rate": 5.907025297821218e-07, "loss": 1.5574, "step": 3925 }, { "epoch": 0.2735602550256071, "grad_norm": 0.7720773415373251, "learning_rate": 5.906503011574075e-07, "loss": 1.4853, "step": 3926 }, { "epoch": 0.2736299341532244, "grad_norm": 0.6976717600818462, "learning_rate": 5.905980626775085e-07, "loss": 1.542, "step": 3927 }, { "epoch": 0.27369961328084175, "grad_norm": 0.7211622466811443, "learning_rate": 5.905458143449282e-07, "loss": 1.6, "step": 3928 }, { "epoch": 0.27376929240845904, "grad_norm": 0.695676948180807, "learning_rate": 5.904935561621705e-07, "loss": 1.61, "step": 3929 }, { "epoch": 0.2738389715360764, "grad_norm": 0.7392444394382608, "learning_rate": 5.904412881317398e-07, "loss": 1.4697, "step": 3930 }, { "epoch": 0.2739086506636937, "grad_norm": 0.7379040097850592, "learning_rate": 5.903890102561409e-07, "loss": 1.6316, "step": 3931 }, { "epoch": 0.27397832979131104, "grad_norm": 0.6838537018000045, "learning_rate": 5.903367225378788e-07, "loss": 1.508, "step": 3932 }, { "epoch": 0.27404800891892833, "grad_norm": 0.723306590283538, "learning_rate": 5.902844249794595e-07, "loss": 1.5367, "step": 3933 }, { "epoch": 0.2741176880465457, "grad_norm": 0.7380752803903438, "learning_rate": 5.902321175833891e-07, "loss": 1.5289, "step": 3934 }, { "epoch": 0.274187367174163, "grad_norm": 0.7063438845989196, "learning_rate": 5.901798003521742e-07, "loss": 1.4684, "step": 3935 }, { "epoch": 0.2742570463017803, "grad_norm": 0.7569904066641995, "learning_rate": 5.901274732883223e-07, "loss": 1.5033, "step": 3936 }, { "epoch": 0.2743267254293976, "grad_norm": 0.7296337737834608, "learning_rate": 5.900751363943405e-07, "loss": 1.5454, "step": 3937 }, { "epoch": 0.274396404557015, "grad_norm": 0.7029267716615997, "learning_rate": 5.900227896727372e-07, "loss": 1.5316, "step": 3938 }, { "epoch": 0.27446608368463227, "grad_norm": 0.7178752281258443, "learning_rate": 5.89970433126021e-07, "loss": 1.5698, "step": 3939 }, { "epoch": 0.2745357628122496, "grad_norm": 0.7567086642839586, "learning_rate": 5.899180667567008e-07, "loss": 1.516, "step": 3940 }, { "epoch": 0.2746054419398669, "grad_norm": 0.6923104599232259, "learning_rate": 5.898656905672861e-07, "loss": 1.5541, "step": 3941 }, { "epoch": 0.27467512106748426, "grad_norm": 0.710308756305562, "learning_rate": 5.89813304560287e-07, "loss": 1.5168, "step": 3942 }, { "epoch": 0.27474480019510156, "grad_norm": 0.7618485460806338, "learning_rate": 5.897609087382139e-07, "loss": 1.5854, "step": 3943 }, { "epoch": 0.27481447932271885, "grad_norm": 0.7897506539988088, "learning_rate": 5.897085031035776e-07, "loss": 1.5347, "step": 3944 }, { "epoch": 0.2748841584503362, "grad_norm": 0.7576815455048657, "learning_rate": 5.896560876588897e-07, "loss": 1.5786, "step": 3945 }, { "epoch": 0.2749538375779535, "grad_norm": 0.6614648949215904, "learning_rate": 5.896036624066618e-07, "loss": 1.429, "step": 3946 }, { "epoch": 0.27502351670557085, "grad_norm": 0.7016203280395538, "learning_rate": 5.895512273494064e-07, "loss": 1.5163, "step": 3947 }, { "epoch": 0.27509319583318814, "grad_norm": 0.6931782798530423, "learning_rate": 5.894987824896362e-07, "loss": 1.5627, "step": 3948 }, { "epoch": 0.2751628749608055, "grad_norm": 0.7451596533117754, "learning_rate": 5.894463278298647e-07, "loss": 1.559, "step": 3949 }, { "epoch": 0.2752325540884228, "grad_norm": 0.8392928248647462, "learning_rate": 5.893938633726052e-07, "loss": 1.5077, "step": 3950 }, { "epoch": 0.27530223321604014, "grad_norm": 0.7496311483780644, "learning_rate": 5.893413891203723e-07, "loss": 1.6871, "step": 3951 }, { "epoch": 0.27537191234365743, "grad_norm": 0.6866597786792935, "learning_rate": 5.892889050756805e-07, "loss": 1.5015, "step": 3952 }, { "epoch": 0.2754415914712748, "grad_norm": 0.7539980443871873, "learning_rate": 5.89236411241045e-07, "loss": 1.5783, "step": 3953 }, { "epoch": 0.2755112705988921, "grad_norm": 0.7120384308434919, "learning_rate": 5.891839076189814e-07, "loss": 1.4073, "step": 3954 }, { "epoch": 0.2755809497265094, "grad_norm": 0.7563875392467487, "learning_rate": 5.891313942120056e-07, "loss": 1.4677, "step": 3955 }, { "epoch": 0.2756506288541267, "grad_norm": 0.6614944369011492, "learning_rate": 5.890788710226344e-07, "loss": 1.5537, "step": 3956 }, { "epoch": 0.2757203079817441, "grad_norm": 0.6610667999394185, "learning_rate": 5.890263380533848e-07, "loss": 1.4159, "step": 3957 }, { "epoch": 0.27578998710936137, "grad_norm": 0.7281030442813066, "learning_rate": 5.889737953067742e-07, "loss": 1.4733, "step": 3958 }, { "epoch": 0.2758596662369787, "grad_norm": 0.7187232399212806, "learning_rate": 5.889212427853205e-07, "loss": 1.5881, "step": 3959 }, { "epoch": 0.275929345364596, "grad_norm": 0.7783928113723274, "learning_rate": 5.888686804915423e-07, "loss": 1.7394, "step": 3960 }, { "epoch": 0.27599902449221336, "grad_norm": 0.7800510926427071, "learning_rate": 5.888161084279584e-07, "loss": 1.5735, "step": 3961 }, { "epoch": 0.27606870361983066, "grad_norm": 0.6975159879900961, "learning_rate": 5.887635265970882e-07, "loss": 1.5884, "step": 3962 }, { "epoch": 0.276138382747448, "grad_norm": 0.8555360238849415, "learning_rate": 5.887109350014513e-07, "loss": 1.5826, "step": 3963 }, { "epoch": 0.2762080618750653, "grad_norm": 0.7028176190290327, "learning_rate": 5.886583336435683e-07, "loss": 1.7248, "step": 3964 }, { "epoch": 0.27627774100268265, "grad_norm": 0.7432092852493237, "learning_rate": 5.886057225259598e-07, "loss": 1.6639, "step": 3965 }, { "epoch": 0.27634742013029995, "grad_norm": 0.6896597838445886, "learning_rate": 5.885531016511472e-07, "loss": 1.447, "step": 3966 }, { "epoch": 0.2764170992579173, "grad_norm": 0.720433857116974, "learning_rate": 5.885004710216519e-07, "loss": 1.6086, "step": 3967 }, { "epoch": 0.2764867783855346, "grad_norm": 0.7668175443909877, "learning_rate": 5.884478306399965e-07, "loss": 1.5574, "step": 3968 }, { "epoch": 0.27655645751315194, "grad_norm": 0.7209226325400172, "learning_rate": 5.883951805087033e-07, "loss": 1.6687, "step": 3969 }, { "epoch": 0.27662613664076924, "grad_norm": 0.714311583829575, "learning_rate": 5.883425206302952e-07, "loss": 1.5147, "step": 3970 }, { "epoch": 0.2766958157683866, "grad_norm": 0.6742840641439894, "learning_rate": 5.882898510072964e-07, "loss": 1.4254, "step": 3971 }, { "epoch": 0.2767654948960039, "grad_norm": 0.7348778247791798, "learning_rate": 5.882371716422306e-07, "loss": 1.4907, "step": 3972 }, { "epoch": 0.27683517402362123, "grad_norm": 0.6579709981742292, "learning_rate": 5.88184482537622e-07, "loss": 1.4232, "step": 3973 }, { "epoch": 0.27690485315123853, "grad_norm": 0.7227626916827766, "learning_rate": 5.881317836959961e-07, "loss": 1.4126, "step": 3974 }, { "epoch": 0.2769745322788559, "grad_norm": 0.699676254107354, "learning_rate": 5.880790751198782e-07, "loss": 1.564, "step": 3975 }, { "epoch": 0.2770442114064732, "grad_norm": 0.6913603310218001, "learning_rate": 5.880263568117939e-07, "loss": 1.6078, "step": 3976 }, { "epoch": 0.2771138905340905, "grad_norm": 0.6826484309669981, "learning_rate": 5.879736287742698e-07, "loss": 1.6187, "step": 3977 }, { "epoch": 0.2771835696617078, "grad_norm": 0.7649106951889587, "learning_rate": 5.879208910098327e-07, "loss": 1.6896, "step": 3978 }, { "epoch": 0.27725324878932517, "grad_norm": 0.686641906084249, "learning_rate": 5.878681435210099e-07, "loss": 1.4748, "step": 3979 }, { "epoch": 0.27732292791694246, "grad_norm": 0.7413788736872214, "learning_rate": 5.878153863103294e-07, "loss": 1.6367, "step": 3980 }, { "epoch": 0.2773926070445598, "grad_norm": 0.7146264681803929, "learning_rate": 5.87762619380319e-07, "loss": 1.423, "step": 3981 }, { "epoch": 0.2774622861721771, "grad_norm": 0.7193547975548729, "learning_rate": 5.877098427335077e-07, "loss": 1.5612, "step": 3982 }, { "epoch": 0.27753196529979446, "grad_norm": 0.6911172013518755, "learning_rate": 5.876570563724246e-07, "loss": 1.6294, "step": 3983 }, { "epoch": 0.27760164442741175, "grad_norm": 0.7974987700336611, "learning_rate": 5.876042602995991e-07, "loss": 1.5826, "step": 3984 }, { "epoch": 0.2776713235550291, "grad_norm": 0.7523075392775425, "learning_rate": 5.875514545175619e-07, "loss": 1.4486, "step": 3985 }, { "epoch": 0.2777410026826464, "grad_norm": 0.7105791555841411, "learning_rate": 5.874986390288428e-07, "loss": 1.526, "step": 3986 }, { "epoch": 0.27781068181026375, "grad_norm": 0.6911026667088176, "learning_rate": 5.874458138359734e-07, "loss": 1.4773, "step": 3987 }, { "epoch": 0.27788036093788104, "grad_norm": 0.769290264633232, "learning_rate": 5.873929789414849e-07, "loss": 1.5241, "step": 3988 }, { "epoch": 0.2779500400654984, "grad_norm": 0.7393921952918916, "learning_rate": 5.873401343479093e-07, "loss": 1.5214, "step": 3989 }, { "epoch": 0.2780197191931157, "grad_norm": 0.6990692068762214, "learning_rate": 5.872872800577792e-07, "loss": 1.4944, "step": 3990 }, { "epoch": 0.27808939832073304, "grad_norm": 0.7482070834918554, "learning_rate": 5.872344160736273e-07, "loss": 1.4291, "step": 3991 }, { "epoch": 0.27815907744835033, "grad_norm": 0.7684926653711424, "learning_rate": 5.87181542397987e-07, "loss": 1.449, "step": 3992 }, { "epoch": 0.2782287565759677, "grad_norm": 0.7400846004615286, "learning_rate": 5.871286590333921e-07, "loss": 1.571, "step": 3993 }, { "epoch": 0.278298435703585, "grad_norm": 0.7117553166829831, "learning_rate": 5.870757659823769e-07, "loss": 1.5885, "step": 3994 }, { "epoch": 0.27836811483120233, "grad_norm": 0.7295296260321572, "learning_rate": 5.870228632474761e-07, "loss": 1.573, "step": 3995 }, { "epoch": 0.2784377939588196, "grad_norm": 0.7096933784710855, "learning_rate": 5.869699508312251e-07, "loss": 1.6313, "step": 3996 }, { "epoch": 0.278507473086437, "grad_norm": 0.74818849404883, "learning_rate": 5.869170287361592e-07, "loss": 1.6776, "step": 3997 }, { "epoch": 0.27857715221405427, "grad_norm": 0.6633573767886243, "learning_rate": 5.868640969648149e-07, "loss": 1.5791, "step": 3998 }, { "epoch": 0.2786468313416716, "grad_norm": 0.7037151501018837, "learning_rate": 5.868111555197287e-07, "loss": 1.5421, "step": 3999 }, { "epoch": 0.2787165104692889, "grad_norm": 0.7677023462476129, "learning_rate": 5.867582044034374e-07, "loss": 1.4824, "step": 4000 }, { "epoch": 0.27878618959690626, "grad_norm": 0.6969241390359108, "learning_rate": 5.86705243618479e-07, "loss": 1.6216, "step": 4001 }, { "epoch": 0.27885586872452356, "grad_norm": 0.6722182109071275, "learning_rate": 5.86652273167391e-07, "loss": 1.5251, "step": 4002 }, { "epoch": 0.2789255478521409, "grad_norm": 0.7489623752803286, "learning_rate": 5.865992930527123e-07, "loss": 1.4817, "step": 4003 }, { "epoch": 0.2789952269797582, "grad_norm": 0.7007812459698798, "learning_rate": 5.865463032769814e-07, "loss": 1.5225, "step": 4004 }, { "epoch": 0.27906490610737555, "grad_norm": 0.7121219333031956, "learning_rate": 5.86493303842738e-07, "loss": 1.5117, "step": 4005 }, { "epoch": 0.27913458523499285, "grad_norm": 0.6799027790844323, "learning_rate": 5.864402947525218e-07, "loss": 1.5251, "step": 4006 }, { "epoch": 0.2792042643626102, "grad_norm": 0.7083461002334056, "learning_rate": 5.863872760088732e-07, "loss": 1.526, "step": 4007 }, { "epoch": 0.2792739434902275, "grad_norm": 0.6869380534638632, "learning_rate": 5.863342476143329e-07, "loss": 1.548, "step": 4008 }, { "epoch": 0.27934362261784484, "grad_norm": 0.7917821268802485, "learning_rate": 5.86281209571442e-07, "loss": 1.4851, "step": 4009 }, { "epoch": 0.27941330174546214, "grad_norm": 0.7288384722578055, "learning_rate": 5.862281618827423e-07, "loss": 1.5221, "step": 4010 }, { "epoch": 0.2794829808730795, "grad_norm": 0.737623419529946, "learning_rate": 5.861751045507761e-07, "loss": 1.5496, "step": 4011 }, { "epoch": 0.2795526600006968, "grad_norm": 0.7023565178847206, "learning_rate": 5.861220375780858e-07, "loss": 1.6295, "step": 4012 }, { "epoch": 0.27962233912831413, "grad_norm": 0.8024061445348584, "learning_rate": 5.860689609672146e-07, "loss": 1.6185, "step": 4013 }, { "epoch": 0.27969201825593143, "grad_norm": 0.7407329543983926, "learning_rate": 5.86015874720706e-07, "loss": 1.6296, "step": 4014 }, { "epoch": 0.2797616973835488, "grad_norm": 0.656469116523215, "learning_rate": 5.85962778841104e-07, "loss": 1.4228, "step": 4015 }, { "epoch": 0.2798313765111661, "grad_norm": 0.6946187772623037, "learning_rate": 5.85909673330953e-07, "loss": 1.4918, "step": 4016 }, { "epoch": 0.2799010556387834, "grad_norm": 0.7373641595783593, "learning_rate": 5.858565581927981e-07, "loss": 1.6121, "step": 4017 }, { "epoch": 0.2799707347664007, "grad_norm": 0.7532752424720006, "learning_rate": 5.858034334291845e-07, "loss": 1.5603, "step": 4018 }, { "epoch": 0.28004041389401807, "grad_norm": 0.7264475887960263, "learning_rate": 5.857502990426582e-07, "loss": 1.4764, "step": 4019 }, { "epoch": 0.28011009302163536, "grad_norm": 0.798784286364674, "learning_rate": 5.856971550357653e-07, "loss": 1.5344, "step": 4020 }, { "epoch": 0.2801797721492527, "grad_norm": 0.7176879232704745, "learning_rate": 5.856440014110529e-07, "loss": 1.6604, "step": 4021 }, { "epoch": 0.28024945127687, "grad_norm": 0.6773894896943677, "learning_rate": 5.855908381710679e-07, "loss": 1.4547, "step": 4022 }, { "epoch": 0.28031913040448736, "grad_norm": 0.7127957867651743, "learning_rate": 5.855376653183582e-07, "loss": 1.5388, "step": 4023 }, { "epoch": 0.28038880953210465, "grad_norm": 0.7166579306263381, "learning_rate": 5.854844828554719e-07, "loss": 1.5743, "step": 4024 }, { "epoch": 0.280458488659722, "grad_norm": 0.7410980916162924, "learning_rate": 5.854312907849575e-07, "loss": 1.4147, "step": 4025 }, { "epoch": 0.2805281677873393, "grad_norm": 0.7268041723692737, "learning_rate": 5.853780891093643e-07, "loss": 1.5175, "step": 4026 }, { "epoch": 0.28059784691495665, "grad_norm": 0.7321423766832356, "learning_rate": 5.853248778312416e-07, "loss": 1.5852, "step": 4027 }, { "epoch": 0.28066752604257394, "grad_norm": 0.6934596891068451, "learning_rate": 5.852716569531395e-07, "loss": 1.5311, "step": 4028 }, { "epoch": 0.2807372051701913, "grad_norm": 0.7081751885804427, "learning_rate": 5.852184264776085e-07, "loss": 1.5793, "step": 4029 }, { "epoch": 0.2808068842978086, "grad_norm": 0.715593124627546, "learning_rate": 5.851651864071994e-07, "loss": 1.5773, "step": 4030 }, { "epoch": 0.28087656342542594, "grad_norm": 0.8029234811798415, "learning_rate": 5.851119367444636e-07, "loss": 1.5563, "step": 4031 }, { "epoch": 0.28094624255304324, "grad_norm": 0.7836241719894106, "learning_rate": 5.850586774919531e-07, "loss": 1.5504, "step": 4032 }, { "epoch": 0.2810159216806606, "grad_norm": 0.7444625174947146, "learning_rate": 5.8500540865222e-07, "loss": 1.5774, "step": 4033 }, { "epoch": 0.2810856008082779, "grad_norm": 0.7682038540781259, "learning_rate": 5.849521302278171e-07, "loss": 1.6068, "step": 4034 }, { "epoch": 0.2811552799358952, "grad_norm": 0.6977564285986588, "learning_rate": 5.848988422212977e-07, "loss": 1.539, "step": 4035 }, { "epoch": 0.2812249590635125, "grad_norm": 0.6861978787885887, "learning_rate": 5.848455446352152e-07, "loss": 1.5767, "step": 4036 }, { "epoch": 0.2812946381911298, "grad_norm": 0.7061641280191499, "learning_rate": 5.847922374721241e-07, "loss": 1.5446, "step": 4037 }, { "epoch": 0.28136431731874717, "grad_norm": 0.7162478499272291, "learning_rate": 5.847389207345788e-07, "loss": 1.5821, "step": 4038 }, { "epoch": 0.28143399644636446, "grad_norm": 0.704977072425101, "learning_rate": 5.846855944251343e-07, "loss": 1.6258, "step": 4039 }, { "epoch": 0.2815036755739818, "grad_norm": 0.7989239527820603, "learning_rate": 5.846322585463462e-07, "loss": 1.5252, "step": 4040 }, { "epoch": 0.2815733547015991, "grad_norm": 0.7620500570883405, "learning_rate": 5.845789131007705e-07, "loss": 1.4945, "step": 4041 }, { "epoch": 0.28164303382921646, "grad_norm": 0.7065137235650111, "learning_rate": 5.845255580909634e-07, "loss": 1.5273, "step": 4042 }, { "epoch": 0.28171271295683376, "grad_norm": 0.72292442941867, "learning_rate": 5.844721935194821e-07, "loss": 1.6439, "step": 4043 }, { "epoch": 0.2817823920844511, "grad_norm": 0.7135494906603791, "learning_rate": 5.844188193888838e-07, "loss": 1.6209, "step": 4044 }, { "epoch": 0.2818520712120684, "grad_norm": 0.7238594633780343, "learning_rate": 5.843654357017261e-07, "loss": 1.6056, "step": 4045 }, { "epoch": 0.28192175033968575, "grad_norm": 0.7195992753862425, "learning_rate": 5.843120424605675e-07, "loss": 1.6093, "step": 4046 }, { "epoch": 0.28199142946730305, "grad_norm": 0.7102552233590842, "learning_rate": 5.842586396679666e-07, "loss": 1.5801, "step": 4047 }, { "epoch": 0.2820611085949204, "grad_norm": 0.7237963977095361, "learning_rate": 5.842052273264828e-07, "loss": 1.599, "step": 4048 }, { "epoch": 0.2821307877225377, "grad_norm": 0.7388017933797084, "learning_rate": 5.841518054386754e-07, "loss": 1.4611, "step": 4049 }, { "epoch": 0.28220046685015504, "grad_norm": 0.7169800257321843, "learning_rate": 5.840983740071046e-07, "loss": 1.5368, "step": 4050 }, { "epoch": 0.28227014597777234, "grad_norm": 0.7591467618718327, "learning_rate": 5.840449330343311e-07, "loss": 1.4682, "step": 4051 }, { "epoch": 0.2823398251053897, "grad_norm": 0.7816992052749729, "learning_rate": 5.839914825229157e-07, "loss": 1.5462, "step": 4052 }, { "epoch": 0.282409504233007, "grad_norm": 0.7678402184087277, "learning_rate": 5.839380224754199e-07, "loss": 1.411, "step": 4053 }, { "epoch": 0.28247918336062433, "grad_norm": 0.7008928225544877, "learning_rate": 5.838845528944057e-07, "loss": 1.5995, "step": 4054 }, { "epoch": 0.2825488624882416, "grad_norm": 0.7036680440880047, "learning_rate": 5.838310737824353e-07, "loss": 1.5138, "step": 4055 }, { "epoch": 0.282618541615859, "grad_norm": 0.6918801095209847, "learning_rate": 5.837775851420719e-07, "loss": 1.5122, "step": 4056 }, { "epoch": 0.28268822074347627, "grad_norm": 0.710151105832471, "learning_rate": 5.837240869758785e-07, "loss": 1.6328, "step": 4057 }, { "epoch": 0.2827578998710936, "grad_norm": 6.128195129119697, "learning_rate": 5.836705792864187e-07, "loss": 1.4178, "step": 4058 }, { "epoch": 0.2828275789987109, "grad_norm": 0.6829741022309734, "learning_rate": 5.83617062076257e-07, "loss": 1.5073, "step": 4059 }, { "epoch": 0.28289725812632827, "grad_norm": 0.715180603423623, "learning_rate": 5.835635353479579e-07, "loss": 1.7047, "step": 4060 }, { "epoch": 0.28296693725394556, "grad_norm": 0.7240989089434184, "learning_rate": 5.835099991040865e-07, "loss": 1.6419, "step": 4061 }, { "epoch": 0.2830366163815629, "grad_norm": 1.4884192988469906, "learning_rate": 5.834564533472084e-07, "loss": 1.5715, "step": 4062 }, { "epoch": 0.2831062955091802, "grad_norm": 0.6869430028687137, "learning_rate": 5.834028980798897e-07, "loss": 1.6057, "step": 4063 }, { "epoch": 0.28317597463679756, "grad_norm": 0.7492810286800211, "learning_rate": 5.833493333046969e-07, "loss": 1.5047, "step": 4064 }, { "epoch": 0.28324565376441485, "grad_norm": 0.6528949631329686, "learning_rate": 5.832957590241967e-07, "loss": 1.4532, "step": 4065 }, { "epoch": 0.2833153328920322, "grad_norm": 0.69702222687072, "learning_rate": 5.832421752409567e-07, "loss": 1.5506, "step": 4066 }, { "epoch": 0.2833850120196495, "grad_norm": 0.7244086971828632, "learning_rate": 5.831885819575447e-07, "loss": 1.5833, "step": 4067 }, { "epoch": 0.28345469114726685, "grad_norm": 0.7019005426267827, "learning_rate": 5.831349791765289e-07, "loss": 1.4391, "step": 4068 }, { "epoch": 0.28352437027488414, "grad_norm": 0.7200978367651489, "learning_rate": 5.830813669004781e-07, "loss": 1.477, "step": 4069 }, { "epoch": 0.2835940494025015, "grad_norm": 0.6700016328199216, "learning_rate": 5.830277451319616e-07, "loss": 1.4821, "step": 4070 }, { "epoch": 0.2836637285301188, "grad_norm": 0.7013166629088055, "learning_rate": 5.829741138735491e-07, "loss": 1.5592, "step": 4071 }, { "epoch": 0.28373340765773614, "grad_norm": 0.7464737988751529, "learning_rate": 5.829204731278105e-07, "loss": 1.5095, "step": 4072 }, { "epoch": 0.28380308678535343, "grad_norm": 0.7175291663947816, "learning_rate": 5.828668228973166e-07, "loss": 1.6289, "step": 4073 }, { "epoch": 0.2838727659129708, "grad_norm": 0.772606784486943, "learning_rate": 5.828131631846383e-07, "loss": 1.4315, "step": 4074 }, { "epoch": 0.2839424450405881, "grad_norm": 0.6821940080682076, "learning_rate": 5.82759493992347e-07, "loss": 1.603, "step": 4075 }, { "epoch": 0.2840121241682054, "grad_norm": 0.6781925194254677, "learning_rate": 5.827058153230149e-07, "loss": 1.4863, "step": 4076 }, { "epoch": 0.2840818032958227, "grad_norm": 0.7357770305547745, "learning_rate": 5.826521271792142e-07, "loss": 1.6178, "step": 4077 }, { "epoch": 0.28415148242344007, "grad_norm": 0.6701447160402433, "learning_rate": 5.825984295635178e-07, "loss": 1.4617, "step": 4078 }, { "epoch": 0.28422116155105737, "grad_norm": 0.7456256029388817, "learning_rate": 5.82544722478499e-07, "loss": 1.5877, "step": 4079 }, { "epoch": 0.2842908406786747, "grad_norm": 0.7150322734011164, "learning_rate": 5.824910059267316e-07, "loss": 1.4595, "step": 4080 }, { "epoch": 0.284360519806292, "grad_norm": 0.7762158243973858, "learning_rate": 5.824372799107898e-07, "loss": 1.4091, "step": 4081 }, { "epoch": 0.28443019893390936, "grad_norm": 0.6999482983696204, "learning_rate": 5.823835444332481e-07, "loss": 1.5803, "step": 4082 }, { "epoch": 0.28449987806152666, "grad_norm": 0.7179465348485711, "learning_rate": 5.823297994966817e-07, "loss": 1.5976, "step": 4083 }, { "epoch": 0.284569557189144, "grad_norm": 0.6886196672918611, "learning_rate": 5.822760451036663e-07, "loss": 1.4657, "step": 4084 }, { "epoch": 0.2846392363167613, "grad_norm": 0.7343911706726971, "learning_rate": 5.822222812567777e-07, "loss": 1.5281, "step": 4085 }, { "epoch": 0.28470891544437865, "grad_norm": 0.7747706894286983, "learning_rate": 5.821685079585925e-07, "loss": 1.6097, "step": 4086 }, { "epoch": 0.28477859457199595, "grad_norm": 0.7459684826116418, "learning_rate": 5.821147252116877e-07, "loss": 1.6015, "step": 4087 }, { "epoch": 0.2848482736996133, "grad_norm": 0.7068888527292315, "learning_rate": 5.820609330186406e-07, "loss": 1.552, "step": 4088 }, { "epoch": 0.2849179528272306, "grad_norm": 0.6849166782257857, "learning_rate": 5.82007131382029e-07, "loss": 1.6113, "step": 4089 }, { "epoch": 0.28498763195484794, "grad_norm": 0.7440776028099806, "learning_rate": 5.819533203044312e-07, "loss": 1.8013, "step": 4090 }, { "epoch": 0.28505731108246524, "grad_norm": 0.6927799473369971, "learning_rate": 5.81899499788426e-07, "loss": 1.5277, "step": 4091 }, { "epoch": 0.2851269902100826, "grad_norm": 0.7629208548117576, "learning_rate": 5.818456698365925e-07, "loss": 1.5684, "step": 4092 }, { "epoch": 0.2851966693376999, "grad_norm": 0.7884306874549732, "learning_rate": 5.817918304515104e-07, "loss": 1.5751, "step": 4093 }, { "epoch": 0.28526634846531723, "grad_norm": 0.7391133979352249, "learning_rate": 5.817379816357597e-07, "loss": 1.5001, "step": 4094 }, { "epoch": 0.2853360275929345, "grad_norm": 0.6698145354407297, "learning_rate": 5.816841233919212e-07, "loss": 1.4165, "step": 4095 }, { "epoch": 0.2854057067205519, "grad_norm": 0.792643478722233, "learning_rate": 5.816302557225756e-07, "loss": 1.5447, "step": 4096 }, { "epoch": 0.28547538584816917, "grad_norm": 0.7136178558225388, "learning_rate": 5.815763786303045e-07, "loss": 1.5373, "step": 4097 }, { "epoch": 0.2855450649757865, "grad_norm": 0.7575039417167665, "learning_rate": 5.815224921176897e-07, "loss": 1.6437, "step": 4098 }, { "epoch": 0.2856147441034038, "grad_norm": 0.7584843652406438, "learning_rate": 5.814685961873138e-07, "loss": 1.583, "step": 4099 }, { "epoch": 0.28568442323102117, "grad_norm": 0.7576203179121972, "learning_rate": 5.814146908417594e-07, "loss": 1.5119, "step": 4100 }, { "epoch": 0.28575410235863846, "grad_norm": 0.7598044895034622, "learning_rate": 5.813607760836097e-07, "loss": 1.574, "step": 4101 }, { "epoch": 0.2858237814862558, "grad_norm": 0.6735244316784529, "learning_rate": 5.813068519154485e-07, "loss": 1.5738, "step": 4102 }, { "epoch": 0.2858934606138731, "grad_norm": 0.6868829077636657, "learning_rate": 5.812529183398598e-07, "loss": 1.5322, "step": 4103 }, { "epoch": 0.28596313974149046, "grad_norm": 0.7419971118801489, "learning_rate": 5.811989753594286e-07, "loss": 1.5351, "step": 4104 }, { "epoch": 0.28603281886910775, "grad_norm": 0.7270043780129994, "learning_rate": 5.811450229767396e-07, "loss": 1.3956, "step": 4105 }, { "epoch": 0.2861024979967251, "grad_norm": 0.7054461678186529, "learning_rate": 5.810910611943784e-07, "loss": 1.5752, "step": 4106 }, { "epoch": 0.2861721771243424, "grad_norm": 0.8449176564524208, "learning_rate": 5.810370900149311e-07, "loss": 1.5074, "step": 4107 }, { "epoch": 0.28624185625195975, "grad_norm": 0.7140677921475737, "learning_rate": 5.809831094409838e-07, "loss": 1.5529, "step": 4108 }, { "epoch": 0.28631153537957704, "grad_norm": 0.7074402998095285, "learning_rate": 5.809291194751236e-07, "loss": 1.4844, "step": 4109 }, { "epoch": 0.2863812145071944, "grad_norm": 0.7303614882913259, "learning_rate": 5.808751201199379e-07, "loss": 1.5742, "step": 4110 }, { "epoch": 0.2864508936348117, "grad_norm": 0.7183991860832905, "learning_rate": 5.808211113780142e-07, "loss": 1.4775, "step": 4111 }, { "epoch": 0.28652057276242904, "grad_norm": 0.8003586154955813, "learning_rate": 5.807670932519409e-07, "loss": 1.5617, "step": 4112 }, { "epoch": 0.28659025189004633, "grad_norm": 0.7474000305808857, "learning_rate": 5.807130657443066e-07, "loss": 1.4609, "step": 4113 }, { "epoch": 0.2866599310176637, "grad_norm": 0.6674786568948675, "learning_rate": 5.806590288577002e-07, "loss": 1.473, "step": 4114 }, { "epoch": 0.286729610145281, "grad_norm": 0.7505481394579682, "learning_rate": 5.806049825947117e-07, "loss": 1.5761, "step": 4115 }, { "epoch": 0.28679928927289833, "grad_norm": 0.7296521930923773, "learning_rate": 5.805509269579308e-07, "loss": 1.4665, "step": 4116 }, { "epoch": 0.2868689684005156, "grad_norm": 0.687910460173145, "learning_rate": 5.804968619499479e-07, "loss": 1.5137, "step": 4117 }, { "epoch": 0.286938647528133, "grad_norm": 0.7760861013120258, "learning_rate": 5.804427875733541e-07, "loss": 1.5681, "step": 4118 }, { "epoch": 0.28700832665575027, "grad_norm": 0.6496291087556836, "learning_rate": 5.803887038307407e-07, "loss": 1.4863, "step": 4119 }, { "epoch": 0.2870780057833676, "grad_norm": 0.6963744044109444, "learning_rate": 5.803346107246995e-07, "loss": 1.4003, "step": 4120 }, { "epoch": 0.2871476849109849, "grad_norm": 0.7441529427156599, "learning_rate": 5.802805082578228e-07, "loss": 1.4934, "step": 4121 }, { "epoch": 0.28721736403860226, "grad_norm": 0.6879418056717342, "learning_rate": 5.802263964327031e-07, "loss": 1.4888, "step": 4122 }, { "epoch": 0.28728704316621956, "grad_norm": 0.7485835173848666, "learning_rate": 5.801722752519338e-07, "loss": 1.5704, "step": 4123 }, { "epoch": 0.2873567222938369, "grad_norm": 0.702037064253478, "learning_rate": 5.801181447181083e-07, "loss": 1.5496, "step": 4124 }, { "epoch": 0.2874264014214542, "grad_norm": 0.7026946239503448, "learning_rate": 5.800640048338209e-07, "loss": 1.5774, "step": 4125 }, { "epoch": 0.28749608054907155, "grad_norm": 0.7491119945124249, "learning_rate": 5.800098556016658e-07, "loss": 1.4153, "step": 4126 }, { "epoch": 0.28756575967668885, "grad_norm": 0.6902498898799309, "learning_rate": 5.79955697024238e-07, "loss": 1.5892, "step": 4127 }, { "epoch": 0.28763543880430614, "grad_norm": 0.7063696011417466, "learning_rate": 5.79901529104133e-07, "loss": 1.5036, "step": 4128 }, { "epoch": 0.2877051179319235, "grad_norm": 0.6764332043828444, "learning_rate": 5.798473518439467e-07, "loss": 1.4542, "step": 4129 }, { "epoch": 0.2877747970595408, "grad_norm": 0.7574873462504658, "learning_rate": 5.797931652462752e-07, "loss": 1.5444, "step": 4130 }, { "epoch": 0.28784447618715814, "grad_norm": 0.6919679634888707, "learning_rate": 5.797389693137154e-07, "loss": 1.5891, "step": 4131 }, { "epoch": 0.28791415531477543, "grad_norm": 0.7380176334601344, "learning_rate": 5.796847640488644e-07, "loss": 1.6671, "step": 4132 }, { "epoch": 0.2879838344423928, "grad_norm": 0.8050340076019923, "learning_rate": 5.796305494543197e-07, "loss": 1.5894, "step": 4133 }, { "epoch": 0.2880535135700101, "grad_norm": 0.7487002362938737, "learning_rate": 5.795763255326796e-07, "loss": 1.4812, "step": 4134 }, { "epoch": 0.28812319269762743, "grad_norm": 0.7369552008147401, "learning_rate": 5.795220922865426e-07, "loss": 1.4874, "step": 4135 }, { "epoch": 0.2881928718252447, "grad_norm": 5.4765373368585175, "learning_rate": 5.794678497185075e-07, "loss": 1.5605, "step": 4136 }, { "epoch": 0.2882625509528621, "grad_norm": 0.7284276944673779, "learning_rate": 5.794135978311737e-07, "loss": 1.5034, "step": 4137 }, { "epoch": 0.28833223008047937, "grad_norm": 0.7995026203041848, "learning_rate": 5.793593366271413e-07, "loss": 1.5816, "step": 4138 }, { "epoch": 0.2884019092080967, "grad_norm": 0.7179892897013607, "learning_rate": 5.793050661090105e-07, "loss": 1.5214, "step": 4139 }, { "epoch": 0.288471588335714, "grad_norm": 0.7446689591066641, "learning_rate": 5.79250786279382e-07, "loss": 1.6026, "step": 4140 }, { "epoch": 0.28854126746333136, "grad_norm": 0.7414451978063824, "learning_rate": 5.791964971408569e-07, "loss": 1.507, "step": 4141 }, { "epoch": 0.28861094659094866, "grad_norm": 0.7118717774250654, "learning_rate": 5.791421986960371e-07, "loss": 1.4607, "step": 4142 }, { "epoch": 0.288680625718566, "grad_norm": 0.7274986192626761, "learning_rate": 5.790878909475246e-07, "loss": 1.6512, "step": 4143 }, { "epoch": 0.2887503048461833, "grad_norm": 0.7470589216705154, "learning_rate": 5.790335738979218e-07, "loss": 1.5379, "step": 4144 }, { "epoch": 0.28881998397380065, "grad_norm": 0.7192847739232004, "learning_rate": 5.789792475498319e-07, "loss": 1.5424, "step": 4145 }, { "epoch": 0.28888966310141795, "grad_norm": 0.7808717081028412, "learning_rate": 5.789249119058582e-07, "loss": 1.5689, "step": 4146 }, { "epoch": 0.2889593422290353, "grad_norm": 0.7165423217935094, "learning_rate": 5.788705669686047e-07, "loss": 1.5309, "step": 4147 }, { "epoch": 0.2890290213566526, "grad_norm": 0.7413403373071283, "learning_rate": 5.788162127406755e-07, "loss": 1.6779, "step": 4148 }, { "epoch": 0.28909870048426994, "grad_norm": 0.7088883743890833, "learning_rate": 5.787618492246754e-07, "loss": 1.638, "step": 4149 }, { "epoch": 0.28916837961188724, "grad_norm": 0.6863263458241512, "learning_rate": 5.787074764232098e-07, "loss": 1.5491, "step": 4150 }, { "epoch": 0.2892380587395046, "grad_norm": 0.7306183676069988, "learning_rate": 5.786530943388843e-07, "loss": 1.5322, "step": 4151 }, { "epoch": 0.2893077378671219, "grad_norm": 0.718996793284283, "learning_rate": 5.78598702974305e-07, "loss": 1.567, "step": 4152 }, { "epoch": 0.28937741699473923, "grad_norm": 0.7430070802419222, "learning_rate": 5.785443023320782e-07, "loss": 1.5341, "step": 4153 }, { "epoch": 0.28944709612235653, "grad_norm": 0.7173099226439454, "learning_rate": 5.784898924148112e-07, "loss": 1.5728, "step": 4154 }, { "epoch": 0.2895167752499739, "grad_norm": 0.7298294434551115, "learning_rate": 5.784354732251114e-07, "loss": 1.5361, "step": 4155 }, { "epoch": 0.2895864543775912, "grad_norm": 0.7063577470300629, "learning_rate": 5.783810447655865e-07, "loss": 1.5852, "step": 4156 }, { "epoch": 0.2896561335052085, "grad_norm": 0.746458774354094, "learning_rate": 5.78326607038845e-07, "loss": 1.5576, "step": 4157 }, { "epoch": 0.2897258126328258, "grad_norm": 0.7408594387198453, "learning_rate": 5.782721600474956e-07, "loss": 1.5814, "step": 4158 }, { "epoch": 0.28979549176044317, "grad_norm": 0.7430836064324008, "learning_rate": 5.782177037941475e-07, "loss": 1.467, "step": 4159 }, { "epoch": 0.28986517088806046, "grad_norm": 0.7198430677931947, "learning_rate": 5.781632382814104e-07, "loss": 1.4687, "step": 4160 }, { "epoch": 0.2899348500156778, "grad_norm": 0.7252227953435738, "learning_rate": 5.781087635118942e-07, "loss": 1.6479, "step": 4161 }, { "epoch": 0.2900045291432951, "grad_norm": 0.686562113644517, "learning_rate": 5.780542794882098e-07, "loss": 1.482, "step": 4162 }, { "epoch": 0.29007420827091246, "grad_norm": 0.7181583474767024, "learning_rate": 5.779997862129679e-07, "loss": 1.5554, "step": 4163 }, { "epoch": 0.29014388739852975, "grad_norm": 0.7422299136780489, "learning_rate": 5.779452836887801e-07, "loss": 1.6733, "step": 4164 }, { "epoch": 0.2902135665261471, "grad_norm": 0.6927016820321847, "learning_rate": 5.77890771918258e-07, "loss": 1.5271, "step": 4165 }, { "epoch": 0.2902832456537644, "grad_norm": 0.6829369165723617, "learning_rate": 5.778362509040143e-07, "loss": 1.5014, "step": 4166 }, { "epoch": 0.29035292478138175, "grad_norm": 0.6620607702711369, "learning_rate": 5.777817206486616e-07, "loss": 1.6308, "step": 4167 }, { "epoch": 0.29042260390899904, "grad_norm": 0.7073017154807985, "learning_rate": 5.77727181154813e-07, "loss": 1.5599, "step": 4168 }, { "epoch": 0.2904922830366164, "grad_norm": 0.8033268829212622, "learning_rate": 5.776726324250822e-07, "loss": 1.5079, "step": 4169 }, { "epoch": 0.2905619621642337, "grad_norm": 0.7209486887046835, "learning_rate": 5.776180744620833e-07, "loss": 1.5049, "step": 4170 }, { "epoch": 0.29063164129185104, "grad_norm": 0.8022012464666788, "learning_rate": 5.775635072684308e-07, "loss": 1.4163, "step": 4171 }, { "epoch": 0.29070132041946833, "grad_norm": 0.7277687745750111, "learning_rate": 5.775089308467398e-07, "loss": 1.5629, "step": 4172 }, { "epoch": 0.2907709995470857, "grad_norm": 0.783412324097584, "learning_rate": 5.774543451996256e-07, "loss": 1.6353, "step": 4173 }, { "epoch": 0.290840678674703, "grad_norm": 0.7209177364651561, "learning_rate": 5.773997503297041e-07, "loss": 1.5835, "step": 4174 }, { "epoch": 0.29091035780232033, "grad_norm": 0.7199756416577356, "learning_rate": 5.773451462395915e-07, "loss": 1.5281, "step": 4175 }, { "epoch": 0.2909800369299376, "grad_norm": 0.7322879678085863, "learning_rate": 5.772905329319047e-07, "loss": 1.3603, "step": 4176 }, { "epoch": 0.291049716057555, "grad_norm": 0.6590412334963223, "learning_rate": 5.772359104092607e-07, "loss": 1.4824, "step": 4177 }, { "epoch": 0.29111939518517227, "grad_norm": 0.7445913105232103, "learning_rate": 5.771812786742773e-07, "loss": 1.5666, "step": 4178 }, { "epoch": 0.2911890743127896, "grad_norm": 0.7165402829789416, "learning_rate": 5.771266377295725e-07, "loss": 1.5071, "step": 4179 }, { "epoch": 0.2912587534404069, "grad_norm": 0.6794848939177912, "learning_rate": 5.770719875777647e-07, "loss": 1.4203, "step": 4180 }, { "epoch": 0.29132843256802426, "grad_norm": 0.7532430788398345, "learning_rate": 5.77017328221473e-07, "loss": 1.6119, "step": 4181 }, { "epoch": 0.29139811169564156, "grad_norm": 0.7048663985537814, "learning_rate": 5.769626596633167e-07, "loss": 1.6076, "step": 4182 }, { "epoch": 0.2914677908232589, "grad_norm": 0.7260710522617362, "learning_rate": 5.769079819059156e-07, "loss": 1.5931, "step": 4183 }, { "epoch": 0.2915374699508762, "grad_norm": 0.7085546777892404, "learning_rate": 5.768532949518901e-07, "loss": 1.5788, "step": 4184 }, { "epoch": 0.29160714907849355, "grad_norm": 0.7351359290443842, "learning_rate": 5.767985988038609e-07, "loss": 1.4791, "step": 4185 }, { "epoch": 0.29167682820611085, "grad_norm": 0.7345803290205484, "learning_rate": 5.767438934644489e-07, "loss": 1.498, "step": 4186 }, { "epoch": 0.2917465073337282, "grad_norm": 0.7275728966569317, "learning_rate": 5.766891789362761e-07, "loss": 1.5793, "step": 4187 }, { "epoch": 0.2918161864613455, "grad_norm": 0.6385572204281225, "learning_rate": 5.766344552219643e-07, "loss": 1.5234, "step": 4188 }, { "epoch": 0.29188586558896285, "grad_norm": 0.6878039246866837, "learning_rate": 5.765797223241358e-07, "loss": 1.529, "step": 4189 }, { "epoch": 0.29195554471658014, "grad_norm": 0.7407162221510748, "learning_rate": 5.765249802454138e-07, "loss": 1.4297, "step": 4190 }, { "epoch": 0.2920252238441975, "grad_norm": 0.6954475217623283, "learning_rate": 5.764702289884216e-07, "loss": 1.5887, "step": 4191 }, { "epoch": 0.2920949029718148, "grad_norm": 0.7438942748646914, "learning_rate": 5.764154685557829e-07, "loss": 1.5754, "step": 4192 }, { "epoch": 0.29216458209943214, "grad_norm": 0.7878280970340501, "learning_rate": 5.763606989501221e-07, "loss": 1.5281, "step": 4193 }, { "epoch": 0.29223426122704943, "grad_norm": 0.6856592601877685, "learning_rate": 5.763059201740636e-07, "loss": 1.5176, "step": 4194 }, { "epoch": 0.2923039403546668, "grad_norm": 0.8063588131594583, "learning_rate": 5.762511322302326e-07, "loss": 1.6245, "step": 4195 }, { "epoch": 0.2923736194822841, "grad_norm": 0.7078649971602377, "learning_rate": 5.761963351212548e-07, "loss": 1.5293, "step": 4196 }, { "epoch": 0.2924432986099014, "grad_norm": 0.6867274360359023, "learning_rate": 5.761415288497562e-07, "loss": 1.4691, "step": 4197 }, { "epoch": 0.2925129777375187, "grad_norm": 0.6938725129579134, "learning_rate": 5.76086713418363e-07, "loss": 1.5709, "step": 4198 }, { "epoch": 0.29258265686513607, "grad_norm": 0.6964301335240399, "learning_rate": 5.760318888297023e-07, "loss": 1.5175, "step": 4199 }, { "epoch": 0.29265233599275337, "grad_norm": 0.7176137893004337, "learning_rate": 5.759770550864012e-07, "loss": 1.6088, "step": 4200 }, { "epoch": 0.2927220151203707, "grad_norm": 0.6600968939310983, "learning_rate": 5.759222121910876e-07, "loss": 1.5252, "step": 4201 }, { "epoch": 0.292791694247988, "grad_norm": 0.7713602795893594, "learning_rate": 5.758673601463897e-07, "loss": 1.7007, "step": 4202 }, { "epoch": 0.29286137337560536, "grad_norm": 0.8521496590271287, "learning_rate": 5.758124989549359e-07, "loss": 1.3938, "step": 4203 }, { "epoch": 0.29293105250322266, "grad_norm": 0.7064840849897108, "learning_rate": 5.757576286193557e-07, "loss": 1.6102, "step": 4204 }, { "epoch": 0.29300073163084, "grad_norm": 0.7073473475058616, "learning_rate": 5.75702749142278e-07, "loss": 1.4099, "step": 4205 }, { "epoch": 0.2930704107584573, "grad_norm": 0.7497971375461359, "learning_rate": 5.756478605263332e-07, "loss": 1.59, "step": 4206 }, { "epoch": 0.29314008988607465, "grad_norm": 0.7483325900051251, "learning_rate": 5.755929627741515e-07, "loss": 1.5683, "step": 4207 }, { "epoch": 0.29320976901369195, "grad_norm": 0.677131842423103, "learning_rate": 5.755380558883638e-07, "loss": 1.5654, "step": 4208 }, { "epoch": 0.2932794481413093, "grad_norm": 0.7090599829679397, "learning_rate": 5.754831398716012e-07, "loss": 1.5853, "step": 4209 }, { "epoch": 0.2933491272689266, "grad_norm": 0.6531789620922398, "learning_rate": 5.754282147264955e-07, "loss": 1.5282, "step": 4210 }, { "epoch": 0.29341880639654394, "grad_norm": 0.7290449241279219, "learning_rate": 5.753732804556789e-07, "loss": 1.5285, "step": 4211 }, { "epoch": 0.29348848552416124, "grad_norm": 0.696260443545441, "learning_rate": 5.753183370617839e-07, "loss": 1.5345, "step": 4212 }, { "epoch": 0.2935581646517786, "grad_norm": 0.7252444210476912, "learning_rate": 5.752633845474433e-07, "loss": 1.5398, "step": 4213 }, { "epoch": 0.2936278437793959, "grad_norm": 0.7312914360002516, "learning_rate": 5.752084229152909e-07, "loss": 1.5642, "step": 4214 }, { "epoch": 0.29369752290701323, "grad_norm": 0.724005882627236, "learning_rate": 5.751534521679603e-07, "loss": 1.4495, "step": 4215 }, { "epoch": 0.2937672020346305, "grad_norm": 0.7720337660286007, "learning_rate": 5.750984723080859e-07, "loss": 1.5852, "step": 4216 }, { "epoch": 0.2938368811622479, "grad_norm": 0.6758714480813982, "learning_rate": 5.750434833383024e-07, "loss": 1.4688, "step": 4217 }, { "epoch": 0.29390656028986517, "grad_norm": 0.6951183763692467, "learning_rate": 5.74988485261245e-07, "loss": 1.5259, "step": 4218 }, { "epoch": 0.29397623941748247, "grad_norm": 0.6941762057762565, "learning_rate": 5.749334780795495e-07, "loss": 1.4688, "step": 4219 }, { "epoch": 0.2940459185450998, "grad_norm": 0.7445950618080178, "learning_rate": 5.748784617958516e-07, "loss": 1.5763, "step": 4220 }, { "epoch": 0.2941155976727171, "grad_norm": 0.7090111805366819, "learning_rate": 5.748234364127881e-07, "loss": 1.6148, "step": 4221 }, { "epoch": 0.29418527680033446, "grad_norm": 0.6931440631837924, "learning_rate": 5.747684019329958e-07, "loss": 1.4726, "step": 4222 }, { "epoch": 0.29425495592795176, "grad_norm": 0.6616793591516503, "learning_rate": 5.747133583591122e-07, "loss": 1.495, "step": 4223 }, { "epoch": 0.2943246350555691, "grad_norm": 0.7047857600329261, "learning_rate": 5.746583056937749e-07, "loss": 1.4379, "step": 4224 }, { "epoch": 0.2943943141831864, "grad_norm": 0.7691171573820788, "learning_rate": 5.746032439396223e-07, "loss": 1.7682, "step": 4225 }, { "epoch": 0.29446399331080375, "grad_norm": 0.6925654078498741, "learning_rate": 5.745481730992929e-07, "loss": 1.5043, "step": 4226 }, { "epoch": 0.29453367243842105, "grad_norm": 0.7204041277305288, "learning_rate": 5.74493093175426e-07, "loss": 1.62, "step": 4227 }, { "epoch": 0.2946033515660384, "grad_norm": 0.784007626003321, "learning_rate": 5.74438004170661e-07, "loss": 1.5427, "step": 4228 }, { "epoch": 0.2946730306936557, "grad_norm": 0.7054496982926721, "learning_rate": 5.743829060876379e-07, "loss": 1.6203, "step": 4229 }, { "epoch": 0.29474270982127304, "grad_norm": 0.7036820710436857, "learning_rate": 5.743277989289972e-07, "loss": 1.4374, "step": 4230 }, { "epoch": 0.29481238894889034, "grad_norm": 0.7043837784856916, "learning_rate": 5.742726826973797e-07, "loss": 1.5413, "step": 4231 }, { "epoch": 0.2948820680765077, "grad_norm": 0.7269167952414226, "learning_rate": 5.742175573954266e-07, "loss": 1.4719, "step": 4232 }, { "epoch": 0.294951747204125, "grad_norm": 0.706476929301725, "learning_rate": 5.741624230257798e-07, "loss": 1.533, "step": 4233 }, { "epoch": 0.29502142633174233, "grad_norm": 0.7356825908807404, "learning_rate": 5.741072795910813e-07, "loss": 1.437, "step": 4234 }, { "epoch": 0.2950911054593596, "grad_norm": 0.7040136084896028, "learning_rate": 5.740521270939737e-07, "loss": 1.4609, "step": 4235 }, { "epoch": 0.295160784586977, "grad_norm": 0.7340794981592375, "learning_rate": 5.739969655371e-07, "loss": 1.5591, "step": 4236 }, { "epoch": 0.29523046371459427, "grad_norm": 0.6603536126599652, "learning_rate": 5.739417949231038e-07, "loss": 1.4947, "step": 4237 }, { "epoch": 0.2953001428422116, "grad_norm": 0.7751036977717093, "learning_rate": 5.73886615254629e-07, "loss": 1.5717, "step": 4238 }, { "epoch": 0.2953698219698289, "grad_norm": 0.791476669810281, "learning_rate": 5.738314265343196e-07, "loss": 1.5748, "step": 4239 }, { "epoch": 0.29543950109744627, "grad_norm": 0.7362906055325384, "learning_rate": 5.737762287648207e-07, "loss": 1.605, "step": 4240 }, { "epoch": 0.29550918022506356, "grad_norm": 0.723257144222618, "learning_rate": 5.737210219487774e-07, "loss": 1.4971, "step": 4241 }, { "epoch": 0.2955788593526809, "grad_norm": 0.6947517250277025, "learning_rate": 5.736658060888352e-07, "loss": 1.5147, "step": 4242 }, { "epoch": 0.2956485384802982, "grad_norm": 0.7270369921297178, "learning_rate": 5.736105811876403e-07, "loss": 1.492, "step": 4243 }, { "epoch": 0.29571821760791556, "grad_norm": 0.708560313274988, "learning_rate": 5.735553472478391e-07, "loss": 1.3891, "step": 4244 }, { "epoch": 0.29578789673553285, "grad_norm": 0.7285901567444775, "learning_rate": 5.735001042720786e-07, "loss": 1.6297, "step": 4245 }, { "epoch": 0.2958575758631502, "grad_norm": 0.7960332124526845, "learning_rate": 5.734448522630062e-07, "loss": 1.5527, "step": 4246 }, { "epoch": 0.2959272549907675, "grad_norm": 0.7020497113259067, "learning_rate": 5.733895912232694e-07, "loss": 1.6506, "step": 4247 }, { "epoch": 0.29599693411838485, "grad_norm": 0.715234323713793, "learning_rate": 5.733343211555169e-07, "loss": 1.5195, "step": 4248 }, { "epoch": 0.29606661324600214, "grad_norm": 0.7032592778948775, "learning_rate": 5.732790420623969e-07, "loss": 1.5182, "step": 4249 }, { "epoch": 0.2961362923736195, "grad_norm": 0.6762321778401789, "learning_rate": 5.732237539465586e-07, "loss": 1.5153, "step": 4250 }, { "epoch": 0.2962059715012368, "grad_norm": 0.6585325758077796, "learning_rate": 5.731684568106518e-07, "loss": 1.4654, "step": 4251 }, { "epoch": 0.29627565062885414, "grad_norm": 0.7659023292111404, "learning_rate": 5.731131506573262e-07, "loss": 1.5343, "step": 4252 }, { "epoch": 0.29634532975647143, "grad_norm": 0.7176805803072737, "learning_rate": 5.730578354892322e-07, "loss": 1.5905, "step": 4253 }, { "epoch": 0.2964150088840888, "grad_norm": 0.698554222722932, "learning_rate": 5.730025113090206e-07, "loss": 1.486, "step": 4254 }, { "epoch": 0.2964846880117061, "grad_norm": 0.7916029679362232, "learning_rate": 5.729471781193427e-07, "loss": 1.5445, "step": 4255 }, { "epoch": 0.2965543671393234, "grad_norm": 0.6874181377702069, "learning_rate": 5.728918359228502e-07, "loss": 1.4352, "step": 4256 }, { "epoch": 0.2966240462669407, "grad_norm": 0.7579198602498406, "learning_rate": 5.728364847221953e-07, "loss": 1.4514, "step": 4257 }, { "epoch": 0.29669372539455807, "grad_norm": 0.7097472989631317, "learning_rate": 5.727811245200302e-07, "loss": 1.5043, "step": 4258 }, { "epoch": 0.29676340452217537, "grad_norm": 0.6564182632603695, "learning_rate": 5.727257553190083e-07, "loss": 1.4526, "step": 4259 }, { "epoch": 0.2968330836497927, "grad_norm": 0.7058523448481508, "learning_rate": 5.726703771217827e-07, "loss": 1.6799, "step": 4260 }, { "epoch": 0.29690276277741, "grad_norm": 0.7245857438315936, "learning_rate": 5.726149899310075e-07, "loss": 1.5592, "step": 4261 }, { "epoch": 0.29697244190502736, "grad_norm": 0.7315326028212079, "learning_rate": 5.725595937493366e-07, "loss": 1.5439, "step": 4262 }, { "epoch": 0.29704212103264466, "grad_norm": 0.7195140259415446, "learning_rate": 5.72504188579425e-07, "loss": 1.4122, "step": 4263 }, { "epoch": 0.297111800160262, "grad_norm": 0.7164306723650375, "learning_rate": 5.724487744239278e-07, "loss": 1.5216, "step": 4264 }, { "epoch": 0.2971814792878793, "grad_norm": 0.7744066165079407, "learning_rate": 5.723933512855005e-07, "loss": 1.5427, "step": 4265 }, { "epoch": 0.29725115841549665, "grad_norm": 0.6820385126931663, "learning_rate": 5.72337919166799e-07, "loss": 1.5005, "step": 4266 }, { "epoch": 0.29732083754311395, "grad_norm": 0.7602095640104902, "learning_rate": 5.7228247807048e-07, "loss": 1.5856, "step": 4267 }, { "epoch": 0.2973905166707313, "grad_norm": 0.7376863216947709, "learning_rate": 5.722270279992e-07, "loss": 1.5361, "step": 4268 }, { "epoch": 0.2974601957983486, "grad_norm": 0.6976848335979555, "learning_rate": 5.721715689556165e-07, "loss": 1.6066, "step": 4269 }, { "epoch": 0.29752987492596594, "grad_norm": 0.7387189895719177, "learning_rate": 5.721161009423872e-07, "loss": 1.5509, "step": 4270 }, { "epoch": 0.29759955405358324, "grad_norm": 0.7470502890559497, "learning_rate": 5.720606239621701e-07, "loss": 1.5248, "step": 4271 }, { "epoch": 0.2976692331812006, "grad_norm": 0.7507968058964529, "learning_rate": 5.72005138017624e-07, "loss": 1.5589, "step": 4272 }, { "epoch": 0.2977389123088179, "grad_norm": 0.7697138737872022, "learning_rate": 5.719496431114077e-07, "loss": 1.4206, "step": 4273 }, { "epoch": 0.29780859143643523, "grad_norm": 0.8122208452859123, "learning_rate": 5.718941392461806e-07, "loss": 1.7525, "step": 4274 }, { "epoch": 0.2978782705640525, "grad_norm": 1.025275901310073, "learning_rate": 5.718386264246029e-07, "loss": 1.6115, "step": 4275 }, { "epoch": 0.2979479496916699, "grad_norm": 0.7000608371774311, "learning_rate": 5.717831046493345e-07, "loss": 1.4787, "step": 4276 }, { "epoch": 0.2980176288192872, "grad_norm": 0.7136322606534233, "learning_rate": 5.717275739230363e-07, "loss": 1.5461, "step": 4277 }, { "epoch": 0.2980873079469045, "grad_norm": 0.747023635229759, "learning_rate": 5.716720342483693e-07, "loss": 1.6262, "step": 4278 }, { "epoch": 0.2981569870745218, "grad_norm": 0.6962780662742107, "learning_rate": 5.716164856279952e-07, "loss": 1.5011, "step": 4279 }, { "epoch": 0.29822666620213917, "grad_norm": 0.7586413862543497, "learning_rate": 5.715609280645762e-07, "loss": 1.5255, "step": 4280 }, { "epoch": 0.29829634532975646, "grad_norm": 0.7048885106327465, "learning_rate": 5.715053615607744e-07, "loss": 1.5426, "step": 4281 }, { "epoch": 0.2983660244573738, "grad_norm": 0.6822702776681451, "learning_rate": 5.714497861192527e-07, "loss": 1.564, "step": 4282 }, { "epoch": 0.2984357035849911, "grad_norm": 0.738493339810172, "learning_rate": 5.713942017426747e-07, "loss": 1.4922, "step": 4283 }, { "epoch": 0.29850538271260846, "grad_norm": 0.7083738712536051, "learning_rate": 5.713386084337038e-07, "loss": 1.3278, "step": 4284 }, { "epoch": 0.29857506184022575, "grad_norm": 0.819880393354702, "learning_rate": 5.712830061950042e-07, "loss": 1.6089, "step": 4285 }, { "epoch": 0.2986447409678431, "grad_norm": 0.7375639649661877, "learning_rate": 5.712273950292404e-07, "loss": 1.5276, "step": 4286 }, { "epoch": 0.2987144200954604, "grad_norm": 0.7537522147243475, "learning_rate": 5.711717749390776e-07, "loss": 1.4919, "step": 4287 }, { "epoch": 0.29878409922307775, "grad_norm": 0.6781966488725063, "learning_rate": 5.711161459271812e-07, "loss": 1.5011, "step": 4288 }, { "epoch": 0.29885377835069504, "grad_norm": 0.8326959141972974, "learning_rate": 5.710605079962171e-07, "loss": 1.5864, "step": 4289 }, { "epoch": 0.2989234574783124, "grad_norm": 0.7750678881838795, "learning_rate": 5.710048611488512e-07, "loss": 1.6775, "step": 4290 }, { "epoch": 0.2989931366059297, "grad_norm": 0.7339000247889513, "learning_rate": 5.709492053877506e-07, "loss": 1.627, "step": 4291 }, { "epoch": 0.29906281573354704, "grad_norm": 0.7341569990588388, "learning_rate": 5.708935407155824e-07, "loss": 1.5164, "step": 4292 }, { "epoch": 0.29913249486116433, "grad_norm": 0.8037081802813852, "learning_rate": 5.708378671350141e-07, "loss": 1.4966, "step": 4293 }, { "epoch": 0.2992021739887817, "grad_norm": 0.6627371432304198, "learning_rate": 5.707821846487136e-07, "loss": 1.3968, "step": 4294 }, { "epoch": 0.299271853116399, "grad_norm": 0.767241188025273, "learning_rate": 5.707264932593494e-07, "loss": 1.6399, "step": 4295 }, { "epoch": 0.29934153224401633, "grad_norm": 0.6914039403317371, "learning_rate": 5.706707929695905e-07, "loss": 1.5115, "step": 4296 }, { "epoch": 0.2994112113716336, "grad_norm": 0.6980323825789414, "learning_rate": 5.706150837821059e-07, "loss": 1.4639, "step": 4297 }, { "epoch": 0.299480890499251, "grad_norm": 0.7171078956726246, "learning_rate": 5.705593656995654e-07, "loss": 1.611, "step": 4298 }, { "epoch": 0.29955056962686827, "grad_norm": 0.7332984331849871, "learning_rate": 5.705036387246393e-07, "loss": 1.6235, "step": 4299 }, { "epoch": 0.2996202487544856, "grad_norm": 0.7008562486112845, "learning_rate": 5.704479028599979e-07, "loss": 1.486, "step": 4300 }, { "epoch": 0.2996899278821029, "grad_norm": 0.764475428172602, "learning_rate": 5.703921581083123e-07, "loss": 1.5944, "step": 4301 }, { "epoch": 0.29975960700972026, "grad_norm": 0.9630472761992942, "learning_rate": 5.703364044722539e-07, "loss": 1.6101, "step": 4302 }, { "epoch": 0.29982928613733756, "grad_norm": 0.7357985164170848, "learning_rate": 5.702806419544945e-07, "loss": 1.5459, "step": 4303 }, { "epoch": 0.2998989652649549, "grad_norm": 0.6860971242574258, "learning_rate": 5.702248705577064e-07, "loss": 1.3803, "step": 4304 }, { "epoch": 0.2999686443925722, "grad_norm": 0.7307420289792195, "learning_rate": 5.701690902845622e-07, "loss": 1.5831, "step": 4305 }, { "epoch": 0.30003832352018955, "grad_norm": 0.7160864235298279, "learning_rate": 5.701133011377349e-07, "loss": 1.4446, "step": 4306 }, { "epoch": 0.30010800264780685, "grad_norm": 0.7127607913585317, "learning_rate": 5.700575031198983e-07, "loss": 1.6148, "step": 4307 }, { "epoch": 0.3001776817754242, "grad_norm": 0.7371661893242732, "learning_rate": 5.700016962337264e-07, "loss": 1.584, "step": 4308 }, { "epoch": 0.3002473609030415, "grad_norm": 0.6862056831220482, "learning_rate": 5.699458804818933e-07, "loss": 1.5426, "step": 4309 }, { "epoch": 0.30031704003065884, "grad_norm": 0.9409326647764209, "learning_rate": 5.698900558670737e-07, "loss": 1.5583, "step": 4310 }, { "epoch": 0.30038671915827614, "grad_norm": 0.707354464843671, "learning_rate": 5.698342223919433e-07, "loss": 1.3867, "step": 4311 }, { "epoch": 0.30045639828589343, "grad_norm": 0.6662497690540862, "learning_rate": 5.697783800591775e-07, "loss": 1.5286, "step": 4312 }, { "epoch": 0.3005260774135108, "grad_norm": 0.6995012600576833, "learning_rate": 5.697225288714523e-07, "loss": 1.4394, "step": 4313 }, { "epoch": 0.3005957565411281, "grad_norm": 0.6587298537731207, "learning_rate": 5.696666688314442e-07, "loss": 1.5089, "step": 4314 }, { "epoch": 0.30066543566874543, "grad_norm": 0.6730697833442947, "learning_rate": 5.696107999418305e-07, "loss": 1.5451, "step": 4315 }, { "epoch": 0.3007351147963627, "grad_norm": 0.7094606803913679, "learning_rate": 5.69554922205288e-07, "loss": 1.4908, "step": 4316 }, { "epoch": 0.3008047939239801, "grad_norm": 0.6509023335460346, "learning_rate": 5.69499035624495e-07, "loss": 1.5202, "step": 4317 }, { "epoch": 0.30087447305159737, "grad_norm": 0.7491001156354492, "learning_rate": 5.694431402021292e-07, "loss": 1.5196, "step": 4318 }, { "epoch": 0.3009441521792147, "grad_norm": 0.6870728473339202, "learning_rate": 5.693872359408696e-07, "loss": 1.4587, "step": 4319 }, { "epoch": 0.301013831306832, "grad_norm": 0.6913259421135415, "learning_rate": 5.69331322843395e-07, "loss": 1.6039, "step": 4320 }, { "epoch": 0.30108351043444936, "grad_norm": 0.7570791386505102, "learning_rate": 5.69275400912385e-07, "loss": 1.6491, "step": 4321 }, { "epoch": 0.30115318956206666, "grad_norm": 0.7419386183508703, "learning_rate": 5.692194701505195e-07, "loss": 1.5979, "step": 4322 }, { "epoch": 0.301222868689684, "grad_norm": 0.7168472397929991, "learning_rate": 5.691635305604789e-07, "loss": 1.6062, "step": 4323 }, { "epoch": 0.3012925478173013, "grad_norm": 0.7273318013404496, "learning_rate": 5.691075821449437e-07, "loss": 1.5357, "step": 4324 }, { "epoch": 0.30136222694491865, "grad_norm": 0.703456514068365, "learning_rate": 5.690516249065953e-07, "loss": 1.7195, "step": 4325 }, { "epoch": 0.30143190607253595, "grad_norm": 0.7146216193702607, "learning_rate": 5.689956588481151e-07, "loss": 1.5897, "step": 4326 }, { "epoch": 0.3015015852001533, "grad_norm": 0.6979947721924662, "learning_rate": 5.689396839721853e-07, "loss": 1.5119, "step": 4327 }, { "epoch": 0.3015712643277706, "grad_norm": 0.7286369517144264, "learning_rate": 5.688837002814881e-07, "loss": 1.5625, "step": 4328 }, { "epoch": 0.30164094345538794, "grad_norm": 0.7002598809033803, "learning_rate": 5.688277077787065e-07, "loss": 1.4626, "step": 4329 }, { "epoch": 0.30171062258300524, "grad_norm": 0.7152829336567289, "learning_rate": 5.687717064665239e-07, "loss": 1.6106, "step": 4330 }, { "epoch": 0.3017803017106226, "grad_norm": 0.7341268521670578, "learning_rate": 5.687156963476236e-07, "loss": 1.5397, "step": 4331 }, { "epoch": 0.3018499808382399, "grad_norm": 0.6993141305033542, "learning_rate": 5.686596774246903e-07, "loss": 1.4845, "step": 4332 }, { "epoch": 0.30191965996585723, "grad_norm": 0.7425939500950345, "learning_rate": 5.686036497004079e-07, "loss": 1.5275, "step": 4333 }, { "epoch": 0.30198933909347453, "grad_norm": 0.8728770492993477, "learning_rate": 5.685476131774617e-07, "loss": 1.617, "step": 4334 }, { "epoch": 0.3020590182210919, "grad_norm": 0.7165920132387991, "learning_rate": 5.684915678585372e-07, "loss": 1.4761, "step": 4335 }, { "epoch": 0.3021286973487092, "grad_norm": 0.7206231125446654, "learning_rate": 5.684355137463201e-07, "loss": 1.6038, "step": 4336 }, { "epoch": 0.3021983764763265, "grad_norm": 0.7461449598093063, "learning_rate": 5.683794508434965e-07, "loss": 1.4806, "step": 4337 }, { "epoch": 0.3022680556039438, "grad_norm": 0.7326703682099187, "learning_rate": 5.683233791527532e-07, "loss": 1.5679, "step": 4338 }, { "epoch": 0.30233773473156117, "grad_norm": 0.7098570289778796, "learning_rate": 5.682672986767771e-07, "loss": 1.6465, "step": 4339 }, { "epoch": 0.30240741385917846, "grad_norm": 0.7338137182161039, "learning_rate": 5.682112094182559e-07, "loss": 1.5681, "step": 4340 }, { "epoch": 0.3024770929867958, "grad_norm": 0.7598766115415722, "learning_rate": 5.681551113798774e-07, "loss": 1.7009, "step": 4341 }, { "epoch": 0.3025467721144131, "grad_norm": 0.6912780506537121, "learning_rate": 5.680990045643299e-07, "loss": 1.4817, "step": 4342 }, { "epoch": 0.30261645124203046, "grad_norm": 0.7008636273971501, "learning_rate": 5.680428889743023e-07, "loss": 1.5617, "step": 4343 }, { "epoch": 0.30268613036964775, "grad_norm": 0.7017393439038174, "learning_rate": 5.679867646124837e-07, "loss": 1.5042, "step": 4344 }, { "epoch": 0.3027558094972651, "grad_norm": 0.7272041861547763, "learning_rate": 5.679306314815636e-07, "loss": 1.4487, "step": 4345 }, { "epoch": 0.3028254886248824, "grad_norm": 0.7117474417769055, "learning_rate": 5.678744895842321e-07, "loss": 1.4574, "step": 4346 }, { "epoch": 0.30289516775249975, "grad_norm": 0.7026478170110361, "learning_rate": 5.678183389231796e-07, "loss": 1.4453, "step": 4347 }, { "epoch": 0.30296484688011704, "grad_norm": 0.713541128071128, "learning_rate": 5.67762179501097e-07, "loss": 1.5474, "step": 4348 }, { "epoch": 0.3030345260077344, "grad_norm": 0.7447825123847247, "learning_rate": 5.677060113206756e-07, "loss": 1.5962, "step": 4349 }, { "epoch": 0.3031042051353517, "grad_norm": 0.6790629074737846, "learning_rate": 5.67649834384607e-07, "loss": 1.629, "step": 4350 }, { "epoch": 0.30317388426296904, "grad_norm": 0.7346346969447306, "learning_rate": 5.675936486955834e-07, "loss": 1.6648, "step": 4351 }, { "epoch": 0.30324356339058633, "grad_norm": 0.7110715667981556, "learning_rate": 5.675374542562973e-07, "loss": 1.5896, "step": 4352 }, { "epoch": 0.3033132425182037, "grad_norm": 0.7142436991764997, "learning_rate": 5.674812510694416e-07, "loss": 1.5098, "step": 4353 }, { "epoch": 0.303382921645821, "grad_norm": 0.7105548896306177, "learning_rate": 5.674250391377097e-07, "loss": 1.5081, "step": 4354 }, { "epoch": 0.30345260077343833, "grad_norm": 0.7322611956071579, "learning_rate": 5.673688184637956e-07, "loss": 1.5588, "step": 4355 }, { "epoch": 0.3035222799010556, "grad_norm": 0.6954733835570996, "learning_rate": 5.673125890503932e-07, "loss": 1.4469, "step": 4356 }, { "epoch": 0.303591959028673, "grad_norm": 0.7341910322709105, "learning_rate": 5.672563509001972e-07, "loss": 1.4872, "step": 4357 }, { "epoch": 0.30366163815629027, "grad_norm": 0.688288685718414, "learning_rate": 5.672001040159026e-07, "loss": 1.4781, "step": 4358 }, { "epoch": 0.3037313172839076, "grad_norm": 0.7411868407592087, "learning_rate": 5.67143848400205e-07, "loss": 1.6026, "step": 4359 }, { "epoch": 0.3038009964115249, "grad_norm": 0.6503515017270078, "learning_rate": 5.670875840558005e-07, "loss": 1.4431, "step": 4360 }, { "epoch": 0.30387067553914227, "grad_norm": 0.7775967475885618, "learning_rate": 5.67031310985385e-07, "loss": 1.5814, "step": 4361 }, { "epoch": 0.30394035466675956, "grad_norm": 0.6685209900171873, "learning_rate": 5.669750291916554e-07, "loss": 1.5307, "step": 4362 }, { "epoch": 0.3040100337943769, "grad_norm": 0.7203373515957084, "learning_rate": 5.669187386773087e-07, "loss": 1.5483, "step": 4363 }, { "epoch": 0.3040797129219942, "grad_norm": 0.7274571102331661, "learning_rate": 5.668624394450428e-07, "loss": 1.4758, "step": 4364 }, { "epoch": 0.30414939204961156, "grad_norm": 0.7206220254266728, "learning_rate": 5.668061314975553e-07, "loss": 1.4899, "step": 4365 }, { "epoch": 0.30421907117722885, "grad_norm": 0.710236911651575, "learning_rate": 5.667498148375447e-07, "loss": 1.642, "step": 4366 }, { "epoch": 0.3042887503048462, "grad_norm": 0.7139976815719326, "learning_rate": 5.666934894677099e-07, "loss": 1.5409, "step": 4367 }, { "epoch": 0.3043584294324635, "grad_norm": 0.6954576104030126, "learning_rate": 5.666371553907501e-07, "loss": 1.434, "step": 4368 }, { "epoch": 0.30442810856008085, "grad_norm": 0.7030492892506666, "learning_rate": 5.665808126093649e-07, "loss": 1.3851, "step": 4369 }, { "epoch": 0.30449778768769814, "grad_norm": 0.6610603452433989, "learning_rate": 5.665244611262543e-07, "loss": 1.4934, "step": 4370 }, { "epoch": 0.3045674668153155, "grad_norm": 0.7353844159537593, "learning_rate": 5.66468100944119e-07, "loss": 1.5959, "step": 4371 }, { "epoch": 0.3046371459429328, "grad_norm": 0.7180170447023938, "learning_rate": 5.664117320656596e-07, "loss": 1.5525, "step": 4372 }, { "epoch": 0.30470682507055014, "grad_norm": 0.7076752320951577, "learning_rate": 5.663553544935777e-07, "loss": 1.4768, "step": 4373 }, { "epoch": 0.30477650419816743, "grad_norm": 0.7305586044258503, "learning_rate": 5.662989682305748e-07, "loss": 1.6246, "step": 4374 }, { "epoch": 0.3048461833257848, "grad_norm": 0.7558819635443185, "learning_rate": 5.662425732793532e-07, "loss": 1.5732, "step": 4375 }, { "epoch": 0.3049158624534021, "grad_norm": 0.7153477087807026, "learning_rate": 5.661861696426154e-07, "loss": 1.4463, "step": 4376 }, { "epoch": 0.3049855415810194, "grad_norm": 0.7263139382865508, "learning_rate": 5.661297573230644e-07, "loss": 1.5023, "step": 4377 }, { "epoch": 0.3050552207086367, "grad_norm": 0.7001941576629662, "learning_rate": 5.660733363234035e-07, "loss": 1.4831, "step": 4378 }, { "epoch": 0.30512489983625407, "grad_norm": 0.7056967216013887, "learning_rate": 5.660169066463367e-07, "loss": 1.4512, "step": 4379 }, { "epoch": 0.30519457896387137, "grad_norm": 0.722321962015521, "learning_rate": 5.659604682945681e-07, "loss": 1.6328, "step": 4380 }, { "epoch": 0.3052642580914887, "grad_norm": 0.7459925387500808, "learning_rate": 5.659040212708024e-07, "loss": 1.5597, "step": 4381 }, { "epoch": 0.305333937219106, "grad_norm": 0.6985403031047768, "learning_rate": 5.658475655777445e-07, "loss": 1.5844, "step": 4382 }, { "epoch": 0.30540361634672336, "grad_norm": 0.6924190337217615, "learning_rate": 5.657911012180999e-07, "loss": 1.4897, "step": 4383 }, { "epoch": 0.30547329547434066, "grad_norm": 0.6857257208268763, "learning_rate": 5.657346281945748e-07, "loss": 1.5364, "step": 4384 }, { "epoch": 0.305542974601958, "grad_norm": 0.6960778169774027, "learning_rate": 5.65678146509875e-07, "loss": 1.6121, "step": 4385 }, { "epoch": 0.3056126537295753, "grad_norm": 0.7042377335398031, "learning_rate": 5.656216561667078e-07, "loss": 1.6275, "step": 4386 }, { "epoch": 0.30568233285719265, "grad_norm": 0.6764060757496547, "learning_rate": 5.655651571677797e-07, "loss": 1.5309, "step": 4387 }, { "epoch": 0.30575201198480995, "grad_norm": 0.7420151820599352, "learning_rate": 5.655086495157989e-07, "loss": 1.515, "step": 4388 }, { "epoch": 0.3058216911124273, "grad_norm": 0.7454176035431609, "learning_rate": 5.654521332134729e-07, "loss": 1.5449, "step": 4389 }, { "epoch": 0.3058913702400446, "grad_norm": 0.7166026979384447, "learning_rate": 5.653956082635102e-07, "loss": 1.4952, "step": 4390 }, { "epoch": 0.30596104936766194, "grad_norm": 0.696252526086783, "learning_rate": 5.653390746686195e-07, "loss": 1.4843, "step": 4391 }, { "epoch": 0.30603072849527924, "grad_norm": 0.7198107998452128, "learning_rate": 5.652825324315103e-07, "loss": 1.5638, "step": 4392 }, { "epoch": 0.3061004076228966, "grad_norm": 0.7111816471169524, "learning_rate": 5.652259815548919e-07, "loss": 1.6544, "step": 4393 }, { "epoch": 0.3061700867505139, "grad_norm": 0.7687614335199426, "learning_rate": 5.651694220414745e-07, "loss": 1.5855, "step": 4394 }, { "epoch": 0.30623976587813123, "grad_norm": 0.7454634904231932, "learning_rate": 5.651128538939687e-07, "loss": 1.6859, "step": 4395 }, { "epoch": 0.3063094450057485, "grad_norm": 0.6789328892800132, "learning_rate": 5.65056277115085e-07, "loss": 1.4511, "step": 4396 }, { "epoch": 0.3063791241333659, "grad_norm": 0.6967181617814054, "learning_rate": 5.649996917075348e-07, "loss": 1.5627, "step": 4397 }, { "epoch": 0.30644880326098317, "grad_norm": 0.7221567523229675, "learning_rate": 5.649430976740299e-07, "loss": 1.4447, "step": 4398 }, { "epoch": 0.3065184823886005, "grad_norm": 0.7419218988374826, "learning_rate": 5.648864950172825e-07, "loss": 1.6038, "step": 4399 }, { "epoch": 0.3065881615162178, "grad_norm": 0.7543528666991685, "learning_rate": 5.648298837400047e-07, "loss": 1.5557, "step": 4400 }, { "epoch": 0.30665784064383517, "grad_norm": 0.672566336166264, "learning_rate": 5.647732638449098e-07, "loss": 1.5224, "step": 4401 }, { "epoch": 0.30672751977145246, "grad_norm": 0.6647018335496818, "learning_rate": 5.64716635334711e-07, "loss": 1.5032, "step": 4402 }, { "epoch": 0.30679719889906976, "grad_norm": 0.7035658712733628, "learning_rate": 5.646599982121222e-07, "loss": 1.499, "step": 4403 }, { "epoch": 0.3068668780266871, "grad_norm": 0.7324652829978336, "learning_rate": 5.646033524798572e-07, "loss": 1.6015, "step": 4404 }, { "epoch": 0.3069365571543044, "grad_norm": 0.6581572710664613, "learning_rate": 5.645466981406311e-07, "loss": 1.4681, "step": 4405 }, { "epoch": 0.30700623628192175, "grad_norm": 0.7379880634218996, "learning_rate": 5.644900351971586e-07, "loss": 1.5125, "step": 4406 }, { "epoch": 0.30707591540953905, "grad_norm": 0.6937990883394584, "learning_rate": 5.644333636521549e-07, "loss": 1.534, "step": 4407 }, { "epoch": 0.3071455945371564, "grad_norm": 0.709535109038696, "learning_rate": 5.643766835083363e-07, "loss": 1.5428, "step": 4408 }, { "epoch": 0.3072152736647737, "grad_norm": 0.7080500790253447, "learning_rate": 5.643199947684187e-07, "loss": 1.6857, "step": 4409 }, { "epoch": 0.30728495279239104, "grad_norm": 0.7306898899751795, "learning_rate": 5.642632974351187e-07, "loss": 1.5052, "step": 4410 }, { "epoch": 0.30735463192000834, "grad_norm": 0.6916557398320974, "learning_rate": 5.642065915111535e-07, "loss": 1.5313, "step": 4411 }, { "epoch": 0.3074243110476257, "grad_norm": 0.7456137117488936, "learning_rate": 5.641498769992406e-07, "loss": 1.6002, "step": 4412 }, { "epoch": 0.307493990175243, "grad_norm": 0.7286947464768576, "learning_rate": 5.640931539020978e-07, "loss": 1.5603, "step": 4413 }, { "epoch": 0.30756366930286033, "grad_norm": 0.7296045551328778, "learning_rate": 5.640364222224435e-07, "loss": 1.472, "step": 4414 }, { "epoch": 0.3076333484304776, "grad_norm": 0.7188974984129131, "learning_rate": 5.63979681962996e-07, "loss": 1.5777, "step": 4415 }, { "epoch": 0.307703027558095, "grad_norm": 0.6987921504744042, "learning_rate": 5.639229331264748e-07, "loss": 1.5582, "step": 4416 }, { "epoch": 0.30777270668571227, "grad_norm": 0.6733705792089942, "learning_rate": 5.638661757155995e-07, "loss": 1.4362, "step": 4417 }, { "epoch": 0.3078423858133296, "grad_norm": 0.7532019974955833, "learning_rate": 5.638094097330898e-07, "loss": 1.5592, "step": 4418 }, { "epoch": 0.3079120649409469, "grad_norm": 0.7362829024275824, "learning_rate": 5.63752635181666e-07, "loss": 1.5044, "step": 4419 }, { "epoch": 0.30798174406856427, "grad_norm": 0.6879131082588306, "learning_rate": 5.63695852064049e-07, "loss": 1.5981, "step": 4420 }, { "epoch": 0.30805142319618156, "grad_norm": 0.7261822695523283, "learning_rate": 5.636390603829599e-07, "loss": 1.7112, "step": 4421 }, { "epoch": 0.3081211023237989, "grad_norm": 0.8396134526299301, "learning_rate": 5.635822601411203e-07, "loss": 1.5046, "step": 4422 }, { "epoch": 0.3081907814514162, "grad_norm": 0.6924912458603729, "learning_rate": 5.635254513412522e-07, "loss": 1.5348, "step": 4423 }, { "epoch": 0.30826046057903356, "grad_norm": 0.7804285234866137, "learning_rate": 5.634686339860779e-07, "loss": 1.5138, "step": 4424 }, { "epoch": 0.30833013970665085, "grad_norm": 0.7762150856399619, "learning_rate": 5.634118080783203e-07, "loss": 1.5775, "step": 4425 }, { "epoch": 0.3083998188342682, "grad_norm": 0.7025050135748799, "learning_rate": 5.633549736207026e-07, "loss": 1.4535, "step": 4426 }, { "epoch": 0.3084694979618855, "grad_norm": 0.7092815664252987, "learning_rate": 5.632981306159483e-07, "loss": 1.5258, "step": 4427 }, { "epoch": 0.30853917708950285, "grad_norm": 0.7130146811467492, "learning_rate": 5.632412790667818e-07, "loss": 1.6053, "step": 4428 }, { "epoch": 0.30860885621712014, "grad_norm": 0.7943626302984487, "learning_rate": 5.631844189759271e-07, "loss": 1.5869, "step": 4429 }, { "epoch": 0.3086785353447375, "grad_norm": 0.7409423035412404, "learning_rate": 5.631275503461091e-07, "loss": 1.4726, "step": 4430 }, { "epoch": 0.3087482144723548, "grad_norm": 0.7429732104868719, "learning_rate": 5.630706731800535e-07, "loss": 1.5541, "step": 4431 }, { "epoch": 0.30881789359997214, "grad_norm": 0.7551340123253788, "learning_rate": 5.630137874804855e-07, "loss": 1.711, "step": 4432 }, { "epoch": 0.30888757272758943, "grad_norm": 0.6698510994317796, "learning_rate": 5.629568932501314e-07, "loss": 1.4954, "step": 4433 }, { "epoch": 0.3089572518552068, "grad_norm": 0.7099811918718965, "learning_rate": 5.628999904917175e-07, "loss": 1.4815, "step": 4434 }, { "epoch": 0.3090269309828241, "grad_norm": 0.7550362727314823, "learning_rate": 5.62843079207971e-07, "loss": 1.5581, "step": 4435 }, { "epoch": 0.3090966101104414, "grad_norm": 0.7185400605901908, "learning_rate": 5.62786159401619e-07, "loss": 1.5663, "step": 4436 }, { "epoch": 0.3091662892380587, "grad_norm": 0.6874944643037497, "learning_rate": 5.627292310753892e-07, "loss": 1.6189, "step": 4437 }, { "epoch": 0.3092359683656761, "grad_norm": 0.6862872862618756, "learning_rate": 5.626722942320098e-07, "loss": 1.526, "step": 4438 }, { "epoch": 0.30930564749329337, "grad_norm": 0.777117696116382, "learning_rate": 5.626153488742094e-07, "loss": 1.609, "step": 4439 }, { "epoch": 0.3093753266209107, "grad_norm": 0.7148010353520651, "learning_rate": 5.625583950047168e-07, "loss": 1.5535, "step": 4440 }, { "epoch": 0.309445005748528, "grad_norm": 0.7045498396728237, "learning_rate": 5.625014326262612e-07, "loss": 1.5452, "step": 4441 }, { "epoch": 0.30951468487614536, "grad_norm": 0.6919278984568086, "learning_rate": 5.624444617415727e-07, "loss": 1.4555, "step": 4442 }, { "epoch": 0.30958436400376266, "grad_norm": 0.6905139175639234, "learning_rate": 5.623874823533813e-07, "loss": 1.5853, "step": 4443 }, { "epoch": 0.30965404313138, "grad_norm": 0.7165338368059605, "learning_rate": 5.623304944644177e-07, "loss": 1.5571, "step": 4444 }, { "epoch": 0.3097237222589973, "grad_norm": 0.7097791827485946, "learning_rate": 5.622734980774126e-07, "loss": 1.4597, "step": 4445 }, { "epoch": 0.30979340138661465, "grad_norm": 0.6880495509954674, "learning_rate": 5.622164931950975e-07, "loss": 1.4994, "step": 4446 }, { "epoch": 0.30986308051423195, "grad_norm": 0.7265350063500531, "learning_rate": 5.621594798202044e-07, "loss": 1.5729, "step": 4447 }, { "epoch": 0.3099327596418493, "grad_norm": 0.7271006631212064, "learning_rate": 5.621024579554652e-07, "loss": 1.6254, "step": 4448 }, { "epoch": 0.3100024387694666, "grad_norm": 0.7172550132449438, "learning_rate": 5.620454276036126e-07, "loss": 1.5689, "step": 4449 }, { "epoch": 0.31007211789708394, "grad_norm": 0.6859933153200322, "learning_rate": 5.619883887673798e-07, "loss": 1.5004, "step": 4450 }, { "epoch": 0.31014179702470124, "grad_norm": 0.747765073619636, "learning_rate": 5.619313414494999e-07, "loss": 1.5306, "step": 4451 }, { "epoch": 0.3102114761523186, "grad_norm": 0.7128484452214556, "learning_rate": 5.618742856527069e-07, "loss": 1.5639, "step": 4452 }, { "epoch": 0.3102811552799359, "grad_norm": 0.7681613031122471, "learning_rate": 5.618172213797351e-07, "loss": 1.6, "step": 4453 }, { "epoch": 0.31035083440755323, "grad_norm": 0.7524879920443474, "learning_rate": 5.617601486333189e-07, "loss": 1.5381, "step": 4454 }, { "epoch": 0.31042051353517053, "grad_norm": 0.7252015588543342, "learning_rate": 5.617030674161936e-07, "loss": 1.6141, "step": 4455 }, { "epoch": 0.3104901926627879, "grad_norm": 0.7135283316475586, "learning_rate": 5.616459777310946e-07, "loss": 1.6824, "step": 4456 }, { "epoch": 0.3105598717904052, "grad_norm": 0.7234761560418117, "learning_rate": 5.615888795807577e-07, "loss": 1.5512, "step": 4457 }, { "epoch": 0.3106295509180225, "grad_norm": 0.6888581693005156, "learning_rate": 5.61531772967919e-07, "loss": 1.4997, "step": 4458 }, { "epoch": 0.3106992300456398, "grad_norm": 0.7150965852736714, "learning_rate": 5.614746578953155e-07, "loss": 1.6749, "step": 4459 }, { "epoch": 0.31076890917325717, "grad_norm": 0.685877969315356, "learning_rate": 5.61417534365684e-07, "loss": 1.5597, "step": 4460 }, { "epoch": 0.31083858830087446, "grad_norm": 0.6710895076279765, "learning_rate": 5.613604023817622e-07, "loss": 1.5663, "step": 4461 }, { "epoch": 0.3109082674284918, "grad_norm": 0.7201758805730143, "learning_rate": 5.613032619462877e-07, "loss": 1.534, "step": 4462 }, { "epoch": 0.3109779465561091, "grad_norm": 0.7009125282431902, "learning_rate": 5.612461130619991e-07, "loss": 1.5826, "step": 4463 }, { "epoch": 0.31104762568372646, "grad_norm": 0.7343800397441643, "learning_rate": 5.611889557316349e-07, "loss": 1.6119, "step": 4464 }, { "epoch": 0.31111730481134375, "grad_norm": 0.6762556387092465, "learning_rate": 5.611317899579342e-07, "loss": 1.5175, "step": 4465 }, { "epoch": 0.3111869839389611, "grad_norm": 0.7320624779852928, "learning_rate": 5.610746157436364e-07, "loss": 1.5275, "step": 4466 }, { "epoch": 0.3112566630665784, "grad_norm": 0.6697280425825357, "learning_rate": 5.610174330914817e-07, "loss": 1.4777, "step": 4467 }, { "epoch": 0.31132634219419575, "grad_norm": 0.7468461994111188, "learning_rate": 5.609602420042102e-07, "loss": 1.6182, "step": 4468 }, { "epoch": 0.31139602132181304, "grad_norm": 0.7355989195989832, "learning_rate": 5.609030424845627e-07, "loss": 1.5292, "step": 4469 }, { "epoch": 0.3114657004494304, "grad_norm": 0.7209632348788007, "learning_rate": 5.608458345352802e-07, "loss": 1.5888, "step": 4470 }, { "epoch": 0.3115353795770477, "grad_norm": 0.6846198121785194, "learning_rate": 5.607886181591043e-07, "loss": 1.487, "step": 4471 }, { "epoch": 0.31160505870466504, "grad_norm": 0.7210029872091488, "learning_rate": 5.60731393358777e-07, "loss": 1.5018, "step": 4472 }, { "epoch": 0.31167473783228233, "grad_norm": 0.7261186538094028, "learning_rate": 5.606741601370406e-07, "loss": 1.4957, "step": 4473 }, { "epoch": 0.3117444169598997, "grad_norm": 0.7004930303444802, "learning_rate": 5.606169184966377e-07, "loss": 1.4821, "step": 4474 }, { "epoch": 0.311814096087517, "grad_norm": 0.6922215262031016, "learning_rate": 5.605596684403115e-07, "loss": 1.4644, "step": 4475 }, { "epoch": 0.31188377521513433, "grad_norm": 0.7123564721272889, "learning_rate": 5.605024099708058e-07, "loss": 1.4204, "step": 4476 }, { "epoch": 0.3119534543427516, "grad_norm": 0.6901484934307104, "learning_rate": 5.60445143090864e-07, "loss": 1.4364, "step": 4477 }, { "epoch": 0.312023133470369, "grad_norm": 0.7434500197807802, "learning_rate": 5.60387867803231e-07, "loss": 1.5128, "step": 4478 }, { "epoch": 0.31209281259798627, "grad_norm": 0.6970836184981862, "learning_rate": 5.603305841106511e-07, "loss": 1.5619, "step": 4479 }, { "epoch": 0.3121624917256036, "grad_norm": 0.7172415376307196, "learning_rate": 5.6027329201587e-07, "loss": 1.6662, "step": 4480 }, { "epoch": 0.3122321708532209, "grad_norm": 0.724545533459188, "learning_rate": 5.602159915216326e-07, "loss": 1.4708, "step": 4481 }, { "epoch": 0.31230184998083826, "grad_norm": 0.6768537107167042, "learning_rate": 5.601586826306853e-07, "loss": 1.5061, "step": 4482 }, { "epoch": 0.31237152910845556, "grad_norm": 0.7007067821148911, "learning_rate": 5.601013653457743e-07, "loss": 1.503, "step": 4483 }, { "epoch": 0.3124412082360729, "grad_norm": 0.7139890398072082, "learning_rate": 5.600440396696465e-07, "loss": 1.5266, "step": 4484 }, { "epoch": 0.3125108873636902, "grad_norm": 0.7267770350004812, "learning_rate": 5.599867056050489e-07, "loss": 1.4927, "step": 4485 }, { "epoch": 0.31258056649130755, "grad_norm": 0.7346092705739481, "learning_rate": 5.599293631547289e-07, "loss": 1.5618, "step": 4486 }, { "epoch": 0.31265024561892485, "grad_norm": 0.6883420305557534, "learning_rate": 5.59872012321435e-07, "loss": 1.4841, "step": 4487 }, { "epoch": 0.3127199247465422, "grad_norm": 0.8690928931163032, "learning_rate": 5.598146531079151e-07, "loss": 1.6252, "step": 4488 }, { "epoch": 0.3127896038741595, "grad_norm": 0.7386897051184834, "learning_rate": 5.597572855169182e-07, "loss": 1.5784, "step": 4489 }, { "epoch": 0.31285928300177684, "grad_norm": 0.7397508406451234, "learning_rate": 5.596999095511935e-07, "loss": 1.5716, "step": 4490 }, { "epoch": 0.31292896212939414, "grad_norm": 0.7126692429917938, "learning_rate": 5.596425252134903e-07, "loss": 1.5979, "step": 4491 }, { "epoch": 0.3129986412570115, "grad_norm": 0.6840682521617562, "learning_rate": 5.595851325065588e-07, "loss": 1.6575, "step": 4492 }, { "epoch": 0.3130683203846288, "grad_norm": 0.7597470315412264, "learning_rate": 5.595277314331495e-07, "loss": 1.5514, "step": 4493 }, { "epoch": 0.3131379995122461, "grad_norm": 0.6596496330675966, "learning_rate": 5.594703219960127e-07, "loss": 1.4768, "step": 4494 }, { "epoch": 0.31320767863986343, "grad_norm": 0.7101073477506992, "learning_rate": 5.594129041979001e-07, "loss": 1.4752, "step": 4495 }, { "epoch": 0.3132773577674807, "grad_norm": 0.8087865878948145, "learning_rate": 5.593554780415632e-07, "loss": 1.5729, "step": 4496 }, { "epoch": 0.3133470368950981, "grad_norm": 0.7625992856773187, "learning_rate": 5.592980435297535e-07, "loss": 1.5422, "step": 4497 }, { "epoch": 0.31341671602271537, "grad_norm": 0.6867096727586379, "learning_rate": 5.592406006652241e-07, "loss": 1.4298, "step": 4498 }, { "epoch": 0.3134863951503327, "grad_norm": 0.7457698816149559, "learning_rate": 5.591831494507271e-07, "loss": 1.5903, "step": 4499 }, { "epoch": 0.31355607427795, "grad_norm": 0.7503797775935128, "learning_rate": 5.591256898890162e-07, "loss": 1.5962, "step": 4500 }, { "epoch": 0.31362575340556736, "grad_norm": 0.7085594299784876, "learning_rate": 5.590682219828446e-07, "loss": 1.5023, "step": 4501 }, { "epoch": 0.31369543253318466, "grad_norm": 0.7243760005682286, "learning_rate": 5.590107457349667e-07, "loss": 1.6518, "step": 4502 }, { "epoch": 0.313765111660802, "grad_norm": 0.7448276470045246, "learning_rate": 5.589532611481363e-07, "loss": 1.6848, "step": 4503 }, { "epoch": 0.3138347907884193, "grad_norm": 0.7494794861319963, "learning_rate": 5.588957682251087e-07, "loss": 1.5629, "step": 4504 }, { "epoch": 0.31390446991603665, "grad_norm": 0.7213716227937492, "learning_rate": 5.588382669686389e-07, "loss": 1.5777, "step": 4505 }, { "epoch": 0.31397414904365395, "grad_norm": 0.7646428526365497, "learning_rate": 5.587807573814824e-07, "loss": 1.5049, "step": 4506 }, { "epoch": 0.3140438281712713, "grad_norm": 0.767120792824794, "learning_rate": 5.587232394663951e-07, "loss": 1.4491, "step": 4507 }, { "epoch": 0.3141135072988886, "grad_norm": 0.7105591775635199, "learning_rate": 5.586657132261337e-07, "loss": 1.4434, "step": 4508 }, { "epoch": 0.31418318642650594, "grad_norm": 0.758203714015006, "learning_rate": 5.586081786634549e-07, "loss": 1.7566, "step": 4509 }, { "epoch": 0.31425286555412324, "grad_norm": 0.7786093682246001, "learning_rate": 5.585506357811156e-07, "loss": 1.5759, "step": 4510 }, { "epoch": 0.3143225446817406, "grad_norm": 0.6442824769449441, "learning_rate": 5.584930845818736e-07, "loss": 1.5383, "step": 4511 }, { "epoch": 0.3143922238093579, "grad_norm": 0.6987033992122678, "learning_rate": 5.584355250684867e-07, "loss": 1.4782, "step": 4512 }, { "epoch": 0.31446190293697523, "grad_norm": 0.7123821044662978, "learning_rate": 5.583779572437135e-07, "loss": 1.4858, "step": 4513 }, { "epoch": 0.31453158206459253, "grad_norm": 0.7612746697091406, "learning_rate": 5.583203811103125e-07, "loss": 1.5794, "step": 4514 }, { "epoch": 0.3146012611922099, "grad_norm": 0.6833845167460846, "learning_rate": 5.582627966710432e-07, "loss": 1.4549, "step": 4515 }, { "epoch": 0.3146709403198272, "grad_norm": 0.7935778077228008, "learning_rate": 5.582052039286649e-07, "loss": 1.6172, "step": 4516 }, { "epoch": 0.3147406194474445, "grad_norm": 0.7301222093535693, "learning_rate": 5.581476028859377e-07, "loss": 1.5544, "step": 4517 }, { "epoch": 0.3148102985750618, "grad_norm": 0.7456619945461759, "learning_rate": 5.580899935456218e-07, "loss": 1.588, "step": 4518 }, { "epoch": 0.31487997770267917, "grad_norm": 0.7179618775554296, "learning_rate": 5.580323759104781e-07, "loss": 1.5222, "step": 4519 }, { "epoch": 0.31494965683029646, "grad_norm": 0.7132286895714686, "learning_rate": 5.579747499832679e-07, "loss": 1.434, "step": 4520 }, { "epoch": 0.3150193359579138, "grad_norm": 0.7465885391719073, "learning_rate": 5.579171157667522e-07, "loss": 1.5557, "step": 4521 }, { "epoch": 0.3150890150855311, "grad_norm": 0.749662806631823, "learning_rate": 5.578594732636936e-07, "loss": 1.6624, "step": 4522 }, { "epoch": 0.31515869421314846, "grad_norm": 0.949124921903201, "learning_rate": 5.578018224768542e-07, "loss": 1.4182, "step": 4523 }, { "epoch": 0.31522837334076576, "grad_norm": 0.744353130865854, "learning_rate": 5.577441634089965e-07, "loss": 1.6209, "step": 4524 }, { "epoch": 0.3152980524683831, "grad_norm": 0.6873792099313955, "learning_rate": 5.576864960628839e-07, "loss": 1.4797, "step": 4525 }, { "epoch": 0.3153677315960004, "grad_norm": 0.6994386653153584, "learning_rate": 5.5762882044128e-07, "loss": 1.4613, "step": 4526 }, { "epoch": 0.31543741072361775, "grad_norm": 0.705522154107607, "learning_rate": 5.575711365469486e-07, "loss": 1.6182, "step": 4527 }, { "epoch": 0.31550708985123505, "grad_norm": 0.7725865357784353, "learning_rate": 5.57513444382654e-07, "loss": 1.569, "step": 4528 }, { "epoch": 0.3155767689788524, "grad_norm": 0.6845243748298593, "learning_rate": 5.574557439511612e-07, "loss": 1.5217, "step": 4529 }, { "epoch": 0.3156464481064697, "grad_norm": 0.7261032424467299, "learning_rate": 5.573980352552348e-07, "loss": 1.6041, "step": 4530 }, { "epoch": 0.31571612723408704, "grad_norm": 0.773279494758788, "learning_rate": 5.573403182976408e-07, "loss": 1.6184, "step": 4531 }, { "epoch": 0.31578580636170434, "grad_norm": 0.9622361724689067, "learning_rate": 5.572825930811449e-07, "loss": 1.6083, "step": 4532 }, { "epoch": 0.3158554854893217, "grad_norm": 0.7432584934801373, "learning_rate": 5.572248596085133e-07, "loss": 1.436, "step": 4533 }, { "epoch": 0.315925164616939, "grad_norm": 0.7427295237224485, "learning_rate": 5.571671178825131e-07, "loss": 1.5585, "step": 4534 }, { "epoch": 0.31599484374455633, "grad_norm": 0.6928269582168132, "learning_rate": 5.57109367905911e-07, "loss": 1.4105, "step": 4535 }, { "epoch": 0.3160645228721736, "grad_norm": 0.6728242767255252, "learning_rate": 5.570516096814747e-07, "loss": 1.5522, "step": 4536 }, { "epoch": 0.316134201999791, "grad_norm": 0.7263367317524648, "learning_rate": 5.569938432119721e-07, "loss": 1.6393, "step": 4537 }, { "epoch": 0.31620388112740827, "grad_norm": 0.715112801174288, "learning_rate": 5.569360685001715e-07, "loss": 1.4807, "step": 4538 }, { "epoch": 0.3162735602550256, "grad_norm": 0.7492932996364073, "learning_rate": 5.568782855488413e-07, "loss": 1.6866, "step": 4539 }, { "epoch": 0.3163432393826429, "grad_norm": 0.6986507984234215, "learning_rate": 5.568204943607508e-07, "loss": 1.5787, "step": 4540 }, { "epoch": 0.31641291851026027, "grad_norm": 0.6888654643145271, "learning_rate": 5.567626949386696e-07, "loss": 1.6045, "step": 4541 }, { "epoch": 0.31648259763787756, "grad_norm": 0.6805162691732184, "learning_rate": 5.567048872853675e-07, "loss": 1.5848, "step": 4542 }, { "epoch": 0.3165522767654949, "grad_norm": 0.6884368305487023, "learning_rate": 5.566470714036145e-07, "loss": 1.55, "step": 4543 }, { "epoch": 0.3166219558931122, "grad_norm": 0.7570329409020771, "learning_rate": 5.565892472961816e-07, "loss": 1.6468, "step": 4544 }, { "epoch": 0.31669163502072956, "grad_norm": 0.7059467717662286, "learning_rate": 5.565314149658398e-07, "loss": 1.4445, "step": 4545 }, { "epoch": 0.31676131414834685, "grad_norm": 0.6657695365455025, "learning_rate": 5.564735744153601e-07, "loss": 1.4957, "step": 4546 }, { "epoch": 0.3168309932759642, "grad_norm": 0.7407102775583433, "learning_rate": 5.56415725647515e-07, "loss": 1.5459, "step": 4547 }, { "epoch": 0.3169006724035815, "grad_norm": 0.7106313167914474, "learning_rate": 5.563578686650763e-07, "loss": 1.5298, "step": 4548 }, { "epoch": 0.31697035153119885, "grad_norm": 0.682333055169887, "learning_rate": 5.563000034708168e-07, "loss": 1.5829, "step": 4549 }, { "epoch": 0.31704003065881614, "grad_norm": 0.7192818302564468, "learning_rate": 5.562421300675094e-07, "loss": 1.419, "step": 4550 }, { "epoch": 0.3171097097864335, "grad_norm": 0.7430656876330435, "learning_rate": 5.561842484579276e-07, "loss": 1.5114, "step": 4551 }, { "epoch": 0.3171793889140508, "grad_norm": 0.7111699564860365, "learning_rate": 5.561263586448452e-07, "loss": 1.6344, "step": 4552 }, { "epoch": 0.31724906804166814, "grad_norm": 0.7041991520578657, "learning_rate": 5.560684606310363e-07, "loss": 1.4087, "step": 4553 }, { "epoch": 0.31731874716928543, "grad_norm": 0.6955203999717197, "learning_rate": 5.560105544192756e-07, "loss": 1.5001, "step": 4554 }, { "epoch": 0.3173884262969028, "grad_norm": 0.642220065364471, "learning_rate": 5.559526400123382e-07, "loss": 1.3332, "step": 4555 }, { "epoch": 0.3174581054245201, "grad_norm": 0.7146233847081206, "learning_rate": 5.558947174129991e-07, "loss": 1.5475, "step": 4556 }, { "epoch": 0.3175277845521374, "grad_norm": 0.6547314881825679, "learning_rate": 5.558367866240346e-07, "loss": 1.451, "step": 4557 }, { "epoch": 0.3175974636797547, "grad_norm": 0.7557513478775116, "learning_rate": 5.557788476482202e-07, "loss": 1.512, "step": 4558 }, { "epoch": 0.31766714280737207, "grad_norm": 0.7504684331261248, "learning_rate": 5.557209004883331e-07, "loss": 1.4838, "step": 4559 }, { "epoch": 0.31773682193498937, "grad_norm": 0.6641025593399148, "learning_rate": 5.556629451471498e-07, "loss": 1.4582, "step": 4560 }, { "epoch": 0.3178065010626067, "grad_norm": 0.6858127362399964, "learning_rate": 5.556049816274479e-07, "loss": 1.4477, "step": 4561 }, { "epoch": 0.317876180190224, "grad_norm": 0.7816268642559431, "learning_rate": 5.555470099320049e-07, "loss": 1.4744, "step": 4562 }, { "epoch": 0.31794585931784136, "grad_norm": 0.7280784746335369, "learning_rate": 5.554890300635992e-07, "loss": 1.602, "step": 4563 }, { "epoch": 0.31801553844545866, "grad_norm": 0.7235496817025429, "learning_rate": 5.554310420250091e-07, "loss": 1.5873, "step": 4564 }, { "epoch": 0.318085217573076, "grad_norm": 0.7145943660212166, "learning_rate": 5.553730458190136e-07, "loss": 1.5606, "step": 4565 }, { "epoch": 0.3181548967006933, "grad_norm": 0.7405716180465298, "learning_rate": 5.55315041448392e-07, "loss": 1.5548, "step": 4566 }, { "epoch": 0.31822457582831065, "grad_norm": 0.699821597252651, "learning_rate": 5.55257028915924e-07, "loss": 1.5007, "step": 4567 }, { "epoch": 0.31829425495592795, "grad_norm": 0.7264404039646922, "learning_rate": 5.551990082243896e-07, "loss": 1.5778, "step": 4568 }, { "epoch": 0.3183639340835453, "grad_norm": 0.7018136293753561, "learning_rate": 5.551409793765692e-07, "loss": 1.5822, "step": 4569 }, { "epoch": 0.3184336132111626, "grad_norm": 0.7085191412090573, "learning_rate": 5.55082942375244e-07, "loss": 1.566, "step": 4570 }, { "epoch": 0.31850329233877994, "grad_norm": 0.6959965920230744, "learning_rate": 5.550248972231949e-07, "loss": 1.424, "step": 4571 }, { "epoch": 0.31857297146639724, "grad_norm": 0.7573753306961281, "learning_rate": 5.549668439232036e-07, "loss": 1.7044, "step": 4572 }, { "epoch": 0.3186426505940146, "grad_norm": 0.7796455158742204, "learning_rate": 5.549087824780523e-07, "loss": 1.4707, "step": 4573 }, { "epoch": 0.3187123297216319, "grad_norm": 0.8073763532584914, "learning_rate": 5.548507128905233e-07, "loss": 1.6353, "step": 4574 }, { "epoch": 0.31878200884924923, "grad_norm": 0.6759903199641083, "learning_rate": 5.547926351633995e-07, "loss": 1.4381, "step": 4575 }, { "epoch": 0.3188516879768665, "grad_norm": 0.8112029098972438, "learning_rate": 5.54734549299464e-07, "loss": 1.4541, "step": 4576 }, { "epoch": 0.3189213671044839, "grad_norm": 0.668927646315414, "learning_rate": 5.546764553015004e-07, "loss": 1.5504, "step": 4577 }, { "epoch": 0.31899104623210117, "grad_norm": 0.728448386639013, "learning_rate": 5.546183531722927e-07, "loss": 1.6189, "step": 4578 }, { "epoch": 0.3190607253597185, "grad_norm": 0.7022708932585651, "learning_rate": 5.545602429146254e-07, "loss": 1.4315, "step": 4579 }, { "epoch": 0.3191304044873358, "grad_norm": 0.7663878863520591, "learning_rate": 5.54502124531283e-07, "loss": 1.5433, "step": 4580 }, { "epoch": 0.31920008361495317, "grad_norm": 0.7464042904379575, "learning_rate": 5.544439980250511e-07, "loss": 1.7276, "step": 4581 }, { "epoch": 0.31926976274257046, "grad_norm": 0.761958715868217, "learning_rate": 5.543858633987147e-07, "loss": 1.4849, "step": 4582 }, { "epoch": 0.3193394418701878, "grad_norm": 0.7402266957550955, "learning_rate": 5.5432772065506e-07, "loss": 1.619, "step": 4583 }, { "epoch": 0.3194091209978051, "grad_norm": 0.676648377109348, "learning_rate": 5.542695697968735e-07, "loss": 1.5384, "step": 4584 }, { "epoch": 0.31947880012542246, "grad_norm": 0.7194002923819981, "learning_rate": 5.542114108269416e-07, "loss": 1.499, "step": 4585 }, { "epoch": 0.31954847925303975, "grad_norm": 0.7094758253955392, "learning_rate": 5.541532437480514e-07, "loss": 1.5803, "step": 4586 }, { "epoch": 0.31961815838065705, "grad_norm": 0.6828922434293256, "learning_rate": 5.540950685629905e-07, "loss": 1.4883, "step": 4587 }, { "epoch": 0.3196878375082744, "grad_norm": 0.7289414195466307, "learning_rate": 5.540368852745469e-07, "loss": 1.6278, "step": 4588 }, { "epoch": 0.3197575166358917, "grad_norm": 0.7408149230732806, "learning_rate": 5.539786938855087e-07, "loss": 1.5954, "step": 4589 }, { "epoch": 0.31982719576350904, "grad_norm": 0.6675256658590901, "learning_rate": 5.539204943986645e-07, "loss": 1.517, "step": 4590 }, { "epoch": 0.31989687489112634, "grad_norm": 0.6800644937580433, "learning_rate": 5.538622868168034e-07, "loss": 1.5081, "step": 4591 }, { "epoch": 0.3199665540187437, "grad_norm": 0.729421915525262, "learning_rate": 5.53804071142715e-07, "loss": 1.609, "step": 4592 }, { "epoch": 0.320036233146361, "grad_norm": 0.6810241293051824, "learning_rate": 5.537458473791889e-07, "loss": 1.4824, "step": 4593 }, { "epoch": 0.32010591227397833, "grad_norm": 0.6577963086195658, "learning_rate": 5.536876155290153e-07, "loss": 1.4177, "step": 4594 }, { "epoch": 0.3201755914015956, "grad_norm": 0.7461821891392836, "learning_rate": 5.53629375594985e-07, "loss": 1.5158, "step": 4595 }, { "epoch": 0.320245270529213, "grad_norm": 0.7391443519888712, "learning_rate": 5.535711275798887e-07, "loss": 1.4028, "step": 4596 }, { "epoch": 0.3203149496568303, "grad_norm": 0.7217676239781258, "learning_rate": 5.53512871486518e-07, "loss": 1.6136, "step": 4597 }, { "epoch": 0.3203846287844476, "grad_norm": 0.7728777338722576, "learning_rate": 5.534546073176645e-07, "loss": 1.4466, "step": 4598 }, { "epoch": 0.3204543079120649, "grad_norm": 0.6624023550793688, "learning_rate": 5.533963350761203e-07, "loss": 1.5125, "step": 4599 }, { "epoch": 0.32052398703968227, "grad_norm": 0.7744005505294767, "learning_rate": 5.533380547646781e-07, "loss": 1.4948, "step": 4600 }, { "epoch": 0.32059366616729956, "grad_norm": 0.6691477498617161, "learning_rate": 5.532797663861307e-07, "loss": 1.5916, "step": 4601 }, { "epoch": 0.3206633452949169, "grad_norm": 0.6918371725583984, "learning_rate": 5.532214699432715e-07, "loss": 1.5005, "step": 4602 }, { "epoch": 0.3207330244225342, "grad_norm": 0.7497768594055559, "learning_rate": 5.53163165438894e-07, "loss": 1.4548, "step": 4603 }, { "epoch": 0.32080270355015156, "grad_norm": 0.7488556365884924, "learning_rate": 5.531048528757924e-07, "loss": 1.5472, "step": 4604 }, { "epoch": 0.32087238267776885, "grad_norm": 0.7579239463318133, "learning_rate": 5.530465322567612e-07, "loss": 1.5825, "step": 4605 }, { "epoch": 0.3209420618053862, "grad_norm": 0.7919011629759329, "learning_rate": 5.529882035845952e-07, "loss": 1.5038, "step": 4606 }, { "epoch": 0.3210117409330035, "grad_norm": 0.7674975520767843, "learning_rate": 5.529298668620894e-07, "loss": 1.6024, "step": 4607 }, { "epoch": 0.32108142006062085, "grad_norm": 0.7096403207271622, "learning_rate": 5.528715220920397e-07, "loss": 1.5253, "step": 4608 }, { "epoch": 0.32115109918823814, "grad_norm": 0.7134317507024582, "learning_rate": 5.528131692772423e-07, "loss": 1.6089, "step": 4609 }, { "epoch": 0.3212207783158555, "grad_norm": 0.7671723571840533, "learning_rate": 5.52754808420493e-07, "loss": 1.6016, "step": 4610 }, { "epoch": 0.3212904574434728, "grad_norm": 0.688495820899462, "learning_rate": 5.52696439524589e-07, "loss": 1.5726, "step": 4611 }, { "epoch": 0.32136013657109014, "grad_norm": 0.6678380593790649, "learning_rate": 5.526380625923274e-07, "loss": 1.5112, "step": 4612 }, { "epoch": 0.32142981569870743, "grad_norm": 0.745762956582063, "learning_rate": 5.525796776265057e-07, "loss": 1.5177, "step": 4613 }, { "epoch": 0.3214994948263248, "grad_norm": 0.7112486718285385, "learning_rate": 5.525212846299217e-07, "loss": 1.5147, "step": 4614 }, { "epoch": 0.3215691739539421, "grad_norm": 0.724836975919717, "learning_rate": 5.524628836053739e-07, "loss": 1.6959, "step": 4615 }, { "epoch": 0.32163885308155943, "grad_norm": 0.6913389692644148, "learning_rate": 5.524044745556608e-07, "loss": 1.5525, "step": 4616 }, { "epoch": 0.3217085322091767, "grad_norm": 0.7223242124116028, "learning_rate": 5.523460574835818e-07, "loss": 1.6182, "step": 4617 }, { "epoch": 0.3217782113367941, "grad_norm": 0.6972269063425547, "learning_rate": 5.52287632391936e-07, "loss": 1.614, "step": 4618 }, { "epoch": 0.32184789046441137, "grad_norm": 0.6855602585457429, "learning_rate": 5.522291992835234e-07, "loss": 1.5322, "step": 4619 }, { "epoch": 0.3219175695920287, "grad_norm": 0.7295488072661531, "learning_rate": 5.521707581611445e-07, "loss": 1.4599, "step": 4620 }, { "epoch": 0.321987248719646, "grad_norm": 0.7602169389635018, "learning_rate": 5.521123090275996e-07, "loss": 1.4392, "step": 4621 }, { "epoch": 0.32205692784726336, "grad_norm": 0.7611538203907603, "learning_rate": 5.520538518856896e-07, "loss": 1.6624, "step": 4622 }, { "epoch": 0.32212660697488066, "grad_norm": 0.7382468747722052, "learning_rate": 5.519953867382163e-07, "loss": 1.5633, "step": 4623 }, { "epoch": 0.322196286102498, "grad_norm": 0.6877266006916888, "learning_rate": 5.51936913587981e-07, "loss": 1.4824, "step": 4624 }, { "epoch": 0.3222659652301153, "grad_norm": 0.7238249354166105, "learning_rate": 5.518784324377861e-07, "loss": 1.6241, "step": 4625 }, { "epoch": 0.32233564435773265, "grad_norm": 0.7876389933251687, "learning_rate": 5.518199432904342e-07, "loss": 1.6126, "step": 4626 }, { "epoch": 0.32240532348534995, "grad_norm": 0.7244708081210876, "learning_rate": 5.517614461487283e-07, "loss": 1.57, "step": 4627 }, { "epoch": 0.3224750026129673, "grad_norm": 0.6877563899579893, "learning_rate": 5.517029410154713e-07, "loss": 1.4658, "step": 4628 }, { "epoch": 0.3225446817405846, "grad_norm": 0.7381068738510921, "learning_rate": 5.516444278934672e-07, "loss": 1.472, "step": 4629 }, { "epoch": 0.32261436086820194, "grad_norm": 0.7661324152373148, "learning_rate": 5.5158590678552e-07, "loss": 1.4738, "step": 4630 }, { "epoch": 0.32268403999581924, "grad_norm": 0.7180663750359986, "learning_rate": 5.515273776944343e-07, "loss": 1.4249, "step": 4631 }, { "epoch": 0.3227537191234366, "grad_norm": 0.7314590378305785, "learning_rate": 5.514688406230145e-07, "loss": 1.5152, "step": 4632 }, { "epoch": 0.3228233982510539, "grad_norm": 0.6801266282448725, "learning_rate": 5.514102955740663e-07, "loss": 1.5268, "step": 4633 }, { "epoch": 0.32289307737867123, "grad_norm": 0.7305814534042752, "learning_rate": 5.51351742550395e-07, "loss": 1.495, "step": 4634 }, { "epoch": 0.32296275650628853, "grad_norm": 0.6815117848190505, "learning_rate": 5.512931815548069e-07, "loss": 1.4861, "step": 4635 }, { "epoch": 0.3230324356339059, "grad_norm": 0.7455133929413991, "learning_rate": 5.512346125901079e-07, "loss": 1.4462, "step": 4636 }, { "epoch": 0.3231021147615232, "grad_norm": 0.7508120732279528, "learning_rate": 5.511760356591052e-07, "loss": 1.5483, "step": 4637 }, { "epoch": 0.3231717938891405, "grad_norm": 0.7014812959074661, "learning_rate": 5.511174507646055e-07, "loss": 1.609, "step": 4638 }, { "epoch": 0.3232414730167578, "grad_norm": 0.7421777110315984, "learning_rate": 5.510588579094168e-07, "loss": 1.4491, "step": 4639 }, { "epoch": 0.32331115214437517, "grad_norm": 0.7827639342952755, "learning_rate": 5.510002570963465e-07, "loss": 1.4477, "step": 4640 }, { "epoch": 0.32338083127199246, "grad_norm": 0.6862468086728646, "learning_rate": 5.50941648328203e-07, "loss": 1.5549, "step": 4641 }, { "epoch": 0.3234505103996098, "grad_norm": 0.7260160638503516, "learning_rate": 5.508830316077952e-07, "loss": 1.4935, "step": 4642 }, { "epoch": 0.3235201895272271, "grad_norm": 0.6932870393561489, "learning_rate": 5.508244069379321e-07, "loss": 1.7095, "step": 4643 }, { "epoch": 0.32358986865484446, "grad_norm": 0.7465469881600842, "learning_rate": 5.507657743214228e-07, "loss": 1.5113, "step": 4644 }, { "epoch": 0.32365954778246175, "grad_norm": 0.724992133322777, "learning_rate": 5.507071337610773e-07, "loss": 1.6181, "step": 4645 }, { "epoch": 0.3237292269100791, "grad_norm": 0.746052557847551, "learning_rate": 5.506484852597058e-07, "loss": 1.6747, "step": 4646 }, { "epoch": 0.3237989060376964, "grad_norm": 0.7450971054334902, "learning_rate": 5.505898288201188e-07, "loss": 1.6415, "step": 4647 }, { "epoch": 0.32386858516531375, "grad_norm": 0.7264260556915378, "learning_rate": 5.505311644451272e-07, "loss": 1.6594, "step": 4648 }, { "epoch": 0.32393826429293104, "grad_norm": 0.7155597727480145, "learning_rate": 5.504724921375425e-07, "loss": 1.4577, "step": 4649 }, { "epoch": 0.3240079434205484, "grad_norm": 0.7669193360173628, "learning_rate": 5.504138119001761e-07, "loss": 1.5443, "step": 4650 }, { "epoch": 0.3240776225481657, "grad_norm": 0.8326790168924357, "learning_rate": 5.503551237358404e-07, "loss": 1.7411, "step": 4651 }, { "epoch": 0.32414730167578304, "grad_norm": 0.7134212535930974, "learning_rate": 5.502964276473477e-07, "loss": 1.5207, "step": 4652 }, { "epoch": 0.32421698080340033, "grad_norm": 0.7164271081334321, "learning_rate": 5.502377236375108e-07, "loss": 1.563, "step": 4653 }, { "epoch": 0.3242866599310177, "grad_norm": 0.7574513017908499, "learning_rate": 5.501790117091429e-07, "loss": 1.5626, "step": 4654 }, { "epoch": 0.324356339058635, "grad_norm": 0.6950388343278109, "learning_rate": 5.501202918650577e-07, "loss": 1.5222, "step": 4655 }, { "epoch": 0.32442601818625233, "grad_norm": 0.7125678417544166, "learning_rate": 5.500615641080691e-07, "loss": 1.6304, "step": 4656 }, { "epoch": 0.3244956973138696, "grad_norm": 0.6998624739958945, "learning_rate": 5.500028284409915e-07, "loss": 1.5324, "step": 4657 }, { "epoch": 0.324565376441487, "grad_norm": 0.7068349491161948, "learning_rate": 5.499440848666395e-07, "loss": 1.5863, "step": 4658 }, { "epoch": 0.32463505556910427, "grad_norm": 0.661201049428343, "learning_rate": 5.498853333878285e-07, "loss": 1.4551, "step": 4659 }, { "epoch": 0.3247047346967216, "grad_norm": 0.7269218043387352, "learning_rate": 5.498265740073738e-07, "loss": 1.5651, "step": 4660 }, { "epoch": 0.3247744138243389, "grad_norm": 0.749285337503175, "learning_rate": 5.497678067280913e-07, "loss": 1.7859, "step": 4661 }, { "epoch": 0.32484409295195626, "grad_norm": 0.7480084626513137, "learning_rate": 5.497090315527971e-07, "loss": 1.5376, "step": 4662 }, { "epoch": 0.32491377207957356, "grad_norm": 0.6844373648201154, "learning_rate": 5.496502484843082e-07, "loss": 1.4983, "step": 4663 }, { "epoch": 0.3249834512071909, "grad_norm": 0.8226873965644259, "learning_rate": 5.495914575254411e-07, "loss": 1.7003, "step": 4664 }, { "epoch": 0.3250531303348082, "grad_norm": 0.6728335863273786, "learning_rate": 5.495326586790137e-07, "loss": 1.5181, "step": 4665 }, { "epoch": 0.32512280946242555, "grad_norm": 0.7469348771533575, "learning_rate": 5.494738519478434e-07, "loss": 1.6188, "step": 4666 }, { "epoch": 0.32519248859004285, "grad_norm": 0.7149477160520245, "learning_rate": 5.494150373347485e-07, "loss": 1.5103, "step": 4667 }, { "epoch": 0.3252621677176602, "grad_norm": 0.7543294043052631, "learning_rate": 5.493562148425475e-07, "loss": 1.531, "step": 4668 }, { "epoch": 0.3253318468452775, "grad_norm": 0.8562125075394144, "learning_rate": 5.492973844740592e-07, "loss": 1.5215, "step": 4669 }, { "epoch": 0.32540152597289485, "grad_norm": 0.7576430297954465, "learning_rate": 5.492385462321028e-07, "loss": 1.4913, "step": 4670 }, { "epoch": 0.32547120510051214, "grad_norm": 0.693823950005331, "learning_rate": 5.491797001194984e-07, "loss": 1.5224, "step": 4671 }, { "epoch": 0.3255408842281295, "grad_norm": 0.7327177721134271, "learning_rate": 5.491208461390654e-07, "loss": 1.4314, "step": 4672 }, { "epoch": 0.3256105633557468, "grad_norm": 0.7566433006080883, "learning_rate": 5.490619842936248e-07, "loss": 1.6511, "step": 4673 }, { "epoch": 0.32568024248336414, "grad_norm": 0.7885667972779885, "learning_rate": 5.490031145859969e-07, "loss": 1.7014, "step": 4674 }, { "epoch": 0.32574992161098143, "grad_norm": 0.7768801147957153, "learning_rate": 5.489442370190032e-07, "loss": 1.5669, "step": 4675 }, { "epoch": 0.3258196007385988, "grad_norm": 0.720878744425241, "learning_rate": 5.488853515954651e-07, "loss": 1.5863, "step": 4676 }, { "epoch": 0.3258892798662161, "grad_norm": 0.6862498975781554, "learning_rate": 5.488264583182043e-07, "loss": 1.6059, "step": 4677 }, { "epoch": 0.32595895899383337, "grad_norm": 0.7409461109224728, "learning_rate": 5.487675571900435e-07, "loss": 1.5253, "step": 4678 }, { "epoch": 0.3260286381214507, "grad_norm": 0.7660810857821033, "learning_rate": 5.48708648213805e-07, "loss": 1.5677, "step": 4679 }, { "epoch": 0.326098317249068, "grad_norm": 0.709707457530693, "learning_rate": 5.486497313923121e-07, "loss": 1.4906, "step": 4680 }, { "epoch": 0.32616799637668537, "grad_norm": 0.7039157582721269, "learning_rate": 5.48590806728388e-07, "loss": 1.642, "step": 4681 }, { "epoch": 0.32623767550430266, "grad_norm": 0.8055219249645671, "learning_rate": 5.485318742248567e-07, "loss": 1.5276, "step": 4682 }, { "epoch": 0.32630735463192, "grad_norm": 0.7082044224744788, "learning_rate": 5.484729338845422e-07, "loss": 1.5328, "step": 4683 }, { "epoch": 0.3263770337595373, "grad_norm": 0.7195194869776044, "learning_rate": 5.484139857102691e-07, "loss": 1.564, "step": 4684 }, { "epoch": 0.32644671288715466, "grad_norm": 0.7260360051203139, "learning_rate": 5.483550297048624e-07, "loss": 1.5654, "step": 4685 }, { "epoch": 0.32651639201477195, "grad_norm": 0.7095863416019655, "learning_rate": 5.482960658711472e-07, "loss": 1.6385, "step": 4686 }, { "epoch": 0.3265860711423893, "grad_norm": 0.7089680351775876, "learning_rate": 5.482370942119494e-07, "loss": 1.5918, "step": 4687 }, { "epoch": 0.3266557502700066, "grad_norm": 0.7754253458635536, "learning_rate": 5.481781147300948e-07, "loss": 1.5457, "step": 4688 }, { "epoch": 0.32672542939762395, "grad_norm": 0.7133933783068345, "learning_rate": 5.481191274284101e-07, "loss": 1.4844, "step": 4689 }, { "epoch": 0.32679510852524124, "grad_norm": 0.7290333114124878, "learning_rate": 5.480601323097218e-07, "loss": 1.6388, "step": 4690 }, { "epoch": 0.3268647876528586, "grad_norm": 0.7342510780107464, "learning_rate": 5.480011293768572e-07, "loss": 1.4667, "step": 4691 }, { "epoch": 0.3269344667804759, "grad_norm": 0.691124208728813, "learning_rate": 5.479421186326439e-07, "loss": 1.4353, "step": 4692 }, { "epoch": 0.32700414590809324, "grad_norm": 0.7409084935108537, "learning_rate": 5.478831000799098e-07, "loss": 1.4188, "step": 4693 }, { "epoch": 0.32707382503571053, "grad_norm": 0.7581250592188127, "learning_rate": 5.478240737214831e-07, "loss": 1.4854, "step": 4694 }, { "epoch": 0.3271435041633279, "grad_norm": 0.6916383712026993, "learning_rate": 5.477650395601926e-07, "loss": 1.6155, "step": 4695 }, { "epoch": 0.3272131832909452, "grad_norm": 0.7525082735725286, "learning_rate": 5.477059975988671e-07, "loss": 1.6562, "step": 4696 }, { "epoch": 0.3272828624185625, "grad_norm": 0.6850325687097222, "learning_rate": 5.476469478403363e-07, "loss": 1.5095, "step": 4697 }, { "epoch": 0.3273525415461798, "grad_norm": 0.6911370545439388, "learning_rate": 5.475878902874298e-07, "loss": 1.5014, "step": 4698 }, { "epoch": 0.32742222067379717, "grad_norm": 0.6779251995747619, "learning_rate": 5.475288249429777e-07, "loss": 1.521, "step": 4699 }, { "epoch": 0.32749189980141447, "grad_norm": 0.6777351655241748, "learning_rate": 5.474697518098108e-07, "loss": 1.5156, "step": 4700 }, { "epoch": 0.3275615789290318, "grad_norm": 0.6809433325320242, "learning_rate": 5.4741067089076e-07, "loss": 1.5013, "step": 4701 }, { "epoch": 0.3276312580566491, "grad_norm": 0.7119144281537158, "learning_rate": 5.47351582188656e-07, "loss": 1.5415, "step": 4702 }, { "epoch": 0.32770093718426646, "grad_norm": 0.6613717045837818, "learning_rate": 5.472924857063311e-07, "loss": 1.4926, "step": 4703 }, { "epoch": 0.32777061631188376, "grad_norm": 0.7288042034986972, "learning_rate": 5.472333814466173e-07, "loss": 1.4969, "step": 4704 }, { "epoch": 0.3278402954395011, "grad_norm": 0.6995801218556058, "learning_rate": 5.471742694123465e-07, "loss": 1.4725, "step": 4705 }, { "epoch": 0.3279099745671184, "grad_norm": 0.6891604297815477, "learning_rate": 5.471151496063519e-07, "loss": 1.4852, "step": 4706 }, { "epoch": 0.32797965369473575, "grad_norm": 0.7077304381998237, "learning_rate": 5.470560220314666e-07, "loss": 1.5236, "step": 4707 }, { "epoch": 0.32804933282235305, "grad_norm": 0.6457256925404681, "learning_rate": 5.469968866905239e-07, "loss": 1.5301, "step": 4708 }, { "epoch": 0.3281190119499704, "grad_norm": 0.7383356695325461, "learning_rate": 5.469377435863577e-07, "loss": 1.5252, "step": 4709 }, { "epoch": 0.3281886910775877, "grad_norm": 0.7226731345944863, "learning_rate": 5.468785927218026e-07, "loss": 1.5984, "step": 4710 }, { "epoch": 0.32825837020520504, "grad_norm": 0.7351157644434121, "learning_rate": 5.468194340996929e-07, "loss": 1.662, "step": 4711 }, { "epoch": 0.32832804933282234, "grad_norm": 0.6957818071174824, "learning_rate": 5.467602677228638e-07, "loss": 1.4365, "step": 4712 }, { "epoch": 0.3283977284604397, "grad_norm": 0.7196864910738779, "learning_rate": 5.467010935941507e-07, "loss": 1.5894, "step": 4713 }, { "epoch": 0.328467407588057, "grad_norm": 0.7146083912133652, "learning_rate": 5.466419117163889e-07, "loss": 1.4484, "step": 4714 }, { "epoch": 0.32853708671567433, "grad_norm": 0.8106128842589259, "learning_rate": 5.465827220924151e-07, "loss": 1.4741, "step": 4715 }, { "epoch": 0.3286067658432916, "grad_norm": 0.7103171764326403, "learning_rate": 5.465235247250653e-07, "loss": 1.508, "step": 4716 }, { "epoch": 0.328676444970909, "grad_norm": 0.6812060402187873, "learning_rate": 5.464643196171767e-07, "loss": 1.4331, "step": 4717 }, { "epoch": 0.32874612409852627, "grad_norm": 0.7177732946700219, "learning_rate": 5.464051067715865e-07, "loss": 1.4702, "step": 4718 }, { "epoch": 0.3288158032261436, "grad_norm": 0.6958049083294975, "learning_rate": 5.463458861911322e-07, "loss": 1.4755, "step": 4719 }, { "epoch": 0.3288854823537609, "grad_norm": 0.7325285859162157, "learning_rate": 5.462866578786518e-07, "loss": 1.4601, "step": 4720 }, { "epoch": 0.32895516148137827, "grad_norm": 0.7582230823199709, "learning_rate": 5.462274218369836e-07, "loss": 1.4437, "step": 4721 }, { "epoch": 0.32902484060899556, "grad_norm": 0.7210199222494685, "learning_rate": 5.461681780689663e-07, "loss": 1.5113, "step": 4722 }, { "epoch": 0.3290945197366129, "grad_norm": 0.6795180257214272, "learning_rate": 5.461089265774391e-07, "loss": 1.4581, "step": 4723 }, { "epoch": 0.3291641988642302, "grad_norm": 0.7355181900180564, "learning_rate": 5.460496673652414e-07, "loss": 1.5793, "step": 4724 }, { "epoch": 0.32923387799184756, "grad_norm": 0.7418505821521018, "learning_rate": 5.45990400435213e-07, "loss": 1.5742, "step": 4725 }, { "epoch": 0.32930355711946485, "grad_norm": 0.8148378311162706, "learning_rate": 5.459311257901941e-07, "loss": 1.5995, "step": 4726 }, { "epoch": 0.3293732362470822, "grad_norm": 0.9894964109825274, "learning_rate": 5.458718434330252e-07, "loss": 1.4232, "step": 4727 }, { "epoch": 0.3294429153746995, "grad_norm": 0.701725083846248, "learning_rate": 5.458125533665475e-07, "loss": 1.5014, "step": 4728 }, { "epoch": 0.32951259450231685, "grad_norm": 0.7596048302356802, "learning_rate": 5.45753255593602e-07, "loss": 1.5457, "step": 4729 }, { "epoch": 0.32958227362993414, "grad_norm": 0.6908157411025175, "learning_rate": 5.456939501170304e-07, "loss": 1.5558, "step": 4730 }, { "epoch": 0.3296519527575515, "grad_norm": 0.7496144153051155, "learning_rate": 5.456346369396747e-07, "loss": 1.578, "step": 4731 }, { "epoch": 0.3297216318851688, "grad_norm": 0.7164436063279995, "learning_rate": 5.455753160643777e-07, "loss": 1.6629, "step": 4732 }, { "epoch": 0.32979131101278614, "grad_norm": 0.7559462022136434, "learning_rate": 5.455159874939819e-07, "loss": 1.4736, "step": 4733 }, { "epoch": 0.32986099014040343, "grad_norm": 0.7600862407544624, "learning_rate": 5.454566512313302e-07, "loss": 1.4862, "step": 4734 }, { "epoch": 0.3299306692680208, "grad_norm": 0.7004081297809731, "learning_rate": 5.453973072792665e-07, "loss": 1.5029, "step": 4735 }, { "epoch": 0.3300003483956381, "grad_norm": 0.72591322289454, "learning_rate": 5.453379556406344e-07, "loss": 1.5313, "step": 4736 }, { "epoch": 0.3300700275232554, "grad_norm": 0.7659965365810644, "learning_rate": 5.452785963182786e-07, "loss": 1.5111, "step": 4737 }, { "epoch": 0.3301397066508727, "grad_norm": 0.7075696002326214, "learning_rate": 5.452192293150432e-07, "loss": 1.6228, "step": 4738 }, { "epoch": 0.33020938577849007, "grad_norm": 0.7345339015777442, "learning_rate": 5.451598546337734e-07, "loss": 1.6043, "step": 4739 }, { "epoch": 0.33027906490610737, "grad_norm": 0.7963071700771978, "learning_rate": 5.451004722773148e-07, "loss": 1.6095, "step": 4740 }, { "epoch": 0.3303487440337247, "grad_norm": 0.7525640730979843, "learning_rate": 5.450410822485126e-07, "loss": 1.4938, "step": 4741 }, { "epoch": 0.330418423161342, "grad_norm": 0.6891761133669421, "learning_rate": 5.449816845502132e-07, "loss": 1.4215, "step": 4742 }, { "epoch": 0.33048810228895936, "grad_norm": 0.7469893291500803, "learning_rate": 5.449222791852631e-07, "loss": 1.5721, "step": 4743 }, { "epoch": 0.33055778141657666, "grad_norm": 0.7269189267880387, "learning_rate": 5.448628661565092e-07, "loss": 1.5862, "step": 4744 }, { "epoch": 0.330627460544194, "grad_norm": 0.7483789383148026, "learning_rate": 5.448034454667984e-07, "loss": 1.6084, "step": 4745 }, { "epoch": 0.3306971396718113, "grad_norm": 0.7177898667623517, "learning_rate": 5.447440171189784e-07, "loss": 1.5663, "step": 4746 }, { "epoch": 0.33076681879942865, "grad_norm": 0.7687765020088722, "learning_rate": 5.446845811158973e-07, "loss": 1.5396, "step": 4747 }, { "epoch": 0.33083649792704595, "grad_norm": 0.7445506289197298, "learning_rate": 5.446251374604032e-07, "loss": 1.6004, "step": 4748 }, { "epoch": 0.3309061770546633, "grad_norm": 0.7628599743997581, "learning_rate": 5.445656861553449e-07, "loss": 1.5991, "step": 4749 }, { "epoch": 0.3309758561822806, "grad_norm": 0.7690295768355484, "learning_rate": 5.44506227203571e-07, "loss": 1.608, "step": 4750 }, { "epoch": 0.33104553530989794, "grad_norm": 0.7066359165688519, "learning_rate": 5.444467606079316e-07, "loss": 1.553, "step": 4751 }, { "epoch": 0.33111521443751524, "grad_norm": 0.7107118246361575, "learning_rate": 5.443872863712759e-07, "loss": 1.445, "step": 4752 }, { "epoch": 0.3311848935651326, "grad_norm": 0.7189520145837976, "learning_rate": 5.443278044964542e-07, "loss": 1.5686, "step": 4753 }, { "epoch": 0.3312545726927499, "grad_norm": 0.6934783281831515, "learning_rate": 5.442683149863171e-07, "loss": 1.4495, "step": 4754 }, { "epoch": 0.33132425182036723, "grad_norm": 0.7524447895980935, "learning_rate": 5.442088178437154e-07, "loss": 1.6211, "step": 4755 }, { "epoch": 0.3313939309479845, "grad_norm": 0.7002606597250953, "learning_rate": 5.441493130715002e-07, "loss": 1.5382, "step": 4756 }, { "epoch": 0.3314636100756019, "grad_norm": 0.7542379695219257, "learning_rate": 5.440898006725234e-07, "loss": 1.6374, "step": 4757 }, { "epoch": 0.3315332892032192, "grad_norm": 0.7312472818227834, "learning_rate": 5.440302806496365e-07, "loss": 1.4442, "step": 4758 }, { "epoch": 0.3316029683308365, "grad_norm": 0.6765765400729704, "learning_rate": 5.439707530056922e-07, "loss": 1.5379, "step": 4759 }, { "epoch": 0.3316726474584538, "grad_norm": 0.7145871013427026, "learning_rate": 5.43911217743543e-07, "loss": 1.4743, "step": 4760 }, { "epoch": 0.33174232658607117, "grad_norm": 0.7534020290273412, "learning_rate": 5.438516748660421e-07, "loss": 1.5155, "step": 4761 }, { "epoch": 0.33181200571368846, "grad_norm": 0.701107642439635, "learning_rate": 5.437921243760427e-07, "loss": 1.5799, "step": 4762 }, { "epoch": 0.3318816848413058, "grad_norm": 0.8062590359838392, "learning_rate": 5.437325662763987e-07, "loss": 1.6094, "step": 4763 }, { "epoch": 0.3319513639689231, "grad_norm": 0.7264396881824076, "learning_rate": 5.436730005699644e-07, "loss": 1.5333, "step": 4764 }, { "epoch": 0.33202104309654046, "grad_norm": 0.7164683710272938, "learning_rate": 5.436134272595941e-07, "loss": 1.643, "step": 4765 }, { "epoch": 0.33209072222415775, "grad_norm": 0.7681061882158766, "learning_rate": 5.435538463481427e-07, "loss": 1.7029, "step": 4766 }, { "epoch": 0.3321604013517751, "grad_norm": 0.68673542098374, "learning_rate": 5.434942578384654e-07, "loss": 1.4273, "step": 4767 }, { "epoch": 0.3322300804793924, "grad_norm": 0.7630700252413264, "learning_rate": 5.43434661733418e-07, "loss": 1.6314, "step": 4768 }, { "epoch": 0.3322997596070097, "grad_norm": 0.7378555507316659, "learning_rate": 5.433750580358563e-07, "loss": 1.5883, "step": 4769 }, { "epoch": 0.33236943873462704, "grad_norm": 0.7184196608437186, "learning_rate": 5.433154467486367e-07, "loss": 1.553, "step": 4770 }, { "epoch": 0.33243911786224434, "grad_norm": 0.7610107203023418, "learning_rate": 5.43255827874616e-07, "loss": 1.5474, "step": 4771 }, { "epoch": 0.3325087969898617, "grad_norm": 0.6578353625178328, "learning_rate": 5.43196201416651e-07, "loss": 1.5826, "step": 4772 }, { "epoch": 0.332578476117479, "grad_norm": 0.7332684613958692, "learning_rate": 5.43136567377599e-07, "loss": 1.5332, "step": 4773 }, { "epoch": 0.33264815524509633, "grad_norm": 0.7403565072313969, "learning_rate": 5.430769257603185e-07, "loss": 1.6311, "step": 4774 }, { "epoch": 0.33271783437271363, "grad_norm": 0.7207657489954281, "learning_rate": 5.43017276567667e-07, "loss": 1.6177, "step": 4775 }, { "epoch": 0.332787513500331, "grad_norm": 0.7303985385444817, "learning_rate": 5.429576198025032e-07, "loss": 1.5329, "step": 4776 }, { "epoch": 0.3328571926279483, "grad_norm": 0.7035495502689031, "learning_rate": 5.428979554676861e-07, "loss": 1.4916, "step": 4777 }, { "epoch": 0.3329268717555656, "grad_norm": 0.7003566219294264, "learning_rate": 5.428382835660746e-07, "loss": 1.5497, "step": 4778 }, { "epoch": 0.3329965508831829, "grad_norm": 0.7612676386996001, "learning_rate": 5.427786041005286e-07, "loss": 1.5206, "step": 4779 }, { "epoch": 0.33306623001080027, "grad_norm": 0.7094601748268057, "learning_rate": 5.42718917073908e-07, "loss": 1.5688, "step": 4780 }, { "epoch": 0.33313590913841756, "grad_norm": 0.728996130018037, "learning_rate": 5.426592224890731e-07, "loss": 1.5695, "step": 4781 }, { "epoch": 0.3332055882660349, "grad_norm": 0.6826206661442296, "learning_rate": 5.425995203488846e-07, "loss": 1.5038, "step": 4782 }, { "epoch": 0.3332752673936522, "grad_norm": 0.6850069680455925, "learning_rate": 5.425398106562034e-07, "loss": 1.4939, "step": 4783 }, { "epoch": 0.33334494652126956, "grad_norm": 0.7198182130651017, "learning_rate": 5.424800934138913e-07, "loss": 1.6911, "step": 4784 }, { "epoch": 0.33341462564888685, "grad_norm": 0.7031776478007508, "learning_rate": 5.424203686248098e-07, "loss": 1.3148, "step": 4785 }, { "epoch": 0.3334843047765042, "grad_norm": 0.7033765004420741, "learning_rate": 5.423606362918209e-07, "loss": 1.5843, "step": 4786 }, { "epoch": 0.3335539839041215, "grad_norm": 0.7277452680681038, "learning_rate": 5.423008964177873e-07, "loss": 1.6939, "step": 4787 }, { "epoch": 0.33362366303173885, "grad_norm": 0.721356540917203, "learning_rate": 5.422411490055717e-07, "loss": 1.5653, "step": 4788 }, { "epoch": 0.33369334215935614, "grad_norm": 0.7036008560433303, "learning_rate": 5.421813940580377e-07, "loss": 1.6107, "step": 4789 }, { "epoch": 0.3337630212869735, "grad_norm": 0.7458747498855014, "learning_rate": 5.421216315780484e-07, "loss": 1.6373, "step": 4790 }, { "epoch": 0.3338327004145908, "grad_norm": 0.7173976160596333, "learning_rate": 5.420618615684681e-07, "loss": 1.5497, "step": 4791 }, { "epoch": 0.33390237954220814, "grad_norm": 0.7257908124277479, "learning_rate": 5.420020840321608e-07, "loss": 1.6506, "step": 4792 }, { "epoch": 0.33397205866982543, "grad_norm": 0.7276705606366335, "learning_rate": 5.419422989719914e-07, "loss": 1.5947, "step": 4793 }, { "epoch": 0.3340417377974428, "grad_norm": 0.7124819335777425, "learning_rate": 5.418825063908247e-07, "loss": 1.5129, "step": 4794 }, { "epoch": 0.3341114169250601, "grad_norm": 0.6973185583977857, "learning_rate": 5.418227062915263e-07, "loss": 1.4769, "step": 4795 }, { "epoch": 0.33418109605267743, "grad_norm": 0.7171103000694339, "learning_rate": 5.417628986769621e-07, "loss": 1.528, "step": 4796 }, { "epoch": 0.3342507751802947, "grad_norm": 0.7212360242606413, "learning_rate": 5.417030835499978e-07, "loss": 1.4938, "step": 4797 }, { "epoch": 0.3343204543079121, "grad_norm": 0.7517464170152247, "learning_rate": 5.416432609135e-07, "loss": 1.609, "step": 4798 }, { "epoch": 0.33439013343552937, "grad_norm": 0.8652168233618581, "learning_rate": 5.415834307703356e-07, "loss": 1.5062, "step": 4799 }, { "epoch": 0.3344598125631467, "grad_norm": 0.7218739889842337, "learning_rate": 5.415235931233716e-07, "loss": 1.4711, "step": 4800 }, { "epoch": 0.334529491690764, "grad_norm": 0.7094515975723757, "learning_rate": 5.414637479754757e-07, "loss": 1.5788, "step": 4801 }, { "epoch": 0.33459917081838136, "grad_norm": 0.7393768229184177, "learning_rate": 5.414038953295158e-07, "loss": 1.5629, "step": 4802 }, { "epoch": 0.33466884994599866, "grad_norm": 0.7302062209177451, "learning_rate": 5.413440351883602e-07, "loss": 1.6523, "step": 4803 }, { "epoch": 0.334738529073616, "grad_norm": 0.6845338065322232, "learning_rate": 5.412841675548776e-07, "loss": 1.522, "step": 4804 }, { "epoch": 0.3348082082012333, "grad_norm": 0.7235354663832575, "learning_rate": 5.412242924319366e-07, "loss": 1.5268, "step": 4805 }, { "epoch": 0.33487788732885065, "grad_norm": 0.7613419830759305, "learning_rate": 5.411644098224069e-07, "loss": 1.4734, "step": 4806 }, { "epoch": 0.33494756645646795, "grad_norm": 0.6741423643970372, "learning_rate": 5.411045197291581e-07, "loss": 1.5251, "step": 4807 }, { "epoch": 0.3350172455840853, "grad_norm": 0.7105937235840101, "learning_rate": 5.410446221550603e-07, "loss": 1.5714, "step": 4808 }, { "epoch": 0.3350869247117026, "grad_norm": 0.7391867622954359, "learning_rate": 5.409847171029837e-07, "loss": 1.5494, "step": 4809 }, { "epoch": 0.33515660383931994, "grad_norm": 0.697675218773251, "learning_rate": 5.409248045757993e-07, "loss": 1.5154, "step": 4810 }, { "epoch": 0.33522628296693724, "grad_norm": 0.6927014456831924, "learning_rate": 5.408648845763781e-07, "loss": 1.5613, "step": 4811 }, { "epoch": 0.3352959620945546, "grad_norm": 0.7007622265131535, "learning_rate": 5.408049571075917e-07, "loss": 1.515, "step": 4812 }, { "epoch": 0.3353656412221719, "grad_norm": 0.6629006984199853, "learning_rate": 5.40745022172312e-07, "loss": 1.4397, "step": 4813 }, { "epoch": 0.33543532034978923, "grad_norm": 0.7762079077753187, "learning_rate": 5.406850797734109e-07, "loss": 1.7242, "step": 4814 }, { "epoch": 0.33550499947740653, "grad_norm": 0.705869108994762, "learning_rate": 5.406251299137613e-07, "loss": 1.5714, "step": 4815 }, { "epoch": 0.3355746786050239, "grad_norm": 0.6860445057284045, "learning_rate": 5.405651725962358e-07, "loss": 1.5411, "step": 4816 }, { "epoch": 0.3356443577326412, "grad_norm": 0.6967122024785454, "learning_rate": 5.405052078237082e-07, "loss": 1.6579, "step": 4817 }, { "epoch": 0.3357140368602585, "grad_norm": 0.7647204058296255, "learning_rate": 5.404452355990515e-07, "loss": 1.5023, "step": 4818 }, { "epoch": 0.3357837159878758, "grad_norm": 0.7108929741768184, "learning_rate": 5.403852559251401e-07, "loss": 1.5517, "step": 4819 }, { "epoch": 0.33585339511549317, "grad_norm": 0.8140504410329782, "learning_rate": 5.403252688048482e-07, "loss": 1.5006, "step": 4820 }, { "epoch": 0.33592307424311046, "grad_norm": 0.6860079696803548, "learning_rate": 5.402652742410505e-07, "loss": 1.6411, "step": 4821 }, { "epoch": 0.3359927533707278, "grad_norm": 0.7650567940545888, "learning_rate": 5.402052722366221e-07, "loss": 1.3765, "step": 4822 }, { "epoch": 0.3360624324983451, "grad_norm": 0.7242517343750577, "learning_rate": 5.401452627944387e-07, "loss": 1.4242, "step": 4823 }, { "epoch": 0.33613211162596246, "grad_norm": 0.7431924617453302, "learning_rate": 5.400852459173754e-07, "loss": 1.586, "step": 4824 }, { "epoch": 0.33620179075357975, "grad_norm": 0.698467639711488, "learning_rate": 5.40025221608309e-07, "loss": 1.5904, "step": 4825 }, { "epoch": 0.3362714698811971, "grad_norm": 0.7824488496164661, "learning_rate": 5.399651898701156e-07, "loss": 1.4601, "step": 4826 }, { "epoch": 0.3363411490088144, "grad_norm": 0.7380126909494652, "learning_rate": 5.399051507056722e-07, "loss": 1.421, "step": 4827 }, { "epoch": 0.33641082813643175, "grad_norm": 0.7554036049745274, "learning_rate": 5.39845104117856e-07, "loss": 1.5047, "step": 4828 }, { "epoch": 0.33648050726404904, "grad_norm": 0.6730427825408142, "learning_rate": 5.397850501095445e-07, "loss": 1.5169, "step": 4829 }, { "epoch": 0.3365501863916664, "grad_norm": 0.7522968508915672, "learning_rate": 5.397249886836155e-07, "loss": 1.579, "step": 4830 }, { "epoch": 0.3366198655192837, "grad_norm": 0.7656384123478277, "learning_rate": 5.396649198429476e-07, "loss": 1.5836, "step": 4831 }, { "epoch": 0.33668954464690104, "grad_norm": 0.7095621969140741, "learning_rate": 5.396048435904192e-07, "loss": 1.5901, "step": 4832 }, { "epoch": 0.33675922377451833, "grad_norm": 0.6984903216291455, "learning_rate": 5.395447599289092e-07, "loss": 1.5424, "step": 4833 }, { "epoch": 0.3368289029021357, "grad_norm": 0.7296764226484795, "learning_rate": 5.394846688612969e-07, "loss": 1.5648, "step": 4834 }, { "epoch": 0.336898582029753, "grad_norm": 0.6635513910837075, "learning_rate": 5.394245703904623e-07, "loss": 1.4931, "step": 4835 }, { "epoch": 0.33696826115737033, "grad_norm": 0.7388466868414264, "learning_rate": 5.393644645192853e-07, "loss": 1.4121, "step": 4836 }, { "epoch": 0.3370379402849876, "grad_norm": 0.7112377558521906, "learning_rate": 5.393043512506462e-07, "loss": 1.4546, "step": 4837 }, { "epoch": 0.337107619412605, "grad_norm": 0.727315967629537, "learning_rate": 5.392442305874258e-07, "loss": 1.6098, "step": 4838 }, { "epoch": 0.33717729854022227, "grad_norm": 0.8099460689341998, "learning_rate": 5.391841025325051e-07, "loss": 1.6303, "step": 4839 }, { "epoch": 0.3372469776678396, "grad_norm": 0.7051935142489242, "learning_rate": 5.391239670887659e-07, "loss": 1.4497, "step": 4840 }, { "epoch": 0.3373166567954569, "grad_norm": 0.6783397479396385, "learning_rate": 5.390638242590897e-07, "loss": 1.5105, "step": 4841 }, { "epoch": 0.33738633592307427, "grad_norm": 0.7347089211759783, "learning_rate": 5.390036740463587e-07, "loss": 1.6473, "step": 4842 }, { "epoch": 0.33745601505069156, "grad_norm": 0.6732402289305454, "learning_rate": 5.389435164534555e-07, "loss": 1.4919, "step": 4843 }, { "epoch": 0.3375256941783089, "grad_norm": 0.7148083262660647, "learning_rate": 5.38883351483263e-07, "loss": 1.5546, "step": 4844 }, { "epoch": 0.3375953733059262, "grad_norm": 0.6836003305145799, "learning_rate": 5.388231791386643e-07, "loss": 1.4778, "step": 4845 }, { "epoch": 0.33766505243354356, "grad_norm": 0.8084013190577042, "learning_rate": 5.387629994225432e-07, "loss": 1.6094, "step": 4846 }, { "epoch": 0.33773473156116085, "grad_norm": 0.662801305334541, "learning_rate": 5.387028123377832e-07, "loss": 1.4713, "step": 4847 }, { "epoch": 0.3378044106887782, "grad_norm": 0.6791407537391915, "learning_rate": 5.386426178872692e-07, "loss": 1.5404, "step": 4848 }, { "epoch": 0.3378740898163955, "grad_norm": 0.7335026978467318, "learning_rate": 5.385824160738854e-07, "loss": 1.5911, "step": 4849 }, { "epoch": 0.33794376894401285, "grad_norm": 0.6875140299288989, "learning_rate": 5.385222069005169e-07, "loss": 1.5747, "step": 4850 }, { "epoch": 0.33801344807163014, "grad_norm": 0.6989430758131691, "learning_rate": 5.384619903700492e-07, "loss": 1.4246, "step": 4851 }, { "epoch": 0.3380831271992475, "grad_norm": 0.6590948742130245, "learning_rate": 5.384017664853677e-07, "loss": 1.5146, "step": 4852 }, { "epoch": 0.3381528063268648, "grad_norm": 0.706352782049481, "learning_rate": 5.383415352493587e-07, "loss": 1.571, "step": 4853 }, { "epoch": 0.33822248545448214, "grad_norm": 0.7519551620740152, "learning_rate": 5.382812966649086e-07, "loss": 1.5446, "step": 4854 }, { "epoch": 0.33829216458209943, "grad_norm": 0.7099021233898958, "learning_rate": 5.38221050734904e-07, "loss": 1.5529, "step": 4855 }, { "epoch": 0.3383618437097168, "grad_norm": 0.7199442639569562, "learning_rate": 5.38160797462232e-07, "loss": 1.534, "step": 4856 }, { "epoch": 0.3384315228373341, "grad_norm": 0.7144789348289255, "learning_rate": 5.381005368497803e-07, "loss": 1.4376, "step": 4857 }, { "epoch": 0.3385012019649514, "grad_norm": 0.7403548398371584, "learning_rate": 5.380402689004365e-07, "loss": 1.5743, "step": 4858 }, { "epoch": 0.3385708810925687, "grad_norm": 0.7627039850404922, "learning_rate": 5.379799936170888e-07, "loss": 1.5054, "step": 4859 }, { "epoch": 0.33864056022018607, "grad_norm": 0.677840868673094, "learning_rate": 5.379197110026258e-07, "loss": 1.5203, "step": 4860 }, { "epoch": 0.33871023934780337, "grad_norm": 0.7394880148402937, "learning_rate": 5.378594210599363e-07, "loss": 1.61, "step": 4861 }, { "epoch": 0.33877991847542066, "grad_norm": 0.7543058613191993, "learning_rate": 5.377991237919096e-07, "loss": 1.5824, "step": 4862 }, { "epoch": 0.338849597603038, "grad_norm": 0.665265338341848, "learning_rate": 5.377388192014351e-07, "loss": 1.4639, "step": 4863 }, { "epoch": 0.3389192767306553, "grad_norm": 0.711462501878098, "learning_rate": 5.376785072914029e-07, "loss": 1.5846, "step": 4864 }, { "epoch": 0.33898895585827266, "grad_norm": 0.7270116627711709, "learning_rate": 5.37618188064703e-07, "loss": 1.5409, "step": 4865 }, { "epoch": 0.33905863498588995, "grad_norm": 0.6769787698247857, "learning_rate": 5.375578615242263e-07, "loss": 1.5406, "step": 4866 }, { "epoch": 0.3391283141135073, "grad_norm": 0.7193369604108305, "learning_rate": 5.374975276728638e-07, "loss": 1.5786, "step": 4867 }, { "epoch": 0.3391979932411246, "grad_norm": 0.7083441966840394, "learning_rate": 5.374371865135067e-07, "loss": 1.4636, "step": 4868 }, { "epoch": 0.33926767236874195, "grad_norm": 0.6917558146241541, "learning_rate": 5.373768380490466e-07, "loss": 1.4435, "step": 4869 }, { "epoch": 0.33933735149635924, "grad_norm": 0.714922470007537, "learning_rate": 5.373164822823755e-07, "loss": 1.4841, "step": 4870 }, { "epoch": 0.3394070306239766, "grad_norm": 0.717829779405514, "learning_rate": 5.37256119216386e-07, "loss": 1.5644, "step": 4871 }, { "epoch": 0.3394767097515939, "grad_norm": 0.7743959381513768, "learning_rate": 5.371957488539706e-07, "loss": 1.5185, "step": 4872 }, { "epoch": 0.33954638887921124, "grad_norm": 0.8131617550648077, "learning_rate": 5.371353711980225e-07, "loss": 1.5316, "step": 4873 }, { "epoch": 0.33961606800682853, "grad_norm": 0.7709352415864421, "learning_rate": 5.370749862514352e-07, "loss": 1.5742, "step": 4874 }, { "epoch": 0.3396857471344459, "grad_norm": 0.7807457832872728, "learning_rate": 5.370145940171022e-07, "loss": 1.6104, "step": 4875 }, { "epoch": 0.3397554262620632, "grad_norm": 0.7267607315291826, "learning_rate": 5.369541944979178e-07, "loss": 1.4927, "step": 4876 }, { "epoch": 0.3398251053896805, "grad_norm": 0.7361558349717697, "learning_rate": 5.368937876967765e-07, "loss": 1.4753, "step": 4877 }, { "epoch": 0.3398947845172978, "grad_norm": 0.6852432333809807, "learning_rate": 5.36833373616573e-07, "loss": 1.5538, "step": 4878 }, { "epoch": 0.33996446364491517, "grad_norm": 0.7199238314597366, "learning_rate": 5.367729522602026e-07, "loss": 1.475, "step": 4879 }, { "epoch": 0.34003414277253247, "grad_norm": 0.7732480003051626, "learning_rate": 5.367125236305607e-07, "loss": 1.6253, "step": 4880 }, { "epoch": 0.3401038219001498, "grad_norm": 0.690556485623902, "learning_rate": 5.366520877305433e-07, "loss": 1.6229, "step": 4881 }, { "epoch": 0.3401735010277671, "grad_norm": 0.7113155930404748, "learning_rate": 5.365916445630464e-07, "loss": 1.5381, "step": 4882 }, { "epoch": 0.34024318015538446, "grad_norm": 0.6147503521616917, "learning_rate": 5.365311941309667e-07, "loss": 1.4189, "step": 4883 }, { "epoch": 0.34031285928300176, "grad_norm": 0.714706834724729, "learning_rate": 5.36470736437201e-07, "loss": 1.5128, "step": 4884 }, { "epoch": 0.3403825384106191, "grad_norm": 0.7150201217564616, "learning_rate": 5.364102714846469e-07, "loss": 1.5691, "step": 4885 }, { "epoch": 0.3404522175382364, "grad_norm": 0.7304098295016737, "learning_rate": 5.363497992762015e-07, "loss": 1.432, "step": 4886 }, { "epoch": 0.34052189666585375, "grad_norm": 0.7570206498289689, "learning_rate": 5.362893198147631e-07, "loss": 1.728, "step": 4887 }, { "epoch": 0.34059157579347105, "grad_norm": 0.7143465756664162, "learning_rate": 5.3622883310323e-07, "loss": 1.5685, "step": 4888 }, { "epoch": 0.3406612549210884, "grad_norm": 0.8122541734007522, "learning_rate": 5.361683391445006e-07, "loss": 1.5325, "step": 4889 }, { "epoch": 0.3407309340487057, "grad_norm": 0.7734092568022763, "learning_rate": 5.361078379414741e-07, "loss": 1.4959, "step": 4890 }, { "epoch": 0.34080061317632304, "grad_norm": 0.7251845358695838, "learning_rate": 5.360473294970499e-07, "loss": 1.5793, "step": 4891 }, { "epoch": 0.34087029230394034, "grad_norm": 0.7678069911342846, "learning_rate": 5.359868138141274e-07, "loss": 1.4537, "step": 4892 }, { "epoch": 0.3409399714315577, "grad_norm": 0.723545658098318, "learning_rate": 5.359262908956068e-07, "loss": 1.4833, "step": 4893 }, { "epoch": 0.341009650559175, "grad_norm": 0.710062730643498, "learning_rate": 5.358657607443887e-07, "loss": 1.5221, "step": 4894 }, { "epoch": 0.34107932968679233, "grad_norm": 0.7061472317372298, "learning_rate": 5.358052233633734e-07, "loss": 1.5833, "step": 4895 }, { "epoch": 0.3411490088144096, "grad_norm": 0.7442311521043259, "learning_rate": 5.357446787554623e-07, "loss": 1.5627, "step": 4896 }, { "epoch": 0.341218687942027, "grad_norm": 0.6977710712490137, "learning_rate": 5.356841269235568e-07, "loss": 1.5103, "step": 4897 }, { "epoch": 0.34128836706964427, "grad_norm": 0.6829996598763791, "learning_rate": 5.356235678705584e-07, "loss": 1.539, "step": 4898 }, { "epoch": 0.3413580461972616, "grad_norm": 0.7041108868124177, "learning_rate": 5.355630015993696e-07, "loss": 1.4318, "step": 4899 }, { "epoch": 0.3414277253248789, "grad_norm": 0.739511445992145, "learning_rate": 5.355024281128926e-07, "loss": 1.6007, "step": 4900 }, { "epoch": 0.34149740445249627, "grad_norm": 0.752034002439808, "learning_rate": 5.354418474140302e-07, "loss": 1.6381, "step": 4901 }, { "epoch": 0.34156708358011356, "grad_norm": 0.7260501221496931, "learning_rate": 5.353812595056856e-07, "loss": 1.6835, "step": 4902 }, { "epoch": 0.3416367627077309, "grad_norm": 0.6857671150292761, "learning_rate": 5.353206643907624e-07, "loss": 1.6081, "step": 4903 }, { "epoch": 0.3417064418353482, "grad_norm": 0.7102916293714224, "learning_rate": 5.352600620721644e-07, "loss": 1.4413, "step": 4904 }, { "epoch": 0.34177612096296556, "grad_norm": 0.6528079878598496, "learning_rate": 5.351994525527957e-07, "loss": 1.5078, "step": 4905 }, { "epoch": 0.34184580009058285, "grad_norm": 0.765193785606818, "learning_rate": 5.351388358355609e-07, "loss": 1.4708, "step": 4906 }, { "epoch": 0.3419154792182002, "grad_norm": 0.7233681669435182, "learning_rate": 5.350782119233648e-07, "loss": 1.5652, "step": 4907 }, { "epoch": 0.3419851583458175, "grad_norm": 0.6973226611884625, "learning_rate": 5.350175808191127e-07, "loss": 1.5033, "step": 4908 }, { "epoch": 0.34205483747343485, "grad_norm": 0.7136416717450348, "learning_rate": 5.349569425257101e-07, "loss": 1.4198, "step": 4909 }, { "epoch": 0.34212451660105214, "grad_norm": 0.7693775797382657, "learning_rate": 5.348962970460631e-07, "loss": 1.409, "step": 4910 }, { "epoch": 0.3421941957286695, "grad_norm": 0.7336409853595478, "learning_rate": 5.348356443830777e-07, "loss": 1.4816, "step": 4911 }, { "epoch": 0.3422638748562868, "grad_norm": 0.6823577987189021, "learning_rate": 5.347749845396606e-07, "loss": 1.5111, "step": 4912 }, { "epoch": 0.34233355398390414, "grad_norm": 0.7167651243573857, "learning_rate": 5.347143175187188e-07, "loss": 1.3348, "step": 4913 }, { "epoch": 0.34240323311152143, "grad_norm": 0.7398765728960008, "learning_rate": 5.346536433231596e-07, "loss": 1.465, "step": 4914 }, { "epoch": 0.3424729122391388, "grad_norm": 0.7291279264814856, "learning_rate": 5.345929619558905e-07, "loss": 1.4537, "step": 4915 }, { "epoch": 0.3425425913667561, "grad_norm": 0.6960763279406099, "learning_rate": 5.345322734198196e-07, "loss": 1.5036, "step": 4916 }, { "epoch": 0.3426122704943734, "grad_norm": 0.7419002594743389, "learning_rate": 5.344715777178551e-07, "loss": 1.6412, "step": 4917 }, { "epoch": 0.3426819496219907, "grad_norm": 0.7595820712509183, "learning_rate": 5.344108748529058e-07, "loss": 1.6397, "step": 4918 }, { "epoch": 0.3427516287496081, "grad_norm": 0.7694766737856703, "learning_rate": 5.343501648278807e-07, "loss": 1.6078, "step": 4919 }, { "epoch": 0.34282130787722537, "grad_norm": 0.6807981058044563, "learning_rate": 5.342894476456889e-07, "loss": 1.4479, "step": 4920 }, { "epoch": 0.3428909870048427, "grad_norm": 0.754974484289456, "learning_rate": 5.342287233092405e-07, "loss": 1.6514, "step": 4921 }, { "epoch": 0.34296066613246, "grad_norm": 0.7535510628217905, "learning_rate": 5.341679918214452e-07, "loss": 1.5696, "step": 4922 }, { "epoch": 0.34303034526007736, "grad_norm": 0.7357029495555406, "learning_rate": 5.341072531852134e-07, "loss": 1.521, "step": 4923 }, { "epoch": 0.34310002438769466, "grad_norm": 0.7082023447167447, "learning_rate": 5.340465074034562e-07, "loss": 1.5007, "step": 4924 }, { "epoch": 0.343169703515312, "grad_norm": 0.7097395892281676, "learning_rate": 5.339857544790843e-07, "loss": 1.4334, "step": 4925 }, { "epoch": 0.3432393826429293, "grad_norm": 0.7437771128644068, "learning_rate": 5.33924994415009e-07, "loss": 1.6869, "step": 4926 }, { "epoch": 0.34330906177054665, "grad_norm": 0.7222212332837278, "learning_rate": 5.338642272141424e-07, "loss": 1.6559, "step": 4927 }, { "epoch": 0.34337874089816395, "grad_norm": 0.774011346047693, "learning_rate": 5.338034528793963e-07, "loss": 1.6342, "step": 4928 }, { "epoch": 0.3434484200257813, "grad_norm": 0.6966346592207932, "learning_rate": 5.337426714136832e-07, "loss": 1.4734, "step": 4929 }, { "epoch": 0.3435180991533986, "grad_norm": 0.788871599051374, "learning_rate": 5.33681882819916e-07, "loss": 1.5862, "step": 4930 }, { "epoch": 0.34358777828101594, "grad_norm": 0.7397766374316244, "learning_rate": 5.336210871010078e-07, "loss": 1.6653, "step": 4931 }, { "epoch": 0.34365745740863324, "grad_norm": 0.7180829427756388, "learning_rate": 5.335602842598721e-07, "loss": 1.5283, "step": 4932 }, { "epoch": 0.3437271365362506, "grad_norm": 0.7348660111880068, "learning_rate": 5.334994742994224e-07, "loss": 1.618, "step": 4933 }, { "epoch": 0.3437968156638679, "grad_norm": 0.7752690830193244, "learning_rate": 5.33438657222573e-07, "loss": 1.6631, "step": 4934 }, { "epoch": 0.34386649479148523, "grad_norm": 0.7061573790825465, "learning_rate": 5.333778330322386e-07, "loss": 1.5554, "step": 4935 }, { "epoch": 0.34393617391910253, "grad_norm": 0.6966452925169359, "learning_rate": 5.333170017313336e-07, "loss": 1.4936, "step": 4936 }, { "epoch": 0.3440058530467199, "grad_norm": 0.7108489950765159, "learning_rate": 5.332561633227736e-07, "loss": 1.49, "step": 4937 }, { "epoch": 0.3440755321743372, "grad_norm": 0.7360768060995024, "learning_rate": 5.331953178094737e-07, "loss": 1.5874, "step": 4938 }, { "epoch": 0.3441452113019545, "grad_norm": 0.7061287583659489, "learning_rate": 5.3313446519435e-07, "loss": 1.5182, "step": 4939 }, { "epoch": 0.3442148904295718, "grad_norm": 0.7531667234468807, "learning_rate": 5.330736054803186e-07, "loss": 1.5243, "step": 4940 }, { "epoch": 0.34428456955718917, "grad_norm": 0.649115378193036, "learning_rate": 5.330127386702962e-07, "loss": 1.4874, "step": 4941 }, { "epoch": 0.34435424868480646, "grad_norm": 0.6839245166901333, "learning_rate": 5.329518647671992e-07, "loss": 1.4666, "step": 4942 }, { "epoch": 0.3444239278124238, "grad_norm": 0.6955071209132906, "learning_rate": 5.328909837739454e-07, "loss": 1.6033, "step": 4943 }, { "epoch": 0.3444936069400411, "grad_norm": 0.7160416982988688, "learning_rate": 5.328300956934519e-07, "loss": 1.4956, "step": 4944 }, { "epoch": 0.34456328606765846, "grad_norm": 0.6908646469509884, "learning_rate": 5.327692005286366e-07, "loss": 1.5294, "step": 4945 }, { "epoch": 0.34463296519527575, "grad_norm": 0.7472362365311507, "learning_rate": 5.327082982824181e-07, "loss": 1.5895, "step": 4946 }, { "epoch": 0.3447026443228931, "grad_norm": 0.7505746601450485, "learning_rate": 5.326473889577145e-07, "loss": 1.4966, "step": 4947 }, { "epoch": 0.3447723234505104, "grad_norm": 0.6600104860006836, "learning_rate": 5.325864725574451e-07, "loss": 1.4753, "step": 4948 }, { "epoch": 0.34484200257812775, "grad_norm": 0.7586810928023145, "learning_rate": 5.325255490845287e-07, "loss": 1.5685, "step": 4949 }, { "epoch": 0.34491168170574504, "grad_norm": 0.7852011275224828, "learning_rate": 5.324646185418853e-07, "loss": 1.6254, "step": 4950 }, { "epoch": 0.3449813608333624, "grad_norm": 0.7464220138774134, "learning_rate": 5.324036809324347e-07, "loss": 1.6366, "step": 4951 }, { "epoch": 0.3450510399609797, "grad_norm": 0.707679820201954, "learning_rate": 5.32342736259097e-07, "loss": 1.5273, "step": 4952 }, { "epoch": 0.345120719088597, "grad_norm": 0.6649346495566145, "learning_rate": 5.322817845247929e-07, "loss": 1.5056, "step": 4953 }, { "epoch": 0.34519039821621433, "grad_norm": 0.7446126608675369, "learning_rate": 5.322208257324433e-07, "loss": 1.5291, "step": 4954 }, { "epoch": 0.34526007734383163, "grad_norm": 0.7157920582125764, "learning_rate": 5.321598598849695e-07, "loss": 1.5463, "step": 4955 }, { "epoch": 0.345329756471449, "grad_norm": 0.7130418175302066, "learning_rate": 5.32098886985293e-07, "loss": 1.5481, "step": 4956 }, { "epoch": 0.3453994355990663, "grad_norm": 0.6549977386565085, "learning_rate": 5.320379070363359e-07, "loss": 1.38, "step": 4957 }, { "epoch": 0.3454691147266836, "grad_norm": 0.6719591171989239, "learning_rate": 5.319769200410205e-07, "loss": 1.5743, "step": 4958 }, { "epoch": 0.3455387938543009, "grad_norm": 0.7086929840190598, "learning_rate": 5.319159260022694e-07, "loss": 1.5321, "step": 4959 }, { "epoch": 0.34560847298191827, "grad_norm": 0.7158822266473911, "learning_rate": 5.318549249230055e-07, "loss": 1.558, "step": 4960 }, { "epoch": 0.34567815210953556, "grad_norm": 0.7338554111996759, "learning_rate": 5.317939168061521e-07, "loss": 1.6296, "step": 4961 }, { "epoch": 0.3457478312371529, "grad_norm": 0.716688713173419, "learning_rate": 5.317329016546326e-07, "loss": 1.5146, "step": 4962 }, { "epoch": 0.3458175103647702, "grad_norm": 0.6746045493486954, "learning_rate": 5.316718794713716e-07, "loss": 1.4125, "step": 4963 }, { "epoch": 0.34588718949238756, "grad_norm": 0.7673016809121121, "learning_rate": 5.316108502592928e-07, "loss": 1.4457, "step": 4964 }, { "epoch": 0.34595686862000485, "grad_norm": 0.6981807378933002, "learning_rate": 5.315498140213211e-07, "loss": 1.4776, "step": 4965 }, { "epoch": 0.3460265477476222, "grad_norm": 0.7111768030048936, "learning_rate": 5.314887707603814e-07, "loss": 1.4599, "step": 4966 }, { "epoch": 0.3460962268752395, "grad_norm": 0.7897503701287117, "learning_rate": 5.314277204793992e-07, "loss": 1.6354, "step": 4967 }, { "epoch": 0.34616590600285685, "grad_norm": 0.7191071514005091, "learning_rate": 5.313666631813e-07, "loss": 1.5362, "step": 4968 }, { "epoch": 0.34623558513047414, "grad_norm": 0.7216111690405148, "learning_rate": 5.313055988690098e-07, "loss": 1.4787, "step": 4969 }, { "epoch": 0.3463052642580915, "grad_norm": 0.6989929154312301, "learning_rate": 5.312445275454549e-07, "loss": 1.5059, "step": 4970 }, { "epoch": 0.3463749433857088, "grad_norm": 0.7180325961762156, "learning_rate": 5.31183449213562e-07, "loss": 1.5621, "step": 4971 }, { "epoch": 0.34644462251332614, "grad_norm": 0.6972183647561123, "learning_rate": 5.311223638762581e-07, "loss": 1.5391, "step": 4972 }, { "epoch": 0.34651430164094343, "grad_norm": 0.7371303123998308, "learning_rate": 5.310612715364705e-07, "loss": 1.4495, "step": 4973 }, { "epoch": 0.3465839807685608, "grad_norm": 0.7235927075509028, "learning_rate": 5.31000172197127e-07, "loss": 1.66, "step": 4974 }, { "epoch": 0.3466536598961781, "grad_norm": 0.6811158833110971, "learning_rate": 5.309390658611555e-07, "loss": 1.5143, "step": 4975 }, { "epoch": 0.34672333902379543, "grad_norm": 0.7297291177884462, "learning_rate": 5.308779525314844e-07, "loss": 1.6434, "step": 4976 }, { "epoch": 0.3467930181514127, "grad_norm": 0.6648605494038474, "learning_rate": 5.308168322110423e-07, "loss": 1.4774, "step": 4977 }, { "epoch": 0.3468626972790301, "grad_norm": 0.777247005511412, "learning_rate": 5.307557049027582e-07, "loss": 1.4144, "step": 4978 }, { "epoch": 0.34693237640664737, "grad_norm": 0.6929973859668674, "learning_rate": 5.306945706095615e-07, "loss": 1.5942, "step": 4979 }, { "epoch": 0.3470020555342647, "grad_norm": 0.7283578679121835, "learning_rate": 5.30633429334382e-07, "loss": 1.5701, "step": 4980 }, { "epoch": 0.347071734661882, "grad_norm": 0.7320143590031736, "learning_rate": 5.305722810801493e-07, "loss": 1.5392, "step": 4981 }, { "epoch": 0.34714141378949936, "grad_norm": 0.6903230410445872, "learning_rate": 5.305111258497943e-07, "loss": 1.6587, "step": 4982 }, { "epoch": 0.34721109291711666, "grad_norm": 0.6754268076389439, "learning_rate": 5.304499636462473e-07, "loss": 1.5187, "step": 4983 }, { "epoch": 0.347280772044734, "grad_norm": 0.7071926402253579, "learning_rate": 5.303887944724396e-07, "loss": 1.5039, "step": 4984 }, { "epoch": 0.3473504511723513, "grad_norm": 0.7314158959654293, "learning_rate": 5.303276183313022e-07, "loss": 1.4893, "step": 4985 }, { "epoch": 0.34742013029996865, "grad_norm": 0.717579235163545, "learning_rate": 5.30266435225767e-07, "loss": 1.5723, "step": 4986 }, { "epoch": 0.34748980942758595, "grad_norm": 0.670827472100565, "learning_rate": 5.302052451587659e-07, "loss": 1.4619, "step": 4987 }, { "epoch": 0.3475594885552033, "grad_norm": 0.7071414740051439, "learning_rate": 5.301440481332316e-07, "loss": 1.6362, "step": 4988 }, { "epoch": 0.3476291676828206, "grad_norm": 0.7195762826997261, "learning_rate": 5.300828441520965e-07, "loss": 1.5261, "step": 4989 }, { "epoch": 0.34769884681043794, "grad_norm": 0.7269680457866935, "learning_rate": 5.300216332182934e-07, "loss": 1.5192, "step": 4990 }, { "epoch": 0.34776852593805524, "grad_norm": 0.7167959844489366, "learning_rate": 5.299604153347562e-07, "loss": 1.5301, "step": 4991 }, { "epoch": 0.3478382050656726, "grad_norm": 0.7521845229687258, "learning_rate": 5.298991905044182e-07, "loss": 1.5524, "step": 4992 }, { "epoch": 0.3479078841932899, "grad_norm": 0.7313888512216582, "learning_rate": 5.298379587302136e-07, "loss": 1.5929, "step": 4993 }, { "epoch": 0.34797756332090723, "grad_norm": 0.670917170186345, "learning_rate": 5.297767200150765e-07, "loss": 1.6268, "step": 4994 }, { "epoch": 0.34804724244852453, "grad_norm": 0.7296243936097038, "learning_rate": 5.297154743619418e-07, "loss": 1.5761, "step": 4995 }, { "epoch": 0.3481169215761419, "grad_norm": 0.6621500834968012, "learning_rate": 5.296542217737445e-07, "loss": 1.3975, "step": 4996 }, { "epoch": 0.3481866007037592, "grad_norm": 0.6841736192653086, "learning_rate": 5.2959296225342e-07, "loss": 1.4708, "step": 4997 }, { "epoch": 0.3482562798313765, "grad_norm": 0.7506006835561448, "learning_rate": 5.295316958039038e-07, "loss": 1.6007, "step": 4998 }, { "epoch": 0.3483259589589938, "grad_norm": 0.7525641487586818, "learning_rate": 5.294704224281321e-07, "loss": 1.5337, "step": 4999 }, { "epoch": 0.34839563808661117, "grad_norm": 0.8019951480668435, "learning_rate": 5.294091421290412e-07, "loss": 1.6959, "step": 5000 }, { "epoch": 0.34846531721422846, "grad_norm": 0.9289139139040437, "learning_rate": 5.293478549095676e-07, "loss": 1.5733, "step": 5001 }, { "epoch": 0.3485349963418458, "grad_norm": 0.7366925407109421, "learning_rate": 5.292865607726485e-07, "loss": 1.5289, "step": 5002 }, { "epoch": 0.3486046754694631, "grad_norm": 0.6901044411339604, "learning_rate": 5.292252597212212e-07, "loss": 1.5065, "step": 5003 }, { "epoch": 0.34867435459708046, "grad_norm": 0.7858548036787403, "learning_rate": 5.291639517582235e-07, "loss": 1.7448, "step": 5004 }, { "epoch": 0.34874403372469776, "grad_norm": 0.6910046587719392, "learning_rate": 5.291026368865932e-07, "loss": 1.4958, "step": 5005 }, { "epoch": 0.3488137128523151, "grad_norm": 0.7131421363913354, "learning_rate": 5.290413151092685e-07, "loss": 1.5104, "step": 5006 }, { "epoch": 0.3488833919799324, "grad_norm": 0.678645451016774, "learning_rate": 5.289799864291884e-07, "loss": 1.6023, "step": 5007 }, { "epoch": 0.34895307110754975, "grad_norm": 0.7135897875529243, "learning_rate": 5.289186508492918e-07, "loss": 1.5097, "step": 5008 }, { "epoch": 0.34902275023516705, "grad_norm": 0.6925868792365488, "learning_rate": 5.288573083725181e-07, "loss": 1.4276, "step": 5009 }, { "epoch": 0.3490924293627844, "grad_norm": 0.7355852311495054, "learning_rate": 5.287959590018066e-07, "loss": 1.6468, "step": 5010 }, { "epoch": 0.3491621084904017, "grad_norm": 0.6591714591613197, "learning_rate": 5.287346027400978e-07, "loss": 1.4403, "step": 5011 }, { "epoch": 0.34923178761801904, "grad_norm": 0.7384149162418628, "learning_rate": 5.286732395903316e-07, "loss": 1.5242, "step": 5012 }, { "epoch": 0.34930146674563634, "grad_norm": 0.7951440907862842, "learning_rate": 5.286118695554488e-07, "loss": 1.5518, "step": 5013 }, { "epoch": 0.3493711458732537, "grad_norm": 0.690403671698083, "learning_rate": 5.285504926383904e-07, "loss": 1.4066, "step": 5014 }, { "epoch": 0.349440825000871, "grad_norm": 0.734429986958859, "learning_rate": 5.284891088420977e-07, "loss": 1.6652, "step": 5015 }, { "epoch": 0.34951050412848833, "grad_norm": 0.6791322665246944, "learning_rate": 5.284277181695124e-07, "loss": 1.4876, "step": 5016 }, { "epoch": 0.3495801832561056, "grad_norm": 0.7333763237125314, "learning_rate": 5.283663206235762e-07, "loss": 1.558, "step": 5017 }, { "epoch": 0.349649862383723, "grad_norm": 0.7894035079650024, "learning_rate": 5.283049162072316e-07, "loss": 1.471, "step": 5018 }, { "epoch": 0.34971954151134027, "grad_norm": 0.7444643304296316, "learning_rate": 5.282435049234214e-07, "loss": 1.522, "step": 5019 }, { "epoch": 0.3497892206389576, "grad_norm": 0.6643970018185514, "learning_rate": 5.281820867750883e-07, "loss": 1.5659, "step": 5020 }, { "epoch": 0.3498588997665749, "grad_norm": 0.7232823075509024, "learning_rate": 5.281206617651756e-07, "loss": 1.5947, "step": 5021 }, { "epoch": 0.34992857889419227, "grad_norm": 0.7206660363783639, "learning_rate": 5.280592298966271e-07, "loss": 1.5868, "step": 5022 }, { "epoch": 0.34999825802180956, "grad_norm": 0.7874148940637059, "learning_rate": 5.279977911723866e-07, "loss": 1.6011, "step": 5023 }, { "epoch": 0.3500679371494269, "grad_norm": 0.7463976753010189, "learning_rate": 5.279363455953982e-07, "loss": 1.6795, "step": 5024 }, { "epoch": 0.3501376162770442, "grad_norm": 0.7140576677032477, "learning_rate": 5.278748931686068e-07, "loss": 1.4726, "step": 5025 }, { "epoch": 0.35020729540466156, "grad_norm": 0.8032460802640788, "learning_rate": 5.278134338949572e-07, "loss": 1.5765, "step": 5026 }, { "epoch": 0.35027697453227885, "grad_norm": 0.736073108144398, "learning_rate": 5.277519677773946e-07, "loss": 1.6496, "step": 5027 }, { "epoch": 0.3503466536598962, "grad_norm": 0.7433157359642669, "learning_rate": 5.276904948188647e-07, "loss": 1.607, "step": 5028 }, { "epoch": 0.3504163327875135, "grad_norm": 0.7302026377592102, "learning_rate": 5.276290150223133e-07, "loss": 1.5698, "step": 5029 }, { "epoch": 0.35048601191513085, "grad_norm": 0.7156186997183115, "learning_rate": 5.275675283906867e-07, "loss": 1.6206, "step": 5030 }, { "epoch": 0.35055569104274814, "grad_norm": 0.684118999147647, "learning_rate": 5.275060349269315e-07, "loss": 1.5142, "step": 5031 }, { "epoch": 0.3506253701703655, "grad_norm": 0.7293360774488382, "learning_rate": 5.274445346339945e-07, "loss": 1.5432, "step": 5032 }, { "epoch": 0.3506950492979828, "grad_norm": 0.7374356798123075, "learning_rate": 5.273830275148231e-07, "loss": 1.547, "step": 5033 }, { "epoch": 0.35076472842560014, "grad_norm": 0.6984990019219187, "learning_rate": 5.273215135723644e-07, "loss": 1.5509, "step": 5034 }, { "epoch": 0.35083440755321743, "grad_norm": 0.6984423015837139, "learning_rate": 5.272599928095669e-07, "loss": 1.6198, "step": 5035 }, { "epoch": 0.3509040866808348, "grad_norm": 0.7221225311495126, "learning_rate": 5.271984652293784e-07, "loss": 1.5365, "step": 5036 }, { "epoch": 0.3509737658084521, "grad_norm": 0.6832259001328025, "learning_rate": 5.271369308347475e-07, "loss": 1.5944, "step": 5037 }, { "epoch": 0.3510434449360694, "grad_norm": 0.7288757483748387, "learning_rate": 5.27075389628623e-07, "loss": 1.565, "step": 5038 }, { "epoch": 0.3511131240636867, "grad_norm": 0.773464821300105, "learning_rate": 5.270138416139543e-07, "loss": 1.5954, "step": 5039 }, { "epoch": 0.35118280319130407, "grad_norm": 0.719342206905609, "learning_rate": 5.269522867936905e-07, "loss": 1.4334, "step": 5040 }, { "epoch": 0.35125248231892137, "grad_norm": 0.7003743350586481, "learning_rate": 5.268907251707821e-07, "loss": 1.5568, "step": 5041 }, { "epoch": 0.3513221614465387, "grad_norm": 0.6756700295397962, "learning_rate": 5.268291567481786e-07, "loss": 1.4975, "step": 5042 }, { "epoch": 0.351391840574156, "grad_norm": 0.7101359625406757, "learning_rate": 5.267675815288307e-07, "loss": 1.5818, "step": 5043 }, { "epoch": 0.3514615197017733, "grad_norm": 0.7656785077162607, "learning_rate": 5.267059995156894e-07, "loss": 1.4525, "step": 5044 }, { "epoch": 0.35153119882939066, "grad_norm": 0.7206756017968566, "learning_rate": 5.266444107117056e-07, "loss": 1.5277, "step": 5045 }, { "epoch": 0.35160087795700795, "grad_norm": 0.7893166371200515, "learning_rate": 5.265828151198307e-07, "loss": 1.7029, "step": 5046 }, { "epoch": 0.3516705570846253, "grad_norm": 0.7090526225797813, "learning_rate": 5.265212127430169e-07, "loss": 1.5572, "step": 5047 }, { "epoch": 0.3517402362122426, "grad_norm": 0.7587655099716046, "learning_rate": 5.264596035842158e-07, "loss": 1.4987, "step": 5048 }, { "epoch": 0.35180991533985995, "grad_norm": 0.6922136341394148, "learning_rate": 5.263979876463804e-07, "loss": 1.4961, "step": 5049 }, { "epoch": 0.35187959446747724, "grad_norm": 0.7018301647037124, "learning_rate": 5.263363649324629e-07, "loss": 1.5998, "step": 5050 }, { "epoch": 0.3519492735950946, "grad_norm": 0.7100948833017958, "learning_rate": 5.262747354454167e-07, "loss": 1.6238, "step": 5051 }, { "epoch": 0.3520189527227119, "grad_norm": 0.6878432194296483, "learning_rate": 5.262130991881952e-07, "loss": 1.5418, "step": 5052 }, { "epoch": 0.35208863185032924, "grad_norm": 0.739369196023084, "learning_rate": 5.26151456163752e-07, "loss": 1.5088, "step": 5053 }, { "epoch": 0.35215831097794653, "grad_norm": 0.7327168663945245, "learning_rate": 5.260898063750413e-07, "loss": 1.5453, "step": 5054 }, { "epoch": 0.3522279901055639, "grad_norm": 0.7494770476194448, "learning_rate": 5.260281498250174e-07, "loss": 1.5667, "step": 5055 }, { "epoch": 0.3522976692331812, "grad_norm": 0.7003511050728188, "learning_rate": 5.25966486516635e-07, "loss": 1.4696, "step": 5056 }, { "epoch": 0.3523673483607985, "grad_norm": 0.682006424136004, "learning_rate": 5.259048164528492e-07, "loss": 1.5554, "step": 5057 }, { "epoch": 0.3524370274884158, "grad_norm": 0.7261598036764687, "learning_rate": 5.258431396366154e-07, "loss": 1.5927, "step": 5058 }, { "epoch": 0.35250670661603317, "grad_norm": 0.7304805813607402, "learning_rate": 5.257814560708891e-07, "loss": 1.5012, "step": 5059 }, { "epoch": 0.35257638574365047, "grad_norm": 0.701653516106193, "learning_rate": 5.257197657586264e-07, "loss": 1.4971, "step": 5060 }, { "epoch": 0.3526460648712678, "grad_norm": 0.7665366559302987, "learning_rate": 5.256580687027837e-07, "loss": 1.5559, "step": 5061 }, { "epoch": 0.3527157439988851, "grad_norm": 0.6825767430611114, "learning_rate": 5.255963649063176e-07, "loss": 1.6248, "step": 5062 }, { "epoch": 0.35278542312650246, "grad_norm": 0.7166776263768039, "learning_rate": 5.255346543721849e-07, "loss": 1.5674, "step": 5063 }, { "epoch": 0.35285510225411976, "grad_norm": 0.6688400141973269, "learning_rate": 5.254729371033433e-07, "loss": 1.5386, "step": 5064 }, { "epoch": 0.3529247813817371, "grad_norm": 0.7341742905821448, "learning_rate": 5.2541121310275e-07, "loss": 1.5394, "step": 5065 }, { "epoch": 0.3529944605093544, "grad_norm": 0.7139855856855144, "learning_rate": 5.25349482373363e-07, "loss": 1.5032, "step": 5066 }, { "epoch": 0.35306413963697175, "grad_norm": 0.7377531592463131, "learning_rate": 5.252877449181409e-07, "loss": 1.6183, "step": 5067 }, { "epoch": 0.35313381876458905, "grad_norm": 0.6992765237751483, "learning_rate": 5.25226000740042e-07, "loss": 1.5182, "step": 5068 }, { "epoch": 0.3532034978922064, "grad_norm": 0.8121552367033361, "learning_rate": 5.251642498420253e-07, "loss": 1.5733, "step": 5069 }, { "epoch": 0.3532731770198237, "grad_norm": 0.7225162064445212, "learning_rate": 5.251024922270498e-07, "loss": 1.6745, "step": 5070 }, { "epoch": 0.35334285614744104, "grad_norm": 0.7220584431415679, "learning_rate": 5.250407278980753e-07, "loss": 1.6101, "step": 5071 }, { "epoch": 0.35341253527505834, "grad_norm": 0.7338355042291222, "learning_rate": 5.249789568580618e-07, "loss": 1.522, "step": 5072 }, { "epoch": 0.3534822144026757, "grad_norm": 0.6781864282217835, "learning_rate": 5.249171791099692e-07, "loss": 1.4984, "step": 5073 }, { "epoch": 0.353551893530293, "grad_norm": 0.7579599868943648, "learning_rate": 5.248553946567581e-07, "loss": 1.6135, "step": 5074 }, { "epoch": 0.35362157265791033, "grad_norm": 0.740562375568996, "learning_rate": 5.247936035013895e-07, "loss": 1.5535, "step": 5075 }, { "epoch": 0.3536912517855276, "grad_norm": 0.6684099288907697, "learning_rate": 5.247318056468243e-07, "loss": 1.5008, "step": 5076 }, { "epoch": 0.353760930913145, "grad_norm": 0.6961783128265661, "learning_rate": 5.246700010960242e-07, "loss": 1.3987, "step": 5077 }, { "epoch": 0.3538306100407623, "grad_norm": 0.7742250152885914, "learning_rate": 5.246081898519508e-07, "loss": 1.4998, "step": 5078 }, { "epoch": 0.3539002891683796, "grad_norm": 0.6974290524914329, "learning_rate": 5.245463719175663e-07, "loss": 1.4631, "step": 5079 }, { "epoch": 0.3539699682959969, "grad_norm": 0.7394144947732425, "learning_rate": 5.244845472958334e-07, "loss": 1.6719, "step": 5080 }, { "epoch": 0.35403964742361427, "grad_norm": 0.7306324314278728, "learning_rate": 5.244227159897145e-07, "loss": 1.4915, "step": 5081 }, { "epoch": 0.35410932655123156, "grad_norm": 0.742438727557881, "learning_rate": 5.243608780021729e-07, "loss": 1.5282, "step": 5082 }, { "epoch": 0.3541790056788489, "grad_norm": 0.7491554405441359, "learning_rate": 5.242990333361718e-07, "loss": 1.4513, "step": 5083 }, { "epoch": 0.3542486848064662, "grad_norm": 0.7174024426449469, "learning_rate": 5.242371819946751e-07, "loss": 1.4468, "step": 5084 }, { "epoch": 0.35431836393408356, "grad_norm": 0.6926765842468269, "learning_rate": 5.241753239806468e-07, "loss": 1.6201, "step": 5085 }, { "epoch": 0.35438804306170085, "grad_norm": 0.6878017168512975, "learning_rate": 5.241134592970512e-07, "loss": 1.4277, "step": 5086 }, { "epoch": 0.3544577221893182, "grad_norm": 0.7167457202174242, "learning_rate": 5.24051587946853e-07, "loss": 1.4938, "step": 5087 }, { "epoch": 0.3545274013169355, "grad_norm": 0.6964352455294602, "learning_rate": 5.239897099330175e-07, "loss": 1.6145, "step": 5088 }, { "epoch": 0.35459708044455285, "grad_norm": 0.6974430427662163, "learning_rate": 5.239278252585096e-07, "loss": 1.4977, "step": 5089 }, { "epoch": 0.35466675957217014, "grad_norm": 0.6855351540727951, "learning_rate": 5.23865933926295e-07, "loss": 1.5572, "step": 5090 }, { "epoch": 0.3547364386997875, "grad_norm": 0.7201666934289629, "learning_rate": 5.238040359393399e-07, "loss": 1.6433, "step": 5091 }, { "epoch": 0.3548061178274048, "grad_norm": 0.7277989750215804, "learning_rate": 5.237421313006103e-07, "loss": 1.536, "step": 5092 }, { "epoch": 0.35487579695502214, "grad_norm": 0.6634341796241707, "learning_rate": 5.236802200130731e-07, "loss": 1.4078, "step": 5093 }, { "epoch": 0.35494547608263943, "grad_norm": 0.7052333618869744, "learning_rate": 5.23618302079695e-07, "loss": 1.4558, "step": 5094 }, { "epoch": 0.3550151552102568, "grad_norm": 0.7597778563956493, "learning_rate": 5.235563775034431e-07, "loss": 1.5985, "step": 5095 }, { "epoch": 0.3550848343378741, "grad_norm": 0.7706519972191562, "learning_rate": 5.234944462872853e-07, "loss": 1.4622, "step": 5096 }, { "epoch": 0.35515451346549143, "grad_norm": 0.7272603173122851, "learning_rate": 5.234325084341893e-07, "loss": 1.4587, "step": 5097 }, { "epoch": 0.3552241925931087, "grad_norm": 0.7338825767979815, "learning_rate": 5.233705639471233e-07, "loss": 1.7211, "step": 5098 }, { "epoch": 0.3552938717207261, "grad_norm": 0.7069141949193741, "learning_rate": 5.233086128290559e-07, "loss": 1.4935, "step": 5099 }, { "epoch": 0.35536355084834337, "grad_norm": 0.6440283358521028, "learning_rate": 5.232466550829557e-07, "loss": 1.391, "step": 5100 }, { "epoch": 0.3554332299759607, "grad_norm": 0.7555141567575172, "learning_rate": 5.231846907117919e-07, "loss": 1.6087, "step": 5101 }, { "epoch": 0.355502909103578, "grad_norm": 0.7162415478121203, "learning_rate": 5.231227197185342e-07, "loss": 1.5383, "step": 5102 }, { "epoch": 0.35557258823119536, "grad_norm": 0.731361586378461, "learning_rate": 5.230607421061522e-07, "loss": 1.4528, "step": 5103 }, { "epoch": 0.35564226735881266, "grad_norm": 0.7519791657504464, "learning_rate": 5.22998757877616e-07, "loss": 1.5298, "step": 5104 }, { "epoch": 0.35571194648643, "grad_norm": 0.7158025830973831, "learning_rate": 5.22936767035896e-07, "loss": 1.494, "step": 5105 }, { "epoch": 0.3557816256140473, "grad_norm": 0.723748809683638, "learning_rate": 5.228747695839628e-07, "loss": 1.6235, "step": 5106 }, { "epoch": 0.35585130474166465, "grad_norm": 0.690675333061282, "learning_rate": 5.228127655247878e-07, "loss": 1.5151, "step": 5107 }, { "epoch": 0.35592098386928195, "grad_norm": 0.7119021276041587, "learning_rate": 5.227507548613421e-07, "loss": 1.5736, "step": 5108 }, { "epoch": 0.3559906629968993, "grad_norm": 0.7377717095815133, "learning_rate": 5.226887375965974e-07, "loss": 1.6437, "step": 5109 }, { "epoch": 0.3560603421245166, "grad_norm": 0.6893478182531352, "learning_rate": 5.226267137335256e-07, "loss": 1.4684, "step": 5110 }, { "epoch": 0.35613002125213394, "grad_norm": 0.7497735677098524, "learning_rate": 5.225646832750993e-07, "loss": 1.6747, "step": 5111 }, { "epoch": 0.35619970037975124, "grad_norm": 0.7102877627281015, "learning_rate": 5.225026462242909e-07, "loss": 1.5834, "step": 5112 }, { "epoch": 0.3562693795073686, "grad_norm": 0.7598662761974709, "learning_rate": 5.224406025840734e-07, "loss": 1.5745, "step": 5113 }, { "epoch": 0.3563390586349859, "grad_norm": 0.690830365671091, "learning_rate": 5.223785523574201e-07, "loss": 1.5295, "step": 5114 }, { "epoch": 0.35640873776260323, "grad_norm": 0.7349684708292723, "learning_rate": 5.223164955473045e-07, "loss": 1.739, "step": 5115 }, { "epoch": 0.35647841689022053, "grad_norm": 0.7050326698210355, "learning_rate": 5.222544321567006e-07, "loss": 1.4603, "step": 5116 }, { "epoch": 0.3565480960178379, "grad_norm": 0.744957758056315, "learning_rate": 5.221923621885824e-07, "loss": 1.6169, "step": 5117 }, { "epoch": 0.3566177751454552, "grad_norm": 0.7146331562092086, "learning_rate": 5.221302856459247e-07, "loss": 1.5036, "step": 5118 }, { "epoch": 0.3566874542730725, "grad_norm": 0.7084897847748508, "learning_rate": 5.220682025317022e-07, "loss": 1.5428, "step": 5119 }, { "epoch": 0.3567571334006898, "grad_norm": 0.768911401979755, "learning_rate": 5.220061128488898e-07, "loss": 1.4999, "step": 5120 }, { "epoch": 0.35682681252830717, "grad_norm": 0.7568365121723222, "learning_rate": 5.219440166004635e-07, "loss": 1.48, "step": 5121 }, { "epoch": 0.35689649165592446, "grad_norm": 0.754288265747282, "learning_rate": 5.218819137893987e-07, "loss": 1.6032, "step": 5122 }, { "epoch": 0.3569661707835418, "grad_norm": 0.7313637670227049, "learning_rate": 5.218198044186714e-07, "loss": 1.528, "step": 5123 }, { "epoch": 0.3570358499111591, "grad_norm": 0.7476164464044086, "learning_rate": 5.217576884912583e-07, "loss": 1.41, "step": 5124 }, { "epoch": 0.35710552903877646, "grad_norm": 0.7170855090682579, "learning_rate": 5.216955660101362e-07, "loss": 1.4268, "step": 5125 }, { "epoch": 0.35717520816639375, "grad_norm": 0.7142896772084316, "learning_rate": 5.216334369782816e-07, "loss": 1.5978, "step": 5126 }, { "epoch": 0.3572448872940111, "grad_norm": 0.7358415994670768, "learning_rate": 5.215713013986725e-07, "loss": 1.4269, "step": 5127 }, { "epoch": 0.3573145664216284, "grad_norm": 0.7153570108585909, "learning_rate": 5.215091592742861e-07, "loss": 1.5301, "step": 5128 }, { "epoch": 0.35738424554924575, "grad_norm": 0.7447373193332415, "learning_rate": 5.214470106081006e-07, "loss": 1.5988, "step": 5129 }, { "epoch": 0.35745392467686304, "grad_norm": 0.7478977331229874, "learning_rate": 5.213848554030942e-07, "loss": 1.5723, "step": 5130 }, { "epoch": 0.3575236038044804, "grad_norm": 0.6830375549123991, "learning_rate": 5.213226936622456e-07, "loss": 1.5305, "step": 5131 }, { "epoch": 0.3575932829320977, "grad_norm": 0.7308156057281666, "learning_rate": 5.212605253885334e-07, "loss": 1.5576, "step": 5132 }, { "epoch": 0.35766296205971504, "grad_norm": 0.6936867476883012, "learning_rate": 5.211983505849374e-07, "loss": 1.4978, "step": 5133 }, { "epoch": 0.35773264118733233, "grad_norm": 0.7707040281173906, "learning_rate": 5.211361692544366e-07, "loss": 1.5393, "step": 5134 }, { "epoch": 0.3578023203149497, "grad_norm": 0.7024155180503071, "learning_rate": 5.210739814000112e-07, "loss": 1.5802, "step": 5135 }, { "epoch": 0.357871999442567, "grad_norm": 0.7394402935977779, "learning_rate": 5.210117870246413e-07, "loss": 1.5669, "step": 5136 }, { "epoch": 0.3579416785701843, "grad_norm": 0.8847985010981289, "learning_rate": 5.209495861313073e-07, "loss": 1.6315, "step": 5137 }, { "epoch": 0.3580113576978016, "grad_norm": 0.6932795155767438, "learning_rate": 5.208873787229901e-07, "loss": 1.5393, "step": 5138 }, { "epoch": 0.3580810368254189, "grad_norm": 0.7381773345843451, "learning_rate": 5.208251648026706e-07, "loss": 1.4413, "step": 5139 }, { "epoch": 0.35815071595303627, "grad_norm": 0.7475210673169992, "learning_rate": 5.207629443733305e-07, "loss": 1.5271, "step": 5140 }, { "epoch": 0.35822039508065356, "grad_norm": 0.6821769502467249, "learning_rate": 5.207007174379512e-07, "loss": 1.3818, "step": 5141 }, { "epoch": 0.3582900742082709, "grad_norm": 0.7540507964274135, "learning_rate": 5.206384839995151e-07, "loss": 1.6267, "step": 5142 }, { "epoch": 0.3583597533358882, "grad_norm": 0.7189622914498229, "learning_rate": 5.205762440610043e-07, "loss": 1.465, "step": 5143 }, { "epoch": 0.35842943246350556, "grad_norm": 0.7217356184482514, "learning_rate": 5.205139976254017e-07, "loss": 1.3787, "step": 5144 }, { "epoch": 0.35849911159112285, "grad_norm": 0.727465508665904, "learning_rate": 5.204517446956899e-07, "loss": 1.5033, "step": 5145 }, { "epoch": 0.3585687907187402, "grad_norm": 0.7359901406990895, "learning_rate": 5.203894852748525e-07, "loss": 1.593, "step": 5146 }, { "epoch": 0.3586384698463575, "grad_norm": 0.7027153744924136, "learning_rate": 5.203272193658731e-07, "loss": 1.5325, "step": 5147 }, { "epoch": 0.35870814897397485, "grad_norm": 0.7257847487940218, "learning_rate": 5.202649469717355e-07, "loss": 1.6293, "step": 5148 }, { "epoch": 0.35877782810159214, "grad_norm": 0.7370632429557675, "learning_rate": 5.202026680954239e-07, "loss": 1.5385, "step": 5149 }, { "epoch": 0.3588475072292095, "grad_norm": 0.6892655930676715, "learning_rate": 5.201403827399229e-07, "loss": 1.4966, "step": 5150 }, { "epoch": 0.3589171863568268, "grad_norm": 0.6988742124124329, "learning_rate": 5.200780909082172e-07, "loss": 1.4691, "step": 5151 }, { "epoch": 0.35898686548444414, "grad_norm": 0.760811010008508, "learning_rate": 5.200157926032923e-07, "loss": 1.6241, "step": 5152 }, { "epoch": 0.35905654461206143, "grad_norm": 0.7212598757194405, "learning_rate": 5.199534878281334e-07, "loss": 1.5798, "step": 5153 }, { "epoch": 0.3591262237396788, "grad_norm": 0.7060841907531608, "learning_rate": 5.198911765857262e-07, "loss": 1.5388, "step": 5154 }, { "epoch": 0.3591959028672961, "grad_norm": 0.7837864246835402, "learning_rate": 5.198288588790569e-07, "loss": 1.5413, "step": 5155 }, { "epoch": 0.35926558199491343, "grad_norm": 0.7216877555907196, "learning_rate": 5.197665347111119e-07, "loss": 1.5262, "step": 5156 }, { "epoch": 0.3593352611225307, "grad_norm": 0.804075556812053, "learning_rate": 5.19704204084878e-07, "loss": 1.5869, "step": 5157 }, { "epoch": 0.3594049402501481, "grad_norm": 0.7217025913661967, "learning_rate": 5.19641867003342e-07, "loss": 1.5733, "step": 5158 }, { "epoch": 0.35947461937776537, "grad_norm": 0.6582452744365979, "learning_rate": 5.195795234694912e-07, "loss": 1.6061, "step": 5159 }, { "epoch": 0.3595442985053827, "grad_norm": 0.7737437639055407, "learning_rate": 5.195171734863135e-07, "loss": 1.5443, "step": 5160 }, { "epoch": 0.359613977633, "grad_norm": 0.7250214544500239, "learning_rate": 5.194548170567967e-07, "loss": 1.5808, "step": 5161 }, { "epoch": 0.35968365676061737, "grad_norm": 0.6857607962575639, "learning_rate": 5.19392454183929e-07, "loss": 1.4723, "step": 5162 }, { "epoch": 0.35975333588823466, "grad_norm": 0.6663403161373356, "learning_rate": 5.19330084870699e-07, "loss": 1.4604, "step": 5163 }, { "epoch": 0.359823015015852, "grad_norm": 0.7785922329800811, "learning_rate": 5.192677091200955e-07, "loss": 1.5168, "step": 5164 }, { "epoch": 0.3598926941434693, "grad_norm": 0.69477412998931, "learning_rate": 5.192053269351078e-07, "loss": 1.5047, "step": 5165 }, { "epoch": 0.35996237327108666, "grad_norm": 0.7145866167341475, "learning_rate": 5.191429383187252e-07, "loss": 1.7196, "step": 5166 }, { "epoch": 0.36003205239870395, "grad_norm": 0.8216013176843149, "learning_rate": 5.190805432739378e-07, "loss": 1.6393, "step": 5167 }, { "epoch": 0.3601017315263213, "grad_norm": 0.7794476392517875, "learning_rate": 5.190181418037354e-07, "loss": 1.4782, "step": 5168 }, { "epoch": 0.3601714106539386, "grad_norm": 0.6724797600349622, "learning_rate": 5.189557339111084e-07, "loss": 1.4706, "step": 5169 }, { "epoch": 0.36024108978155595, "grad_norm": 0.7334712166799854, "learning_rate": 5.188933195990478e-07, "loss": 1.6242, "step": 5170 }, { "epoch": 0.36031076890917324, "grad_norm": 0.7208270609468629, "learning_rate": 5.188308988705443e-07, "loss": 1.5976, "step": 5171 }, { "epoch": 0.3603804480367906, "grad_norm": 0.8019105316309917, "learning_rate": 5.187684717285897e-07, "loss": 1.5433, "step": 5172 }, { "epoch": 0.3604501271644079, "grad_norm": 0.7227317280229342, "learning_rate": 5.187060381761749e-07, "loss": 1.6401, "step": 5173 }, { "epoch": 0.36051980629202524, "grad_norm": 0.7261853178782461, "learning_rate": 5.186435982162924e-07, "loss": 1.6069, "step": 5174 }, { "epoch": 0.36058948541964253, "grad_norm": 0.731713234324846, "learning_rate": 5.185811518519344e-07, "loss": 1.4512, "step": 5175 }, { "epoch": 0.3606591645472599, "grad_norm": 0.6683510801178882, "learning_rate": 5.185186990860932e-07, "loss": 1.4891, "step": 5176 }, { "epoch": 0.3607288436748772, "grad_norm": 0.7496938877958745, "learning_rate": 5.184562399217621e-07, "loss": 1.4466, "step": 5177 }, { "epoch": 0.3607985228024945, "grad_norm": 0.733168705759652, "learning_rate": 5.183937743619337e-07, "loss": 1.5857, "step": 5178 }, { "epoch": 0.3608682019301118, "grad_norm": 0.7430949705009791, "learning_rate": 5.18331302409602e-07, "loss": 1.5315, "step": 5179 }, { "epoch": 0.36093788105772917, "grad_norm": 0.7773572657707958, "learning_rate": 5.182688240677605e-07, "loss": 1.5201, "step": 5180 }, { "epoch": 0.36100756018534647, "grad_norm": 0.7536228512940022, "learning_rate": 5.182063393394033e-07, "loss": 1.5811, "step": 5181 }, { "epoch": 0.3610772393129638, "grad_norm": 0.6928947809986236, "learning_rate": 5.181438482275249e-07, "loss": 1.4702, "step": 5182 }, { "epoch": 0.3611469184405811, "grad_norm": 0.7192975853115967, "learning_rate": 5.1808135073512e-07, "loss": 1.451, "step": 5183 }, { "epoch": 0.36121659756819846, "grad_norm": 0.693654126443667, "learning_rate": 5.180188468651835e-07, "loss": 1.4676, "step": 5184 }, { "epoch": 0.36128627669581576, "grad_norm": 0.7256757119654305, "learning_rate": 5.179563366207108e-07, "loss": 1.6045, "step": 5185 }, { "epoch": 0.3613559558234331, "grad_norm": 0.6875294331327294, "learning_rate": 5.178938200046974e-07, "loss": 1.513, "step": 5186 }, { "epoch": 0.3614256349510504, "grad_norm": 0.739944485913754, "learning_rate": 5.178312970201394e-07, "loss": 1.4669, "step": 5187 }, { "epoch": 0.36149531407866775, "grad_norm": 0.6832797355399127, "learning_rate": 5.17768767670033e-07, "loss": 1.4283, "step": 5188 }, { "epoch": 0.36156499320628505, "grad_norm": 0.6876615223406541, "learning_rate": 5.177062319573746e-07, "loss": 1.5305, "step": 5189 }, { "epoch": 0.3616346723339024, "grad_norm": 0.701091374244738, "learning_rate": 5.176436898851611e-07, "loss": 1.4617, "step": 5190 }, { "epoch": 0.3617043514615197, "grad_norm": 0.723571517664063, "learning_rate": 5.175811414563897e-07, "loss": 1.5976, "step": 5191 }, { "epoch": 0.36177403058913704, "grad_norm": 0.7259446504081081, "learning_rate": 5.17518586674058e-07, "loss": 1.6222, "step": 5192 }, { "epoch": 0.36184370971675434, "grad_norm": 0.7117002981964684, "learning_rate": 5.174560255411634e-07, "loss": 1.5056, "step": 5193 }, { "epoch": 0.3619133888443717, "grad_norm": 0.7279042262192607, "learning_rate": 5.173934580607041e-07, "loss": 1.6258, "step": 5194 }, { "epoch": 0.361983067971989, "grad_norm": 0.7104886630481225, "learning_rate": 5.173308842356785e-07, "loss": 1.5851, "step": 5195 }, { "epoch": 0.36205274709960633, "grad_norm": 0.6871358125170827, "learning_rate": 5.172683040690853e-07, "loss": 1.5191, "step": 5196 }, { "epoch": 0.3621224262272236, "grad_norm": 0.7192980784986389, "learning_rate": 5.172057175639234e-07, "loss": 1.6021, "step": 5197 }, { "epoch": 0.362192105354841, "grad_norm": 0.6654693323072031, "learning_rate": 5.171431247231921e-07, "loss": 1.5323, "step": 5198 }, { "epoch": 0.36226178448245827, "grad_norm": 0.7188297136953314, "learning_rate": 5.17080525549891e-07, "loss": 1.6149, "step": 5199 }, { "epoch": 0.3623314636100756, "grad_norm": 0.7728183408864958, "learning_rate": 5.170179200470199e-07, "loss": 1.6164, "step": 5200 }, { "epoch": 0.3624011427376929, "grad_norm": 0.7382453104563416, "learning_rate": 5.169553082175792e-07, "loss": 1.5468, "step": 5201 }, { "epoch": 0.36247082186531027, "grad_norm": 0.7664313372593619, "learning_rate": 5.16892690064569e-07, "loss": 1.5538, "step": 5202 }, { "epoch": 0.36254050099292756, "grad_norm": 0.7403909821642547, "learning_rate": 5.168300655909905e-07, "loss": 1.5414, "step": 5203 }, { "epoch": 0.3626101801205449, "grad_norm": 0.7610636702682715, "learning_rate": 5.167674347998446e-07, "loss": 1.6746, "step": 5204 }, { "epoch": 0.3626798592481622, "grad_norm": 0.7008708794040567, "learning_rate": 5.167047976941327e-07, "loss": 1.4871, "step": 5205 }, { "epoch": 0.36274953837577956, "grad_norm": 0.6827672523246823, "learning_rate": 5.166421542768564e-07, "loss": 1.5816, "step": 5206 }, { "epoch": 0.36281921750339685, "grad_norm": 0.7860676509331375, "learning_rate": 5.165795045510179e-07, "loss": 1.5773, "step": 5207 }, { "epoch": 0.3628888966310142, "grad_norm": 0.7178660737374489, "learning_rate": 5.165168485196194e-07, "loss": 1.466, "step": 5208 }, { "epoch": 0.3629585757586315, "grad_norm": 0.7093634056240848, "learning_rate": 5.164541861856636e-07, "loss": 1.3871, "step": 5209 }, { "epoch": 0.36302825488624885, "grad_norm": 0.6972079077503902, "learning_rate": 5.163915175521532e-07, "loss": 1.449, "step": 5210 }, { "epoch": 0.36309793401386614, "grad_norm": 0.747745907435438, "learning_rate": 5.163288426220918e-07, "loss": 1.5072, "step": 5211 }, { "epoch": 0.3631676131414835, "grad_norm": 0.733427906317132, "learning_rate": 5.162661613984823e-07, "loss": 1.5162, "step": 5212 }, { "epoch": 0.3632372922691008, "grad_norm": 0.7233838960156477, "learning_rate": 5.162034738843291e-07, "loss": 1.66, "step": 5213 }, { "epoch": 0.36330697139671814, "grad_norm": 0.7357691997602305, "learning_rate": 5.161407800826359e-07, "loss": 1.6048, "step": 5214 }, { "epoch": 0.36337665052433543, "grad_norm": 0.7419119044198975, "learning_rate": 5.160780799964074e-07, "loss": 1.5819, "step": 5215 }, { "epoch": 0.3634463296519528, "grad_norm": 0.7333257219084258, "learning_rate": 5.160153736286481e-07, "loss": 1.5136, "step": 5216 }, { "epoch": 0.3635160087795701, "grad_norm": 0.7561954088937832, "learning_rate": 5.159526609823633e-07, "loss": 1.532, "step": 5217 }, { "epoch": 0.3635856879071874, "grad_norm": 0.7266071236575034, "learning_rate": 5.15889942060558e-07, "loss": 1.6245, "step": 5218 }, { "epoch": 0.3636553670348047, "grad_norm": 0.7365917391566773, "learning_rate": 5.15827216866238e-07, "loss": 1.6322, "step": 5219 }, { "epoch": 0.36372504616242207, "grad_norm": 0.7040763190740109, "learning_rate": 5.157644854024093e-07, "loss": 1.49, "step": 5220 }, { "epoch": 0.36379472529003937, "grad_norm": 0.7142262462047746, "learning_rate": 5.15701747672078e-07, "loss": 1.5597, "step": 5221 }, { "epoch": 0.3638644044176567, "grad_norm": 0.6858746531213153, "learning_rate": 5.156390036782504e-07, "loss": 1.5639, "step": 5222 }, { "epoch": 0.363934083545274, "grad_norm": 0.6759937525398312, "learning_rate": 5.155762534239337e-07, "loss": 1.5822, "step": 5223 }, { "epoch": 0.36400376267289136, "grad_norm": 0.7509133714833661, "learning_rate": 5.155134969121349e-07, "loss": 1.5394, "step": 5224 }, { "epoch": 0.36407344180050866, "grad_norm": 0.7170835709751526, "learning_rate": 5.154507341458613e-07, "loss": 1.5826, "step": 5225 }, { "epoch": 0.364143120928126, "grad_norm": 0.7065609504484022, "learning_rate": 5.153879651281208e-07, "loss": 1.5208, "step": 5226 }, { "epoch": 0.3642128000557433, "grad_norm": 0.6860528121460813, "learning_rate": 5.153251898619212e-07, "loss": 1.5345, "step": 5227 }, { "epoch": 0.3642824791833606, "grad_norm": 0.8400742404674034, "learning_rate": 5.152624083502711e-07, "loss": 1.4601, "step": 5228 }, { "epoch": 0.36435215831097795, "grad_norm": 0.7068731002179544, "learning_rate": 5.151996205961789e-07, "loss": 1.5634, "step": 5229 }, { "epoch": 0.36442183743859524, "grad_norm": 0.682862701091438, "learning_rate": 5.151368266026535e-07, "loss": 1.5034, "step": 5230 }, { "epoch": 0.3644915165662126, "grad_norm": 0.740431488301115, "learning_rate": 5.150740263727043e-07, "loss": 1.6257, "step": 5231 }, { "epoch": 0.3645611956938299, "grad_norm": 0.6693329964335967, "learning_rate": 5.150112199093407e-07, "loss": 1.4643, "step": 5232 }, { "epoch": 0.36463087482144724, "grad_norm": 0.7470365567305248, "learning_rate": 5.149484072155725e-07, "loss": 1.5566, "step": 5233 }, { "epoch": 0.36470055394906453, "grad_norm": 0.7473497564826113, "learning_rate": 5.1488558829441e-07, "loss": 1.4871, "step": 5234 }, { "epoch": 0.3647702330766819, "grad_norm": 0.6911032507873867, "learning_rate": 5.148227631488632e-07, "loss": 1.4324, "step": 5235 }, { "epoch": 0.3648399122042992, "grad_norm": 0.6816650784423868, "learning_rate": 5.147599317819434e-07, "loss": 1.5158, "step": 5236 }, { "epoch": 0.3649095913319165, "grad_norm": 0.7410861631268022, "learning_rate": 5.14697094196661e-07, "loss": 1.5026, "step": 5237 }, { "epoch": 0.3649792704595338, "grad_norm": 0.6936484383081856, "learning_rate": 5.146342503960276e-07, "loss": 1.5705, "step": 5238 }, { "epoch": 0.3650489495871512, "grad_norm": 0.6786652114843855, "learning_rate": 5.14571400383055e-07, "loss": 1.5508, "step": 5239 }, { "epoch": 0.36511862871476847, "grad_norm": 0.7273301299834638, "learning_rate": 5.145085441607548e-07, "loss": 1.4541, "step": 5240 }, { "epoch": 0.3651883078423858, "grad_norm": 0.7934277087687172, "learning_rate": 5.144456817321391e-07, "loss": 1.5811, "step": 5241 }, { "epoch": 0.3652579869700031, "grad_norm": 0.7129202993259002, "learning_rate": 5.143828131002207e-07, "loss": 1.6266, "step": 5242 }, { "epoch": 0.36532766609762046, "grad_norm": 0.662891493603992, "learning_rate": 5.143199382680122e-07, "loss": 1.4635, "step": 5243 }, { "epoch": 0.36539734522523776, "grad_norm": 0.7154475325660717, "learning_rate": 5.14257057238527e-07, "loss": 1.5247, "step": 5244 }, { "epoch": 0.3654670243528551, "grad_norm": 0.7066260380609907, "learning_rate": 5.141941700147782e-07, "loss": 1.5311, "step": 5245 }, { "epoch": 0.3655367034804724, "grad_norm": 0.6989214833213997, "learning_rate": 5.141312765997793e-07, "loss": 1.2925, "step": 5246 }, { "epoch": 0.36560638260808975, "grad_norm": 0.7044441524364802, "learning_rate": 5.140683769965448e-07, "loss": 1.61, "step": 5247 }, { "epoch": 0.36567606173570705, "grad_norm": 0.6607704920750958, "learning_rate": 5.140054712080886e-07, "loss": 1.444, "step": 5248 }, { "epoch": 0.3657457408633244, "grad_norm": 0.7357333603325759, "learning_rate": 5.139425592374253e-07, "loss": 1.5409, "step": 5249 }, { "epoch": 0.3658154199909417, "grad_norm": 0.6514266738332384, "learning_rate": 5.138796410875699e-07, "loss": 1.4404, "step": 5250 }, { "epoch": 0.36588509911855904, "grad_norm": 0.6992053094386647, "learning_rate": 5.138167167615376e-07, "loss": 1.5441, "step": 5251 }, { "epoch": 0.36595477824617634, "grad_norm": 0.7186567293557066, "learning_rate": 5.137537862623437e-07, "loss": 1.5629, "step": 5252 }, { "epoch": 0.3660244573737937, "grad_norm": 0.7064096116081595, "learning_rate": 5.136908495930041e-07, "loss": 1.353, "step": 5253 }, { "epoch": 0.366094136501411, "grad_norm": 0.694342282292615, "learning_rate": 5.136279067565347e-07, "loss": 1.6093, "step": 5254 }, { "epoch": 0.36616381562902833, "grad_norm": 0.695106825117824, "learning_rate": 5.135649577559519e-07, "loss": 1.53, "step": 5255 }, { "epoch": 0.36623349475664563, "grad_norm": 0.6721407293211693, "learning_rate": 5.135020025942726e-07, "loss": 1.5514, "step": 5256 }, { "epoch": 0.366303173884263, "grad_norm": 0.7433143801168884, "learning_rate": 5.134390412745134e-07, "loss": 1.6664, "step": 5257 }, { "epoch": 0.3663728530118803, "grad_norm": 0.7759049168554641, "learning_rate": 5.133760737996916e-07, "loss": 1.6917, "step": 5258 }, { "epoch": 0.3664425321394976, "grad_norm": 0.7287911458113379, "learning_rate": 5.133131001728248e-07, "loss": 1.6547, "step": 5259 }, { "epoch": 0.3665122112671149, "grad_norm": 0.7351234554888396, "learning_rate": 5.132501203969309e-07, "loss": 1.5529, "step": 5260 }, { "epoch": 0.36658189039473227, "grad_norm": 0.7453393320312598, "learning_rate": 5.131871344750279e-07, "loss": 1.5328, "step": 5261 }, { "epoch": 0.36665156952234956, "grad_norm": 0.6918766579616535, "learning_rate": 5.131241424101342e-07, "loss": 1.5461, "step": 5262 }, { "epoch": 0.3667212486499669, "grad_norm": 0.7194318357060998, "learning_rate": 5.130611442052686e-07, "loss": 1.6265, "step": 5263 }, { "epoch": 0.3667909277775842, "grad_norm": 0.7300076987686653, "learning_rate": 5.129981398634502e-07, "loss": 1.5558, "step": 5264 }, { "epoch": 0.36686060690520156, "grad_norm": 0.7009279205137522, "learning_rate": 5.129351293876979e-07, "loss": 1.4367, "step": 5265 }, { "epoch": 0.36693028603281885, "grad_norm": 0.8297625632738989, "learning_rate": 5.128721127810318e-07, "loss": 1.7773, "step": 5266 }, { "epoch": 0.3669999651604362, "grad_norm": 0.6939962844924814, "learning_rate": 5.128090900464713e-07, "loss": 1.5233, "step": 5267 }, { "epoch": 0.3670696442880535, "grad_norm": 0.7366202778643759, "learning_rate": 5.12746061187037e-07, "loss": 1.6129, "step": 5268 }, { "epoch": 0.36713932341567085, "grad_norm": 0.7558579968863058, "learning_rate": 5.126830262057491e-07, "loss": 1.5278, "step": 5269 }, { "epoch": 0.36720900254328814, "grad_norm": 0.718857236429046, "learning_rate": 5.126199851056285e-07, "loss": 1.4735, "step": 5270 }, { "epoch": 0.3672786816709055, "grad_norm": 0.7972347547964148, "learning_rate": 5.125569378896962e-07, "loss": 1.6827, "step": 5271 }, { "epoch": 0.3673483607985228, "grad_norm": 0.6899496063426461, "learning_rate": 5.124938845609736e-07, "loss": 1.58, "step": 5272 }, { "epoch": 0.36741803992614014, "grad_norm": 0.7818174794056979, "learning_rate": 5.124308251224824e-07, "loss": 1.5645, "step": 5273 }, { "epoch": 0.36748771905375743, "grad_norm": 0.7124659133872496, "learning_rate": 5.123677595772443e-07, "loss": 1.5349, "step": 5274 }, { "epoch": 0.3675573981813748, "grad_norm": 0.7511031303630104, "learning_rate": 5.123046879282817e-07, "loss": 1.5803, "step": 5275 }, { "epoch": 0.3676270773089921, "grad_norm": 0.6851223267795419, "learning_rate": 5.122416101786171e-07, "loss": 1.5072, "step": 5276 }, { "epoch": 0.36769675643660943, "grad_norm": 0.806850869793742, "learning_rate": 5.121785263312734e-07, "loss": 1.6434, "step": 5277 }, { "epoch": 0.3677664355642267, "grad_norm": 0.7254644946888305, "learning_rate": 5.121154363892735e-07, "loss": 1.5579, "step": 5278 }, { "epoch": 0.3678361146918441, "grad_norm": 0.7252493972658083, "learning_rate": 5.12052340355641e-07, "loss": 1.5835, "step": 5279 }, { "epoch": 0.36790579381946137, "grad_norm": 0.7662299719465424, "learning_rate": 5.119892382333996e-07, "loss": 1.5148, "step": 5280 }, { "epoch": 0.3679754729470787, "grad_norm": 0.7165871659616716, "learning_rate": 5.119261300255731e-07, "loss": 1.5447, "step": 5281 }, { "epoch": 0.368045152074696, "grad_norm": 0.6639950924743402, "learning_rate": 5.118630157351859e-07, "loss": 1.554, "step": 5282 }, { "epoch": 0.36811483120231336, "grad_norm": 0.7302210295216659, "learning_rate": 5.117998953652625e-07, "loss": 1.5355, "step": 5283 }, { "epoch": 0.36818451032993066, "grad_norm": 0.7491105839545834, "learning_rate": 5.117367689188279e-07, "loss": 1.6475, "step": 5284 }, { "epoch": 0.368254189457548, "grad_norm": 0.7506643071993163, "learning_rate": 5.11673636398907e-07, "loss": 1.6575, "step": 5285 }, { "epoch": 0.3683238685851653, "grad_norm": 0.6812138419849934, "learning_rate": 5.116104978085254e-07, "loss": 1.4553, "step": 5286 }, { "epoch": 0.36839354771278265, "grad_norm": 0.7854900925563119, "learning_rate": 5.11547353150709e-07, "loss": 1.5519, "step": 5287 }, { "epoch": 0.36846322684039995, "grad_norm": 0.7203783971396994, "learning_rate": 5.114842024284834e-07, "loss": 1.6115, "step": 5288 }, { "epoch": 0.3685329059680173, "grad_norm": 0.7243845386114148, "learning_rate": 5.114210456448753e-07, "loss": 1.5813, "step": 5289 }, { "epoch": 0.3686025850956346, "grad_norm": 0.713318574120179, "learning_rate": 5.113578828029111e-07, "loss": 1.5824, "step": 5290 }, { "epoch": 0.36867226422325194, "grad_norm": 0.7536484451835456, "learning_rate": 5.112947139056177e-07, "loss": 1.5102, "step": 5291 }, { "epoch": 0.36874194335086924, "grad_norm": 0.7081609936285251, "learning_rate": 5.112315389560226e-07, "loss": 1.4296, "step": 5292 }, { "epoch": 0.3688116224784866, "grad_norm": 0.6799279862368306, "learning_rate": 5.111683579571528e-07, "loss": 1.5083, "step": 5293 }, { "epoch": 0.3688813016061039, "grad_norm": 0.890442651971021, "learning_rate": 5.111051709120361e-07, "loss": 1.6717, "step": 5294 }, { "epoch": 0.36895098073372123, "grad_norm": 0.6787398243474793, "learning_rate": 5.11041977823701e-07, "loss": 1.5357, "step": 5295 }, { "epoch": 0.36902065986133853, "grad_norm": 0.6934921564234446, "learning_rate": 5.109787786951755e-07, "loss": 1.5096, "step": 5296 }, { "epoch": 0.3690903389889559, "grad_norm": 0.6968589312259752, "learning_rate": 5.109155735294882e-07, "loss": 1.4032, "step": 5297 }, { "epoch": 0.3691600181165732, "grad_norm": 0.819070605787555, "learning_rate": 5.108523623296682e-07, "loss": 1.5379, "step": 5298 }, { "epoch": 0.3692296972441905, "grad_norm": 0.7192037379184335, "learning_rate": 5.107891450987445e-07, "loss": 1.5684, "step": 5299 }, { "epoch": 0.3692993763718078, "grad_norm": 0.7157852409802509, "learning_rate": 5.107259218397469e-07, "loss": 1.5831, "step": 5300 }, { "epoch": 0.36936905549942517, "grad_norm": 0.7492510878850005, "learning_rate": 5.10662692555705e-07, "loss": 1.5437, "step": 5301 }, { "epoch": 0.36943873462704246, "grad_norm": 0.7286317846247485, "learning_rate": 5.105994572496488e-07, "loss": 1.5993, "step": 5302 }, { "epoch": 0.3695084137546598, "grad_norm": 0.7648274962276388, "learning_rate": 5.105362159246089e-07, "loss": 1.6396, "step": 5303 }, { "epoch": 0.3695780928822771, "grad_norm": 0.7420079599949096, "learning_rate": 5.104729685836156e-07, "loss": 1.6214, "step": 5304 }, { "epoch": 0.36964777200989446, "grad_norm": 0.692661841317161, "learning_rate": 5.104097152297002e-07, "loss": 1.502, "step": 5305 }, { "epoch": 0.36971745113751175, "grad_norm": 0.6671121171209912, "learning_rate": 5.103464558658937e-07, "loss": 1.484, "step": 5306 }, { "epoch": 0.3697871302651291, "grad_norm": 0.698266599735423, "learning_rate": 5.102831904952278e-07, "loss": 1.5238, "step": 5307 }, { "epoch": 0.3698568093927464, "grad_norm": 0.721316187644341, "learning_rate": 5.102199191207341e-07, "loss": 1.5747, "step": 5308 }, { "epoch": 0.36992648852036375, "grad_norm": 0.7555051596431045, "learning_rate": 5.10156641745445e-07, "loss": 1.4998, "step": 5309 }, { "epoch": 0.36999616764798104, "grad_norm": 0.7378153963986179, "learning_rate": 5.100933583723924e-07, "loss": 1.5894, "step": 5310 }, { "epoch": 0.3700658467755984, "grad_norm": 0.7299552421467299, "learning_rate": 5.100300690046095e-07, "loss": 1.4737, "step": 5311 }, { "epoch": 0.3701355259032157, "grad_norm": 0.7222764229223232, "learning_rate": 5.099667736451289e-07, "loss": 1.534, "step": 5312 }, { "epoch": 0.37020520503083304, "grad_norm": 0.6816801189726973, "learning_rate": 5.099034722969839e-07, "loss": 1.473, "step": 5313 }, { "epoch": 0.37027488415845033, "grad_norm": 0.6548112913225141, "learning_rate": 5.098401649632083e-07, "loss": 1.4501, "step": 5314 }, { "epoch": 0.3703445632860677, "grad_norm": 0.6909094462608453, "learning_rate": 5.097768516468354e-07, "loss": 1.6325, "step": 5315 }, { "epoch": 0.370414242413685, "grad_norm": 0.745114108887267, "learning_rate": 5.097135323508998e-07, "loss": 1.4192, "step": 5316 }, { "epoch": 0.37048392154130233, "grad_norm": 0.7094163660025467, "learning_rate": 5.096502070784358e-07, "loss": 1.5063, "step": 5317 }, { "epoch": 0.3705536006689196, "grad_norm": 0.6850285597934616, "learning_rate": 5.095868758324777e-07, "loss": 1.5345, "step": 5318 }, { "epoch": 0.3706232797965369, "grad_norm": 0.689281312430016, "learning_rate": 5.095235386160609e-07, "loss": 1.4471, "step": 5319 }, { "epoch": 0.37069295892415427, "grad_norm": 0.6915170853407353, "learning_rate": 5.094601954322205e-07, "loss": 1.5534, "step": 5320 }, { "epoch": 0.37076263805177156, "grad_norm": 0.7047793583439973, "learning_rate": 5.09396846283992e-07, "loss": 1.4784, "step": 5321 }, { "epoch": 0.3708323171793889, "grad_norm": 0.6663159334094043, "learning_rate": 5.093334911744112e-07, "loss": 1.4606, "step": 5322 }, { "epoch": 0.3709019963070062, "grad_norm": 0.7150983337368044, "learning_rate": 5.092701301065143e-07, "loss": 1.5738, "step": 5323 }, { "epoch": 0.37097167543462356, "grad_norm": 0.7746113045884914, "learning_rate": 5.092067630833376e-07, "loss": 1.5458, "step": 5324 }, { "epoch": 0.37104135456224085, "grad_norm": 0.726557554264369, "learning_rate": 5.091433901079178e-07, "loss": 1.5695, "step": 5325 }, { "epoch": 0.3711110336898582, "grad_norm": 0.7372150181593171, "learning_rate": 5.09080011183292e-07, "loss": 1.5355, "step": 5326 }, { "epoch": 0.3711807128174755, "grad_norm": 0.6940705110176282, "learning_rate": 5.090166263124972e-07, "loss": 1.6131, "step": 5327 }, { "epoch": 0.37125039194509285, "grad_norm": 0.6706521663019233, "learning_rate": 5.089532354985712e-07, "loss": 1.4164, "step": 5328 }, { "epoch": 0.37132007107271015, "grad_norm": 0.7522207421573274, "learning_rate": 5.088898387445516e-07, "loss": 1.6635, "step": 5329 }, { "epoch": 0.3713897502003275, "grad_norm": 0.7628608894232416, "learning_rate": 5.088264360534766e-07, "loss": 1.4664, "step": 5330 }, { "epoch": 0.3714594293279448, "grad_norm": 0.7187722963470405, "learning_rate": 5.087630274283846e-07, "loss": 1.4531, "step": 5331 }, { "epoch": 0.37152910845556214, "grad_norm": 0.6801481781174705, "learning_rate": 5.086996128723142e-07, "loss": 1.4878, "step": 5332 }, { "epoch": 0.37159878758317944, "grad_norm": 0.7645501547843065, "learning_rate": 5.086361923883045e-07, "loss": 1.4992, "step": 5333 }, { "epoch": 0.3716684667107968, "grad_norm": 0.711836763750871, "learning_rate": 5.085727659793944e-07, "loss": 1.4532, "step": 5334 }, { "epoch": 0.3717381458384141, "grad_norm": 0.712627458827013, "learning_rate": 5.085093336486239e-07, "loss": 1.5743, "step": 5335 }, { "epoch": 0.37180782496603143, "grad_norm": 0.6921801368603905, "learning_rate": 5.084458953990325e-07, "loss": 1.5584, "step": 5336 }, { "epoch": 0.3718775040936487, "grad_norm": 0.7227212206111542, "learning_rate": 5.083824512336604e-07, "loss": 1.4999, "step": 5337 }, { "epoch": 0.3719471832212661, "grad_norm": 0.7828431028231742, "learning_rate": 5.083190011555478e-07, "loss": 1.6068, "step": 5338 }, { "epoch": 0.37201686234888337, "grad_norm": 0.6628573302677304, "learning_rate": 5.082555451677356e-07, "loss": 1.5878, "step": 5339 }, { "epoch": 0.3720865414765007, "grad_norm": 0.6802179570076206, "learning_rate": 5.081920832732647e-07, "loss": 1.4912, "step": 5340 }, { "epoch": 0.372156220604118, "grad_norm": 0.728418405218629, "learning_rate": 5.081286154751763e-07, "loss": 1.5246, "step": 5341 }, { "epoch": 0.37222589973173537, "grad_norm": 0.697994481641133, "learning_rate": 5.080651417765117e-07, "loss": 1.4452, "step": 5342 }, { "epoch": 0.37229557885935266, "grad_norm": 0.7055931388578118, "learning_rate": 5.080016621803128e-07, "loss": 1.4946, "step": 5343 }, { "epoch": 0.37236525798697, "grad_norm": 0.7065869550826593, "learning_rate": 5.079381766896219e-07, "loss": 1.6107, "step": 5344 }, { "epoch": 0.3724349371145873, "grad_norm": 0.7410511984098631, "learning_rate": 5.078746853074811e-07, "loss": 1.5031, "step": 5345 }, { "epoch": 0.37250461624220466, "grad_norm": 0.7901975002588718, "learning_rate": 5.078111880369331e-07, "loss": 1.7529, "step": 5346 }, { "epoch": 0.37257429536982195, "grad_norm": 0.6930274846449783, "learning_rate": 5.07747684881021e-07, "loss": 1.649, "step": 5347 }, { "epoch": 0.3726439744974393, "grad_norm": 0.8041954663566083, "learning_rate": 5.076841758427877e-07, "loss": 1.6462, "step": 5348 }, { "epoch": 0.3727136536250566, "grad_norm": 0.7305914510607037, "learning_rate": 5.07620660925277e-07, "loss": 1.4538, "step": 5349 }, { "epoch": 0.37278333275267395, "grad_norm": 0.7158559747595667, "learning_rate": 5.075571401315321e-07, "loss": 1.4756, "step": 5350 }, { "epoch": 0.37285301188029124, "grad_norm": 0.7073775288961177, "learning_rate": 5.074936134645978e-07, "loss": 1.5127, "step": 5351 }, { "epoch": 0.3729226910079086, "grad_norm": 0.7430699639842753, "learning_rate": 5.074300809275181e-07, "loss": 1.5496, "step": 5352 }, { "epoch": 0.3729923701355259, "grad_norm": 0.7641833104167214, "learning_rate": 5.073665425233374e-07, "loss": 1.4541, "step": 5353 }, { "epoch": 0.37306204926314324, "grad_norm": 0.7180671749293043, "learning_rate": 5.073029982551009e-07, "loss": 1.5463, "step": 5354 }, { "epoch": 0.37313172839076053, "grad_norm": 0.6599128583126527, "learning_rate": 5.072394481258535e-07, "loss": 1.3976, "step": 5355 }, { "epoch": 0.3732014075183779, "grad_norm": 0.7688131761567236, "learning_rate": 5.071758921386409e-07, "loss": 1.5714, "step": 5356 }, { "epoch": 0.3732710866459952, "grad_norm": 0.7072828006472733, "learning_rate": 5.071123302965086e-07, "loss": 1.5208, "step": 5357 }, { "epoch": 0.3733407657736125, "grad_norm": 0.7187426792443632, "learning_rate": 5.070487626025027e-07, "loss": 1.6456, "step": 5358 }, { "epoch": 0.3734104449012298, "grad_norm": 0.7180845966367558, "learning_rate": 5.069851890596698e-07, "loss": 1.5107, "step": 5359 }, { "epoch": 0.37348012402884717, "grad_norm": 0.6708545748911989, "learning_rate": 5.06921609671056e-07, "loss": 1.4611, "step": 5360 }, { "epoch": 0.37354980315646447, "grad_norm": 0.7028485609419693, "learning_rate": 5.068580244397085e-07, "loss": 1.4579, "step": 5361 }, { "epoch": 0.3736194822840818, "grad_norm": 0.6713681496765652, "learning_rate": 5.067944333686743e-07, "loss": 1.4134, "step": 5362 }, { "epoch": 0.3736891614116991, "grad_norm": 0.6680925713652732, "learning_rate": 5.067308364610006e-07, "loss": 1.5172, "step": 5363 }, { "epoch": 0.37375884053931646, "grad_norm": 0.7842330472219948, "learning_rate": 5.066672337197354e-07, "loss": 1.5071, "step": 5364 }, { "epoch": 0.37382851966693376, "grad_norm": 0.7628791637111324, "learning_rate": 5.066036251479266e-07, "loss": 1.5212, "step": 5365 }, { "epoch": 0.3738981987945511, "grad_norm": 0.6813664665615757, "learning_rate": 5.065400107486226e-07, "loss": 1.5094, "step": 5366 }, { "epoch": 0.3739678779221684, "grad_norm": 0.7181594596254843, "learning_rate": 5.064763905248716e-07, "loss": 1.4813, "step": 5367 }, { "epoch": 0.37403755704978575, "grad_norm": 0.7657310789918401, "learning_rate": 5.064127644797227e-07, "loss": 1.6721, "step": 5368 }, { "epoch": 0.37410723617740305, "grad_norm": 0.7271926630736468, "learning_rate": 5.063491326162248e-07, "loss": 1.5984, "step": 5369 }, { "epoch": 0.3741769153050204, "grad_norm": 0.7156351904619168, "learning_rate": 5.062854949374273e-07, "loss": 1.6321, "step": 5370 }, { "epoch": 0.3742465944326377, "grad_norm": 0.6973806824549705, "learning_rate": 5.062218514463799e-07, "loss": 1.5303, "step": 5371 }, { "epoch": 0.37431627356025504, "grad_norm": 0.7268627075391219, "learning_rate": 5.061582021461325e-07, "loss": 1.6009, "step": 5372 }, { "epoch": 0.37438595268787234, "grad_norm": 0.7435202270708391, "learning_rate": 5.060945470397354e-07, "loss": 1.5263, "step": 5373 }, { "epoch": 0.3744556318154897, "grad_norm": 0.7061226191508438, "learning_rate": 5.060308861302389e-07, "loss": 1.5312, "step": 5374 }, { "epoch": 0.374525310943107, "grad_norm": 0.6935908957909679, "learning_rate": 5.059672194206938e-07, "loss": 1.4977, "step": 5375 }, { "epoch": 0.37459499007072433, "grad_norm": 0.8044608362031292, "learning_rate": 5.059035469141515e-07, "loss": 1.5661, "step": 5376 }, { "epoch": 0.3746646691983416, "grad_norm": 0.6649746679278241, "learning_rate": 5.058398686136628e-07, "loss": 1.5039, "step": 5377 }, { "epoch": 0.374734348325959, "grad_norm": 0.7565675239565676, "learning_rate": 5.057761845222795e-07, "loss": 1.5466, "step": 5378 }, { "epoch": 0.37480402745357627, "grad_norm": 0.6988450148686387, "learning_rate": 5.057124946430535e-07, "loss": 1.524, "step": 5379 }, { "epoch": 0.3748737065811936, "grad_norm": 0.6789677269566424, "learning_rate": 5.056487989790371e-07, "loss": 1.5258, "step": 5380 }, { "epoch": 0.3749433857088109, "grad_norm": 0.7113732310328157, "learning_rate": 5.055850975332823e-07, "loss": 1.5414, "step": 5381 }, { "epoch": 0.37501306483642827, "grad_norm": 0.6976581810589285, "learning_rate": 5.055213903088424e-07, "loss": 1.5575, "step": 5382 }, { "epoch": 0.37508274396404556, "grad_norm": 0.6854275337810213, "learning_rate": 5.054576773087697e-07, "loss": 1.5032, "step": 5383 }, { "epoch": 0.3751524230916629, "grad_norm": 0.7527863161147542, "learning_rate": 5.05393958536118e-07, "loss": 1.5894, "step": 5384 }, { "epoch": 0.3752221022192802, "grad_norm": 0.6747886537395279, "learning_rate": 5.053302339939407e-07, "loss": 1.4749, "step": 5385 }, { "epoch": 0.37529178134689756, "grad_norm": 0.6837579373025371, "learning_rate": 5.052665036852914e-07, "loss": 1.5364, "step": 5386 }, { "epoch": 0.37536146047451485, "grad_norm": 0.7873639942673621, "learning_rate": 5.052027676132245e-07, "loss": 1.7117, "step": 5387 }, { "epoch": 0.3754311396021322, "grad_norm": 0.7919474795693494, "learning_rate": 5.051390257807941e-07, "loss": 1.6618, "step": 5388 }, { "epoch": 0.3755008187297495, "grad_norm": 0.7103425061197322, "learning_rate": 5.05075278191055e-07, "loss": 1.6743, "step": 5389 }, { "epoch": 0.37557049785736685, "grad_norm": 0.7161819553104973, "learning_rate": 5.05011524847062e-07, "loss": 1.566, "step": 5390 }, { "epoch": 0.37564017698498414, "grad_norm": 0.7334385643113612, "learning_rate": 5.049477657518704e-07, "loss": 1.5287, "step": 5391 }, { "epoch": 0.3757098561126015, "grad_norm": 0.7572351651031275, "learning_rate": 5.048840009085356e-07, "loss": 1.5927, "step": 5392 }, { "epoch": 0.3757795352402188, "grad_norm": 0.7124113602854409, "learning_rate": 5.048202303201134e-07, "loss": 1.5382, "step": 5393 }, { "epoch": 0.37584921436783614, "grad_norm": 0.6855279268109093, "learning_rate": 5.047564539896597e-07, "loss": 1.489, "step": 5394 }, { "epoch": 0.37591889349545343, "grad_norm": 0.7122062077638511, "learning_rate": 5.046926719202309e-07, "loss": 1.4946, "step": 5395 }, { "epoch": 0.3759885726230708, "grad_norm": 0.7091079314088186, "learning_rate": 5.046288841148835e-07, "loss": 1.6135, "step": 5396 }, { "epoch": 0.3760582517506881, "grad_norm": 0.6740079747894211, "learning_rate": 5.045650905766745e-07, "loss": 1.553, "step": 5397 }, { "epoch": 0.3761279308783054, "grad_norm": 0.6702936126540127, "learning_rate": 5.045012913086607e-07, "loss": 1.4964, "step": 5398 }, { "epoch": 0.3761976100059227, "grad_norm": 0.6985064459949539, "learning_rate": 5.044374863138998e-07, "loss": 1.4901, "step": 5399 }, { "epoch": 0.3762672891335401, "grad_norm": 0.7301075792224525, "learning_rate": 5.043736755954493e-07, "loss": 1.5574, "step": 5400 }, { "epoch": 0.37633696826115737, "grad_norm": 0.749172825026058, "learning_rate": 5.043098591563673e-07, "loss": 1.5362, "step": 5401 }, { "epoch": 0.3764066473887747, "grad_norm": 0.7094520771114469, "learning_rate": 5.042460369997119e-07, "loss": 1.5059, "step": 5402 }, { "epoch": 0.376476326516392, "grad_norm": 0.7207987192677225, "learning_rate": 5.041822091285415e-07, "loss": 1.4182, "step": 5403 }, { "epoch": 0.37654600564400936, "grad_norm": 0.7707547850189875, "learning_rate": 5.041183755459151e-07, "loss": 1.619, "step": 5404 }, { "epoch": 0.37661568477162666, "grad_norm": 0.6830636923143021, "learning_rate": 5.040545362548915e-07, "loss": 1.5678, "step": 5405 }, { "epoch": 0.376685363899244, "grad_norm": 0.732725900627606, "learning_rate": 5.039906912585302e-07, "loss": 1.5759, "step": 5406 }, { "epoch": 0.3767550430268613, "grad_norm": 0.7422161056576552, "learning_rate": 5.039268405598906e-07, "loss": 1.4853, "step": 5407 }, { "epoch": 0.37682472215447865, "grad_norm": 0.7256022665248492, "learning_rate": 5.038629841620327e-07, "loss": 1.5363, "step": 5408 }, { "epoch": 0.37689440128209595, "grad_norm": 0.6892575042707771, "learning_rate": 5.037991220680166e-07, "loss": 1.6261, "step": 5409 }, { "epoch": 0.3769640804097133, "grad_norm": 0.6804116027311646, "learning_rate": 5.037352542809025e-07, "loss": 1.5511, "step": 5410 }, { "epoch": 0.3770337595373306, "grad_norm": 0.7689597898444267, "learning_rate": 5.036713808037515e-07, "loss": 1.5802, "step": 5411 }, { "epoch": 0.3771034386649479, "grad_norm": 0.6964237941047433, "learning_rate": 5.036075016396242e-07, "loss": 1.4965, "step": 5412 }, { "epoch": 0.37717311779256524, "grad_norm": 0.7435068845298077, "learning_rate": 5.035436167915819e-07, "loss": 1.5212, "step": 5413 }, { "epoch": 0.37724279692018253, "grad_norm": 0.7349257406525216, "learning_rate": 5.034797262626861e-07, "loss": 1.4656, "step": 5414 }, { "epoch": 0.3773124760477999, "grad_norm": 0.6882927821800732, "learning_rate": 5.034158300559986e-07, "loss": 1.5055, "step": 5415 }, { "epoch": 0.3773821551754172, "grad_norm": 0.7189670232247855, "learning_rate": 5.033519281745815e-07, "loss": 1.6791, "step": 5416 }, { "epoch": 0.37745183430303453, "grad_norm": 0.7508315288135808, "learning_rate": 5.032880206214968e-07, "loss": 1.5512, "step": 5417 }, { "epoch": 0.3775215134306518, "grad_norm": 0.7366512193101076, "learning_rate": 5.032241073998076e-07, "loss": 1.5568, "step": 5418 }, { "epoch": 0.3775911925582692, "grad_norm": 0.6640423914093597, "learning_rate": 5.031601885125763e-07, "loss": 1.487, "step": 5419 }, { "epoch": 0.37766087168588647, "grad_norm": 0.6553708894293232, "learning_rate": 5.030962639628663e-07, "loss": 1.3905, "step": 5420 }, { "epoch": 0.3777305508135038, "grad_norm": 0.738643876142048, "learning_rate": 5.030323337537408e-07, "loss": 1.5277, "step": 5421 }, { "epoch": 0.3778002299411211, "grad_norm": 0.6964778026790148, "learning_rate": 5.029683978882635e-07, "loss": 1.4764, "step": 5422 }, { "epoch": 0.37786990906873846, "grad_norm": 0.7305960630310033, "learning_rate": 5.029044563694985e-07, "loss": 1.5922, "step": 5423 }, { "epoch": 0.37793958819635576, "grad_norm": 0.7575001757201274, "learning_rate": 5.028405092005098e-07, "loss": 1.5197, "step": 5424 }, { "epoch": 0.3780092673239731, "grad_norm": 0.6845732719679103, "learning_rate": 5.02776556384362e-07, "loss": 1.6039, "step": 5425 }, { "epoch": 0.3780789464515904, "grad_norm": 0.7388090099334985, "learning_rate": 5.0271259792412e-07, "loss": 1.4985, "step": 5426 }, { "epoch": 0.37814862557920775, "grad_norm": 0.6893090869935965, "learning_rate": 5.026486338228484e-07, "loss": 1.5317, "step": 5427 }, { "epoch": 0.37821830470682505, "grad_norm": 0.7481567809018613, "learning_rate": 5.025846640836129e-07, "loss": 1.5365, "step": 5428 }, { "epoch": 0.3782879838344424, "grad_norm": 0.7477294337521692, "learning_rate": 5.025206887094789e-07, "loss": 1.6067, "step": 5429 }, { "epoch": 0.3783576629620597, "grad_norm": 0.702638609667168, "learning_rate": 5.02456707703512e-07, "loss": 1.5787, "step": 5430 }, { "epoch": 0.37842734208967704, "grad_norm": 0.6619744765586045, "learning_rate": 5.023927210687788e-07, "loss": 1.5176, "step": 5431 }, { "epoch": 0.37849702121729434, "grad_norm": 0.6782202426529567, "learning_rate": 5.023287288083453e-07, "loss": 1.3905, "step": 5432 }, { "epoch": 0.3785667003449117, "grad_norm": 0.7090382071085164, "learning_rate": 5.022647309252783e-07, "loss": 1.548, "step": 5433 }, { "epoch": 0.378636379472529, "grad_norm": 0.8299623002044955, "learning_rate": 5.022007274226446e-07, "loss": 1.6024, "step": 5434 }, { "epoch": 0.37870605860014633, "grad_norm": 0.7610548390668402, "learning_rate": 5.021367183035114e-07, "loss": 1.6044, "step": 5435 }, { "epoch": 0.37877573772776363, "grad_norm": 0.7751485981500597, "learning_rate": 5.020727035709463e-07, "loss": 1.5679, "step": 5436 }, { "epoch": 0.378845416855381, "grad_norm": 0.7063453389555776, "learning_rate": 5.020086832280168e-07, "loss": 1.5361, "step": 5437 }, { "epoch": 0.3789150959829983, "grad_norm": 0.6902667458465007, "learning_rate": 5.019446572777909e-07, "loss": 1.5486, "step": 5438 }, { "epoch": 0.3789847751106156, "grad_norm": 0.6808435381311487, "learning_rate": 5.018806257233372e-07, "loss": 1.5548, "step": 5439 }, { "epoch": 0.3790544542382329, "grad_norm": 0.715213686595762, "learning_rate": 5.018165885677238e-07, "loss": 1.4849, "step": 5440 }, { "epoch": 0.37912413336585027, "grad_norm": 0.6856158512743733, "learning_rate": 5.017525458140196e-07, "loss": 1.4843, "step": 5441 }, { "epoch": 0.37919381249346756, "grad_norm": 0.7319426863719979, "learning_rate": 5.016884974652937e-07, "loss": 1.5292, "step": 5442 }, { "epoch": 0.3792634916210849, "grad_norm": 0.7207250460074514, "learning_rate": 5.016244435246157e-07, "loss": 1.563, "step": 5443 }, { "epoch": 0.3793331707487022, "grad_norm": 0.7791614239074859, "learning_rate": 5.015603839950547e-07, "loss": 1.5604, "step": 5444 }, { "epoch": 0.37940284987631956, "grad_norm": 0.7509861266693079, "learning_rate": 5.014963188796808e-07, "loss": 1.6997, "step": 5445 }, { "epoch": 0.37947252900393685, "grad_norm": 0.7855757249315607, "learning_rate": 5.014322481815643e-07, "loss": 1.3949, "step": 5446 }, { "epoch": 0.3795422081315542, "grad_norm": 0.7625165655696754, "learning_rate": 5.013681719037753e-07, "loss": 1.6439, "step": 5447 }, { "epoch": 0.3796118872591715, "grad_norm": 0.7524121252006589, "learning_rate": 5.013040900493848e-07, "loss": 1.5756, "step": 5448 }, { "epoch": 0.37968156638678885, "grad_norm": 0.7053803505432666, "learning_rate": 5.012400026214633e-07, "loss": 1.409, "step": 5449 }, { "epoch": 0.37975124551440614, "grad_norm": 0.7577344283390356, "learning_rate": 5.011759096230823e-07, "loss": 1.6402, "step": 5450 }, { "epoch": 0.3798209246420235, "grad_norm": 0.6913724292869309, "learning_rate": 5.011118110573133e-07, "loss": 1.5397, "step": 5451 }, { "epoch": 0.3798906037696408, "grad_norm": 0.656322013225874, "learning_rate": 5.010477069272278e-07, "loss": 1.4744, "step": 5452 }, { "epoch": 0.37996028289725814, "grad_norm": 0.6962733401306395, "learning_rate": 5.009835972358981e-07, "loss": 1.6503, "step": 5453 }, { "epoch": 0.38002996202487543, "grad_norm": 0.7618166591802068, "learning_rate": 5.009194819863962e-07, "loss": 1.5865, "step": 5454 }, { "epoch": 0.3800996411524928, "grad_norm": 0.7562107443197053, "learning_rate": 5.008553611817948e-07, "loss": 1.5157, "step": 5455 }, { "epoch": 0.3801693202801101, "grad_norm": 0.7074325420993474, "learning_rate": 5.007912348251666e-07, "loss": 1.4497, "step": 5456 }, { "epoch": 0.38023899940772743, "grad_norm": 0.7242519999159883, "learning_rate": 5.007271029195848e-07, "loss": 1.4, "step": 5457 }, { "epoch": 0.3803086785353447, "grad_norm": 0.6342168243414719, "learning_rate": 5.006629654681224e-07, "loss": 1.3542, "step": 5458 }, { "epoch": 0.3803783576629621, "grad_norm": 0.7126075656711214, "learning_rate": 5.005988224738535e-07, "loss": 1.4551, "step": 5459 }, { "epoch": 0.38044803679057937, "grad_norm": 0.7270437470510306, "learning_rate": 5.005346739398517e-07, "loss": 1.5397, "step": 5460 }, { "epoch": 0.3805177159181967, "grad_norm": 0.7624831018762762, "learning_rate": 5.00470519869191e-07, "loss": 1.6236, "step": 5461 }, { "epoch": 0.380587395045814, "grad_norm": 0.6992172062429126, "learning_rate": 5.00406360264946e-07, "loss": 1.4604, "step": 5462 }, { "epoch": 0.38065707417343136, "grad_norm": 0.6566970192760084, "learning_rate": 5.003421951301914e-07, "loss": 1.4452, "step": 5463 }, { "epoch": 0.38072675330104866, "grad_norm": 0.6998814130100979, "learning_rate": 5.002780244680018e-07, "loss": 1.5508, "step": 5464 }, { "epoch": 0.380796432428666, "grad_norm": 0.7732539204937761, "learning_rate": 5.002138482814528e-07, "loss": 1.7181, "step": 5465 }, { "epoch": 0.3808661115562833, "grad_norm": 0.6567647406485111, "learning_rate": 5.001496665736194e-07, "loss": 1.5205, "step": 5466 }, { "epoch": 0.38093579068390065, "grad_norm": 0.7193635427827528, "learning_rate": 5.000854793475778e-07, "loss": 1.6246, "step": 5467 }, { "epoch": 0.38100546981151795, "grad_norm": 0.6951657989880955, "learning_rate": 5.000212866064038e-07, "loss": 1.6509, "step": 5468 }, { "epoch": 0.3810751489391353, "grad_norm": 0.7213532599547762, "learning_rate": 4.999570883531735e-07, "loss": 1.4985, "step": 5469 }, { "epoch": 0.3811448280667526, "grad_norm": 0.754431340305897, "learning_rate": 4.998928845909635e-07, "loss": 1.5693, "step": 5470 }, { "epoch": 0.38121450719436994, "grad_norm": 0.6989133775278445, "learning_rate": 4.998286753228507e-07, "loss": 1.5648, "step": 5471 }, { "epoch": 0.38128418632198724, "grad_norm": 0.7707962594955189, "learning_rate": 4.99764460551912e-07, "loss": 1.6039, "step": 5472 }, { "epoch": 0.3813538654496046, "grad_norm": 0.7054262814931245, "learning_rate": 4.997002402812248e-07, "loss": 1.4979, "step": 5473 }, { "epoch": 0.3814235445772219, "grad_norm": 0.7334950363402816, "learning_rate": 4.996360145138664e-07, "loss": 1.5707, "step": 5474 }, { "epoch": 0.38149322370483923, "grad_norm": 0.7876729845840897, "learning_rate": 4.99571783252915e-07, "loss": 1.7612, "step": 5475 }, { "epoch": 0.38156290283245653, "grad_norm": 0.658733357304968, "learning_rate": 4.995075465014486e-07, "loss": 1.4626, "step": 5476 }, { "epoch": 0.3816325819600739, "grad_norm": 0.7316933855652641, "learning_rate": 4.994433042625454e-07, "loss": 1.5544, "step": 5477 }, { "epoch": 0.3817022610876912, "grad_norm": 0.6762053571770205, "learning_rate": 4.99379056539284e-07, "loss": 1.4745, "step": 5478 }, { "epoch": 0.3817719402153085, "grad_norm": 0.7084236910699324, "learning_rate": 4.993148033347437e-07, "loss": 1.5001, "step": 5479 }, { "epoch": 0.3818416193429258, "grad_norm": 0.7271727538874175, "learning_rate": 4.992505446520031e-07, "loss": 1.5476, "step": 5480 }, { "epoch": 0.38191129847054317, "grad_norm": 0.8292686404246161, "learning_rate": 4.991862804941419e-07, "loss": 1.5489, "step": 5481 }, { "epoch": 0.38198097759816046, "grad_norm": 0.7133473578558076, "learning_rate": 4.991220108642397e-07, "loss": 1.5058, "step": 5482 }, { "epoch": 0.3820506567257778, "grad_norm": 0.8080700827951963, "learning_rate": 4.990577357653765e-07, "loss": 1.566, "step": 5483 }, { "epoch": 0.3821203358533951, "grad_norm": 0.6708760556900807, "learning_rate": 4.989934552006323e-07, "loss": 1.5028, "step": 5484 }, { "epoch": 0.38219001498101246, "grad_norm": 0.7097547017774847, "learning_rate": 4.989291691730879e-07, "loss": 1.4198, "step": 5485 }, { "epoch": 0.38225969410862976, "grad_norm": 0.7369934710042176, "learning_rate": 4.988648776858237e-07, "loss": 1.4532, "step": 5486 }, { "epoch": 0.3823293732362471, "grad_norm": 0.6939493609803657, "learning_rate": 4.98800580741921e-07, "loss": 1.4274, "step": 5487 }, { "epoch": 0.3823990523638644, "grad_norm": 0.6749067587849705, "learning_rate": 4.987362783444606e-07, "loss": 1.5632, "step": 5488 }, { "epoch": 0.38246873149148175, "grad_norm": 0.6991470780685725, "learning_rate": 4.986719704965244e-07, "loss": 1.4716, "step": 5489 }, { "epoch": 0.38253841061909905, "grad_norm": 0.7311252973539579, "learning_rate": 4.98607657201194e-07, "loss": 1.5337, "step": 5490 }, { "epoch": 0.3826080897467164, "grad_norm": 0.746727609531956, "learning_rate": 4.985433384615513e-07, "loss": 1.563, "step": 5491 }, { "epoch": 0.3826777688743337, "grad_norm": 0.6725022234464829, "learning_rate": 4.984790142806788e-07, "loss": 1.4125, "step": 5492 }, { "epoch": 0.38274744800195104, "grad_norm": 0.7290834568894429, "learning_rate": 4.98414684661659e-07, "loss": 1.4797, "step": 5493 }, { "epoch": 0.38281712712956834, "grad_norm": 0.8232159581467848, "learning_rate": 4.983503496075746e-07, "loss": 1.5075, "step": 5494 }, { "epoch": 0.3828868062571857, "grad_norm": 0.7540441799845059, "learning_rate": 4.982860091215087e-07, "loss": 1.6719, "step": 5495 }, { "epoch": 0.382956485384803, "grad_norm": 0.6928721393975205, "learning_rate": 4.982216632065449e-07, "loss": 1.4631, "step": 5496 }, { "epoch": 0.38302616451242033, "grad_norm": 0.6788751573672828, "learning_rate": 4.981573118657665e-07, "loss": 1.3759, "step": 5497 }, { "epoch": 0.3830958436400376, "grad_norm": 0.7214898466660512, "learning_rate": 4.980929551022572e-07, "loss": 1.6067, "step": 5498 }, { "epoch": 0.383165522767655, "grad_norm": 0.7308153122274459, "learning_rate": 4.980285929191015e-07, "loss": 1.4594, "step": 5499 }, { "epoch": 0.38323520189527227, "grad_norm": 0.6670540192625501, "learning_rate": 4.979642253193835e-07, "loss": 1.4535, "step": 5500 }, { "epoch": 0.3833048810228896, "grad_norm": 0.7395483755910071, "learning_rate": 4.978998523061879e-07, "loss": 1.539, "step": 5501 }, { "epoch": 0.3833745601505069, "grad_norm": 0.7695363996801382, "learning_rate": 4.978354738825996e-07, "loss": 1.4483, "step": 5502 }, { "epoch": 0.3834442392781242, "grad_norm": 0.718100408377757, "learning_rate": 4.977710900517039e-07, "loss": 1.6256, "step": 5503 }, { "epoch": 0.38351391840574156, "grad_norm": 0.69109645152382, "learning_rate": 4.977067008165859e-07, "loss": 1.5024, "step": 5504 }, { "epoch": 0.38358359753335886, "grad_norm": 0.7116426387946586, "learning_rate": 4.976423061803315e-07, "loss": 1.6046, "step": 5505 }, { "epoch": 0.3836532766609762, "grad_norm": 0.7852170766850769, "learning_rate": 4.975779061460264e-07, "loss": 1.7033, "step": 5506 }, { "epoch": 0.3837229557885935, "grad_norm": 0.7689308983734927, "learning_rate": 4.97513500716757e-07, "loss": 1.557, "step": 5507 }, { "epoch": 0.38379263491621085, "grad_norm": 0.7233222068190561, "learning_rate": 4.974490898956097e-07, "loss": 1.5805, "step": 5508 }, { "epoch": 0.38386231404382815, "grad_norm": 0.7177217070825982, "learning_rate": 4.973846736856711e-07, "loss": 1.5573, "step": 5509 }, { "epoch": 0.3839319931714455, "grad_norm": 0.7513702094061874, "learning_rate": 4.973202520900282e-07, "loss": 1.5063, "step": 5510 }, { "epoch": 0.3840016722990628, "grad_norm": 0.7713853038917127, "learning_rate": 4.972558251117684e-07, "loss": 1.5505, "step": 5511 }, { "epoch": 0.38407135142668014, "grad_norm": 0.7090530504996623, "learning_rate": 4.971913927539787e-07, "loss": 1.4762, "step": 5512 }, { "epoch": 0.38414103055429744, "grad_norm": 0.7231100039969972, "learning_rate": 4.971269550197474e-07, "loss": 1.5707, "step": 5513 }, { "epoch": 0.3842107096819148, "grad_norm": 0.705309164694651, "learning_rate": 4.970625119121621e-07, "loss": 1.5038, "step": 5514 }, { "epoch": 0.3842803888095321, "grad_norm": 0.6751102981403432, "learning_rate": 4.969980634343112e-07, "loss": 1.6372, "step": 5515 }, { "epoch": 0.38435006793714943, "grad_norm": 0.7449367266167386, "learning_rate": 4.969336095892832e-07, "loss": 1.5536, "step": 5516 }, { "epoch": 0.3844197470647667, "grad_norm": 0.8001794393337616, "learning_rate": 4.968691503801668e-07, "loss": 1.5196, "step": 5517 }, { "epoch": 0.3844894261923841, "grad_norm": 0.8006640674300962, "learning_rate": 4.968046858100511e-07, "loss": 1.5362, "step": 5518 }, { "epoch": 0.38455910532000137, "grad_norm": 0.7143062739796038, "learning_rate": 4.967402158820253e-07, "loss": 1.5016, "step": 5519 }, { "epoch": 0.3846287844476187, "grad_norm": 0.7395199641524862, "learning_rate": 4.966757405991789e-07, "loss": 1.5336, "step": 5520 }, { "epoch": 0.384698463575236, "grad_norm": 0.7173154576633266, "learning_rate": 4.966112599646018e-07, "loss": 1.473, "step": 5521 }, { "epoch": 0.38476814270285337, "grad_norm": 0.6862498887428835, "learning_rate": 4.965467739813839e-07, "loss": 1.5229, "step": 5522 }, { "epoch": 0.38483782183047066, "grad_norm": 0.7432309321652748, "learning_rate": 4.964822826526156e-07, "loss": 1.4813, "step": 5523 }, { "epoch": 0.384907500958088, "grad_norm": 0.744188463216907, "learning_rate": 4.964177859813877e-07, "loss": 1.4979, "step": 5524 }, { "epoch": 0.3849771800857053, "grad_norm": 0.6818187295241545, "learning_rate": 4.963532839707905e-07, "loss": 1.5359, "step": 5525 }, { "epoch": 0.38504685921332266, "grad_norm": 0.7178209922409182, "learning_rate": 4.962887766239153e-07, "loss": 1.6431, "step": 5526 }, { "epoch": 0.38511653834093995, "grad_norm": 0.7332062253347716, "learning_rate": 4.962242639438536e-07, "loss": 1.5583, "step": 5527 }, { "epoch": 0.3851862174685573, "grad_norm": 0.6889685000442761, "learning_rate": 4.961597459336968e-07, "loss": 1.5706, "step": 5528 }, { "epoch": 0.3852558965961746, "grad_norm": 0.8052842084542119, "learning_rate": 4.960952225965369e-07, "loss": 1.5433, "step": 5529 }, { "epoch": 0.38532557572379195, "grad_norm": 0.766408402464758, "learning_rate": 4.960306939354656e-07, "loss": 1.5682, "step": 5530 }, { "epoch": 0.38539525485140924, "grad_norm": 0.7484794652400308, "learning_rate": 4.959661599535756e-07, "loss": 1.5582, "step": 5531 }, { "epoch": 0.3854649339790266, "grad_norm": 0.7820263373509483, "learning_rate": 4.959016206539595e-07, "loss": 1.7689, "step": 5532 }, { "epoch": 0.3855346131066439, "grad_norm": 0.7259786737233994, "learning_rate": 4.9583707603971e-07, "loss": 1.6318, "step": 5533 }, { "epoch": 0.38560429223426124, "grad_norm": 0.7676985737076506, "learning_rate": 4.957725261139201e-07, "loss": 1.6155, "step": 5534 }, { "epoch": 0.38567397136187853, "grad_norm": 0.7069887072622391, "learning_rate": 4.957079708796835e-07, "loss": 1.3996, "step": 5535 }, { "epoch": 0.3857436504894959, "grad_norm": 0.7030376049130003, "learning_rate": 4.956434103400937e-07, "loss": 1.5679, "step": 5536 }, { "epoch": 0.3858133296171132, "grad_norm": 0.6855415717927498, "learning_rate": 4.955788444982444e-07, "loss": 1.5555, "step": 5537 }, { "epoch": 0.3858830087447305, "grad_norm": 0.7614205686727051, "learning_rate": 4.9551427335723e-07, "loss": 1.6741, "step": 5538 }, { "epoch": 0.3859526878723478, "grad_norm": 0.6989562304165375, "learning_rate": 4.954496969201446e-07, "loss": 1.6391, "step": 5539 }, { "epoch": 0.38602236699996517, "grad_norm": 0.695698775967819, "learning_rate": 4.953851151900831e-07, "loss": 1.5328, "step": 5540 }, { "epoch": 0.38609204612758247, "grad_norm": 0.7417673904195509, "learning_rate": 4.953205281701402e-07, "loss": 1.6011, "step": 5541 }, { "epoch": 0.3861617252551998, "grad_norm": 0.6907559477471068, "learning_rate": 4.95255935863411e-07, "loss": 1.4323, "step": 5542 }, { "epoch": 0.3862314043828171, "grad_norm": 0.6779292317473243, "learning_rate": 4.951913382729913e-07, "loss": 1.563, "step": 5543 }, { "epoch": 0.38630108351043446, "grad_norm": 0.7723058973909813, "learning_rate": 4.951267354019762e-07, "loss": 1.5341, "step": 5544 }, { "epoch": 0.38637076263805176, "grad_norm": 0.7137990992337757, "learning_rate": 4.95062127253462e-07, "loss": 1.5635, "step": 5545 }, { "epoch": 0.3864404417656691, "grad_norm": 0.704918015158124, "learning_rate": 4.949975138305446e-07, "loss": 1.4581, "step": 5546 }, { "epoch": 0.3865101208932864, "grad_norm": 0.7155688218885253, "learning_rate": 4.949328951363204e-07, "loss": 1.5916, "step": 5547 }, { "epoch": 0.38657980002090375, "grad_norm": 0.7932583910439622, "learning_rate": 4.948682711738865e-07, "loss": 1.5528, "step": 5548 }, { "epoch": 0.38664947914852105, "grad_norm": 0.8083960035017587, "learning_rate": 4.948036419463393e-07, "loss": 1.5914, "step": 5549 }, { "epoch": 0.3867191582761384, "grad_norm": 0.667764867736382, "learning_rate": 4.947390074567761e-07, "loss": 1.5833, "step": 5550 }, { "epoch": 0.3867888374037557, "grad_norm": 0.7419549291652704, "learning_rate": 4.946743677082945e-07, "loss": 1.5124, "step": 5551 }, { "epoch": 0.38685851653137304, "grad_norm": 0.6870025872836284, "learning_rate": 4.946097227039921e-07, "loss": 1.5114, "step": 5552 }, { "epoch": 0.38692819565899034, "grad_norm": 0.6755742379070345, "learning_rate": 4.945450724469665e-07, "loss": 1.5062, "step": 5553 }, { "epoch": 0.3869978747866077, "grad_norm": 0.7192605944563015, "learning_rate": 4.944804169403164e-07, "loss": 1.5299, "step": 5554 }, { "epoch": 0.387067553914225, "grad_norm": 0.7074686788750786, "learning_rate": 4.944157561871397e-07, "loss": 1.4861, "step": 5555 }, { "epoch": 0.38713723304184233, "grad_norm": 0.7303712587132282, "learning_rate": 4.943510901905356e-07, "loss": 1.5789, "step": 5556 }, { "epoch": 0.3872069121694596, "grad_norm": 0.70092718611266, "learning_rate": 4.942864189536027e-07, "loss": 1.5269, "step": 5557 }, { "epoch": 0.387276591297077, "grad_norm": 0.7749207884282524, "learning_rate": 4.942217424794401e-07, "loss": 1.5306, "step": 5558 }, { "epoch": 0.3873462704246943, "grad_norm": 0.7612048856830113, "learning_rate": 4.941570607711474e-07, "loss": 1.6048, "step": 5559 }, { "epoch": 0.3874159495523116, "grad_norm": 0.7291029938509181, "learning_rate": 4.940923738318245e-07, "loss": 1.4984, "step": 5560 }, { "epoch": 0.3874856286799289, "grad_norm": 0.6677895182838004, "learning_rate": 4.940276816645708e-07, "loss": 1.4915, "step": 5561 }, { "epoch": 0.38755530780754627, "grad_norm": 0.7735121442224963, "learning_rate": 4.939629842724868e-07, "loss": 1.5627, "step": 5562 }, { "epoch": 0.38762498693516356, "grad_norm": 0.7790175853606958, "learning_rate": 4.93898281658673e-07, "loss": 1.4801, "step": 5563 }, { "epoch": 0.3876946660627809, "grad_norm": 0.7145681800827799, "learning_rate": 4.9383357382623e-07, "loss": 1.4691, "step": 5564 }, { "epoch": 0.3877643451903982, "grad_norm": 0.7432846857234464, "learning_rate": 4.937688607782586e-07, "loss": 1.5028, "step": 5565 }, { "epoch": 0.38783402431801556, "grad_norm": 0.7159183915911248, "learning_rate": 4.937041425178601e-07, "loss": 1.5037, "step": 5566 }, { "epoch": 0.38790370344563285, "grad_norm": 0.7269668278784979, "learning_rate": 4.936394190481361e-07, "loss": 1.4085, "step": 5567 }, { "epoch": 0.3879733825732502, "grad_norm": 0.6866858671417458, "learning_rate": 4.93574690372188e-07, "loss": 1.4342, "step": 5568 }, { "epoch": 0.3880430617008675, "grad_norm": 0.7145490114822929, "learning_rate": 4.935099564931177e-07, "loss": 1.5746, "step": 5569 }, { "epoch": 0.38811274082848485, "grad_norm": 0.7138045405297827, "learning_rate": 4.934452174140276e-07, "loss": 1.5896, "step": 5570 }, { "epoch": 0.38818241995610214, "grad_norm": 0.7095926528479751, "learning_rate": 4.933804731380201e-07, "loss": 1.5292, "step": 5571 }, { "epoch": 0.3882520990837195, "grad_norm": 0.7129684749761618, "learning_rate": 4.933157236681978e-07, "loss": 1.5525, "step": 5572 }, { "epoch": 0.3883217782113368, "grad_norm": 0.7328684505244791, "learning_rate": 4.932509690076638e-07, "loss": 1.7226, "step": 5573 }, { "epoch": 0.38839145733895414, "grad_norm": 0.7339828615577176, "learning_rate": 4.931862091595208e-07, "loss": 1.6035, "step": 5574 }, { "epoch": 0.38846113646657143, "grad_norm": 0.6450537560923328, "learning_rate": 4.931214441268729e-07, "loss": 1.5124, "step": 5575 }, { "epoch": 0.3885308155941888, "grad_norm": 0.7300985149976399, "learning_rate": 4.930566739128232e-07, "loss": 1.4279, "step": 5576 }, { "epoch": 0.3886004947218061, "grad_norm": 0.6980864377031916, "learning_rate": 4.92991898520476e-07, "loss": 1.5553, "step": 5577 }, { "epoch": 0.38867017384942343, "grad_norm": 0.7027638402706293, "learning_rate": 4.929271179529351e-07, "loss": 1.5512, "step": 5578 }, { "epoch": 0.3887398529770407, "grad_norm": 0.7548467168888858, "learning_rate": 4.928623322133055e-07, "loss": 1.5324, "step": 5579 }, { "epoch": 0.3888095321046581, "grad_norm": 0.7409166404373027, "learning_rate": 4.927975413046912e-07, "loss": 1.6207, "step": 5580 }, { "epoch": 0.38887921123227537, "grad_norm": 0.6605626587391256, "learning_rate": 4.927327452301975e-07, "loss": 1.502, "step": 5581 }, { "epoch": 0.3889488903598927, "grad_norm": 0.6880962749504176, "learning_rate": 4.926679439929295e-07, "loss": 1.4707, "step": 5582 }, { "epoch": 0.38901856948751, "grad_norm": 0.7802737438340347, "learning_rate": 4.926031375959926e-07, "loss": 1.5877, "step": 5583 }, { "epoch": 0.38908824861512736, "grad_norm": 0.7257829551630415, "learning_rate": 4.925383260424924e-07, "loss": 1.5467, "step": 5584 }, { "epoch": 0.38915792774274466, "grad_norm": 0.6993590557963282, "learning_rate": 4.924735093355348e-07, "loss": 1.5517, "step": 5585 }, { "epoch": 0.389227606870362, "grad_norm": 0.7438495635411839, "learning_rate": 4.92408687478226e-07, "loss": 1.5874, "step": 5586 }, { "epoch": 0.3892972859979793, "grad_norm": 0.733088731946248, "learning_rate": 4.923438604736725e-07, "loss": 1.5717, "step": 5587 }, { "epoch": 0.38936696512559665, "grad_norm": 0.7083801837424353, "learning_rate": 4.922790283249808e-07, "loss": 1.3843, "step": 5588 }, { "epoch": 0.38943664425321395, "grad_norm": 0.717862741840073, "learning_rate": 4.922141910352578e-07, "loss": 1.5743, "step": 5589 }, { "epoch": 0.3895063233808313, "grad_norm": 0.7021056241225214, "learning_rate": 4.921493486076106e-07, "loss": 1.4911, "step": 5590 }, { "epoch": 0.3895760025084486, "grad_norm": 0.7326892044390457, "learning_rate": 4.920845010451468e-07, "loss": 1.5485, "step": 5591 }, { "epoch": 0.38964568163606594, "grad_norm": 0.7167763296787694, "learning_rate": 4.920196483509737e-07, "loss": 1.5422, "step": 5592 }, { "epoch": 0.38971536076368324, "grad_norm": 0.6716757792080215, "learning_rate": 4.919547905281994e-07, "loss": 1.6012, "step": 5593 }, { "epoch": 0.38978503989130053, "grad_norm": 0.7156461246251185, "learning_rate": 4.91889927579932e-07, "loss": 1.5866, "step": 5594 }, { "epoch": 0.3898547190189179, "grad_norm": 0.7419782612066664, "learning_rate": 4.918250595092798e-07, "loss": 1.4934, "step": 5595 }, { "epoch": 0.3899243981465352, "grad_norm": 0.7476547747400746, "learning_rate": 4.917601863193514e-07, "loss": 1.6338, "step": 5596 }, { "epoch": 0.38999407727415253, "grad_norm": 0.728083014760731, "learning_rate": 4.916953080132558e-07, "loss": 1.5645, "step": 5597 }, { "epoch": 0.3900637564017698, "grad_norm": 0.7735797984284867, "learning_rate": 4.916304245941018e-07, "loss": 1.6132, "step": 5598 }, { "epoch": 0.3901334355293872, "grad_norm": 0.7034034881934533, "learning_rate": 4.915655360649992e-07, "loss": 1.5384, "step": 5599 }, { "epoch": 0.39020311465700447, "grad_norm": 0.7489804917635291, "learning_rate": 4.915006424290572e-07, "loss": 1.5392, "step": 5600 }, { "epoch": 0.3902727937846218, "grad_norm": 0.7456332541983953, "learning_rate": 4.914357436893857e-07, "loss": 1.5891, "step": 5601 }, { "epoch": 0.3903424729122391, "grad_norm": 0.7036216749370718, "learning_rate": 4.91370839849095e-07, "loss": 1.6004, "step": 5602 }, { "epoch": 0.39041215203985646, "grad_norm": 0.7193251436793048, "learning_rate": 4.913059309112952e-07, "loss": 1.529, "step": 5603 }, { "epoch": 0.39048183116747376, "grad_norm": 0.7729473570738046, "learning_rate": 4.91241016879097e-07, "loss": 1.5915, "step": 5604 }, { "epoch": 0.3905515102950911, "grad_norm": 0.7334388646784056, "learning_rate": 4.911760977556112e-07, "loss": 1.5761, "step": 5605 }, { "epoch": 0.3906211894227084, "grad_norm": 0.7335809025498045, "learning_rate": 4.911111735439487e-07, "loss": 1.5482, "step": 5606 }, { "epoch": 0.39069086855032575, "grad_norm": 0.7207635907762502, "learning_rate": 4.91046244247221e-07, "loss": 1.4662, "step": 5607 }, { "epoch": 0.39076054767794305, "grad_norm": 0.7091201638463493, "learning_rate": 4.909813098685395e-07, "loss": 1.5795, "step": 5608 }, { "epoch": 0.3908302268055604, "grad_norm": 0.7013771838761281, "learning_rate": 4.909163704110161e-07, "loss": 1.544, "step": 5609 }, { "epoch": 0.3908999059331777, "grad_norm": 0.7074049818275138, "learning_rate": 4.908514258777628e-07, "loss": 1.6141, "step": 5610 }, { "epoch": 0.39096958506079504, "grad_norm": 0.6972315835236457, "learning_rate": 4.907864762718918e-07, "loss": 1.4784, "step": 5611 }, { "epoch": 0.39103926418841234, "grad_norm": 0.715516245329465, "learning_rate": 4.907215215965157e-07, "loss": 1.5359, "step": 5612 }, { "epoch": 0.3911089433160297, "grad_norm": 0.7574553672743075, "learning_rate": 4.906565618547475e-07, "loss": 1.7375, "step": 5613 }, { "epoch": 0.391178622443647, "grad_norm": 0.6502857177607614, "learning_rate": 4.905915970496996e-07, "loss": 1.5163, "step": 5614 }, { "epoch": 0.39124830157126433, "grad_norm": 0.6938368096085746, "learning_rate": 4.905266271844857e-07, "loss": 1.5624, "step": 5615 }, { "epoch": 0.39131798069888163, "grad_norm": 0.6712918492602664, "learning_rate": 4.904616522622193e-07, "loss": 1.5201, "step": 5616 }, { "epoch": 0.391387659826499, "grad_norm": 0.7988195643434735, "learning_rate": 4.903966722860139e-07, "loss": 1.5929, "step": 5617 }, { "epoch": 0.3914573389541163, "grad_norm": 0.7057183780225315, "learning_rate": 4.903316872589836e-07, "loss": 1.648, "step": 5618 }, { "epoch": 0.3915270180817336, "grad_norm": 0.6757523132570389, "learning_rate": 4.902666971842426e-07, "loss": 1.5514, "step": 5619 }, { "epoch": 0.3915966972093509, "grad_norm": 0.6980300975678734, "learning_rate": 4.902017020649053e-07, "loss": 1.6303, "step": 5620 }, { "epoch": 0.39166637633696827, "grad_norm": 0.7461011434866858, "learning_rate": 4.901367019040866e-07, "loss": 1.4192, "step": 5621 }, { "epoch": 0.39173605546458556, "grad_norm": 0.7287549638649695, "learning_rate": 4.900716967049011e-07, "loss": 1.5044, "step": 5622 }, { "epoch": 0.3918057345922029, "grad_norm": 0.6884426807253309, "learning_rate": 4.900066864704644e-07, "loss": 1.4788, "step": 5623 }, { "epoch": 0.3918754137198202, "grad_norm": 0.698307728524486, "learning_rate": 4.899416712038918e-07, "loss": 1.5435, "step": 5624 }, { "epoch": 0.39194509284743756, "grad_norm": 0.6789008299948818, "learning_rate": 4.898766509082986e-07, "loss": 1.5289, "step": 5625 }, { "epoch": 0.39201477197505485, "grad_norm": 0.7970341094255471, "learning_rate": 4.89811625586801e-07, "loss": 1.7357, "step": 5626 }, { "epoch": 0.3920844511026722, "grad_norm": 0.7188738277514154, "learning_rate": 4.897465952425153e-07, "loss": 1.4774, "step": 5627 }, { "epoch": 0.3921541302302895, "grad_norm": 0.8373284142960566, "learning_rate": 4.896815598785576e-07, "loss": 1.6099, "step": 5628 }, { "epoch": 0.39222380935790685, "grad_norm": 0.7088823337013, "learning_rate": 4.896165194980447e-07, "loss": 1.529, "step": 5629 }, { "epoch": 0.39229348848552414, "grad_norm": 0.7223001653765114, "learning_rate": 4.895514741040933e-07, "loss": 1.5641, "step": 5630 }, { "epoch": 0.3923631676131415, "grad_norm": 0.6914220741166752, "learning_rate": 4.894864236998208e-07, "loss": 1.5283, "step": 5631 }, { "epoch": 0.3924328467407588, "grad_norm": 0.7147757332932779, "learning_rate": 4.894213682883443e-07, "loss": 1.521, "step": 5632 }, { "epoch": 0.39250252586837614, "grad_norm": 0.764680925124264, "learning_rate": 4.893563078727815e-07, "loss": 1.5515, "step": 5633 }, { "epoch": 0.39257220499599343, "grad_norm": 0.7822852931923628, "learning_rate": 4.892912424562501e-07, "loss": 1.5652, "step": 5634 }, { "epoch": 0.3926418841236108, "grad_norm": 0.7929478109855593, "learning_rate": 4.892261720418682e-07, "loss": 1.7101, "step": 5635 }, { "epoch": 0.3927115632512281, "grad_norm": 0.7218548026309808, "learning_rate": 4.891610966327543e-07, "loss": 1.652, "step": 5636 }, { "epoch": 0.39278124237884543, "grad_norm": 0.7335248098012389, "learning_rate": 4.890960162320267e-07, "loss": 1.5682, "step": 5637 }, { "epoch": 0.3928509215064627, "grad_norm": 0.7143306188937307, "learning_rate": 4.890309308428044e-07, "loss": 1.5508, "step": 5638 }, { "epoch": 0.3929206006340801, "grad_norm": 0.6688063423914016, "learning_rate": 4.889658404682062e-07, "loss": 1.5865, "step": 5639 }, { "epoch": 0.39299027976169737, "grad_norm": 0.6915994442437262, "learning_rate": 4.889007451113515e-07, "loss": 1.4609, "step": 5640 }, { "epoch": 0.3930599588893147, "grad_norm": 0.7061342396943753, "learning_rate": 4.8883564477536e-07, "loss": 1.5257, "step": 5641 }, { "epoch": 0.393129638016932, "grad_norm": 0.7330023613356081, "learning_rate": 4.88770539463351e-07, "loss": 1.5863, "step": 5642 }, { "epoch": 0.39319931714454937, "grad_norm": 0.6569236724030605, "learning_rate": 4.887054291784448e-07, "loss": 1.5345, "step": 5643 }, { "epoch": 0.39326899627216666, "grad_norm": 0.6997962715930213, "learning_rate": 4.886403139237615e-07, "loss": 1.6101, "step": 5644 }, { "epoch": 0.393338675399784, "grad_norm": 0.6660182692895334, "learning_rate": 4.885751937024216e-07, "loss": 1.4714, "step": 5645 }, { "epoch": 0.3934083545274013, "grad_norm": 0.7775772452706329, "learning_rate": 4.885100685175459e-07, "loss": 1.6419, "step": 5646 }, { "epoch": 0.39347803365501866, "grad_norm": 0.7940753653334305, "learning_rate": 4.88444938372255e-07, "loss": 1.4888, "step": 5647 }, { "epoch": 0.39354771278263595, "grad_norm": 0.6889526655071125, "learning_rate": 4.883798032696704e-07, "loss": 1.5522, "step": 5648 }, { "epoch": 0.3936173919102533, "grad_norm": 0.7216018333181632, "learning_rate": 4.883146632129135e-07, "loss": 1.424, "step": 5649 }, { "epoch": 0.3936870710378706, "grad_norm": 0.7383631566837524, "learning_rate": 4.882495182051056e-07, "loss": 1.5577, "step": 5650 }, { "epoch": 0.39375675016548795, "grad_norm": 0.6910122209800914, "learning_rate": 4.88184368249369e-07, "loss": 1.651, "step": 5651 }, { "epoch": 0.39382642929310524, "grad_norm": 0.7071475670558016, "learning_rate": 4.881192133488256e-07, "loss": 1.4289, "step": 5652 }, { "epoch": 0.3938961084207226, "grad_norm": 0.7940647359092496, "learning_rate": 4.880540535065978e-07, "loss": 1.5281, "step": 5653 }, { "epoch": 0.3939657875483399, "grad_norm": 0.7424024603209955, "learning_rate": 4.879888887258082e-07, "loss": 1.7073, "step": 5654 }, { "epoch": 0.39403546667595724, "grad_norm": 0.7040686230275012, "learning_rate": 4.879237190095795e-07, "loss": 1.6446, "step": 5655 }, { "epoch": 0.39410514580357453, "grad_norm": 0.6804339815316579, "learning_rate": 4.878585443610351e-07, "loss": 1.5443, "step": 5656 }, { "epoch": 0.3941748249311919, "grad_norm": 0.7694443687860312, "learning_rate": 4.877933647832978e-07, "loss": 1.6059, "step": 5657 }, { "epoch": 0.3942445040588092, "grad_norm": 0.7095308678114916, "learning_rate": 4.877281802794917e-07, "loss": 1.5783, "step": 5658 }, { "epoch": 0.3943141831864265, "grad_norm": 0.6834728695375323, "learning_rate": 4.876629908527402e-07, "loss": 1.4167, "step": 5659 }, { "epoch": 0.3943838623140438, "grad_norm": 0.732453498933405, "learning_rate": 4.875977965061674e-07, "loss": 1.5803, "step": 5660 }, { "epoch": 0.39445354144166117, "grad_norm": 0.8493170677261569, "learning_rate": 4.875325972428976e-07, "loss": 1.5688, "step": 5661 }, { "epoch": 0.39452322056927847, "grad_norm": 0.7366435447689659, "learning_rate": 4.874673930660551e-07, "loss": 1.6815, "step": 5662 }, { "epoch": 0.3945928996968958, "grad_norm": 0.6972247175402578, "learning_rate": 4.87402183978765e-07, "loss": 1.3911, "step": 5663 }, { "epoch": 0.3946625788245131, "grad_norm": 0.6724755543519445, "learning_rate": 4.87336969984152e-07, "loss": 1.553, "step": 5664 }, { "epoch": 0.39473225795213046, "grad_norm": 0.7443101644041815, "learning_rate": 4.872717510853411e-07, "loss": 1.4836, "step": 5665 }, { "epoch": 0.39480193707974776, "grad_norm": 0.7105185151933725, "learning_rate": 4.872065272854581e-07, "loss": 1.604, "step": 5666 }, { "epoch": 0.3948716162073651, "grad_norm": 0.7224894163248697, "learning_rate": 4.871412985876283e-07, "loss": 1.5213, "step": 5667 }, { "epoch": 0.3949412953349824, "grad_norm": 0.7109761043079036, "learning_rate": 4.87076064994978e-07, "loss": 1.5168, "step": 5668 }, { "epoch": 0.39501097446259975, "grad_norm": 0.8068347274295018, "learning_rate": 4.870108265106329e-07, "loss": 1.7317, "step": 5669 }, { "epoch": 0.39508065359021705, "grad_norm": 0.7051206301189206, "learning_rate": 4.869455831377196e-07, "loss": 1.5398, "step": 5670 }, { "epoch": 0.3951503327178344, "grad_norm": 0.7227348729953125, "learning_rate": 4.868803348793649e-07, "loss": 1.5696, "step": 5671 }, { "epoch": 0.3952200118454517, "grad_norm": 0.7597406001763358, "learning_rate": 4.868150817386952e-07, "loss": 1.4969, "step": 5672 }, { "epoch": 0.39528969097306904, "grad_norm": 0.8029844792030686, "learning_rate": 4.867498237188378e-07, "loss": 1.5866, "step": 5673 }, { "epoch": 0.39535937010068634, "grad_norm": 0.7330628123186286, "learning_rate": 4.8668456082292e-07, "loss": 1.5821, "step": 5674 }, { "epoch": 0.3954290492283037, "grad_norm": 0.7313312062625753, "learning_rate": 4.866192930540692e-07, "loss": 1.6279, "step": 5675 }, { "epoch": 0.395498728355921, "grad_norm": 0.8043239241069365, "learning_rate": 4.865540204154133e-07, "loss": 1.544, "step": 5676 }, { "epoch": 0.39556840748353833, "grad_norm": 0.7187432972009813, "learning_rate": 4.864887429100803e-07, "loss": 1.4934, "step": 5677 }, { "epoch": 0.3956380866111556, "grad_norm": 0.7673309697309961, "learning_rate": 4.864234605411983e-07, "loss": 1.5899, "step": 5678 }, { "epoch": 0.395707765738773, "grad_norm": 0.7442189649830544, "learning_rate": 4.86358173311896e-07, "loss": 1.5012, "step": 5679 }, { "epoch": 0.39577744486639027, "grad_norm": 0.7633515183291592, "learning_rate": 4.862928812253018e-07, "loss": 1.6551, "step": 5680 }, { "epoch": 0.3958471239940076, "grad_norm": 0.6976117552063694, "learning_rate": 4.862275842845448e-07, "loss": 1.5045, "step": 5681 }, { "epoch": 0.3959168031216249, "grad_norm": 0.7490951654378956, "learning_rate": 4.861622824927543e-07, "loss": 1.5752, "step": 5682 }, { "epoch": 0.39598648224924227, "grad_norm": 0.6767327539185277, "learning_rate": 4.860969758530593e-07, "loss": 1.497, "step": 5683 }, { "epoch": 0.39605616137685956, "grad_norm": 0.6841274638774705, "learning_rate": 4.860316643685898e-07, "loss": 1.5006, "step": 5684 }, { "epoch": 0.3961258405044769, "grad_norm": 0.7178087195736019, "learning_rate": 4.859663480424755e-07, "loss": 1.6026, "step": 5685 }, { "epoch": 0.3961955196320942, "grad_norm": 0.7031296552917827, "learning_rate": 4.859010268778465e-07, "loss": 1.6191, "step": 5686 }, { "epoch": 0.3962651987597115, "grad_norm": 0.70942369444354, "learning_rate": 4.858357008778333e-07, "loss": 1.6875, "step": 5687 }, { "epoch": 0.39633487788732885, "grad_norm": 0.7593496749721073, "learning_rate": 4.857703700455662e-07, "loss": 1.4996, "step": 5688 }, { "epoch": 0.39640455701494615, "grad_norm": 0.7200832749609688, "learning_rate": 4.85705034384176e-07, "loss": 1.5551, "step": 5689 }, { "epoch": 0.3964742361425635, "grad_norm": 0.6920762660051075, "learning_rate": 4.856396938967939e-07, "loss": 1.5067, "step": 5690 }, { "epoch": 0.3965439152701808, "grad_norm": 0.7669411676412027, "learning_rate": 4.855743485865511e-07, "loss": 1.4865, "step": 5691 }, { "epoch": 0.39661359439779814, "grad_norm": 0.7247852762545683, "learning_rate": 4.85508998456579e-07, "loss": 1.5194, "step": 5692 }, { "epoch": 0.39668327352541544, "grad_norm": 0.6844608804865571, "learning_rate": 4.854436435100093e-07, "loss": 1.5579, "step": 5693 }, { "epoch": 0.3967529526530328, "grad_norm": 0.8003618335097139, "learning_rate": 4.85378283749974e-07, "loss": 1.5606, "step": 5694 }, { "epoch": 0.3968226317806501, "grad_norm": 0.6849811665507215, "learning_rate": 4.853129191796053e-07, "loss": 1.3802, "step": 5695 }, { "epoch": 0.39689231090826743, "grad_norm": 0.6944625667383478, "learning_rate": 4.852475498020355e-07, "loss": 1.4661, "step": 5696 }, { "epoch": 0.3969619900358847, "grad_norm": 0.7215389063076278, "learning_rate": 4.851821756203975e-07, "loss": 1.5935, "step": 5697 }, { "epoch": 0.3970316691635021, "grad_norm": 0.7034503235896736, "learning_rate": 4.851167966378238e-07, "loss": 1.6066, "step": 5698 }, { "epoch": 0.39710134829111937, "grad_norm": 0.7325183000161554, "learning_rate": 4.850514128574478e-07, "loss": 1.5436, "step": 5699 }, { "epoch": 0.3971710274187367, "grad_norm": 0.7186466732509661, "learning_rate": 4.849860242824026e-07, "loss": 1.4569, "step": 5700 }, { "epoch": 0.397240706546354, "grad_norm": 0.69751502384842, "learning_rate": 4.84920630915822e-07, "loss": 1.4414, "step": 5701 }, { "epoch": 0.39731038567397137, "grad_norm": 0.7034771671737267, "learning_rate": 4.848552327608393e-07, "loss": 1.4511, "step": 5702 }, { "epoch": 0.39738006480158866, "grad_norm": 0.7062965342234896, "learning_rate": 4.847898298205892e-07, "loss": 1.5185, "step": 5703 }, { "epoch": 0.397449743929206, "grad_norm": 0.7433825843030339, "learning_rate": 4.847244220982053e-07, "loss": 1.5063, "step": 5704 }, { "epoch": 0.3975194230568233, "grad_norm": 1.0650085692078823, "learning_rate": 4.846590095968226e-07, "loss": 1.5949, "step": 5705 }, { "epoch": 0.39758910218444066, "grad_norm": 0.7328515507980331, "learning_rate": 4.845935923195755e-07, "loss": 1.543, "step": 5706 }, { "epoch": 0.39765878131205795, "grad_norm": 0.7022710312029437, "learning_rate": 4.845281702695989e-07, "loss": 1.6136, "step": 5707 }, { "epoch": 0.3977284604396753, "grad_norm": 0.712743673405011, "learning_rate": 4.844627434500282e-07, "loss": 1.4946, "step": 5708 }, { "epoch": 0.3977981395672926, "grad_norm": 0.7208989189265302, "learning_rate": 4.843973118639986e-07, "loss": 1.5949, "step": 5709 }, { "epoch": 0.39786781869490995, "grad_norm": 0.6990871375396196, "learning_rate": 4.843318755146456e-07, "loss": 1.5536, "step": 5710 }, { "epoch": 0.39793749782252724, "grad_norm": 0.7238622642027562, "learning_rate": 4.842664344051053e-07, "loss": 1.4782, "step": 5711 }, { "epoch": 0.3980071769501446, "grad_norm": 0.7130800444416632, "learning_rate": 4.842009885385137e-07, "loss": 1.4871, "step": 5712 }, { "epoch": 0.3980768560777619, "grad_norm": 0.660704364407987, "learning_rate": 4.841355379180071e-07, "loss": 1.4377, "step": 5713 }, { "epoch": 0.39814653520537924, "grad_norm": 0.7016372786912725, "learning_rate": 4.840700825467219e-07, "loss": 1.5465, "step": 5714 }, { "epoch": 0.39821621433299653, "grad_norm": 0.8233394310985324, "learning_rate": 4.84004622427795e-07, "loss": 1.5596, "step": 5715 }, { "epoch": 0.3982858934606139, "grad_norm": 0.7431768553667599, "learning_rate": 4.839391575643634e-07, "loss": 1.5784, "step": 5716 }, { "epoch": 0.3983555725882312, "grad_norm": 0.7286515546228858, "learning_rate": 4.838736879595643e-07, "loss": 1.5407, "step": 5717 }, { "epoch": 0.3984252517158485, "grad_norm": 0.6803185223474442, "learning_rate": 4.838082136165349e-07, "loss": 1.5144, "step": 5718 }, { "epoch": 0.3984949308434658, "grad_norm": 0.729071303691758, "learning_rate": 4.837427345384132e-07, "loss": 1.4729, "step": 5719 }, { "epoch": 0.3985646099710832, "grad_norm": 0.7505050057733356, "learning_rate": 4.836772507283369e-07, "loss": 1.6377, "step": 5720 }, { "epoch": 0.39863428909870047, "grad_norm": 0.6596317365173326, "learning_rate": 4.836117621894442e-07, "loss": 1.4454, "step": 5721 }, { "epoch": 0.3987039682263178, "grad_norm": 0.747012650690718, "learning_rate": 4.835462689248733e-07, "loss": 1.4649, "step": 5722 }, { "epoch": 0.3987736473539351, "grad_norm": 0.7566351309737823, "learning_rate": 4.83480770937763e-07, "loss": 1.6035, "step": 5723 }, { "epoch": 0.39884332648155246, "grad_norm": 0.7442740560150342, "learning_rate": 4.83415268231252e-07, "loss": 1.4965, "step": 5724 }, { "epoch": 0.39891300560916976, "grad_norm": 0.7856195341062227, "learning_rate": 4.833497608084793e-07, "loss": 1.5433, "step": 5725 }, { "epoch": 0.3989826847367871, "grad_norm": 0.69840242548124, "learning_rate": 4.83284248672584e-07, "loss": 1.5195, "step": 5726 }, { "epoch": 0.3990523638644044, "grad_norm": 0.6809083026173022, "learning_rate": 4.832187318267059e-07, "loss": 1.4451, "step": 5727 }, { "epoch": 0.39912204299202175, "grad_norm": 0.6750686878569807, "learning_rate": 4.831532102739844e-07, "loss": 1.5211, "step": 5728 }, { "epoch": 0.39919172211963905, "grad_norm": 0.7334443597087285, "learning_rate": 4.830876840175596e-07, "loss": 1.6082, "step": 5729 }, { "epoch": 0.3992614012472564, "grad_norm": 0.68852786930121, "learning_rate": 4.830221530605716e-07, "loss": 1.6109, "step": 5730 }, { "epoch": 0.3993310803748737, "grad_norm": 0.7544369302712011, "learning_rate": 4.829566174061609e-07, "loss": 1.5038, "step": 5731 }, { "epoch": 0.39940075950249104, "grad_norm": 0.7049817535947259, "learning_rate": 4.828910770574679e-07, "loss": 1.5423, "step": 5732 }, { "epoch": 0.39947043863010834, "grad_norm": 0.7385275567962578, "learning_rate": 4.828255320176336e-07, "loss": 1.5723, "step": 5733 }, { "epoch": 0.3995401177577257, "grad_norm": 0.759768274010817, "learning_rate": 4.827599822897988e-07, "loss": 1.5713, "step": 5734 }, { "epoch": 0.399609796885343, "grad_norm": 0.8286752447322427, "learning_rate": 4.826944278771051e-07, "loss": 1.6021, "step": 5735 }, { "epoch": 0.39967947601296033, "grad_norm": 0.7238465828951179, "learning_rate": 4.826288687826939e-07, "loss": 1.6626, "step": 5736 }, { "epoch": 0.39974915514057763, "grad_norm": 0.7770751932847518, "learning_rate": 4.825633050097067e-07, "loss": 1.6797, "step": 5737 }, { "epoch": 0.399818834268195, "grad_norm": 0.7711030533066128, "learning_rate": 4.824977365612857e-07, "loss": 1.6196, "step": 5738 }, { "epoch": 0.3998885133958123, "grad_norm": 0.6875827946985658, "learning_rate": 4.82432163440573e-07, "loss": 1.5406, "step": 5739 }, { "epoch": 0.3999581925234296, "grad_norm": 0.7335197989377081, "learning_rate": 4.82366585650711e-07, "loss": 1.6377, "step": 5740 }, { "epoch": 0.4000278716510469, "grad_norm": 0.6941435526661424, "learning_rate": 4.823010031948425e-07, "loss": 1.6344, "step": 5741 }, { "epoch": 0.40009755077866427, "grad_norm": 0.6797310835665473, "learning_rate": 4.8223541607611e-07, "loss": 1.4352, "step": 5742 } ], "logging_steps": 1, "max_steps": 14351, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2871, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3010137962840064.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }