| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1962, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030581039755351682, |
| "grad_norm": 0.18201100826263428, |
| "learning_rate": 2.0202020202020205e-07, |
| "loss": 1.8941408395767212, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0061162079510703364, |
| "grad_norm": 0.11798238009214401, |
| "learning_rate": 6.060606060606061e-07, |
| "loss": 1.8559235334396362, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009174311926605505, |
| "grad_norm": 0.22169165313243866, |
| "learning_rate": 1.01010101010101e-06, |
| "loss": 1.8905575275421143, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012232415902140673, |
| "grad_norm": 0.12514109909534454, |
| "learning_rate": 1.4141414141414143e-06, |
| "loss": 1.8444780111312866, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01529051987767584, |
| "grad_norm": 0.3371029496192932, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 2.009756326675415, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 0.9957485198974609, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.8797118663787842, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.021406727828746176, |
| "grad_norm": 0.3545893132686615, |
| "learning_rate": 2.6262626262626267e-06, |
| "loss": 1.880387544631958, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.024464831804281346, |
| "grad_norm": 0.21946458518505096, |
| "learning_rate": 3.0303030303030305e-06, |
| "loss": 1.9453247785568237, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.027522935779816515, |
| "grad_norm": 0.7614877223968506, |
| "learning_rate": 3.4343434343434347e-06, |
| "loss": 1.829978108406067, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03058103975535168, |
| "grad_norm": 0.18568752706050873, |
| "learning_rate": 3.8383838383838385e-06, |
| "loss": 1.8531824350357056, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03363914373088685, |
| "grad_norm": 0.21273010969161987, |
| "learning_rate": 4.242424242424243e-06, |
| "loss": 1.9913711547851562, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 0.28394240140914917, |
| "learning_rate": 4.646464646464647e-06, |
| "loss": 2.005105495452881, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.039755351681957186, |
| "grad_norm": 0.1409633308649063, |
| "learning_rate": 5.0505050505050515e-06, |
| "loss": 1.7043734788894653, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04281345565749235, |
| "grad_norm": 0.20944717526435852, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 1.8792476654052734, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.045871559633027525, |
| "grad_norm": 0.352851003408432, |
| "learning_rate": 5.858585858585859e-06, |
| "loss": 2.1714513301849365, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04892966360856269, |
| "grad_norm": 0.1783100664615631, |
| "learning_rate": 6.262626262626264e-06, |
| "loss": 1.8468539714813232, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05198776758409786, |
| "grad_norm": 0.163264200091362, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.9490365982055664, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 0.13849298655986786, |
| "learning_rate": 7.070707070707071e-06, |
| "loss": 1.7920082807540894, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0581039755351682, |
| "grad_norm": 0.15713374316692352, |
| "learning_rate": 7.474747474747476e-06, |
| "loss": 1.8174947500228882, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06116207951070336, |
| "grad_norm": 0.5049633979797363, |
| "learning_rate": 7.87878787878788e-06, |
| "loss": 1.812342643737793, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06422018348623854, |
| "grad_norm": 0.17551669478416443, |
| "learning_rate": 8.282828282828283e-06, |
| "loss": 1.860647201538086, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0672782874617737, |
| "grad_norm": 0.23912598192691803, |
| "learning_rate": 8.686868686868687e-06, |
| "loss": 1.943070650100708, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07033639143730887, |
| "grad_norm": 0.24952426552772522, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 2.0365545749664307, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 0.48846498131752014, |
| "learning_rate": 9.494949494949497e-06, |
| "loss": 2.0339832305908203, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0764525993883792, |
| "grad_norm": 0.1678168624639511, |
| "learning_rate": 9.8989898989899e-06, |
| "loss": 1.83395254611969, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07951070336391437, |
| "grad_norm": 0.24815726280212402, |
| "learning_rate": 1.0303030303030304e-05, |
| "loss": 1.8216886520385742, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.08256880733944955, |
| "grad_norm": 0.18020159006118774, |
| "learning_rate": 1.0707070707070708e-05, |
| "loss": 1.9608432054519653, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0856269113149847, |
| "grad_norm": 0.13843238353729248, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 1.8869292736053467, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08868501529051988, |
| "grad_norm": 0.1923639327287674, |
| "learning_rate": 1.1515151515151517e-05, |
| "loss": 1.920361042022705, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 0.42597687244415283, |
| "learning_rate": 1.191919191919192e-05, |
| "loss": 1.854645013809204, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09480122324159021, |
| "grad_norm": 0.14682704210281372, |
| "learning_rate": 1.2323232323232323e-05, |
| "loss": 1.785409688949585, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09785932721712538, |
| "grad_norm": 0.17863060534000397, |
| "learning_rate": 1.2727272727272728e-05, |
| "loss": 1.6720788478851318, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.10091743119266056, |
| "grad_norm": 0.19281210005283356, |
| "learning_rate": 1.3131313131313132e-05, |
| "loss": 1.7950663566589355, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.10397553516819572, |
| "grad_norm": 0.18981774151325226, |
| "learning_rate": 1.3535353535353538e-05, |
| "loss": 1.9100995063781738, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10703363914373089, |
| "grad_norm": 0.12145815044641495, |
| "learning_rate": 1.3939393939393942e-05, |
| "loss": 1.773590087890625, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 0.24626584351062775, |
| "learning_rate": 1.4343434343434344e-05, |
| "loss": 1.9656442403793335, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.11314984709480122, |
| "grad_norm": 0.18296314775943756, |
| "learning_rate": 1.4747474747474747e-05, |
| "loss": 2.0457160472869873, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1162079510703364, |
| "grad_norm": 0.9755333065986633, |
| "learning_rate": 1.5151515151515153e-05, |
| "loss": 1.8354032039642334, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11926605504587157, |
| "grad_norm": 0.32339081168174744, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 2.1483542919158936, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.12232415902140673, |
| "grad_norm": 0.15254797041416168, |
| "learning_rate": 1.595959595959596e-05, |
| "loss": 1.8094338178634644, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12538226299694188, |
| "grad_norm": 0.2284584939479828, |
| "learning_rate": 1.6363636363636366e-05, |
| "loss": 1.5790177583694458, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 0.14264698326587677, |
| "learning_rate": 1.6767676767676768e-05, |
| "loss": 1.726978063583374, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.13149847094801223, |
| "grad_norm": 0.17337217926979065, |
| "learning_rate": 1.7171717171717173e-05, |
| "loss": 1.7587239742279053, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1345565749235474, |
| "grad_norm": 0.17287808656692505, |
| "learning_rate": 1.7575757575757576e-05, |
| "loss": 1.6768524646759033, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13761467889908258, |
| "grad_norm": 0.10812447965145111, |
| "learning_rate": 1.797979797979798e-05, |
| "loss": 1.7222340106964111, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.14067278287461774, |
| "grad_norm": 0.12938623130321503, |
| "learning_rate": 1.8383838383838387e-05, |
| "loss": 1.7632179260253906, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1437308868501529, |
| "grad_norm": 0.12679119408130646, |
| "learning_rate": 1.8787878787878792e-05, |
| "loss": 1.8440016508102417, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 0.2494174838066101, |
| "learning_rate": 1.9191919191919194e-05, |
| "loss": 1.7742732763290405, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14984709480122324, |
| "grad_norm": 0.10247253626585007, |
| "learning_rate": 1.9595959595959596e-05, |
| "loss": 1.709675908088684, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1529051987767584, |
| "grad_norm": 0.12246488779783249, |
| "learning_rate": 2e-05, |
| "loss": 1.8481563329696655, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1559633027522936, |
| "grad_norm": 0.1887405663728714, |
| "learning_rate": 1.999994881459676e-05, |
| "loss": 1.7512952089309692, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15902140672782875, |
| "grad_norm": 0.14479632675647736, |
| "learning_rate": 1.9999795258969242e-05, |
| "loss": 1.7125545740127563, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1620795107033639, |
| "grad_norm": 0.1601811647415161, |
| "learning_rate": 1.9999539334864075e-05, |
| "loss": 1.7362236976623535, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 0.13637490570545197, |
| "learning_rate": 1.9999181045192272e-05, |
| "loss": 1.775484561920166, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16819571865443425, |
| "grad_norm": 0.12772946059703827, |
| "learning_rate": 1.9998720394029214e-05, |
| "loss": 1.7764359712600708, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1712538226299694, |
| "grad_norm": 0.3635137975215912, |
| "learning_rate": 1.9998157386614592e-05, |
| "loss": 1.6467103958129883, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1743119266055046, |
| "grad_norm": 0.13494336605072021, |
| "learning_rate": 1.999749202935236e-05, |
| "loss": 1.7608314752578735, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.17737003058103976, |
| "grad_norm": 0.10225271433591843, |
| "learning_rate": 1.9996724329810635e-05, |
| "loss": 1.6330885887145996, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.18042813455657492, |
| "grad_norm": 0.15280012786388397, |
| "learning_rate": 1.999585429672165e-05, |
| "loss": 1.8569954633712769, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 0.21181870996952057, |
| "learning_rate": 1.999488193998162e-05, |
| "loss": 1.684548020362854, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.18654434250764526, |
| "grad_norm": 0.13093267381191254, |
| "learning_rate": 1.9993807270650653e-05, |
| "loss": 1.7649790048599243, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.18960244648318042, |
| "grad_norm": 0.1822010576725006, |
| "learning_rate": 1.9992630300952616e-05, |
| "loss": 1.772727370262146, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1926605504587156, |
| "grad_norm": 0.14621378481388092, |
| "learning_rate": 1.9991351044274984e-05, |
| "loss": 1.7147706747055054, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.19571865443425077, |
| "grad_norm": 0.1720152050256729, |
| "learning_rate": 1.9989969515168707e-05, |
| "loss": 1.6797459125518799, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.19877675840978593, |
| "grad_norm": 0.155650332570076, |
| "learning_rate": 1.9988485729348042e-05, |
| "loss": 1.6960707902908325, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 0.1650742143392563, |
| "learning_rate": 1.998689970369035e-05, |
| "loss": 1.6708898544311523, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.20489296636085627, |
| "grad_norm": 0.16835254430770874, |
| "learning_rate": 1.9985211456235943e-05, |
| "loss": 1.7707663774490356, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.20795107033639143, |
| "grad_norm": 0.1444658637046814, |
| "learning_rate": 1.9983421006187847e-05, |
| "loss": 1.6729985475540161, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.21100917431192662, |
| "grad_norm": 0.12244903296232224, |
| "learning_rate": 1.9981528373911593e-05, |
| "loss": 1.5752283334732056, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.21406727828746178, |
| "grad_norm": 0.13254615664482117, |
| "learning_rate": 1.9979533580934997e-05, |
| "loss": 1.5328928232192993, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.21712538226299694, |
| "grad_norm": 0.11054181307554245, |
| "learning_rate": 1.9977436649947894e-05, |
| "loss": 1.6198745965957642, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 0.13271930813789368, |
| "learning_rate": 1.99752376048019e-05, |
| "loss": 1.5421935319900513, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.22324159021406728, |
| "grad_norm": 0.13614420592784882, |
| "learning_rate": 1.997293647051013e-05, |
| "loss": 1.6007068157196045, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.22629969418960244, |
| "grad_norm": 0.18991592526435852, |
| "learning_rate": 1.9970533273246915e-05, |
| "loss": 1.5232698917388916, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "grad_norm": 1.61968195438385, |
| "learning_rate": 1.9968028040347495e-05, |
| "loss": 1.6931004524230957, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2324159021406728, |
| "grad_norm": 0.13931135833263397, |
| "learning_rate": 1.996542080030774e-05, |
| "loss": 1.7459183931350708, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.23547400611620795, |
| "grad_norm": 0.3036521375179291, |
| "learning_rate": 1.9962711582783782e-05, |
| "loss": 1.7661356925964355, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 0.18819795548915863, |
| "learning_rate": 1.995990041859171e-05, |
| "loss": 1.6686315536499023, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2415902140672783, |
| "grad_norm": 0.21112458407878876, |
| "learning_rate": 1.9956987339707212e-05, |
| "loss": 1.7403184175491333, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.24464831804281345, |
| "grad_norm": 0.15459787845611572, |
| "learning_rate": 1.9953972379265195e-05, |
| "loss": 1.6565409898757935, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.24770642201834864, |
| "grad_norm": 0.1960821896791458, |
| "learning_rate": 1.9950855571559434e-05, |
| "loss": 1.7828407287597656, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.25076452599388377, |
| "grad_norm": 0.27554914355278015, |
| "learning_rate": 1.994763695204216e-05, |
| "loss": 1.5625255107879639, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.25382262996941896, |
| "grad_norm": 0.32836204767227173, |
| "learning_rate": 1.9944316557323676e-05, |
| "loss": 1.7131232023239136, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 0.169178307056427, |
| "learning_rate": 1.9940894425171923e-05, |
| "loss": 1.4519933462142944, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2599388379204893, |
| "grad_norm": 0.21049527823925018, |
| "learning_rate": 1.9937370594512054e-05, |
| "loss": 1.6664998531341553, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.26299694189602446, |
| "grad_norm": 0.1832091212272644, |
| "learning_rate": 1.9933745105426012e-05, |
| "loss": 1.2968833446502686, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.26605504587155965, |
| "grad_norm": 0.20308178663253784, |
| "learning_rate": 1.9930017999152035e-05, |
| "loss": 1.2826684713363647, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2691131498470948, |
| "grad_norm": 0.42270585894584656, |
| "learning_rate": 1.9926189318084225e-05, |
| "loss": 1.5076080560684204, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.27217125382262997, |
| "grad_norm": 0.18236956000328064, |
| "learning_rate": 1.992225910577205e-05, |
| "loss": 1.4853439331054688, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 1.155766487121582, |
| "learning_rate": 1.9918227406919834e-05, |
| "loss": 1.6843595504760742, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2782874617737003, |
| "grad_norm": 0.23917227983474731, |
| "learning_rate": 1.9914094267386282e-05, |
| "loss": 1.6130729913711548, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.28134556574923547, |
| "grad_norm": 0.16265490651130676, |
| "learning_rate": 1.9909859734183922e-05, |
| "loss": 1.3471795320510864, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.28440366972477066, |
| "grad_norm": 0.1218869760632515, |
| "learning_rate": 1.9905523855478605e-05, |
| "loss": 1.2923707962036133, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2874617737003058, |
| "grad_norm": 0.12545672059059143, |
| "learning_rate": 1.990108668058892e-05, |
| "loss": 1.4676196575164795, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.290519877675841, |
| "grad_norm": 0.21261335909366608, |
| "learning_rate": 1.9896548259985677e-05, |
| "loss": 1.396953821182251, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 0.2187412828207016, |
| "learning_rate": 1.9891908645291285e-05, |
| "loss": 1.4410208463668823, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2966360856269113, |
| "grad_norm": 0.12113740295171738, |
| "learning_rate": 1.98871678892792e-05, |
| "loss": 1.6053173542022705, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2996941896024465, |
| "grad_norm": 0.4814097285270691, |
| "learning_rate": 1.9882326045873318e-05, |
| "loss": 1.540165901184082, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.30275229357798167, |
| "grad_norm": 0.1228162944316864, |
| "learning_rate": 1.9877383170147354e-05, |
| "loss": 1.2737184762954712, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3058103975535168, |
| "grad_norm": 0.1502322554588318, |
| "learning_rate": 1.987233931832421e-05, |
| "loss": 1.4496139287948608, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.308868501529052, |
| "grad_norm": 0.18339720368385315, |
| "learning_rate": 1.9867194547775352e-05, |
| "loss": 1.5935065746307373, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 0.11243823170661926, |
| "learning_rate": 1.9861948917020147e-05, |
| "loss": 1.7492598295211792, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3149847094801223, |
| "grad_norm": 0.13702593743801117, |
| "learning_rate": 1.98566024857252e-05, |
| "loss": 1.7874646186828613, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3180428134556575, |
| "grad_norm": 0.13499164581298828, |
| "learning_rate": 1.985115531470368e-05, |
| "loss": 1.877272605895996, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3211009174311927, |
| "grad_norm": 0.15725181996822357, |
| "learning_rate": 1.9845607465914617e-05, |
| "loss": 1.4388186931610107, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3241590214067278, |
| "grad_norm": 0.11166012287139893, |
| "learning_rate": 1.9839959002462204e-05, |
| "loss": 1.5455048084259033, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.327217125382263, |
| "grad_norm": 0.5011727809906006, |
| "learning_rate": 1.9834209988595086e-05, |
| "loss": 1.3708510398864746, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 0.09888817369937897, |
| "learning_rate": 1.982836048970561e-05, |
| "loss": 1.289783239364624, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.17895136773586273, |
| "learning_rate": 1.9822410572329106e-05, |
| "loss": 1.7309335470199585, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3363914373088685, |
| "grad_norm": 0.1677575707435608, |
| "learning_rate": 1.9816360304143107e-05, |
| "loss": 1.5802499055862427, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3394495412844037, |
| "grad_norm": 0.15030327439308167, |
| "learning_rate": 1.98102097539666e-05, |
| "loss": 1.738713026046753, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3425076452599388, |
| "grad_norm": 0.16578397154808044, |
| "learning_rate": 1.9803958991759223e-05, |
| "loss": 1.2134374380111694, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.345565749235474, |
| "grad_norm": 0.27223920822143555, |
| "learning_rate": 1.979760808862049e-05, |
| "loss": 1.493201732635498, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 0.1591493785381317, |
| "learning_rate": 1.979115711678896e-05, |
| "loss": 1.7763992547988892, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3516819571865443, |
| "grad_norm": 0.16624890267848969, |
| "learning_rate": 1.9784606149641425e-05, |
| "loss": 1.5089151859283447, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3547400611620795, |
| "grad_norm": 0.1464773416519165, |
| "learning_rate": 1.9777955261692096e-05, |
| "loss": 1.6361924409866333, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3577981651376147, |
| "grad_norm": 0.21099774539470673, |
| "learning_rate": 1.977120452859172e-05, |
| "loss": 1.5302180051803589, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.36085626911314983, |
| "grad_norm": 0.13680118322372437, |
| "learning_rate": 1.976435402712674e-05, |
| "loss": 1.4461219310760498, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.363914373088685, |
| "grad_norm": 0.12054964900016785, |
| "learning_rate": 1.9757403835218416e-05, |
| "loss": 1.5349360704421997, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.1747889220714569, |
| "learning_rate": 1.9750354031921945e-05, |
| "loss": 1.5896707773208618, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.37003058103975534, |
| "grad_norm": 0.14816083014011383, |
| "learning_rate": 1.9743204697425555e-05, |
| "loss": 1.451588749885559, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3730886850152905, |
| "grad_norm": 0.1631159782409668, |
| "learning_rate": 1.9735955913049596e-05, |
| "loss": 1.5556179285049438, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3761467889908257, |
| "grad_norm": 0.22455169260501862, |
| "learning_rate": 1.972860776124561e-05, |
| "loss": 1.828192949295044, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.37920489296636084, |
| "grad_norm": 0.2173646092414856, |
| "learning_rate": 1.97211603255954e-05, |
| "loss": 2.273862838745117, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.382262996941896, |
| "grad_norm": 0.18272121250629425, |
| "learning_rate": 1.971361369081008e-05, |
| "loss": 1.4451524019241333, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 0.18003606796264648, |
| "learning_rate": 1.9705967942729097e-05, |
| "loss": 1.4289908409118652, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.38837920489296635, |
| "grad_norm": 0.1590876430273056, |
| "learning_rate": 1.969822316831928e-05, |
| "loss": 1.4974957704544067, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.39143730886850153, |
| "grad_norm": 0.3142800033092499, |
| "learning_rate": 1.969037945567383e-05, |
| "loss": 1.6579951047897339, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3944954128440367, |
| "grad_norm": 0.16492165625095367, |
| "learning_rate": 1.9682436894011314e-05, |
| "loss": 1.5667517185211182, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.39755351681957185, |
| "grad_norm": 0.15221764147281647, |
| "learning_rate": 1.9674395573674682e-05, |
| "loss": 1.4063596725463867, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.40061162079510704, |
| "grad_norm": 0.3218546509742737, |
| "learning_rate": 1.9666255586130196e-05, |
| "loss": 1.2971922159194946, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 0.10011743754148483, |
| "learning_rate": 1.9658017023966428e-05, |
| "loss": 1.218963623046875, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.40672782874617736, |
| "grad_norm": 0.12353604286909103, |
| "learning_rate": 1.964967998089318e-05, |
| "loss": 1.3903040885925293, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.40978593272171254, |
| "grad_norm": 0.326667845249176, |
| "learning_rate": 1.9641244551740438e-05, |
| "loss": 1.6172282695770264, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.41284403669724773, |
| "grad_norm": 0.26776137948036194, |
| "learning_rate": 1.9632710832457272e-05, |
| "loss": 1.6435128450393677, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.41590214067278286, |
| "grad_norm": 0.1551942378282547, |
| "learning_rate": 1.9624078920110766e-05, |
| "loss": 1.648958444595337, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.41896024464831805, |
| "grad_norm": 0.17337119579315186, |
| "learning_rate": 1.9615348912884897e-05, |
| "loss": 1.6705131530761719, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 0.2657426595687866, |
| "learning_rate": 1.960652091007944e-05, |
| "loss": 1.5089123249053955, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.42507645259938837, |
| "grad_norm": 0.2546260952949524, |
| "learning_rate": 1.9597595012108797e-05, |
| "loss": 1.6476012468338013, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.42813455657492355, |
| "grad_norm": 0.4865143895149231, |
| "learning_rate": 1.9588571320500914e-05, |
| "loss": 1.5658520460128784, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.43119266055045874, |
| "grad_norm": 0.1443634331226349, |
| "learning_rate": 1.9579449937896067e-05, |
| "loss": 1.4523909091949463, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.43425076452599387, |
| "grad_norm": 0.2230675220489502, |
| "learning_rate": 1.957023096804574e-05, |
| "loss": 1.4425302743911743, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.43730886850152906, |
| "grad_norm": 0.19679437577724457, |
| "learning_rate": 1.9560914515811416e-05, |
| "loss": 1.6431429386138916, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 0.3200703561306, |
| "learning_rate": 1.9551500687163404e-05, |
| "loss": 1.3619184494018555, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4434250764525994, |
| "grad_norm": 0.2082187533378601, |
| "learning_rate": 1.9541989589179608e-05, |
| "loss": 1.590578317642212, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.44648318042813456, |
| "grad_norm": 0.1061125323176384, |
| "learning_rate": 1.9532381330044346e-05, |
| "loss": 1.4594062566757202, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.44954128440366975, |
| "grad_norm": 0.12709374725818634, |
| "learning_rate": 1.9522676019047084e-05, |
| "loss": 1.4876629114151, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4525993883792049, |
| "grad_norm": 0.27205580472946167, |
| "learning_rate": 1.9512873766581216e-05, |
| "loss": 1.611258864402771, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.45565749235474007, |
| "grad_norm": 0.21540172398090363, |
| "learning_rate": 1.9502974684142787e-05, |
| "loss": 1.6235052347183228, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 0.15379998087882996, |
| "learning_rate": 1.949297888432926e-05, |
| "loss": 1.4504597187042236, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4617737003058104, |
| "grad_norm": 0.15325340628623962, |
| "learning_rate": 1.9482886480838193e-05, |
| "loss": 1.3728998899459839, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4648318042813456, |
| "grad_norm": 0.14561216533184052, |
| "learning_rate": 1.947269758846597e-05, |
| "loss": 1.5321768522262573, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.46788990825688076, |
| "grad_norm": 0.1464478224515915, |
| "learning_rate": 1.9462412323106506e-05, |
| "loss": 1.4658679962158203, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4709480122324159, |
| "grad_norm": 0.16449898481369019, |
| "learning_rate": 1.945203080174989e-05, |
| "loss": 1.6214468479156494, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4740061162079511, |
| "grad_norm": 0.11995477229356766, |
| "learning_rate": 1.94415531424811e-05, |
| "loss": 1.4710017442703247, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 0.12246715277433395, |
| "learning_rate": 1.9430979464478618e-05, |
| "loss": 1.4265179634094238, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4801223241590214, |
| "grad_norm": 0.1928575336933136, |
| "learning_rate": 1.9420309888013115e-05, |
| "loss": 1.4053140878677368, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4831804281345566, |
| "grad_norm": 0.1138468086719513, |
| "learning_rate": 1.940954453444604e-05, |
| "loss": 1.501997709274292, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.48623853211009177, |
| "grad_norm": 0.18235905468463898, |
| "learning_rate": 1.9398683526228283e-05, |
| "loss": 1.4911972284317017, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4892966360856269, |
| "grad_norm": 0.25072285532951355, |
| "learning_rate": 1.9387726986898753e-05, |
| "loss": 1.4921306371688843, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4923547400611621, |
| "grad_norm": 0.14147193729877472, |
| "learning_rate": 1.9376675041082974e-05, |
| "loss": 1.6467393636703491, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 0.16818289458751678, |
| "learning_rate": 1.936552781449168e-05, |
| "loss": 1.8290669918060303, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4984709480122324, |
| "grad_norm": 0.1758740097284317, |
| "learning_rate": 1.935428543391938e-05, |
| "loss": 1.7090046405792236, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5015290519877675, |
| "grad_norm": 0.12498500198125839, |
| "learning_rate": 1.9342948027242923e-05, |
| "loss": 1.687024474143982, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5045871559633027, |
| "grad_norm": 0.10503566265106201, |
| "learning_rate": 1.9331515723420016e-05, |
| "loss": 1.6114351749420166, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5076452599388379, |
| "grad_norm": 0.2537882626056671, |
| "learning_rate": 1.9319988652487794e-05, |
| "loss": 1.307665228843689, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5107033639143731, |
| "grad_norm": 0.12492425739765167, |
| "learning_rate": 1.930836694556131e-05, |
| "loss": 1.2778944969177246, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 0.13973812758922577, |
| "learning_rate": 1.929665073483208e-05, |
| "loss": 1.7095977067947388, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5168195718654435, |
| "grad_norm": 0.17106389999389648, |
| "learning_rate": 1.9284840153566533e-05, |
| "loss": 1.7058213949203491, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5198776758409785, |
| "grad_norm": 0.20474772155284882, |
| "learning_rate": 1.9272935336104526e-05, |
| "loss": 1.788483738899231, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5229357798165137, |
| "grad_norm": 0.3535018265247345, |
| "learning_rate": 1.926093641785781e-05, |
| "loss": 1.7168432474136353, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5259938837920489, |
| "grad_norm": 0.17024751007556915, |
| "learning_rate": 1.9248843535308494e-05, |
| "loss": 1.7606186866760254, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5290519877675841, |
| "grad_norm": 0.49967944622039795, |
| "learning_rate": 1.9236656826007483e-05, |
| "loss": 1.6816507577896118, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 0.5842476487159729, |
| "learning_rate": 1.9224376428572914e-05, |
| "loss": 1.6088945865631104, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5351681957186545, |
| "grad_norm": 0.25457754731178284, |
| "learning_rate": 1.9212002482688586e-05, |
| "loss": 1.5031757354736328, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5382262996941896, |
| "grad_norm": 0.20663785934448242, |
| "learning_rate": 1.919953512910237e-05, |
| "loss": 1.7612838745117188, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5412844036697247, |
| "grad_norm": 0.2548372447490692, |
| "learning_rate": 1.9186974509624596e-05, |
| "loss": 1.6867271661758423, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5443425076452599, |
| "grad_norm": 0.8678698539733887, |
| "learning_rate": 1.917432076712647e-05, |
| "loss": 1.2227782011032104, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5474006116207951, |
| "grad_norm": 0.21147370338439941, |
| "learning_rate": 1.916157404553841e-05, |
| "loss": 1.8059334754943848, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 0.1419752538204193, |
| "learning_rate": 1.914873448984843e-05, |
| "loss": 1.8255892992019653, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5535168195718655, |
| "grad_norm": 0.16539934277534485, |
| "learning_rate": 1.913580224610051e-05, |
| "loss": 1.8986237049102783, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5565749235474006, |
| "grad_norm": 0.3205839991569519, |
| "learning_rate": 1.912277746139288e-05, |
| "loss": 1.822392225265503, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5596330275229358, |
| "grad_norm": 0.29032111167907715, |
| "learning_rate": 1.9109660283876402e-05, |
| "loss": 1.796310305595398, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5626911314984709, |
| "grad_norm": 0.1845773160457611, |
| "learning_rate": 1.909645086275286e-05, |
| "loss": 1.6601674556732178, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5657492354740061, |
| "grad_norm": 0.18452957272529602, |
| "learning_rate": 1.9083149348273267e-05, |
| "loss": 1.5303943157196045, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 0.2641131281852722, |
| "learning_rate": 1.906975589173615e-05, |
| "loss": 1.095422387123108, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5718654434250765, |
| "grad_norm": 0.14679329097270966, |
| "learning_rate": 1.9056270645485832e-05, |
| "loss": 1.0895999670028687, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5749235474006116, |
| "grad_norm": 0.0957442969083786, |
| "learning_rate": 1.904269376291071e-05, |
| "loss": 1.3464511632919312, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5779816513761468, |
| "grad_norm": 0.1374763697385788, |
| "learning_rate": 1.9029025398441502e-05, |
| "loss": 1.2797412872314453, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.581039755351682, |
| "grad_norm": 0.13921880722045898, |
| "learning_rate": 1.9015265707549475e-05, |
| "loss": 1.2325642108917236, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5840978593272171, |
| "grad_norm": 0.09560864418745041, |
| "learning_rate": 1.9001414846744708e-05, |
| "loss": 1.2352911233901978, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 0.374447226524353, |
| "learning_rate": 1.898747297357429e-05, |
| "loss": 1.5163378715515137, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5902140672782875, |
| "grad_norm": 0.17736363410949707, |
| "learning_rate": 1.8973440246620527e-05, |
| "loss": 1.4300881624221802, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5932721712538226, |
| "grad_norm": 0.15392173826694489, |
| "learning_rate": 1.895931682549915e-05, |
| "loss": 1.245898962020874, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5963302752293578, |
| "grad_norm": 0.24752528965473175, |
| "learning_rate": 1.8945102870857502e-05, |
| "loss": 1.5547707080841064, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.599388379204893, |
| "grad_norm": 0.08202062547206879, |
| "learning_rate": 1.8930798544372683e-05, |
| "loss": 1.5418813228607178, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6024464831804281, |
| "grad_norm": 0.2755289077758789, |
| "learning_rate": 1.891640400874975e-05, |
| "loss": 1.3220683336257935, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 0.2508130669593811, |
| "learning_rate": 1.8901919427719835e-05, |
| "loss": 1.497948169708252, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6085626911314985, |
| "grad_norm": 0.18379831314086914, |
| "learning_rate": 1.8887344966038293e-05, |
| "loss": 1.6999335289001465, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6116207951070336, |
| "grad_norm": 0.22920790314674377, |
| "learning_rate": 1.8872680789482847e-05, |
| "loss": 1.2188191413879395, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6146788990825688, |
| "grad_norm": 0.16860808432102203, |
| "learning_rate": 1.8857927064851663e-05, |
| "loss": 1.4792815446853638, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.617737003058104, |
| "grad_norm": 0.1248302087187767, |
| "learning_rate": 1.8843083959961487e-05, |
| "loss": 1.5485862493515015, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6207951070336392, |
| "grad_norm": 0.258060097694397, |
| "learning_rate": 1.8828151643645723e-05, |
| "loss": 1.4772653579711914, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 0.11846158653497696, |
| "learning_rate": 1.8813130285752504e-05, |
| "loss": 1.3414539098739624, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6269113149847095, |
| "grad_norm": 0.4850642681121826, |
| "learning_rate": 1.8798020057142787e-05, |
| "loss": 0.9788084626197815, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6299694189602446, |
| "grad_norm": 0.15223458409309387, |
| "learning_rate": 1.8782821129688378e-05, |
| "loss": 1.6087661981582642, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6330275229357798, |
| "grad_norm": 0.239080011844635, |
| "learning_rate": 1.8767533676269994e-05, |
| "loss": 1.5469305515289307, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.636085626911315, |
| "grad_norm": 0.12797395884990692, |
| "learning_rate": 1.8752157870775293e-05, |
| "loss": 1.4467060565948486, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6391437308868502, |
| "grad_norm": 0.1488076150417328, |
| "learning_rate": 1.87366938880969e-05, |
| "loss": 1.5522997379302979, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 0.11536487936973572, |
| "learning_rate": 1.872114190413041e-05, |
| "loss": 1.4859579801559448, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6452599388379205, |
| "grad_norm": 0.15605668723583221, |
| "learning_rate": 1.87055020957724e-05, |
| "loss": 1.4567848443984985, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6483180428134556, |
| "grad_norm": 0.166889950633049, |
| "learning_rate": 1.86897746409184e-05, |
| "loss": 1.6576907634735107, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.6513761467889908, |
| "grad_norm": 0.1812712550163269, |
| "learning_rate": 1.8673959718460877e-05, |
| "loss": 1.3640563488006592, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.654434250764526, |
| "grad_norm": 0.11476635187864304, |
| "learning_rate": 1.865805750828721e-05, |
| "loss": 1.0880959033966064, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6574923547400612, |
| "grad_norm": 0.19921736419200897, |
| "learning_rate": 1.8642068191277632e-05, |
| "loss": 1.7264765501022339, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 0.21837309002876282, |
| "learning_rate": 1.8625991949303163e-05, |
| "loss": 1.481621503829956, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6636085626911316, |
| "grad_norm": 0.14236745238304138, |
| "learning_rate": 1.8609828965223577e-05, |
| "loss": 1.6409680843353271, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.15380296111106873, |
| "learning_rate": 1.8593579422885282e-05, |
| "loss": 1.4764446020126343, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6697247706422018, |
| "grad_norm": 0.22593624889850616, |
| "learning_rate": 1.857724350711925e-05, |
| "loss": 1.679504632949829, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.672782874617737, |
| "grad_norm": 0.19800275564193726, |
| "learning_rate": 1.8560821403738913e-05, |
| "loss": 1.5476343631744385, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6758409785932722, |
| "grad_norm": 0.373758465051651, |
| "learning_rate": 1.854431329953804e-05, |
| "loss": 1.579092264175415, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 0.22904565930366516, |
| "learning_rate": 1.852771938228863e-05, |
| "loss": 1.4693065881729126, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6819571865443425, |
| "grad_norm": 0.20299802720546722, |
| "learning_rate": 1.851103984073876e-05, |
| "loss": 1.129746675491333, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6850152905198776, |
| "grad_norm": 0.6228739619255066, |
| "learning_rate": 1.8494274864610442e-05, |
| "loss": 1.571781039237976, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "grad_norm": 0.16735827922821045, |
| "learning_rate": 1.8477424644597466e-05, |
| "loss": 1.5506985187530518, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.691131498470948, |
| "grad_norm": 0.18025986850261688, |
| "learning_rate": 1.8460489372363233e-05, |
| "loss": 1.5530122518539429, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6941896024464832, |
| "grad_norm": 0.16754700243473053, |
| "learning_rate": 1.844346924053858e-05, |
| "loss": 1.5570106506347656, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 0.14615465700626373, |
| "learning_rate": 1.842636444271957e-05, |
| "loss": 1.384924292564392, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7003058103975535, |
| "grad_norm": 0.1849491447210312, |
| "learning_rate": 1.8409175173465305e-05, |
| "loss": 1.5958186388015747, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7033639143730887, |
| "grad_norm": 0.3052046000957489, |
| "learning_rate": 1.8391901628295723e-05, |
| "loss": 1.687294602394104, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7064220183486238, |
| "grad_norm": 0.23065337538719177, |
| "learning_rate": 1.8374544003689346e-05, |
| "loss": 1.4123787879943848, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.709480122324159, |
| "grad_norm": 0.1883080154657364, |
| "learning_rate": 1.8357102497081068e-05, |
| "loss": 1.6628724336624146, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7125382262996942, |
| "grad_norm": 0.2225079983472824, |
| "learning_rate": 1.8339577306859898e-05, |
| "loss": 1.4429267644882202, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 0.3100110590457916, |
| "learning_rate": 1.832196863236671e-05, |
| "loss": 1.5697983503341675, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7186544342507645, |
| "grad_norm": 0.2065575122833252, |
| "learning_rate": 1.830427667389197e-05, |
| "loss": 1.507987380027771, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7217125382262997, |
| "grad_norm": 0.14007803797721863, |
| "learning_rate": 1.8286501632673467e-05, |
| "loss": 1.6372132301330566, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7247706422018348, |
| "grad_norm": 0.16152538359165192, |
| "learning_rate": 1.8268643710894008e-05, |
| "loss": 1.7255295515060425, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.72782874617737, |
| "grad_norm": 0.16280680894851685, |
| "learning_rate": 1.8250703111679135e-05, |
| "loss": 1.619618535041809, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7308868501529052, |
| "grad_norm": 0.1520742028951645, |
| "learning_rate": 1.8232680039094807e-05, |
| "loss": 1.4603939056396484, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.20179243385791779, |
| "learning_rate": 1.821457469814507e-05, |
| "loss": 1.5757310390472412, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7370030581039755, |
| "grad_norm": 0.16924500465393066, |
| "learning_rate": 1.8196387294769744e-05, |
| "loss": 1.5796566009521484, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7400611620795107, |
| "grad_norm": 0.4897945821285248, |
| "learning_rate": 1.8178118035842068e-05, |
| "loss": 1.5773838758468628, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7431192660550459, |
| "grad_norm": 0.17398764193058014, |
| "learning_rate": 1.8159767129166353e-05, |
| "loss": 1.576639175415039, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.746177370030581, |
| "grad_norm": 0.2364799827337265, |
| "learning_rate": 1.8141334783475608e-05, |
| "loss": 1.576772689819336, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7492354740061162, |
| "grad_norm": 0.4591314196586609, |
| "learning_rate": 1.8122821208429177e-05, |
| "loss": 1.371825098991394, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 0.220738023519516, |
| "learning_rate": 1.8104226614610355e-05, |
| "loss": 1.4610284566879272, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7553516819571865, |
| "grad_norm": 0.18214958906173706, |
| "learning_rate": 1.808555121352398e-05, |
| "loss": 1.3964916467666626, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7584097859327217, |
| "grad_norm": 0.23065899312496185, |
| "learning_rate": 1.806679521759403e-05, |
| "loss": 1.4891163110733032, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7614678899082569, |
| "grad_norm": 0.18206670880317688, |
| "learning_rate": 1.804795884016123e-05, |
| "loss": 1.5172436237335205, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.764525993883792, |
| "grad_norm": 0.256682813167572, |
| "learning_rate": 1.802904229548059e-05, |
| "loss": 1.343529224395752, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7675840978593272, |
| "grad_norm": 0.20497728884220123, |
| "learning_rate": 1.8010045798718996e-05, |
| "loss": 1.0024524927139282, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 1.0883042812347412, |
| "learning_rate": 1.7990969565952744e-05, |
| "loss": 1.281205415725708, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7737003058103975, |
| "grad_norm": 0.1716405302286148, |
| "learning_rate": 1.7971813814165096e-05, |
| "loss": 1.3423949480056763, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7767584097859327, |
| "grad_norm": 0.18879351019859314, |
| "learning_rate": 1.79525787612438e-05, |
| "loss": 1.388129472732544, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7798165137614679, |
| "grad_norm": 0.1109057292342186, |
| "learning_rate": 1.793326462597862e-05, |
| "loss": 1.4014989137649536, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7828746177370031, |
| "grad_norm": 0.08534067124128342, |
| "learning_rate": 1.7913871628058852e-05, |
| "loss": 1.4662851095199585, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7859327217125383, |
| "grad_norm": 0.106519415974617, |
| "learning_rate": 1.7894399988070804e-05, |
| "loss": 1.3783475160598755, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 0.09660910069942474, |
| "learning_rate": 1.7874849927495312e-05, |
| "loss": 1.4017391204833984, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7920489296636085, |
| "grad_norm": 0.14216075837612152, |
| "learning_rate": 1.78552216687052e-05, |
| "loss": 1.365959882736206, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7951070336391437, |
| "grad_norm": 0.1868741363286972, |
| "learning_rate": 1.7835515434962775e-05, |
| "loss": 1.3767143487930298, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7981651376146789, |
| "grad_norm": 0.08062469214200974, |
| "learning_rate": 1.781573145041726e-05, |
| "loss": 1.330634593963623, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8012232415902141, |
| "grad_norm": 0.1081153079867363, |
| "learning_rate": 1.7795869940102256e-05, |
| "loss": 1.3757586479187012, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8042813455657493, |
| "grad_norm": 0.35133278369903564, |
| "learning_rate": 1.77759311299332e-05, |
| "loss": 1.3196890354156494, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 0.05468370392918587, |
| "learning_rate": 1.775591524670475e-05, |
| "loss": 1.183956503868103, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8103975535168195, |
| "grad_norm": 0.23623742163181305, |
| "learning_rate": 1.773582251808827e-05, |
| "loss": 1.2204488515853882, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8134556574923547, |
| "grad_norm": 0.0645361989736557, |
| "learning_rate": 1.7715653172629172e-05, |
| "loss": 1.0032373666763306, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8165137614678899, |
| "grad_norm": 0.12667137384414673, |
| "learning_rate": 1.7695407439744367e-05, |
| "loss": 1.2794767618179321, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8195718654434251, |
| "grad_norm": 0.10390239208936691, |
| "learning_rate": 1.7675085549719638e-05, |
| "loss": 1.3293739557266235, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8226299694189603, |
| "grad_norm": 0.1387118250131607, |
| "learning_rate": 1.765468773370701e-05, |
| "loss": 1.3429309129714966, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 0.0737949088215828, |
| "learning_rate": 1.7634214223722136e-05, |
| "loss": 1.3086459636688232, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8287461773700305, |
| "grad_norm": 0.09957081079483032, |
| "learning_rate": 1.7613665252641656e-05, |
| "loss": 1.3245176076889038, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8318042813455657, |
| "grad_norm": 0.10918257385492325, |
| "learning_rate": 1.7593041054200535e-05, |
| "loss": 1.2970821857452393, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.8348623853211009, |
| "grad_norm": 0.08030971139669418, |
| "learning_rate": 1.757234186298943e-05, |
| "loss": 1.2689207792282104, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8379204892966361, |
| "grad_norm": 0.10478982329368591, |
| "learning_rate": 1.7551567914451982e-05, |
| "loss": 1.3340964317321777, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8409785932721713, |
| "grad_norm": 0.3195957541465759, |
| "learning_rate": 1.7530719444882192e-05, |
| "loss": 1.3039358854293823, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 0.12156535685062408, |
| "learning_rate": 1.7509796691421677e-05, |
| "loss": 1.2771456241607666, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8470948012232415, |
| "grad_norm": 0.12202958762645721, |
| "learning_rate": 1.7488799892057012e-05, |
| "loss": 1.2614632844924927, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8501529051987767, |
| "grad_norm": 0.10148178786039352, |
| "learning_rate": 1.746772928561701e-05, |
| "loss": 1.2788347005844116, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8532110091743119, |
| "grad_norm": 0.14937330782413483, |
| "learning_rate": 1.7446585111769994e-05, |
| "loss": 1.2756626605987549, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8562691131498471, |
| "grad_norm": 0.0912046805024147, |
| "learning_rate": 1.7425367611021095e-05, |
| "loss": 1.2301669120788574, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8593272171253823, |
| "grad_norm": 0.09572380036115646, |
| "learning_rate": 1.740407702470949e-05, |
| "loss": 1.2921226024627686, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 0.12295603007078171, |
| "learning_rate": 1.738271359500569e-05, |
| "loss": 1.3243293762207031, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8654434250764526, |
| "grad_norm": 0.10404468327760696, |
| "learning_rate": 1.7361277564908746e-05, |
| "loss": 1.322561502456665, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8685015290519877, |
| "grad_norm": 0.16732335090637207, |
| "learning_rate": 1.7339769178243513e-05, |
| "loss": 1.316751480102539, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8715596330275229, |
| "grad_norm": 0.3713286519050598, |
| "learning_rate": 1.7318188679657868e-05, |
| "loss": 1.3337935209274292, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8746177370030581, |
| "grad_norm": 0.43876761198043823, |
| "learning_rate": 1.7296536314619927e-05, |
| "loss": 1.3191556930541992, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8776758409785933, |
| "grad_norm": 0.13432131707668304, |
| "learning_rate": 1.7274812329415256e-05, |
| "loss": 1.3426291942596436, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 0.10545923560857773, |
| "learning_rate": 1.725301697114406e-05, |
| "loss": 1.2635884284973145, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8837920489296636, |
| "grad_norm": 0.16690693795681, |
| "learning_rate": 1.7231150487718388e-05, |
| "loss": 1.3099809885025024, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8868501529051988, |
| "grad_norm": 0.10333248227834702, |
| "learning_rate": 1.7209213127859298e-05, |
| "loss": 1.2673903703689575, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8899082568807339, |
| "grad_norm": 0.11379291117191315, |
| "learning_rate": 1.718720514109404e-05, |
| "loss": 1.2961304187774658, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8929663608562691, |
| "grad_norm": 0.13509656488895416, |
| "learning_rate": 1.7165126777753205e-05, |
| "loss": 1.3033212423324585, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8960244648318043, |
| "grad_norm": 0.1093890517950058, |
| "learning_rate": 1.714297828896789e-05, |
| "loss": 1.3269572257995605, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 0.10886257141828537, |
| "learning_rate": 1.7120759926666833e-05, |
| "loss": 1.3087824583053589, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9021406727828746, |
| "grad_norm": 0.10905133932828903, |
| "learning_rate": 1.7098471943573554e-05, |
| "loss": 1.247659683227539, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9051987767584098, |
| "grad_norm": 2.196148157119751, |
| "learning_rate": 1.7076114593203477e-05, |
| "loss": 1.2718437910079956, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.908256880733945, |
| "grad_norm": 0.1503346711397171, |
| "learning_rate": 1.7053688129861047e-05, |
| "loss": 1.310463309288025, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.9113149847094801, |
| "grad_norm": 0.17786924540996552, |
| "learning_rate": 1.703119280863683e-05, |
| "loss": 1.3340353965759277, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9143730886850153, |
| "grad_norm": 0.30375856161117554, |
| "learning_rate": 1.700862888540463e-05, |
| "loss": 1.3078449964523315, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 0.11228691041469574, |
| "learning_rate": 1.698599661681855e-05, |
| "loss": 1.2841458320617676, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9204892966360856, |
| "grad_norm": 0.09515351802110672, |
| "learning_rate": 1.6963296260310108e-05, |
| "loss": 1.2543302774429321, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.9235474006116208, |
| "grad_norm": 0.12382373213768005, |
| "learning_rate": 1.6940528074085277e-05, |
| "loss": 1.2844551801681519, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.926605504587156, |
| "grad_norm": 0.10872837156057358, |
| "learning_rate": 1.6917692317121574e-05, |
| "loss": 1.3093620538711548, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9296636085626911, |
| "grad_norm": 0.10462535917758942, |
| "learning_rate": 1.6894789249165088e-05, |
| "loss": 1.2586979866027832, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9327217125382263, |
| "grad_norm": 0.19008956849575043, |
| "learning_rate": 1.6871819130727543e-05, |
| "loss": 1.2832432985305786, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 0.28194817900657654, |
| "learning_rate": 1.6848782223083346e-05, |
| "loss": 1.2822047472000122, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9388379204892966, |
| "grad_norm": 0.1536101996898651, |
| "learning_rate": 1.682567878826657e-05, |
| "loss": 1.3289425373077393, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.9418960244648318, |
| "grad_norm": 0.10441045463085175, |
| "learning_rate": 1.6802509089068037e-05, |
| "loss": 1.3459938764572144, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.944954128440367, |
| "grad_norm": 0.12564106285572052, |
| "learning_rate": 1.6779273389032268e-05, |
| "loss": 1.3037604093551636, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9480122324159022, |
| "grad_norm": 0.1721193939447403, |
| "learning_rate": 1.675597195245453e-05, |
| "loss": 1.3236896991729736, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9510703363914373, |
| "grad_norm": 0.1637701392173767, |
| "learning_rate": 1.6732605044377804e-05, |
| "loss": 1.290778636932373, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 0.1784583181142807, |
| "learning_rate": 1.670917293058979e-05, |
| "loss": 1.2351161241531372, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9571865443425076, |
| "grad_norm": 0.16748499870300293, |
| "learning_rate": 1.668567587761985e-05, |
| "loss": 1.263080358505249, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9602446483180428, |
| "grad_norm": 0.2232455611228943, |
| "learning_rate": 1.6662114152736025e-05, |
| "loss": 1.2841684818267822, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.963302752293578, |
| "grad_norm": 0.16638226807117462, |
| "learning_rate": 1.663848802394195e-05, |
| "loss": 1.258873462677002, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9663608562691132, |
| "grad_norm": 0.11573649197816849, |
| "learning_rate": 1.6614797759973834e-05, |
| "loss": 1.269798755645752, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.9694189602446484, |
| "grad_norm": 0.1356838494539261, |
| "learning_rate": 1.6591043630297394e-05, |
| "loss": 1.3235660791397095, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 0.25283753871917725, |
| "learning_rate": 1.6567225905104785e-05, |
| "loss": 1.303951382637024, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9755351681957186, |
| "grad_norm": 0.1507134586572647, |
| "learning_rate": 1.654334485531153e-05, |
| "loss": 1.319949746131897, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "grad_norm": 0.18651549518108368, |
| "learning_rate": 1.651940075255345e-05, |
| "loss": 1.274340271949768, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.981651376146789, |
| "grad_norm": 0.11500085890293121, |
| "learning_rate": 1.649539386918355e-05, |
| "loss": 1.2713245153427124, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9847094801223242, |
| "grad_norm": 0.18969926238059998, |
| "learning_rate": 1.6471324478268946e-05, |
| "loss": 1.2918071746826172, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9877675840978594, |
| "grad_norm": 0.33583584427833557, |
| "learning_rate": 1.644719285358774e-05, |
| "loss": 1.3432663679122925, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 0.16686177253723145, |
| "learning_rate": 1.642299926962593e-05, |
| "loss": 1.3730149269104004, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9938837920489296, |
| "grad_norm": 0.2564650774002075, |
| "learning_rate": 1.639874400157425e-05, |
| "loss": 2.0030324459075928, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9969418960244648, |
| "grad_norm": 0.7696052193641663, |
| "learning_rate": 1.6374427325325078e-05, |
| "loss": 1.9848356246948242, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.39214804768562317, |
| "learning_rate": 1.635004951746927e-05, |
| "loss": 1.952000379562378, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.003058103975535, |
| "grad_norm": 0.26579877734184265, |
| "learning_rate": 1.632561085529304e-05, |
| "loss": 1.4564932584762573, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.0061162079510704, |
| "grad_norm": 0.09789416193962097, |
| "learning_rate": 1.6301111616774778e-05, |
| "loss": 1.418447494506836, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.0091743119266054, |
| "grad_norm": 0.20214584469795227, |
| "learning_rate": 1.6276552080581905e-05, |
| "loss": 1.4369993209838867, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.0122324159021407, |
| "grad_norm": 0.1257963627576828, |
| "learning_rate": 1.6251932526067705e-05, |
| "loss": 1.36565363407135, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.0152905198776758, |
| "grad_norm": 0.11626733839511871, |
| "learning_rate": 1.622725323326814e-05, |
| "loss": 1.5537068843841553, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.018348623853211, |
| "grad_norm": 0.10738101601600647, |
| "learning_rate": 1.6202514482898665e-05, |
| "loss": 1.4378470182418823, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.0214067278287462, |
| "grad_norm": 0.10652212798595428, |
| "learning_rate": 1.617771655635104e-05, |
| "loss": 1.3821481466293335, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.0244648318042813, |
| "grad_norm": 0.1619665026664734, |
| "learning_rate": 1.615285973569012e-05, |
| "loss": 1.4641979932785034, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0275229357798166, |
| "grad_norm": 0.0878918468952179, |
| "learning_rate": 1.6127944303650665e-05, |
| "loss": 1.3324640989303589, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.0305810397553516, |
| "grad_norm": 0.12292367219924927, |
| "learning_rate": 1.61029705436341e-05, |
| "loss": 1.4031749963760376, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.033639143730887, |
| "grad_norm": 0.201751247048378, |
| "learning_rate": 1.607793873970531e-05, |
| "loss": 1.5163370370864868, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.036697247706422, |
| "grad_norm": 0.1418297439813614, |
| "learning_rate": 1.6052849176589402e-05, |
| "loss": 1.4317665100097656, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.039755351681957, |
| "grad_norm": 0.12598387897014618, |
| "learning_rate": 1.6027702139668467e-05, |
| "loss": 1.2285103797912598, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0428134556574924, |
| "grad_norm": 0.13934196531772614, |
| "learning_rate": 1.600249791497833e-05, |
| "loss": 1.3943346738815308, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.0458715596330275, |
| "grad_norm": 0.26236793398857117, |
| "learning_rate": 1.5977236789205305e-05, |
| "loss": 1.6373569965362549, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0489296636085628, |
| "grad_norm": 0.1907891184091568, |
| "learning_rate": 1.595191904968293e-05, |
| "loss": 1.3017511367797852, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0519877675840978, |
| "grad_norm": 0.14807197451591492, |
| "learning_rate": 1.592654498438869e-05, |
| "loss": 1.4270445108413696, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0550458715596331, |
| "grad_norm": 0.0998062789440155, |
| "learning_rate": 1.5901114881940755e-05, |
| "loss": 1.318896770477295, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0581039755351682, |
| "grad_norm": 0.11834441125392914, |
| "learning_rate": 1.5875629031594695e-05, |
| "loss": 1.3081272840499878, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.0611620795107033, |
| "grad_norm": 0.18614527583122253, |
| "learning_rate": 1.585008772324018e-05, |
| "loss": 1.31736159324646, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0642201834862386, |
| "grad_norm": 0.1135493814945221, |
| "learning_rate": 1.5824491247397693e-05, |
| "loss": 1.3496593236923218, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.0672782874617737, |
| "grad_norm": 0.10874086618423462, |
| "learning_rate": 1.5798839895215222e-05, |
| "loss": 1.533265233039856, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.070336391437309, |
| "grad_norm": 0.2451164424419403, |
| "learning_rate": 1.5773133958464943e-05, |
| "loss": 1.5562183856964111, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.073394495412844, |
| "grad_norm": 0.16339105367660522, |
| "learning_rate": 1.574737372953991e-05, |
| "loss": 1.3957347869873047, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0764525993883791, |
| "grad_norm": 0.5332375764846802, |
| "learning_rate": 1.5721559501450725e-05, |
| "loss": 1.3628031015396118, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0795107033639144, |
| "grad_norm": 0.16171111166477203, |
| "learning_rate": 1.56956915678222e-05, |
| "loss": 1.3461381196975708, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.0825688073394495, |
| "grad_norm": 0.2861359417438507, |
| "learning_rate": 1.5669770222890033e-05, |
| "loss": 1.4217514991760254, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0856269113149848, |
| "grad_norm": 0.28001871705055237, |
| "learning_rate": 1.564379576149744e-05, |
| "loss": 1.3966766595840454, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0886850152905199, |
| "grad_norm": 0.2379312813282013, |
| "learning_rate": 1.561776847909182e-05, |
| "loss": 1.4265292882919312, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.091743119266055, |
| "grad_norm": 0.17415215075016022, |
| "learning_rate": 1.5591688671721382e-05, |
| "loss": 1.4175716638565063, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.0948012232415902, |
| "grad_norm": 0.1268319934606552, |
| "learning_rate": 1.5565556636031784e-05, |
| "loss": 1.2845792770385742, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0978593272171253, |
| "grad_norm": 0.11549645662307739, |
| "learning_rate": 1.553937266926275e-05, |
| "loss": 1.2449455261230469, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 0.12862299382686615, |
| "learning_rate": 1.551313706924471e-05, |
| "loss": 1.400179147720337, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.1039755351681957, |
| "grad_norm": 0.219200998544693, |
| "learning_rate": 1.5486850134395386e-05, |
| "loss": 1.521613359451294, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.107033639143731, |
| "grad_norm": 0.24224898219108582, |
| "learning_rate": 1.5460512163716413e-05, |
| "loss": 1.3650974035263062, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.110091743119266, |
| "grad_norm": 0.20005325973033905, |
| "learning_rate": 1.5434123456789935e-05, |
| "loss": 1.5584301948547363, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.1131498470948011, |
| "grad_norm": 0.19992578029632568, |
| "learning_rate": 1.54076843137752e-05, |
| "loss": 1.5724374055862427, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.1162079510703364, |
| "grad_norm": 0.20727121829986572, |
| "learning_rate": 1.5381195035405138e-05, |
| "loss": 1.335442066192627, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.1192660550458715, |
| "grad_norm": 0.23304890096187592, |
| "learning_rate": 1.535465592298295e-05, |
| "loss": 1.309862494468689, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.1223241590214068, |
| "grad_norm": 0.11819928884506226, |
| "learning_rate": 1.5328067278378672e-05, |
| "loss": 1.4315496683120728, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.1253822629969419, |
| "grad_norm": 0.17882581055164337, |
| "learning_rate": 1.5301429404025752e-05, |
| "loss": 1.2392085790634155, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.1284403669724772, |
| "grad_norm": 0.13341587781906128, |
| "learning_rate": 1.5274742602917594e-05, |
| "loss": 1.329017996788025, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.1314984709480123, |
| "grad_norm": 0.7237756848335266, |
| "learning_rate": 1.5248007178604125e-05, |
| "loss": 1.2809618711471558, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.1345565749235473, |
| "grad_norm": 0.17202049493789673, |
| "learning_rate": 1.5221223435188346e-05, |
| "loss": 1.3543680906295776, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.1376146788990826, |
| "grad_norm": 0.207048237323761, |
| "learning_rate": 1.5194391677322852e-05, |
| "loss": 1.2619272470474243, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.1406727828746177, |
| "grad_norm": 0.10717354714870453, |
| "learning_rate": 1.516751221020639e-05, |
| "loss": 1.3127491474151611, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.143730886850153, |
| "grad_norm": 0.21290293335914612, |
| "learning_rate": 1.5140585339580372e-05, |
| "loss": 1.4598865509033203, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.146788990825688, |
| "grad_norm": 0.11727327853441238, |
| "learning_rate": 1.5113611371725405e-05, |
| "loss": 1.3367241621017456, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.1498470948012232, |
| "grad_norm": 0.18146538734436035, |
| "learning_rate": 1.5086590613457808e-05, |
| "loss": 1.2867947816848755, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.1529051987767585, |
| "grad_norm": 0.16245512664318085, |
| "learning_rate": 1.5059523372126112e-05, |
| "loss": 1.509689211845398, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1559633027522935, |
| "grad_norm": 0.2850814759731293, |
| "learning_rate": 1.5032409955607578e-05, |
| "loss": 1.3928141593933105, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1590214067278288, |
| "grad_norm": 0.0971517339348793, |
| "learning_rate": 1.5005250672304687e-05, |
| "loss": 1.3027610778808594, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.162079510703364, |
| "grad_norm": 0.14859257638454437, |
| "learning_rate": 1.4978045831141626e-05, |
| "loss": 1.409261703491211, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.165137614678899, |
| "grad_norm": 0.1967761367559433, |
| "learning_rate": 1.4950795741560793e-05, |
| "loss": 1.3910449743270874, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1681957186544343, |
| "grad_norm": 0.11445457488298416, |
| "learning_rate": 1.4923500713519259e-05, |
| "loss": 1.4382435083389282, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1712538226299694, |
| "grad_norm": 0.1770360916852951, |
| "learning_rate": 1.4896161057485248e-05, |
| "loss": 1.3225045204162598, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.1743119266055047, |
| "grad_norm": 0.22804246842861176, |
| "learning_rate": 1.4868777084434607e-05, |
| "loss": 1.4178036451339722, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1773700305810397, |
| "grad_norm": 0.1586090326309204, |
| "learning_rate": 1.4841349105847275e-05, |
| "loss": 1.3072750568389893, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.1804281345565748, |
| "grad_norm": 0.2554394602775574, |
| "learning_rate": 1.4813877433703723e-05, |
| "loss": 1.6272152662277222, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.18348623853211, |
| "grad_norm": 0.2529110610485077, |
| "learning_rate": 1.4786362380481427e-05, |
| "loss": 1.3914612531661987, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.1865443425076452, |
| "grad_norm": 0.13387520611286163, |
| "learning_rate": 1.475880425915129e-05, |
| "loss": 1.4425235986709595, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1896024464831805, |
| "grad_norm": 0.14389349520206451, |
| "learning_rate": 1.4731203383174109e-05, |
| "loss": 1.4251344203948975, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1926605504587156, |
| "grad_norm": 0.13777580857276917, |
| "learning_rate": 1.4703560066496982e-05, |
| "loss": 1.3878992795944214, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1957186544342508, |
| "grad_norm": 0.1566278487443924, |
| "learning_rate": 1.467587462354976e-05, |
| "loss": 1.3862793445587158, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.198776758409786, |
| "grad_norm": 0.13618597388267517, |
| "learning_rate": 1.4648147369241452e-05, |
| "loss": 1.3946163654327393, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.2018348623853212, |
| "grad_norm": 0.192558154463768, |
| "learning_rate": 1.4620378618956663e-05, |
| "loss": 1.3836755752563477, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.2048929663608563, |
| "grad_norm": 0.13239143788814545, |
| "learning_rate": 1.4592568688551982e-05, |
| "loss": 1.4628338813781738, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.2079510703363914, |
| "grad_norm": 0.17559094727039337, |
| "learning_rate": 1.4564717894352414e-05, |
| "loss": 1.3984802961349487, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.2110091743119267, |
| "grad_norm": 0.23624469339847565, |
| "learning_rate": 1.4536826553147762e-05, |
| "loss": 1.2977172136306763, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.2140672782874617, |
| "grad_norm": 0.1116776168346405, |
| "learning_rate": 1.450889498218904e-05, |
| "loss": 1.2677760124206543, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.217125382262997, |
| "grad_norm": 0.12481163442134857, |
| "learning_rate": 1.4480923499184851e-05, |
| "loss": 1.318403720855713, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.2201834862385321, |
| "grad_norm": 0.13348488509655, |
| "learning_rate": 1.4452912422297783e-05, |
| "loss": 1.2807520627975464, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.2232415902140672, |
| "grad_norm": 0.11903538554906845, |
| "learning_rate": 1.4424862070140782e-05, |
| "loss": 1.3351408243179321, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.2262996941896025, |
| "grad_norm": 0.15349438786506653, |
| "learning_rate": 1.439677276177353e-05, |
| "loss": 1.293311357498169, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.2293577981651376, |
| "grad_norm": 0.8263071179389954, |
| "learning_rate": 1.4368644816698831e-05, |
| "loss": 1.5547124147415161, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.2324159021406729, |
| "grad_norm": 0.15303385257720947, |
| "learning_rate": 1.4340478554858948e-05, |
| "loss": 1.5602731704711914, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.235474006116208, |
| "grad_norm": 0.18527425825595856, |
| "learning_rate": 1.4312274296631986e-05, |
| "loss": 1.6077568531036377, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.238532110091743, |
| "grad_norm": 0.1729760617017746, |
| "learning_rate": 1.428403236282824e-05, |
| "loss": 1.5241308212280273, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.2415902140672783, |
| "grad_norm": 0.1840292066335678, |
| "learning_rate": 1.4255753074686554e-05, |
| "loss": 1.5814104080200195, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.2446483180428134, |
| "grad_norm": 0.14792004227638245, |
| "learning_rate": 1.4227436753870645e-05, |
| "loss": 1.469613790512085, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.2477064220183487, |
| "grad_norm": 0.2009243667125702, |
| "learning_rate": 1.4199083722465473e-05, |
| "loss": 1.575390338897705, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.2507645259938838, |
| "grad_norm": 0.18052861094474792, |
| "learning_rate": 1.4170694302973558e-05, |
| "loss": 1.3542118072509766, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.2538226299694188, |
| "grad_norm": 0.25438863039016724, |
| "learning_rate": 1.4142268818311318e-05, |
| "loss": 1.4728432893753052, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2568807339449541, |
| "grad_norm": 0.1723494827747345, |
| "learning_rate": 1.4113807591805403e-05, |
| "loss": 1.2633655071258545, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2599388379204892, |
| "grad_norm": 0.1885925829410553, |
| "learning_rate": 1.408531094718899e-05, |
| "loss": 1.4775038957595825, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.2629969418960245, |
| "grad_norm": 0.17333459854125977, |
| "learning_rate": 1.4056779208598148e-05, |
| "loss": 1.0904916524887085, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2660550458715596, |
| "grad_norm": 0.23691454529762268, |
| "learning_rate": 1.40282127005681e-05, |
| "loss": 1.0575618743896484, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2691131498470947, |
| "grad_norm": 0.19390061497688293, |
| "learning_rate": 1.3999611748029567e-05, |
| "loss": 1.275180697441101, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.27217125382263, |
| "grad_norm": 0.30520501732826233, |
| "learning_rate": 1.3970976676305057e-05, |
| "loss": 1.3261710405349731, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.2752293577981653, |
| "grad_norm": 0.339356929063797, |
| "learning_rate": 1.3942307811105174e-05, |
| "loss": 1.4489119052886963, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.2782874617737003, |
| "grad_norm": 0.31700074672698975, |
| "learning_rate": 1.3913605478524893e-05, |
| "loss": 1.375788927078247, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2813455657492354, |
| "grad_norm": 0.1832026243209839, |
| "learning_rate": 1.3884870005039876e-05, |
| "loss": 1.1603834629058838, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.2844036697247707, |
| "grad_norm": 0.16457735002040863, |
| "learning_rate": 1.3856101717502745e-05, |
| "loss": 1.1277961730957031, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.2874617737003058, |
| "grad_norm": 0.40114447474479675, |
| "learning_rate": 1.3827300943139368e-05, |
| "loss": 1.3474359512329102, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.290519877675841, |
| "grad_norm": 0.3017509877681732, |
| "learning_rate": 1.3798468009545132e-05, |
| "loss": 1.258968472480774, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.2935779816513762, |
| "grad_norm": 0.2743600308895111, |
| "learning_rate": 1.3769603244681224e-05, |
| "loss": 1.216719388961792, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.2966360856269112, |
| "grad_norm": 0.28029799461364746, |
| "learning_rate": 1.3740706976870894e-05, |
| "loss": 1.4269287586212158, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.2996941896024465, |
| "grad_norm": 0.34066903591156006, |
| "learning_rate": 1.3711779534795726e-05, |
| "loss": 1.4010690450668335, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3027522935779816, |
| "grad_norm": 0.1889609545469284, |
| "learning_rate": 1.3682821247491888e-05, |
| "loss": 1.128299355506897, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.305810397553517, |
| "grad_norm": 0.13515017926692963, |
| "learning_rate": 1.365383244434641e-05, |
| "loss": 1.3370952606201172, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.308868501529052, |
| "grad_norm": 0.2397429496049881, |
| "learning_rate": 1.3624813455093426e-05, |
| "loss": 1.4829299449920654, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.311926605504587, |
| "grad_norm": 0.1517391949892044, |
| "learning_rate": 1.3595764609810409e-05, |
| "loss": 1.6207728385925293, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.3149847094801224, |
| "grad_norm": 0.14302465319633484, |
| "learning_rate": 1.3566686238914442e-05, |
| "loss": 1.6425838470458984, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.3180428134556574, |
| "grad_norm": 0.13647185266017914, |
| "learning_rate": 1.3537578673158447e-05, |
| "loss": 1.7624022960662842, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.3211009174311927, |
| "grad_norm": 0.15156084299087524, |
| "learning_rate": 1.3508442243627414e-05, |
| "loss": 1.3077445030212402, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.3241590214067278, |
| "grad_norm": 0.16375960409641266, |
| "learning_rate": 1.3479277281734665e-05, |
| "loss": 1.387640118598938, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.3272171253822629, |
| "grad_norm": 0.13412417471408844, |
| "learning_rate": 1.345008411921804e-05, |
| "loss": 1.2285324335098267, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.3302752293577982, |
| "grad_norm": 0.09673840552568436, |
| "learning_rate": 1.342086308813617e-05, |
| "loss": 1.1523722410202026, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.24676595628261566, |
| "learning_rate": 1.3391614520864665e-05, |
| "loss": 1.5277175903320312, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.3363914373088686, |
| "grad_norm": 0.1344609558582306, |
| "learning_rate": 1.3362338750092345e-05, |
| "loss": 1.4308637380599976, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.3394495412844036, |
| "grad_norm": 0.18598589301109314, |
| "learning_rate": 1.3333036108817468e-05, |
| "loss": 1.6210284233093262, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.3425076452599387, |
| "grad_norm": 0.22593791782855988, |
| "learning_rate": 1.330370693034392e-05, |
| "loss": 1.092522144317627, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.345565749235474, |
| "grad_norm": 0.20722009241580963, |
| "learning_rate": 1.3274351548277444e-05, |
| "loss": 1.3288477659225464, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.3486238532110093, |
| "grad_norm": 0.3268890976905823, |
| "learning_rate": 1.3244970296521832e-05, |
| "loss": 1.5945448875427246, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.3516819571865444, |
| "grad_norm": 0.13788160681724548, |
| "learning_rate": 1.3215563509275134e-05, |
| "loss": 1.3352100849151611, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.3547400611620795, |
| "grad_norm": 0.16543197631835938, |
| "learning_rate": 1.3186131521025848e-05, |
| "loss": 1.4955792427062988, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.3577981651376148, |
| "grad_norm": 0.21935272216796875, |
| "learning_rate": 1.3156674666549131e-05, |
| "loss": 1.3688589334487915, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.3608562691131498, |
| "grad_norm": 0.15441736578941345, |
| "learning_rate": 1.3127193280902977e-05, |
| "loss": 1.2942179441452026, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3639143730886851, |
| "grad_norm": 0.13213297724723816, |
| "learning_rate": 1.3097687699424411e-05, |
| "loss": 1.3552772998809814, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.3669724770642202, |
| "grad_norm": 0.17006580531597137, |
| "learning_rate": 1.306815825772567e-05, |
| "loss": 1.413638949394226, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.3700305810397553, |
| "grad_norm": 0.1837081015110016, |
| "learning_rate": 1.3038605291690401e-05, |
| "loss": 1.2679128646850586, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.3730886850152906, |
| "grad_norm": 0.11259419471025467, |
| "learning_rate": 1.300902913746982e-05, |
| "loss": 1.3821946382522583, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.3761467889908257, |
| "grad_norm": 0.2403029501438141, |
| "learning_rate": 1.2979430131478895e-05, |
| "loss": 1.6863353252410889, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.379204892966361, |
| "grad_norm": 0.31875738501548767, |
| "learning_rate": 1.2949808610392536e-05, |
| "loss": 2.005361318588257, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.382262996941896, |
| "grad_norm": 0.16625094413757324, |
| "learning_rate": 1.2920164911141739e-05, |
| "loss": 1.313822865486145, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.385321100917431, |
| "grad_norm": 0.1672082543373108, |
| "learning_rate": 1.289049937090977e-05, |
| "loss": 1.3048782348632812, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3883792048929664, |
| "grad_norm": 0.17883723974227905, |
| "learning_rate": 1.2860812327128329e-05, |
| "loss": 1.370732307434082, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3914373088685015, |
| "grad_norm": 0.34061485528945923, |
| "learning_rate": 1.2831104117473708e-05, |
| "loss": 1.4522621631622314, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.3944954128440368, |
| "grad_norm": 0.2218160331249237, |
| "learning_rate": 1.2801375079862941e-05, |
| "loss": 1.3893041610717773, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3975535168195719, |
| "grad_norm": 0.19148877263069153, |
| "learning_rate": 1.2771625552449989e-05, |
| "loss": 1.2561511993408203, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.400611620795107, |
| "grad_norm": 0.14251859486103058, |
| "learning_rate": 1.2741855873621853e-05, |
| "loss": 1.1840838193893433, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.4036697247706422, |
| "grad_norm": 0.09337247163057327, |
| "learning_rate": 1.2712066381994771e-05, |
| "loss": 1.1102322340011597, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.4067278287461773, |
| "grad_norm": 0.11131396144628525, |
| "learning_rate": 1.2682257416410324e-05, |
| "loss": 1.2444500923156738, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.4097859327217126, |
| "grad_norm": 0.1941288560628891, |
| "learning_rate": 1.2652429315931607e-05, |
| "loss": 1.4857803583145142, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.4128440366972477, |
| "grad_norm": 0.2464672476053238, |
| "learning_rate": 1.2622582419839364e-05, |
| "loss": 1.5179466009140015, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.4159021406727827, |
| "grad_norm": 0.40092432498931885, |
| "learning_rate": 1.259271706762813e-05, |
| "loss": 1.49838387966156, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.418960244648318, |
| "grad_norm": 0.18188023567199707, |
| "learning_rate": 1.2562833599002376e-05, |
| "loss": 1.5421233177185059, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.4220183486238533, |
| "grad_norm": 0.16129463911056519, |
| "learning_rate": 1.2532932353872626e-05, |
| "loss": 1.3665364980697632, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.4250764525993884, |
| "grad_norm": 0.36101001501083374, |
| "learning_rate": 1.2503013672351614e-05, |
| "loss": 1.4816341400146484, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.4281345565749235, |
| "grad_norm": 0.3823509216308594, |
| "learning_rate": 1.2473077894750406e-05, |
| "loss": 1.381542682647705, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.4311926605504588, |
| "grad_norm": 0.2345127910375595, |
| "learning_rate": 1.2443125361574516e-05, |
| "loss": 1.3099571466445923, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.4342507645259939, |
| "grad_norm": 0.18030600249767303, |
| "learning_rate": 1.241315641352006e-05, |
| "loss": 1.3212041854858398, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.4373088685015292, |
| "grad_norm": 0.2411879003047943, |
| "learning_rate": 1.238317139146985e-05, |
| "loss": 1.51852548122406, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.4403669724770642, |
| "grad_norm": 0.2498498558998108, |
| "learning_rate": 1.235317063648955e-05, |
| "loss": 1.2771378755569458, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.4434250764525993, |
| "grad_norm": 0.29515254497528076, |
| "learning_rate": 1.2323154489823766e-05, |
| "loss": 1.4409904479980469, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.4464831804281346, |
| "grad_norm": 0.23676812648773193, |
| "learning_rate": 1.2293123292892176e-05, |
| "loss": 1.3410083055496216, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.4495412844036697, |
| "grad_norm": 0.20357346534729004, |
| "learning_rate": 1.2263077387285656e-05, |
| "loss": 1.3485143184661865, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.452599388379205, |
| "grad_norm": 0.21395525336265564, |
| "learning_rate": 1.2233017114762383e-05, |
| "loss": 1.5053271055221558, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.45565749235474, |
| "grad_norm": 0.14866068959236145, |
| "learning_rate": 1.2202942817243945e-05, |
| "loss": 1.5033762454986572, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.4587155963302751, |
| "grad_norm": 0.1509505659341812, |
| "learning_rate": 1.217285483681147e-05, |
| "loss": 1.32400381565094, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.4617737003058104, |
| "grad_norm": 0.2283921092748642, |
| "learning_rate": 1.2142753515701715e-05, |
| "loss": 1.2706825733184814, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.4648318042813455, |
| "grad_norm": 0.19454039633274078, |
| "learning_rate": 1.2112639196303177e-05, |
| "loss": 1.403527021408081, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 0.19340156018733978, |
| "learning_rate": 1.2082512221152211e-05, |
| "loss": 1.3496915102005005, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.470948012232416, |
| "grad_norm": 0.28565388917922974, |
| "learning_rate": 1.2052372932929124e-05, |
| "loss": 1.5003544092178345, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.474006116207951, |
| "grad_norm": 0.2407606989145279, |
| "learning_rate": 1.2022221674454276e-05, |
| "loss": 1.3663601875305176, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4770642201834863, |
| "grad_norm": 0.24161574244499207, |
| "learning_rate": 1.1992058788684178e-05, |
| "loss": 1.311216950416565, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4801223241590213, |
| "grad_norm": 0.6812675595283508, |
| "learning_rate": 1.1961884618707606e-05, |
| "loss": 1.2701431512832642, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.4831804281345566, |
| "grad_norm": 0.15686868131160736, |
| "learning_rate": 1.1931699507741681e-05, |
| "loss": 1.4032564163208008, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4862385321100917, |
| "grad_norm": 0.29336854815483093, |
| "learning_rate": 1.1901503799127978e-05, |
| "loss": 1.3898736238479614, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.4892966360856268, |
| "grad_norm": 0.21223317086696625, |
| "learning_rate": 1.1871297836328615e-05, |
| "loss": 1.357151746749878, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.492354740061162, |
| "grad_norm": 0.3396297097206116, |
| "learning_rate": 1.1841081962922339e-05, |
| "loss": 1.5504416227340698, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.4954128440366974, |
| "grad_norm": 0.15393349528312683, |
| "learning_rate": 1.1810856522600633e-05, |
| "loss": 1.7415130138397217, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4984709480122325, |
| "grad_norm": 0.17905078828334808, |
| "learning_rate": 1.1780621859163799e-05, |
| "loss": 1.6198244094848633, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.5015290519877675, |
| "grad_norm": 0.19629204273223877, |
| "learning_rate": 1.1750378316517042e-05, |
| "loss": 1.6088056564331055, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.5045871559633026, |
| "grad_norm": 0.13399019837379456, |
| "learning_rate": 1.1720126238666574e-05, |
| "loss": 1.5122345685958862, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.507645259938838, |
| "grad_norm": 0.205459326505661, |
| "learning_rate": 1.1689865969715682e-05, |
| "loss": 1.183510661125183, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.5107033639143732, |
| "grad_norm": 0.19832122325897217, |
| "learning_rate": 1.1659597853860822e-05, |
| "loss": 1.1826776266098022, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.5137614678899083, |
| "grad_norm": 0.217342808842659, |
| "learning_rate": 1.1629322235387712e-05, |
| "loss": 1.5985417366027832, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.5168195718654434, |
| "grad_norm": 0.21424347162246704, |
| "learning_rate": 1.1599039458667404e-05, |
| "loss": 1.5874334573745728, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.5198776758409784, |
| "grad_norm": 0.21046607196331024, |
| "learning_rate": 1.1568749868152376e-05, |
| "loss": 1.6973150968551636, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.5229357798165137, |
| "grad_norm": 0.22607657313346863, |
| "learning_rate": 1.1538453808372601e-05, |
| "loss": 1.6257494688034058, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.525993883792049, |
| "grad_norm": 0.2784031927585602, |
| "learning_rate": 1.1508151623931652e-05, |
| "loss": 1.6771817207336426, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.529051987767584, |
| "grad_norm": 0.3850814700126648, |
| "learning_rate": 1.1477843659502748e-05, |
| "loss": 1.5088847875595093, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.5321100917431192, |
| "grad_norm": 0.389369398355484, |
| "learning_rate": 1.1447530259824867e-05, |
| "loss": 1.507793664932251, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.5351681957186545, |
| "grad_norm": 0.596508800983429, |
| "learning_rate": 1.1417211769698803e-05, |
| "loss": 1.3859407901763916, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.5382262996941896, |
| "grad_norm": 0.34961917996406555, |
| "learning_rate": 1.1386888533983263e-05, |
| "loss": 1.6432298421859741, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.5412844036697249, |
| "grad_norm": 0.3803319036960602, |
| "learning_rate": 1.1356560897590914e-05, |
| "loss": 1.4720622301101685, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.54434250764526, |
| "grad_norm": 0.18877731263637543, |
| "learning_rate": 1.1326229205484494e-05, |
| "loss": 1.1493924856185913, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.547400611620795, |
| "grad_norm": 0.29330477118492126, |
| "learning_rate": 1.1295893802672867e-05, |
| "loss": 1.6615839004516602, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.5504587155963303, |
| "grad_norm": 0.21881945431232452, |
| "learning_rate": 1.1265555034207103e-05, |
| "loss": 1.749032974243164, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.5535168195718656, |
| "grad_norm": 0.28221604228019714, |
| "learning_rate": 1.1235213245176564e-05, |
| "loss": 1.8175487518310547, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.5565749235474007, |
| "grad_norm": 0.2191021740436554, |
| "learning_rate": 1.1204868780704952e-05, |
| "loss": 1.7448463439941406, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.5596330275229358, |
| "grad_norm": 0.2853221893310547, |
| "learning_rate": 1.117452198594642e-05, |
| "loss": 1.7113560438156128, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.5626911314984708, |
| "grad_norm": 0.30422237515449524, |
| "learning_rate": 1.1144173206081619e-05, |
| "loss": 1.5799381732940674, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.5657492354740061, |
| "grad_norm": 0.2810017466545105, |
| "learning_rate": 1.111382278631377e-05, |
| "loss": 1.4372574090957642, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.5688073394495414, |
| "grad_norm": 0.2811414897441864, |
| "learning_rate": 1.1083471071864766e-05, |
| "loss": 0.9963301420211792, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.5718654434250765, |
| "grad_norm": 0.18028278648853302, |
| "learning_rate": 1.105311840797121e-05, |
| "loss": 0.9933477640151978, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.5749235474006116, |
| "grad_norm": 0.1357865184545517, |
| "learning_rate": 1.1022765139880517e-05, |
| "loss": 1.221966028213501, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5779816513761467, |
| "grad_norm": 0.14980663359165192, |
| "learning_rate": 1.0992411612846962e-05, |
| "loss": 1.1657860279083252, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.581039755351682, |
| "grad_norm": 0.18947246670722961, |
| "learning_rate": 1.0962058172127774e-05, |
| "loss": 1.1021173000335693, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.5840978593272173, |
| "grad_norm": 0.12680017948150635, |
| "learning_rate": 1.0931705162979203e-05, |
| "loss": 1.1529592275619507, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5871559633027523, |
| "grad_norm": 0.31863412261009216, |
| "learning_rate": 1.090135293065258e-05, |
| "loss": 1.38621985912323, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5902140672782874, |
| "grad_norm": 0.18417641520500183, |
| "learning_rate": 1.0871001820390406e-05, |
| "loss": 1.3420405387878418, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5932721712538225, |
| "grad_norm": 0.1281033605337143, |
| "learning_rate": 1.0840652177422418e-05, |
| "loss": 1.1849461793899536, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.5963302752293578, |
| "grad_norm": 0.3351232409477234, |
| "learning_rate": 1.0810304346961666e-05, |
| "loss": 1.3912733793258667, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.599388379204893, |
| "grad_norm": 0.15044710040092468, |
| "learning_rate": 1.0779958674200577e-05, |
| "loss": 1.4560588598251343, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.6024464831804281, |
| "grad_norm": 0.2629024386405945, |
| "learning_rate": 1.0749615504307044e-05, |
| "loss": 1.2233479022979736, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.6055045871559632, |
| "grad_norm": 0.23600272834300995, |
| "learning_rate": 1.0719275182420484e-05, |
| "loss": 1.4159035682678223, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.6085626911314985, |
| "grad_norm": 1.801208734512329, |
| "learning_rate": 1.0688938053647919e-05, |
| "loss": 1.6346092224121094, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.6116207951070336, |
| "grad_norm": 0.21306540071964264, |
| "learning_rate": 1.0658604463060059e-05, |
| "loss": 1.1381094455718994, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.614678899082569, |
| "grad_norm": 0.20768539607524872, |
| "learning_rate": 1.062827475568736e-05, |
| "loss": 1.3859405517578125, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.617737003058104, |
| "grad_norm": 0.12193287909030914, |
| "learning_rate": 1.059794927651611e-05, |
| "loss": 1.4775490760803223, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.620795107033639, |
| "grad_norm": 0.2431986778974533, |
| "learning_rate": 1.0567628370484503e-05, |
| "loss": 1.3453044891357422, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.6238532110091743, |
| "grad_norm": 0.1215621829032898, |
| "learning_rate": 1.0537312382478721e-05, |
| "loss": 1.2004613876342773, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.6269113149847096, |
| "grad_norm": 0.59897381067276, |
| "learning_rate": 1.0507001657329003e-05, |
| "loss": 0.810043215751648, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.6299694189602447, |
| "grad_norm": 0.19294363260269165, |
| "learning_rate": 1.047669653980572e-05, |
| "loss": 1.5248199701309204, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.6330275229357798, |
| "grad_norm": 0.27523353695869446, |
| "learning_rate": 1.0446397374615466e-05, |
| "loss": 1.4369803667068481, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.6360856269113149, |
| "grad_norm": 0.1609538048505783, |
| "learning_rate": 1.0416104506397127e-05, |
| "loss": 1.3286679983139038, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.6391437308868502, |
| "grad_norm": 0.30778738856315613, |
| "learning_rate": 1.0385818279717963e-05, |
| "loss": 1.4519555568695068, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.6422018348623855, |
| "grad_norm": 0.15987016260623932, |
| "learning_rate": 1.0355539039069692e-05, |
| "loss": 1.389966368675232, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.6452599388379205, |
| "grad_norm": 0.15622813999652863, |
| "learning_rate": 1.032526712886457e-05, |
| "loss": 1.352725625038147, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.6483180428134556, |
| "grad_norm": 0.36926910281181335, |
| "learning_rate": 1.0295002893431465e-05, |
| "loss": 1.5491305589675903, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.6513761467889907, |
| "grad_norm": 0.2159455269575119, |
| "learning_rate": 1.0264746677011957e-05, |
| "loss": 1.1885015964508057, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.654434250764526, |
| "grad_norm": 0.16300049424171448, |
| "learning_rate": 1.0234498823756409e-05, |
| "loss": 0.9729296565055847, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.6574923547400613, |
| "grad_norm": 0.2133103460073471, |
| "learning_rate": 1.020425967772006e-05, |
| "loss": 1.6012141704559326, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.6605504587155964, |
| "grad_norm": 0.21847309172153473, |
| "learning_rate": 1.0174029582859104e-05, |
| "loss": 1.391322135925293, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.6636085626911314, |
| "grad_norm": 0.2506503462791443, |
| "learning_rate": 1.0143808883026785e-05, |
| "loss": 1.5196988582611084, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.19972681999206543, |
| "learning_rate": 1.0113597921969482e-05, |
| "loss": 1.3515968322753906, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.6697247706422018, |
| "grad_norm": 0.20533211529254913, |
| "learning_rate": 1.0083397043322802e-05, |
| "loss": 1.5942573547363281, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.6727828746177371, |
| "grad_norm": 0.16217419505119324, |
| "learning_rate": 1.0053206590607667e-05, |
| "loss": 1.4510009288787842, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.6758409785932722, |
| "grad_norm": 0.3597978353500366, |
| "learning_rate": 1.002302690722641e-05, |
| "loss": 1.4673590660095215, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.6788990825688073, |
| "grad_norm": 0.23254692554473877, |
| "learning_rate": 9.992858336458863e-06, |
| "loss": 1.3361679315567017, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6819571865443423, |
| "grad_norm": 0.36849313974380493, |
| "learning_rate": 9.962701221458468e-06, |
| "loss": 1.006578803062439, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6850152905198776, |
| "grad_norm": 0.8467805981636047, |
| "learning_rate": 9.932555905248359e-06, |
| "loss": 1.468936800956726, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.688073394495413, |
| "grad_norm": 0.3970109820365906, |
| "learning_rate": 9.902422730717447e-06, |
| "loss": 1.4364168643951416, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.691131498470948, |
| "grad_norm": 0.2257806360721588, |
| "learning_rate": 9.872302040616564e-06, |
| "loss": 1.458873987197876, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.694189602446483, |
| "grad_norm": 0.3083685040473938, |
| "learning_rate": 9.842194177554522e-06, |
| "loss": 1.4550821781158447, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6972477064220184, |
| "grad_norm": 0.15511758625507355, |
| "learning_rate": 9.812099483994237e-06, |
| "loss": 1.2649579048156738, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.7003058103975535, |
| "grad_norm": 0.18423175811767578, |
| "learning_rate": 9.782018302248823e-06, |
| "loss": 1.490966558456421, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.7033639143730888, |
| "grad_norm": 0.2790970206260681, |
| "learning_rate": 9.751950974477706e-06, |
| "loss": 1.5698015689849854, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.7064220183486238, |
| "grad_norm": 0.2892129123210907, |
| "learning_rate": 9.721897842682733e-06, |
| "loss": 1.2804194688796997, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.709480122324159, |
| "grad_norm": 0.38898563385009766, |
| "learning_rate": 9.691859248704271e-06, |
| "loss": 1.5459824800491333, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.7125382262996942, |
| "grad_norm": 0.2886713147163391, |
| "learning_rate": 9.661835534217332e-06, |
| "loss": 1.3230183124542236, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.7155963302752295, |
| "grad_norm": 0.1743742972612381, |
| "learning_rate": 9.631827040727679e-06, |
| "loss": 1.4699349403381348, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.7186544342507646, |
| "grad_norm": 0.23916998505592346, |
| "learning_rate": 9.601834109567942e-06, |
| "loss": 1.4023747444152832, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.7217125382262997, |
| "grad_norm": 0.16461840271949768, |
| "learning_rate": 9.571857081893739e-06, |
| "loss": 1.5375235080718994, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.7247706422018347, |
| "grad_norm": 0.1851189136505127, |
| "learning_rate": 9.541896298679794e-06, |
| "loss": 1.6321358680725098, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.72782874617737, |
| "grad_norm": 0.2205365002155304, |
| "learning_rate": 9.511952100716051e-06, |
| "loss": 1.5237758159637451, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.7308868501529053, |
| "grad_norm": 0.21927416324615479, |
| "learning_rate": 9.482024828603813e-06, |
| "loss": 1.351357340812683, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.7339449541284404, |
| "grad_norm": 0.14752982556819916, |
| "learning_rate": 9.452114822751854e-06, |
| "loss": 1.465145230293274, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.7370030581039755, |
| "grad_norm": 0.1750420182943344, |
| "learning_rate": 9.422222423372557e-06, |
| "loss": 1.4611705541610718, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.7400611620795106, |
| "grad_norm": 0.2836110293865204, |
| "learning_rate": 9.392347970478035e-06, |
| "loss": 1.4611400365829468, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.7431192660550459, |
| "grad_norm": 0.4955594837665558, |
| "learning_rate": 9.362491803876267e-06, |
| "loss": 1.486251950263977, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.7461773700305812, |
| "grad_norm": 0.1564791202545166, |
| "learning_rate": 9.332654263167242e-06, |
| "loss": 1.4786931276321411, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.7492354740061162, |
| "grad_norm": 0.21143656969070435, |
| "learning_rate": 9.30283568773908e-06, |
| "loss": 1.2680423259735107, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.7522935779816513, |
| "grad_norm": 0.21294501423835754, |
| "learning_rate": 9.273036416764182e-06, |
| "loss": 1.3377047777175903, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.7553516819571864, |
| "grad_norm": 0.6836228966712952, |
| "learning_rate": 9.243256789195374e-06, |
| "loss": 1.2617762088775635, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.7584097859327217, |
| "grad_norm": 0.21216517686843872, |
| "learning_rate": 9.213497143762036e-06, |
| "loss": 1.3481056690216064, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.761467889908257, |
| "grad_norm": 0.21078504621982574, |
| "learning_rate": 9.18375781896628e-06, |
| "loss": 1.321746587753296, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.764525993883792, |
| "grad_norm": 0.4202200472354889, |
| "learning_rate": 9.154039153079054e-06, |
| "loss": 1.193610668182373, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.7675840978593271, |
| "grad_norm": 0.1040918305516243, |
| "learning_rate": 9.12434148413635e-06, |
| "loss": 0.8776851892471313, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.7706422018348624, |
| "grad_norm": 0.1633862406015396, |
| "learning_rate": 9.094665149935307e-06, |
| "loss": 1.1803946495056152, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.7737003058103975, |
| "grad_norm": 0.16494464874267578, |
| "learning_rate": 9.065010488030397e-06, |
| "loss": 1.2437876462936401, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.7767584097859328, |
| "grad_norm": 0.15143194794654846, |
| "learning_rate": 9.035377835729588e-06, |
| "loss": 1.2997621297836304, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.7798165137614679, |
| "grad_norm": 0.18123859167099, |
| "learning_rate": 9.005767530090489e-06, |
| "loss": 1.3120684623718262, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.782874617737003, |
| "grad_norm": 0.1501539647579193, |
| "learning_rate": 8.976179907916528e-06, |
| "loss": 1.3702399730682373, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.7859327217125383, |
| "grad_norm": 0.11408812552690506, |
| "learning_rate": 8.946615305753127e-06, |
| "loss": 1.2892541885375977, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7889908256880735, |
| "grad_norm": 0.11254674196243286, |
| "learning_rate": 8.917074059883852e-06, |
| "loss": 1.308501124382019, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.7920489296636086, |
| "grad_norm": 0.14027130603790283, |
| "learning_rate": 8.887556506326615e-06, |
| "loss": 1.2796250581741333, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.7951070336391437, |
| "grad_norm": 0.1700981706380844, |
| "learning_rate": 8.858062980829838e-06, |
| "loss": 1.2928704023361206, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7981651376146788, |
| "grad_norm": 0.18721206486225128, |
| "learning_rate": 8.828593818868622e-06, |
| "loss": 1.2547078132629395, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.801223241590214, |
| "grad_norm": 0.17795971035957336, |
| "learning_rate": 8.799149355640961e-06, |
| "loss": 1.2972519397735596, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.8042813455657494, |
| "grad_norm": 0.22595000267028809, |
| "learning_rate": 8.769729926063904e-06, |
| "loss": 1.2488983869552612, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.8073394495412844, |
| "grad_norm": 0.07708975672721863, |
| "learning_rate": 8.740335864769747e-06, |
| "loss": 1.1340965032577515, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.8103975535168195, |
| "grad_norm": 0.28360649943351746, |
| "learning_rate": 8.71096750610225e-06, |
| "loss": 1.1929219961166382, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.8134556574923546, |
| "grad_norm": 0.06053072586655617, |
| "learning_rate": 8.681625184112803e-06, |
| "loss": 0.976668655872345, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.81651376146789, |
| "grad_norm": 0.10698408633470535, |
| "learning_rate": 8.652309232556651e-06, |
| "loss": 1.198448896408081, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.8195718654434252, |
| "grad_norm": 0.08489919453859329, |
| "learning_rate": 8.623019984889078e-06, |
| "loss": 1.2306997776031494, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.8226299694189603, |
| "grad_norm": 0.11401335895061493, |
| "learning_rate": 8.593757774261638e-06, |
| "loss": 1.244828462600708, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.8256880733944953, |
| "grad_norm": 0.1129307895898819, |
| "learning_rate": 8.56452293351833e-06, |
| "loss": 1.2172696590423584, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.8287461773700304, |
| "grad_norm": 0.11858973652124405, |
| "learning_rate": 8.535315795191858e-06, |
| "loss": 1.2330491542816162, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.8318042813455657, |
| "grad_norm": 0.1406945139169693, |
| "learning_rate": 8.506136691499805e-06, |
| "loss": 1.2052946090698242, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 0.09237688034772873, |
| "learning_rate": 8.476985954340877e-06, |
| "loss": 1.1807793378829956, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.837920489296636, |
| "grad_norm": 0.09736914187669754, |
| "learning_rate": 8.447863915291133e-06, |
| "loss": 1.2424228191375732, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.8409785932721712, |
| "grad_norm": 0.10175871849060059, |
| "learning_rate": 8.418770905600191e-06, |
| "loss": 1.2142945528030396, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.8440366972477065, |
| "grad_norm": 0.1257796734571457, |
| "learning_rate": 8.389707256187484e-06, |
| "loss": 1.188672661781311, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.8470948012232415, |
| "grad_norm": 0.12363885343074799, |
| "learning_rate": 8.360673297638484e-06, |
| "loss": 1.177275538444519, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.8501529051987768, |
| "grad_norm": 0.188978374004364, |
| "learning_rate": 8.331669360200937e-06, |
| "loss": 1.2045999765396118, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.853211009174312, |
| "grad_norm": 0.1187102198600769, |
| "learning_rate": 8.302695773781124e-06, |
| "loss": 1.1938765048980713, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.856269113149847, |
| "grad_norm": 0.13880708813667297, |
| "learning_rate": 8.273752867940081e-06, |
| "loss": 1.1506744623184204, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.8593272171253823, |
| "grad_norm": 0.13549721240997314, |
| "learning_rate": 8.244840971889885e-06, |
| "loss": 1.2162597179412842, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.8623853211009176, |
| "grad_norm": 0.25943684577941895, |
| "learning_rate": 8.215960414489873e-06, |
| "loss": 1.2452492713928223, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.8654434250764527, |
| "grad_norm": 0.12404591590166092, |
| "learning_rate": 8.187111524242938e-06, |
| "loss": 1.2478712797164917, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.8685015290519877, |
| "grad_norm": 0.13704179227352142, |
| "learning_rate": 8.15829462929176e-06, |
| "loss": 1.2387086153030396, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.8715596330275228, |
| "grad_norm": 0.11567935347557068, |
| "learning_rate": 8.129510057415091e-06, |
| "loss": 1.257190227508545, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.8746177370030581, |
| "grad_norm": 0.6642646789550781, |
| "learning_rate": 8.100758136024027e-06, |
| "loss": 1.239441990852356, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.8776758409785934, |
| "grad_norm": 0.17069149017333984, |
| "learning_rate": 8.072039192158272e-06, |
| "loss": 1.2593530416488647, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.8807339449541285, |
| "grad_norm": 0.14976222813129425, |
| "learning_rate": 8.043353552482435e-06, |
| "loss": 1.1915827989578247, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.8837920489296636, |
| "grad_norm": 0.2598312497138977, |
| "learning_rate": 8.014701543282302e-06, |
| "loss": 1.2334834337234497, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.8868501529051986, |
| "grad_norm": 0.1409589648246765, |
| "learning_rate": 7.986083490461124e-06, |
| "loss": 1.1979966163635254, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.889908256880734, |
| "grad_norm": 0.1519106924533844, |
| "learning_rate": 7.957499719535922e-06, |
| "loss": 1.224788784980774, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.8929663608562692, |
| "grad_norm": 0.14410492777824402, |
| "learning_rate": 7.928950555633767e-06, |
| "loss": 1.2332391738891602, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.8960244648318043, |
| "grad_norm": 0.15549832582473755, |
| "learning_rate": 7.900436323488098e-06, |
| "loss": 1.252604603767395, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8990825688073394, |
| "grad_norm": 0.11451930552721024, |
| "learning_rate": 7.871957347435025e-06, |
| "loss": 1.2335529327392578, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.9021406727828745, |
| "grad_norm": 0.13380469381809235, |
| "learning_rate": 7.843513951409618e-06, |
| "loss": 1.1709686517715454, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.9051987767584098, |
| "grad_norm": 0.12940165400505066, |
| "learning_rate": 7.815106458942265e-06, |
| "loss": 1.211228609085083, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.908256880733945, |
| "grad_norm": 0.1105319932103157, |
| "learning_rate": 7.78673519315495e-06, |
| "loss": 1.2363883256912231, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.9113149847094801, |
| "grad_norm": 0.10502426326274872, |
| "learning_rate": 7.758400476757609e-06, |
| "loss": 1.262728214263916, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.9143730886850152, |
| "grad_norm": 0.1284359097480774, |
| "learning_rate": 7.73010263204443e-06, |
| "loss": 1.234637975692749, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.9174311926605505, |
| "grad_norm": 0.19072557985782623, |
| "learning_rate": 7.70184198089022e-06, |
| "loss": 1.2130069732666016, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.9204892966360856, |
| "grad_norm": 0.15589360892772675, |
| "learning_rate": 7.673618844746709e-06, |
| "loss": 1.1858221292495728, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.9235474006116209, |
| "grad_norm": 0.12436363846063614, |
| "learning_rate": 7.645433544638926e-06, |
| "loss": 1.2181921005249023, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.926605504587156, |
| "grad_norm": 0.1155422255396843, |
| "learning_rate": 7.617286401161523e-06, |
| "loss": 1.2427356243133545, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.929663608562691, |
| "grad_norm": 0.10325782001018524, |
| "learning_rate": 7.589177734475148e-06, |
| "loss": 1.192740797996521, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.9327217125382263, |
| "grad_norm": 0.1549818515777588, |
| "learning_rate": 7.561107864302784e-06, |
| "loss": 1.2155026197433472, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.9357798165137616, |
| "grad_norm": 0.11942024528980255, |
| "learning_rate": 7.533077109926124e-06, |
| "loss": 1.2154382467269897, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.9388379204892967, |
| "grad_norm": 0.12078223377466202, |
| "learning_rate": 7.505085790181938e-06, |
| "loss": 1.2634786367416382, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.9418960244648318, |
| "grad_norm": 0.22967126965522766, |
| "learning_rate": 7.477134223458449e-06, |
| "loss": 1.273752212524414, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.9449541284403669, |
| "grad_norm": 0.11903467774391174, |
| "learning_rate": 7.4492227276917e-06, |
| "loss": 1.234639048576355, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.9480122324159022, |
| "grad_norm": 0.11574736982584, |
| "learning_rate": 7.421351620361954e-06, |
| "loss": 1.2574361562728882, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.9510703363914375, |
| "grad_norm": 0.11663772165775299, |
| "learning_rate": 7.39352121849007e-06, |
| "loss": 1.2257782220840454, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.9541284403669725, |
| "grad_norm": 0.1800021380186081, |
| "learning_rate": 7.3657318386339e-06, |
| "loss": 1.175177812576294, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.9571865443425076, |
| "grad_norm": 0.17682303488254547, |
| "learning_rate": 7.337983796884694e-06, |
| "loss": 1.2020514011383057, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.9602446483180427, |
| "grad_norm": 0.13004066050052643, |
| "learning_rate": 7.310277408863493e-06, |
| "loss": 1.225712537765503, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.963302752293578, |
| "grad_norm": 0.396966814994812, |
| "learning_rate": 7.282612989717555e-06, |
| "loss": 1.2043389081954956, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.9663608562691133, |
| "grad_norm": 0.1417648047208786, |
| "learning_rate": 7.254990854116759e-06, |
| "loss": 1.2117154598236084, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.9694189602446484, |
| "grad_norm": 0.13368594646453857, |
| "learning_rate": 7.2274113162500285e-06, |
| "loss": 1.2600475549697876, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.9724770642201834, |
| "grad_norm": 0.1391812562942505, |
| "learning_rate": 7.199874689821744e-06, |
| "loss": 1.2455639839172363, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.9755351681957185, |
| "grad_norm": 0.13156801462173462, |
| "learning_rate": 7.1723812880482114e-06, |
| "loss": 1.258913278579712, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.9785932721712538, |
| "grad_norm": 0.12165912240743637, |
| "learning_rate": 7.144931423654069e-06, |
| "loss": 1.2114229202270508, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.981651376146789, |
| "grad_norm": 0.17182543873786926, |
| "learning_rate": 7.117525408868722e-06, |
| "loss": 1.219508409500122, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.9847094801223242, |
| "grad_norm": 0.1712474673986435, |
| "learning_rate": 7.090163555422824e-06, |
| "loss": 1.2391793727874756, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.9877675840978593, |
| "grad_norm": 0.14425022900104523, |
| "learning_rate": 7.062846174544713e-06, |
| "loss": 1.2954765558242798, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9908256880733946, |
| "grad_norm": 0.17153722047805786, |
| "learning_rate": 7.035573576956867e-06, |
| "loss": 1.322161316871643, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.9938837920489296, |
| "grad_norm": 0.19832384586334229, |
| "learning_rate": 7.008346072872372e-06, |
| "loss": 1.478272557258606, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.996941896024465, |
| "grad_norm": 0.3216697871685028, |
| "learning_rate": 6.9811639719914004e-06, |
| "loss": 1.4260848760604858, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.706434428691864, |
| "learning_rate": 6.954027583497691e-06, |
| "loss": 1.4685496091842651, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.003058103975535, |
| "grad_norm": 0.17712196707725525, |
| "learning_rate": 6.92693721605501e-06, |
| "loss": 1.394515037536621, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.00611620795107, |
| "grad_norm": 0.12309785187244415, |
| "learning_rate": 6.899893177803667e-06, |
| "loss": 1.3557714223861694, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.0091743119266057, |
| "grad_norm": 0.1349668651819229, |
| "learning_rate": 6.8728957763570005e-06, |
| "loss": 1.3635830879211426, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.0122324159021407, |
| "grad_norm": 0.15826421976089478, |
| "learning_rate": 6.8459453187978706e-06, |
| "loss": 1.2972722053527832, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.015290519877676, |
| "grad_norm": 0.11746937036514282, |
| "learning_rate": 6.819042111675172e-06, |
| "loss": 1.4896833896636963, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.018348623853211, |
| "grad_norm": 0.10701940208673477, |
| "learning_rate": 6.792186461000352e-06, |
| "loss": 1.3726967573165894, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.021406727828746, |
| "grad_norm": 0.18223977088928223, |
| "learning_rate": 6.765378672243923e-06, |
| "loss": 1.3199728727340698, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.0244648318042815, |
| "grad_norm": 0.15080475807189941, |
| "learning_rate": 6.738619050331995e-06, |
| "loss": 1.3884634971618652, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.0275229357798166, |
| "grad_norm": 0.1777096688747406, |
| "learning_rate": 6.711907899642793e-06, |
| "loss": 1.2657767534255981, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.0305810397553516, |
| "grad_norm": 0.15124447643756866, |
| "learning_rate": 6.685245524003212e-06, |
| "loss": 1.3411420583724976, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.0336391437308867, |
| "grad_norm": 0.16805733740329742, |
| "learning_rate": 6.658632226685355e-06, |
| "loss": 1.4487394094467163, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.036697247706422, |
| "grad_norm": 0.15020951628684998, |
| "learning_rate": 6.632068310403075e-06, |
| "loss": 1.3471310138702393, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.0397553516819573, |
| "grad_norm": 0.11654051393270493, |
| "learning_rate": 6.605554077308541e-06, |
| "loss": 1.1645305156707764, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.0428134556574924, |
| "grad_norm": 0.47177380323410034, |
| "learning_rate": 6.579089828988806e-06, |
| "loss": 1.3331588506698608, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.0458715596330275, |
| "grad_norm": 0.18365582823753357, |
| "learning_rate": 6.552675866462358e-06, |
| "loss": 1.568501353263855, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.0489296636085625, |
| "grad_norm": 0.16884642839431763, |
| "learning_rate": 6.526312490175719e-06, |
| "loss": 1.2164785861968994, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.051987767584098, |
| "grad_norm": 0.18424928188323975, |
| "learning_rate": 6.500000000000003e-06, |
| "loss": 1.3639503717422485, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.055045871559633, |
| "grad_norm": 0.14070400595664978, |
| "learning_rate": 6.473738695227528e-06, |
| "loss": 1.2531183958053589, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.058103975535168, |
| "grad_norm": 0.1320883184671402, |
| "learning_rate": 6.447528874568403e-06, |
| "loss": 1.2422581911087036, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.0611620795107033, |
| "grad_norm": 0.31850379705429077, |
| "learning_rate": 6.421370836147125e-06, |
| "loss": 1.2550984621047974, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.0642201834862384, |
| "grad_norm": 0.12996889650821686, |
| "learning_rate": 6.3952648774991895e-06, |
| "loss": 1.2892459630966187, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.067278287461774, |
| "grad_norm": 0.11520091444253922, |
| "learning_rate": 6.3692112955677145e-06, |
| "loss": 1.4742932319641113, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.070336391437309, |
| "grad_norm": 0.20727252960205078, |
| "learning_rate": 6.343210386700056e-06, |
| "loss": 1.4938790798187256, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.073394495412844, |
| "grad_norm": 0.27067703008651733, |
| "learning_rate": 6.317262446644432e-06, |
| "loss": 1.289554238319397, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.076452599388379, |
| "grad_norm": 0.20865699648857117, |
| "learning_rate": 6.291367770546576e-06, |
| "loss": 1.3036962747573853, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.079510703363914, |
| "grad_norm": 0.25244075059890747, |
| "learning_rate": 6.265526652946361e-06, |
| "loss": 1.2870533466339111, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.0825688073394497, |
| "grad_norm": 0.18456417322158813, |
| "learning_rate": 6.23973938777446e-06, |
| "loss": 1.3479864597320557, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.085626911314985, |
| "grad_norm": 0.18370430171489716, |
| "learning_rate": 6.214006268348997e-06, |
| "loss": 1.3356621265411377, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.08868501529052, |
| "grad_norm": 0.19261838495731354, |
| "learning_rate": 6.188327587372216e-06, |
| "loss": 1.3640661239624023, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 0.1763276904821396, |
| "learning_rate": 6.162703636927147e-06, |
| "loss": 1.3478913307189941, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.09480122324159, |
| "grad_norm": 0.10882332921028137, |
| "learning_rate": 6.137134708474293e-06, |
| "loss": 1.2278797626495361, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.0978593272171255, |
| "grad_norm": 0.12949886918067932, |
| "learning_rate": 6.111621092848293e-06, |
| "loss": 1.1902759075164795, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.1009174311926606, |
| "grad_norm": 0.15533407032489777, |
| "learning_rate": 6.086163080254641e-06, |
| "loss": 1.345337152481079, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.1039755351681957, |
| "grad_norm": 0.20214258134365082, |
| "learning_rate": 6.060760960266372e-06, |
| "loss": 1.4601398706436157, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.1070336391437308, |
| "grad_norm": 0.13242913782596588, |
| "learning_rate": 6.035415021820756e-06, |
| "loss": 1.2989829778671265, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.1100917431192663, |
| "grad_norm": 0.2398076355457306, |
| "learning_rate": 6.0101255532160376e-06, |
| "loss": 1.4964780807495117, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.1131498470948014, |
| "grad_norm": 0.20416386425495148, |
| "learning_rate": 5.984892842108143e-06, |
| "loss": 1.4913971424102783, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.1162079510703364, |
| "grad_norm": 0.18165673315525055, |
| "learning_rate": 5.959717175507396e-06, |
| "loss": 1.239193320274353, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.1192660550458715, |
| "grad_norm": 0.2294483482837677, |
| "learning_rate": 5.93459883977528e-06, |
| "loss": 1.1719461679458618, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.1223241590214066, |
| "grad_norm": 0.30271396040916443, |
| "learning_rate": 5.909538120621155e-06, |
| "loss": 1.3723602294921875, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.1253822629969417, |
| "grad_norm": 0.13012711703777313, |
| "learning_rate": 5.884535303099026e-06, |
| "loss": 1.186031460762024, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.128440366972477, |
| "grad_norm": 0.1800045669078827, |
| "learning_rate": 5.859590671604297e-06, |
| "loss": 1.257864236831665, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.1314984709480123, |
| "grad_norm": 0.1988981068134308, |
| "learning_rate": 5.8347045098705216e-06, |
| "loss": 1.2152053117752075, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.1345565749235473, |
| "grad_norm": 0.13755300641059875, |
| "learning_rate": 5.809877100966197e-06, |
| "loss": 1.299118995666504, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.1376146788990824, |
| "grad_norm": 0.1406605839729309, |
| "learning_rate": 5.785108727291532e-06, |
| "loss": 1.2066929340362549, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.140672782874618, |
| "grad_norm": 0.14786066114902496, |
| "learning_rate": 5.760399670575236e-06, |
| "loss": 1.255595088005066, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.143730886850153, |
| "grad_norm": 0.14521285891532898, |
| "learning_rate": 5.735750211871316e-06, |
| "loss": 1.3966695070266724, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.146788990825688, |
| "grad_norm": 0.156136155128479, |
| "learning_rate": 5.711160631555877e-06, |
| "loss": 1.2653884887695312, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.149847094801223, |
| "grad_norm": 0.11567161232233047, |
| "learning_rate": 5.686631209323941e-06, |
| "loss": 1.227649450302124, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.1529051987767582, |
| "grad_norm": 0.1110580638051033, |
| "learning_rate": 5.662162224186258e-06, |
| "loss": 1.4484609365463257, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.1559633027522938, |
| "grad_norm": 0.1445491760969162, |
| "learning_rate": 5.637753954466127e-06, |
| "loss": 1.3130481243133545, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.159021406727829, |
| "grad_norm": 0.1265811175107956, |
| "learning_rate": 5.613406677796246e-06, |
| "loss": 1.248986840248108, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.162079510703364, |
| "grad_norm": 0.12633687257766724, |
| "learning_rate": 5.589120671115542e-06, |
| "loss": 1.3525274991989136, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.165137614678899, |
| "grad_norm": 0.23789754509925842, |
| "learning_rate": 5.564896210666031e-06, |
| "loss": 1.3208624124526978, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.168195718654434, |
| "grad_norm": 0.13444137573242188, |
| "learning_rate": 5.540733571989654e-06, |
| "loss": 1.3796360492706299, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.1712538226299696, |
| "grad_norm": 0.18880939483642578, |
| "learning_rate": 5.51663302992517e-06, |
| "loss": 1.2581044435501099, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.1743119266055047, |
| "grad_norm": 0.12744680047035217, |
| "learning_rate": 5.4925948586050224e-06, |
| "loss": 1.3596972227096558, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.1773700305810397, |
| "grad_norm": 0.18471305072307587, |
| "learning_rate": 5.4686193314522e-06, |
| "loss": 1.256497859954834, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.180428134556575, |
| "grad_norm": 0.18038369715213776, |
| "learning_rate": 5.444706721177157e-06, |
| "loss": 1.5690301656723022, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 0.15947416424751282, |
| "learning_rate": 5.420857299774696e-06, |
| "loss": 1.3116461038589478, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.1865443425076454, |
| "grad_norm": 0.14132535457611084, |
| "learning_rate": 5.397071338520867e-06, |
| "loss": 1.3790884017944336, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.1896024464831805, |
| "grad_norm": 0.32931429147720337, |
| "learning_rate": 5.373349107969902e-06, |
| "loss": 1.356053113937378, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.1926605504587156, |
| "grad_norm": 0.14345382153987885, |
| "learning_rate": 5.349690877951115e-06, |
| "loss": 1.3247517347335815, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.1957186544342506, |
| "grad_norm": 0.15944042801856995, |
| "learning_rate": 5.326096917565853e-06, |
| "loss": 1.3286181688308716, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.198776758409786, |
| "grad_norm": 0.21919000148773193, |
| "learning_rate": 5.302567495184422e-06, |
| "loss": 1.330199122428894, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 0.1875789612531662, |
| "learning_rate": 5.279102878443032e-06, |
| "loss": 1.332606554031372, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.2048929663608563, |
| "grad_norm": 0.33878740668296814, |
| "learning_rate": 5.255703334240774e-06, |
| "loss": 1.3914042711257935, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.2079510703363914, |
| "grad_norm": 0.5218708515167236, |
| "learning_rate": 5.232369128736553e-06, |
| "loss": 1.3419737815856934, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.2110091743119265, |
| "grad_norm": 0.11105236411094666, |
| "learning_rate": 5.2091005273460914e-06, |
| "loss": 1.2455813884735107, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.214067278287462, |
| "grad_norm": 0.11201157420873642, |
| "learning_rate": 5.185897794738881e-06, |
| "loss": 1.2253139019012451, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.217125382262997, |
| "grad_norm": 0.1912904530763626, |
| "learning_rate": 5.162761194835198e-06, |
| "loss": 1.2669559717178345, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.220183486238532, |
| "grad_norm": 0.24734018743038177, |
| "learning_rate": 5.139690990803084e-06, |
| "loss": 1.233301043510437, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.223241590214067, |
| "grad_norm": 0.16191165149211884, |
| "learning_rate": 5.1166874450553635e-06, |
| "loss": 1.2840200662612915, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.2262996941896023, |
| "grad_norm": 0.12160076200962067, |
| "learning_rate": 5.093750819246648e-06, |
| "loss": 1.2450309991836548, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.229357798165138, |
| "grad_norm": 0.36477330327033997, |
| "learning_rate": 5.0708813742703666e-06, |
| "loss": 1.4992554187774658, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.232415902140673, |
| "grad_norm": 0.1379183977842331, |
| "learning_rate": 5.0480793702558085e-06, |
| "loss": 1.5028156042099, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.235474006116208, |
| "grad_norm": 0.22662517428398132, |
| "learning_rate": 5.025345066565135e-06, |
| "loss": 1.5423188209533691, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.238532110091743, |
| "grad_norm": 0.22309836745262146, |
| "learning_rate": 5.002678721790462e-06, |
| "loss": 1.4739760160446167, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.241590214067278, |
| "grad_norm": 0.18657968938350677, |
| "learning_rate": 4.980080593750901e-06, |
| "loss": 1.5198191404342651, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.2446483180428136, |
| "grad_norm": 0.1312897652387619, |
| "learning_rate": 4.9575509394896306e-06, |
| "loss": 1.412659764289856, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.2477064220183487, |
| "grad_norm": 0.2041846066713333, |
| "learning_rate": 4.9350900152709644e-06, |
| "loss": 1.501435399055481, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.2507645259938838, |
| "grad_norm": 0.21802227199077606, |
| "learning_rate": 4.9126980765774535e-06, |
| "loss": 1.2920466661453247, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.253822629969419, |
| "grad_norm": 0.32734358310699463, |
| "learning_rate": 4.890375378106969e-06, |
| "loss": 1.3840155601501465, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.2568807339449544, |
| "grad_norm": 0.18576525151729584, |
| "learning_rate": 4.8681221737698e-06, |
| "loss": 1.1896015405654907, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.2599388379204894, |
| "grad_norm": 0.25952601432800293, |
| "learning_rate": 4.845938716685783e-06, |
| "loss": 1.390250563621521, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.2629969418960245, |
| "grad_norm": 0.2618364691734314, |
| "learning_rate": 4.8238252591813994e-06, |
| "loss": 0.9939552545547485, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.2660550458715596, |
| "grad_norm": 0.7130351066589355, |
| "learning_rate": 4.801782052786928e-06, |
| "loss": 0.9591231942176819, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.2691131498470947, |
| "grad_norm": 0.5174583196640015, |
| "learning_rate": 4.7798093482335736e-06, |
| "loss": 1.1732577085494995, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.2721712538226297, |
| "grad_norm": 0.1944684386253357, |
| "learning_rate": 4.757907395450607e-06, |
| "loss": 1.254443645477295, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 0.32820016145706177, |
| "learning_rate": 4.736076443562537e-06, |
| "loss": 1.3228447437286377, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.2782874617737003, |
| "grad_norm": 0.27493736147880554, |
| "learning_rate": 4.714316740886271e-06, |
| "loss": 1.2684073448181152, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.2813455657492354, |
| "grad_norm": 0.8473254442214966, |
| "learning_rate": 4.69262853492829e-06, |
| "loss": 1.0940097570419312, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.2844036697247705, |
| "grad_norm": 0.14267265796661377, |
| "learning_rate": 4.671012072381827e-06, |
| "loss": 1.062652349472046, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.287461773700306, |
| "grad_norm": 0.14857089519500732, |
| "learning_rate": 4.6494675991240725e-06, |
| "loss": 1.3027708530426025, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.290519877675841, |
| "grad_norm": 0.2024240493774414, |
| "learning_rate": 4.627995360213372e-06, |
| "loss": 1.1870825290679932, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.293577981651376, |
| "grad_norm": 0.47134989500045776, |
| "learning_rate": 4.606595599886441e-06, |
| "loss": 1.1387802362442017, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.2966360856269112, |
| "grad_norm": 0.12811291217803955, |
| "learning_rate": 4.585268561555577e-06, |
| "loss": 1.3463492393493652, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.2996941896024463, |
| "grad_norm": 0.22761978209018707, |
| "learning_rate": 4.564014487805905e-06, |
| "loss": 1.3002067804336548, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.302752293577982, |
| "grad_norm": 0.11687177419662476, |
| "learning_rate": 4.542833620392616e-06, |
| "loss": 1.0505836009979248, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.305810397553517, |
| "grad_norm": 0.5785715579986572, |
| "learning_rate": 4.521726200238199e-06, |
| "loss": 1.2982921600341797, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.308868501529052, |
| "grad_norm": 0.5210697650909424, |
| "learning_rate": 4.5006924674297285e-06, |
| "loss": 1.4383094310760498, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.311926605504587, |
| "grad_norm": 0.13508401811122894, |
| "learning_rate": 4.479732661216114e-06, |
| "loss": 1.5722585916519165, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.314984709480122, |
| "grad_norm": 0.16143162548542023, |
| "learning_rate": 4.458847020005387e-06, |
| "loss": 1.5961930751800537, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.3180428134556577, |
| "grad_norm": 0.17354567348957062, |
| "learning_rate": 4.43803578136198e-06, |
| "loss": 1.7155699729919434, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.3211009174311927, |
| "grad_norm": 0.14079655706882477, |
| "learning_rate": 4.4172991820040385e-06, |
| "loss": 1.248162865638733, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.324159021406728, |
| "grad_norm": 0.4320622980594635, |
| "learning_rate": 4.396637457800717e-06, |
| "loss": 1.3336325883865356, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.327217125382263, |
| "grad_norm": 0.23424510657787323, |
| "learning_rate": 4.376050843769508e-06, |
| "loss": 1.1769382953643799, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.330275229357798, |
| "grad_norm": 0.1386423110961914, |
| "learning_rate": 4.355539574073543e-06, |
| "loss": 1.1032593250274658, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.3263895809650421, |
| "learning_rate": 4.3351038820189605e-06, |
| "loss": 1.4229612350463867, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.3363914373088686, |
| "grad_norm": 0.18773163855075836, |
| "learning_rate": 4.314744000052238e-06, |
| "loss": 1.3791815042495728, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.3394495412844036, |
| "grad_norm": 0.28852972388267517, |
| "learning_rate": 4.294460159757549e-06, |
| "loss": 1.5685712099075317, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.3425076452599387, |
| "grad_norm": 0.1875099092721939, |
| "learning_rate": 4.274252591854119e-06, |
| "loss": 1.0389281511306763, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.3455657492354742, |
| "grad_norm": 0.3121390640735626, |
| "learning_rate": 4.254121526193621e-06, |
| "loss": 1.2694977521896362, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.3486238532110093, |
| "grad_norm": 0.18623915314674377, |
| "learning_rate": 4.234067191757547e-06, |
| "loss": 1.515195608139038, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.3516819571865444, |
| "grad_norm": 0.27571722865104675, |
| "learning_rate": 4.2140898166546094e-06, |
| "loss": 1.2839555740356445, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.3547400611620795, |
| "grad_norm": 0.18172216415405273, |
| "learning_rate": 4.1941896281181345e-06, |
| "loss": 1.4342246055603027, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.3577981651376145, |
| "grad_norm": 0.1874471753835678, |
| "learning_rate": 4.1743668525035e-06, |
| "loss": 1.302701473236084, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.3608562691131496, |
| "grad_norm": 0.13664644956588745, |
| "learning_rate": 4.154621715285544e-06, |
| "loss": 1.2458817958831787, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.363914373088685, |
| "grad_norm": 0.19140246510505676, |
| "learning_rate": 4.134954441055996e-06, |
| "loss": 1.2903972864151, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 0.1618933528661728, |
| "learning_rate": 4.11536525352094e-06, |
| "loss": 1.347403645515442, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.3700305810397553, |
| "grad_norm": 0.35864803194999695, |
| "learning_rate": 4.0958543754982555e-06, |
| "loss": 1.2057493925094604, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.3730886850152904, |
| "grad_norm": 0.24179436266422272, |
| "learning_rate": 4.076422028915092e-06, |
| "loss": 1.33254873752594, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.376146788990826, |
| "grad_norm": 0.8190633654594421, |
| "learning_rate": 4.057068434805334e-06, |
| "loss": 1.6208769083023071, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.379204892966361, |
| "grad_norm": 0.3764900863170624, |
| "learning_rate": 4.037793813307097e-06, |
| "loss": 1.8496865034103394, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.382262996941896, |
| "grad_norm": 0.2258998155593872, |
| "learning_rate": 4.018598383660221e-06, |
| "loss": 1.2403137683868408, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.385321100917431, |
| "grad_norm": 0.2579484283924103, |
| "learning_rate": 3.999482364203777e-06, |
| "loss": 1.2474881410598755, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.388379204892966, |
| "grad_norm": 0.20246495306491852, |
| "learning_rate": 3.980445972373572e-06, |
| "loss": 1.3078337907791138, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.3914373088685017, |
| "grad_norm": 0.4740979075431824, |
| "learning_rate": 3.961489424699698e-06, |
| "loss": 1.3576768636703491, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.3944954128440368, |
| "grad_norm": 0.2766645848751068, |
| "learning_rate": 3.9426129368040525e-06, |
| "loss": 1.3143256902694702, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.397553516819572, |
| "grad_norm": 0.25092101097106934, |
| "learning_rate": 3.923816723397891e-06, |
| "loss": 1.193142056465149, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.400611620795107, |
| "grad_norm": 0.17161835730075836, |
| "learning_rate": 3.905100998279378e-06, |
| "loss": 1.1418907642364502, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.4036697247706424, |
| "grad_norm": 0.11211515963077545, |
| "learning_rate": 3.8864659743311674e-06, |
| "loss": 1.0700639486312866, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.4067278287461775, |
| "grad_norm": 0.10698743164539337, |
| "learning_rate": 3.867911863517976e-06, |
| "loss": 1.1931581497192383, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.4097859327217126, |
| "grad_norm": 0.29255542159080505, |
| "learning_rate": 3.849438876884171e-06, |
| "loss": 1.4374973773956299, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.4128440366972477, |
| "grad_norm": 0.18589414656162262, |
| "learning_rate": 3.831047224551362e-06, |
| "loss": 1.4687811136245728, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.4159021406727827, |
| "grad_norm": 0.2537822127342224, |
| "learning_rate": 3.8127371157160274e-06, |
| "loss": 1.4451251029968262, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.418960244648318, |
| "grad_norm": 0.2604762613773346, |
| "learning_rate": 3.794508758647125e-06, |
| "loss": 1.4975537061691284, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.4220183486238533, |
| "grad_norm": 0.18013572692871094, |
| "learning_rate": 3.776362360683725e-06, |
| "loss": 1.312493085861206, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.4250764525993884, |
| "grad_norm": 0.26494479179382324, |
| "learning_rate": 3.7582981282326436e-06, |
| "loss": 1.4119056463241577, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.4281345565749235, |
| "grad_norm": 0.22667403519153595, |
| "learning_rate": 3.74031626676611e-06, |
| "loss": 1.2957755327224731, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.4311926605504586, |
| "grad_norm": 0.09898483008146286, |
| "learning_rate": 3.7224169808194234e-06, |
| "loss": 1.249149203300476, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.434250764525994, |
| "grad_norm": 0.25444644689559937, |
| "learning_rate": 3.704600473988616e-06, |
| "loss": 1.2711833715438843, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.437308868501529, |
| "grad_norm": 0.20375902950763702, |
| "learning_rate": 3.6868669489281526e-06, |
| "loss": 1.457493543624878, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.4403669724770642, |
| "grad_norm": 0.16325809061527252, |
| "learning_rate": 3.6692166073486207e-06, |
| "loss": 1.2258182764053345, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.4434250764525993, |
| "grad_norm": 0.31586337089538574, |
| "learning_rate": 3.6516496500144315e-06, |
| "loss": 1.3641780614852905, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.4464831804281344, |
| "grad_norm": 0.3672979772090912, |
| "learning_rate": 3.6341662767415366e-06, |
| "loss": 1.2819935083389282, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.44954128440367, |
| "grad_norm": 0.29007235169410706, |
| "learning_rate": 3.616766686395161e-06, |
| "loss": 1.2883577346801758, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.452599388379205, |
| "grad_norm": 0.22274695336818695, |
| "learning_rate": 3.599451076887539e-06, |
| "loss": 1.4647866487503052, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.45565749235474, |
| "grad_norm": 0.2635745108127594, |
| "learning_rate": 3.5822196451756617e-06, |
| "loss": 1.4505290985107422, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 0.6367509961128235, |
| "learning_rate": 3.5650725872590343e-06, |
| "loss": 1.266322374343872, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.46177370030581, |
| "grad_norm": 0.167319193482399, |
| "learning_rate": 3.54801009817745e-06, |
| "loss": 1.228219747543335, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.4648318042813457, |
| "grad_norm": 0.25401708483695984, |
| "learning_rate": 3.5310323720087747e-06, |
| "loss": 1.3591912984848022, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.467889908256881, |
| "grad_norm": 0.1403985321521759, |
| "learning_rate": 3.5141396018667327e-06, |
| "loss": 1.3101667165756226, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.470948012232416, |
| "grad_norm": 0.15048815310001373, |
| "learning_rate": 3.4973319798987075e-06, |
| "loss": 1.4515373706817627, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.474006116207951, |
| "grad_norm": 0.3306581377983093, |
| "learning_rate": 3.480609697283574e-06, |
| "loss": 1.3238542079925537, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.477064220183486, |
| "grad_norm": 0.2043750286102295, |
| "learning_rate": 3.463972944229502e-06, |
| "loss": 1.2536604404449463, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.4801223241590216, |
| "grad_norm": 0.15457271039485931, |
| "learning_rate": 3.4474219099718085e-06, |
| "loss": 1.2132182121276855, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.4831804281345566, |
| "grad_norm": 0.36772802472114563, |
| "learning_rate": 3.4309567827707936e-06, |
| "loss": 1.3583812713623047, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.4862385321100917, |
| "grad_norm": 0.137724831700325, |
| "learning_rate": 3.4145777499096066e-06, |
| "loss": 1.347251057624817, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.489296636085627, |
| "grad_norm": 0.4675873816013336, |
| "learning_rate": 3.3982849976921185e-06, |
| "loss": 1.2851288318634033, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.4923547400611623, |
| "grad_norm": 0.20795711874961853, |
| "learning_rate": 3.3820787114407927e-06, |
| "loss": 1.5017863512039185, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.4954128440366974, |
| "grad_norm": 0.24633866548538208, |
| "learning_rate": 3.3659590754945816e-06, |
| "loss": 1.6977734565734863, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.4984709480122325, |
| "grad_norm": 0.20416969060897827, |
| "learning_rate": 3.349926273206834e-06, |
| "loss": 1.5675647258758545, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.5015290519877675, |
| "grad_norm": 0.17218895256519318, |
| "learning_rate": 3.3339804869432092e-06, |
| "loss": 1.568835735321045, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.5045871559633026, |
| "grad_norm": 0.3312041759490967, |
| "learning_rate": 3.3181218980795915e-06, |
| "loss": 1.4730901718139648, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.5076452599388377, |
| "grad_norm": 0.47724974155426025, |
| "learning_rate": 3.302350687000041e-06, |
| "loss": 1.1118125915527344, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.510703363914373, |
| "grad_norm": 0.22525407373905182, |
| "learning_rate": 3.2866670330947372e-06, |
| "loss": 1.1350667476654053, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.5137614678899083, |
| "grad_norm": 0.5922534465789795, |
| "learning_rate": 3.271071114757936e-06, |
| "loss": 1.5340875387191772, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.5168195718654434, |
| "grad_norm": 0.20079486072063446, |
| "learning_rate": 3.2555631093859376e-06, |
| "loss": 1.5291383266448975, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.5198776758409784, |
| "grad_norm": 0.2685466408729553, |
| "learning_rate": 3.240143193375079e-06, |
| "loss": 1.650044322013855, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.522935779816514, |
| "grad_norm": 0.29007527232170105, |
| "learning_rate": 3.2248115421197207e-06, |
| "loss": 1.580130934715271, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.525993883792049, |
| "grad_norm": 0.17674541473388672, |
| "learning_rate": 3.2095683300102544e-06, |
| "loss": 1.6336654424667358, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.529051987767584, |
| "grad_norm": 0.6385888457298279, |
| "learning_rate": 3.194413730431111e-06, |
| "loss": 1.4276736974716187, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.532110091743119, |
| "grad_norm": 0.18316228687763214, |
| "learning_rate": 3.1793479157588e-06, |
| "loss": 1.4557141065597534, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.5351681957186543, |
| "grad_norm": 0.2798449397087097, |
| "learning_rate": 3.1643710573599484e-06, |
| "loss": 1.3273746967315674, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.5382262996941893, |
| "grad_norm": 0.25256890058517456, |
| "learning_rate": 3.1494833255893347e-06, |
| "loss": 1.5899957418441772, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.541284403669725, |
| "grad_norm": 0.22966250777244568, |
| "learning_rate": 3.1346848897879773e-06, |
| "loss": 1.3582658767700195, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.54434250764526, |
| "grad_norm": 0.23401792347431183, |
| "learning_rate": 3.1199759182811835e-06, |
| "loss": 1.1058764457702637, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.547400611620795, |
| "grad_norm": 0.23885783553123474, |
| "learning_rate": 3.105356578376652e-06, |
| "loss": 1.587262511253357, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 0.1748657077550888, |
| "learning_rate": 3.090827036362566e-06, |
| "loss": 1.7135778665542603, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.5535168195718656, |
| "grad_norm": 0.1924404799938202, |
| "learning_rate": 3.0763874575056897e-06, |
| "loss": 1.7697184085845947, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.5565749235474007, |
| "grad_norm": 0.22128871083259583, |
| "learning_rate": 3.062038006049509e-06, |
| "loss": 1.6986305713653564, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.5596330275229358, |
| "grad_norm": 0.3884623646736145, |
| "learning_rate": 3.0477788452123474e-06, |
| "loss": 1.6635832786560059, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.562691131498471, |
| "grad_norm": 0.193667471408844, |
| "learning_rate": 3.0336101371855132e-06, |
| "loss": 1.5323315858840942, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.565749235474006, |
| "grad_norm": 0.21260865032672882, |
| "learning_rate": 3.019532043131461e-06, |
| "loss": 1.3841668367385864, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 0.7379730343818665, |
| "learning_rate": 3.005544723181949e-06, |
| "loss": 0.9397176504135132, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.5718654434250765, |
| "grad_norm": 0.21575972437858582, |
| "learning_rate": 2.9916483364362273e-06, |
| "loss": 0.9356784820556641, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.5749235474006116, |
| "grad_norm": 0.18256239593029022, |
| "learning_rate": 2.9778430409592165e-06, |
| "loss": 1.1633483171463013, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.5779816513761467, |
| "grad_norm": 0.18563584983348846, |
| "learning_rate": 2.964128993779721e-06, |
| "loss": 1.1023343801498413, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.581039755351682, |
| "grad_norm": 0.17169539630413055, |
| "learning_rate": 2.95050635088864e-06, |
| "loss": 1.0343772172927856, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.5840978593272173, |
| "grad_norm": 0.11842609941959381, |
| "learning_rate": 2.936975267237188e-06, |
| "loss": 1.114422082901001, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.5871559633027523, |
| "grad_norm": 0.5754178762435913, |
| "learning_rate": 2.9235358967351346e-06, |
| "loss": 1.3243212699890137, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.5902140672782874, |
| "grad_norm": 0.22367584705352783, |
| "learning_rate": 2.9101883922490577e-06, |
| "loss": 1.3066273927688599, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.5932721712538225, |
| "grad_norm": 0.11838769167661667, |
| "learning_rate": 2.8969329056006052e-06, |
| "loss": 1.1478899717330933, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.5963302752293576, |
| "grad_norm": 0.6228997707366943, |
| "learning_rate": 2.883769587564757e-06, |
| "loss": 1.308228850364685, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.599388379204893, |
| "grad_norm": 0.14409951865673065, |
| "learning_rate": 2.8706985878681236e-06, |
| "loss": 1.4127944707870483, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.602446483180428, |
| "grad_norm": 0.2419777512550354, |
| "learning_rate": 2.857720055187237e-06, |
| "loss": 1.1605547666549683, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.6055045871559632, |
| "grad_norm": 0.17849405109882355, |
| "learning_rate": 2.8448341371468606e-06, |
| "loss": 1.3722493648529053, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.6085626911314987, |
| "grad_norm": 0.584975004196167, |
| "learning_rate": 2.832040980318304e-06, |
| "loss": 1.5971640348434448, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.611620795107034, |
| "grad_norm": 0.20485441386699677, |
| "learning_rate": 2.8193407302177696e-06, |
| "loss": 1.0920323133468628, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.614678899082569, |
| "grad_norm": 0.37276148796081543, |
| "learning_rate": 2.806733531304681e-06, |
| "loss": 1.333469271659851, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.617737003058104, |
| "grad_norm": 0.16045695543289185, |
| "learning_rate": 2.7942195269800524e-06, |
| "loss": 1.4399563074111938, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.620795107033639, |
| "grad_norm": 0.29774409532546997, |
| "learning_rate": 2.781798859584855e-06, |
| "loss": 1.2612054347991943, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.623853211009174, |
| "grad_norm": 0.10159888118505478, |
| "learning_rate": 2.769471670398389e-06, |
| "loss": 1.138002872467041, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.6269113149847096, |
| "grad_norm": 0.9622639417648315, |
| "learning_rate": 2.757238099636689e-06, |
| "loss": 0.7187841534614563, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.6299694189602447, |
| "grad_norm": 0.15207388997077942, |
| "learning_rate": 2.7450982864509253e-06, |
| "loss": 1.4917500019073486, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.63302752293578, |
| "grad_norm": 0.5151544809341431, |
| "learning_rate": 2.7330523689258106e-06, |
| "loss": 1.3835726976394653, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.636085626911315, |
| "grad_norm": 0.19729852676391602, |
| "learning_rate": 2.721100484078048e-06, |
| "loss": 1.2811280488967896, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.6391437308868504, |
| "grad_norm": 0.22213585674762726, |
| "learning_rate": 2.709242767854758e-06, |
| "loss": 1.4066439867019653, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 0.1790645569562912, |
| "learning_rate": 2.6974793551319383e-06, |
| "loss": 1.3520299196243286, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.6452599388379205, |
| "grad_norm": 0.19265353679656982, |
| "learning_rate": 2.6858103797129246e-06, |
| "loss": 1.3133292198181152, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.6483180428134556, |
| "grad_norm": 0.23618127405643463, |
| "learning_rate": 2.674235974326878e-06, |
| "loss": 1.5000425577163696, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.6513761467889907, |
| "grad_norm": 0.17804878950119019, |
| "learning_rate": 2.6627562706272657e-06, |
| "loss": 1.1008257865905762, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.6544342507645258, |
| "grad_norm": 0.12927761673927307, |
| "learning_rate": 2.6513713991903705e-06, |
| "loss": 0.9210459589958191, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.6574923547400613, |
| "grad_norm": 0.16620349884033203, |
| "learning_rate": 2.640081489513797e-06, |
| "loss": 1.5412592887878418, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.6605504587155964, |
| "grad_norm": 0.3116919994354248, |
| "learning_rate": 2.628886670015009e-06, |
| "loss": 1.3480628728866577, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.6636085626911314, |
| "grad_norm": 0.2775146961212158, |
| "learning_rate": 2.6177870680298624e-06, |
| "loss": 1.4652796983718872, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.17939536273479462, |
| "learning_rate": 2.606782809811155e-06, |
| "loss": 1.297588586807251, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.669724770642202, |
| "grad_norm": 0.22709611058235168, |
| "learning_rate": 2.5958740205272003e-06, |
| "loss": 1.5550439357757568, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.672782874617737, |
| "grad_norm": 0.23353682458400726, |
| "learning_rate": 2.5850608242603913e-06, |
| "loss": 1.4042333364486694, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.675840978593272, |
| "grad_norm": 0.3190506100654602, |
| "learning_rate": 2.5743433440058002e-06, |
| "loss": 1.4088410139083862, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.6788990825688073, |
| "grad_norm": 0.3655577600002289, |
| "learning_rate": 2.5637217016697663e-06, |
| "loss": 1.2669798135757446, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.6819571865443423, |
| "grad_norm": 0.20551763474941254, |
| "learning_rate": 2.5531960180685276e-06, |
| "loss": 0.9370750188827515, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.6850152905198774, |
| "grad_norm": 0.2501385807991028, |
| "learning_rate": 2.5427664129268253e-06, |
| "loss": 1.4236401319503784, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.688073394495413, |
| "grad_norm": 0.17783670127391815, |
| "learning_rate": 2.5324330048765626e-06, |
| "loss": 1.3898571729660034, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.691131498470948, |
| "grad_norm": 0.19069063663482666, |
| "learning_rate": 2.522195911455437e-06, |
| "loss": 1.418738842010498, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.694189602446483, |
| "grad_norm": 0.24323132634162903, |
| "learning_rate": 2.5120552491056197e-06, |
| "loss": 1.4022393226623535, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.6972477064220186, |
| "grad_norm": 0.1714673787355423, |
| "learning_rate": 2.502011133172418e-06, |
| "loss": 1.213725209236145, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.7003058103975537, |
| "grad_norm": 0.21391062438488007, |
| "learning_rate": 2.4920636779029736e-06, |
| "loss": 1.4528888463974, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.7033639143730888, |
| "grad_norm": 0.3517923951148987, |
| "learning_rate": 2.482212996444952e-06, |
| "loss": 1.5154540538787842, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.706422018348624, |
| "grad_norm": 0.21076905727386475, |
| "learning_rate": 2.4724592008452655e-06, |
| "loss": 1.2251962423324585, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.709480122324159, |
| "grad_norm": 0.2092084437608719, |
| "learning_rate": 2.4628024020487946e-06, |
| "loss": 1.4913671016693115, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.712538226299694, |
| "grad_norm": 0.2707628011703491, |
| "learning_rate": 2.4532427098971276e-06, |
| "loss": 1.2693325281143188, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.7155963302752295, |
| "grad_norm": 0.23994937539100647, |
| "learning_rate": 2.4437802331273052e-06, |
| "loss": 1.426426649093628, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.7186544342507646, |
| "grad_norm": 0.3275426924228668, |
| "learning_rate": 2.4344150793705944e-06, |
| "loss": 1.3486037254333496, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.7217125382262997, |
| "grad_norm": 0.1596205085515976, |
| "learning_rate": 2.425147355151254e-06, |
| "loss": 1.4933159351348877, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.7247706422018347, |
| "grad_norm": 0.3981785476207733, |
| "learning_rate": 2.4159771658853306e-06, |
| "loss": 1.5875604152679443, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.7278287461773703, |
| "grad_norm": 0.8734309673309326, |
| "learning_rate": 2.406904615879453e-06, |
| "loss": 1.4748692512512207, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.7308868501529053, |
| "grad_norm": 0.21883857250213623, |
| "learning_rate": 2.3979298083296488e-06, |
| "loss": 1.3090273141860962, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 0.4178310036659241, |
| "learning_rate": 2.3890528453201756e-06, |
| "loss": 1.419610619544983, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.7370030581039755, |
| "grad_norm": 0.2704829275608063, |
| "learning_rate": 2.3802738278223474e-06, |
| "loss": 1.4093573093414307, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.7400611620795106, |
| "grad_norm": 0.37049105763435364, |
| "learning_rate": 2.3715928556934005e-06, |
| "loss": 1.4148246049880981, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.7431192660550456, |
| "grad_norm": 0.42498084902763367, |
| "learning_rate": 2.3630100276753463e-06, |
| "loss": 1.4419108629226685, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.746177370030581, |
| "grad_norm": 0.16743339598178864, |
| "learning_rate": 2.354525441393857e-06, |
| "loss": 1.4382030963897705, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.7492354740061162, |
| "grad_norm": 0.17311187088489532, |
| "learning_rate": 2.346139193357145e-06, |
| "loss": 1.2192634344100952, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.7522935779816513, |
| "grad_norm": 0.38412225246429443, |
| "learning_rate": 2.337851378954877e-06, |
| "loss": 1.2814158201217651, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.7553516819571864, |
| "grad_norm": 0.2552420496940613, |
| "learning_rate": 2.3296620924570772e-06, |
| "loss": 1.1930724382400513, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.758409785932722, |
| "grad_norm": 0.22476616501808167, |
| "learning_rate": 2.3215714270130673e-06, |
| "loss": 1.2869510650634766, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.761467889908257, |
| "grad_norm": 0.22635005414485931, |
| "learning_rate": 2.3135794746503934e-06, |
| "loss": 1.2187225818634033, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.764525993883792, |
| "grad_norm": 0.20397242903709412, |
| "learning_rate": 2.3056863262737915e-06, |
| "loss": 1.131676435470581, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.767584097859327, |
| "grad_norm": 0.16007225215435028, |
| "learning_rate": 2.2978920716641456e-06, |
| "loss": 0.8253338932991028, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.770642201834862, |
| "grad_norm": 0.2556101679801941, |
| "learning_rate": 2.290196799477473e-06, |
| "loss": 1.1432273387908936, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.7737003058103973, |
| "grad_norm": 0.15398050844669342, |
| "learning_rate": 2.2826005972439056e-06, |
| "loss": 1.2073218822479248, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.776758409785933, |
| "grad_norm": 0.14769600331783295, |
| "learning_rate": 2.2751035513667067e-06, |
| "loss": 1.2682545185089111, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.779816513761468, |
| "grad_norm": 0.15157395601272583, |
| "learning_rate": 2.2677057471212783e-06, |
| "loss": 1.2814412117004395, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.782874617737003, |
| "grad_norm": 0.11602704972028732, |
| "learning_rate": 2.2604072686541992e-06, |
| "loss": 1.337765097618103, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.7859327217125385, |
| "grad_norm": 0.11175990104675293, |
| "learning_rate": 2.25320819898226e-06, |
| "loss": 1.2580088376998901, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.7889908256880735, |
| "grad_norm": 0.2158500850200653, |
| "learning_rate": 2.2461086199915215e-06, |
| "loss": 1.280817985534668, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.7920489296636086, |
| "grad_norm": 0.10697898268699646, |
| "learning_rate": 2.2391086124363907e-06, |
| "loss": 1.2506331205368042, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.7951070336391437, |
| "grad_norm": 0.1721104234457016, |
| "learning_rate": 2.232208255938689e-06, |
| "loss": 1.2656058073043823, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.7981651376146788, |
| "grad_norm": 0.16275018453598022, |
| "learning_rate": 2.2254076289867574e-06, |
| "loss": 1.2296828031539917, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.801223241590214, |
| "grad_norm": 0.12005674839019775, |
| "learning_rate": 2.218706808934559e-06, |
| "loss": 1.2741050720214844, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.8042813455657494, |
| "grad_norm": 0.20490582287311554, |
| "learning_rate": 2.2121058720008005e-06, |
| "loss": 1.2242677211761475, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.8073394495412844, |
| "grad_norm": 0.08420670032501221, |
| "learning_rate": 2.205604893268061e-06, |
| "loss": 1.115134358406067, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.8103975535168195, |
| "grad_norm": 0.09198635816574097, |
| "learning_rate": 2.1992039466819464e-06, |
| "loss": 1.1836574077606201, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.8134556574923546, |
| "grad_norm": 0.12914858758449554, |
| "learning_rate": 2.192903105050242e-06, |
| "loss": 0.9696236252784729, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.81651376146789, |
| "grad_norm": 0.09582311660051346, |
| "learning_rate": 2.186702440042086e-06, |
| "loss": 1.174141764640808, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.819571865443425, |
| "grad_norm": 0.12264709919691086, |
| "learning_rate": 2.18060202218715e-06, |
| "loss": 1.2044894695281982, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.8226299694189603, |
| "grad_norm": 0.16786755621433258, |
| "learning_rate": 2.174601920874849e-06, |
| "loss": 1.2202996015548706, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 0.09281696379184723, |
| "learning_rate": 2.168702204353538e-06, |
| "loss": 1.1890109777450562, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.8287461773700304, |
| "grad_norm": 0.10081319510936737, |
| "learning_rate": 2.162902939729744e-06, |
| "loss": 1.2033485174179077, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.8318042813455655, |
| "grad_norm": 0.11747318506240845, |
| "learning_rate": 2.1572041929673983e-06, |
| "loss": 1.1819500923156738, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.834862385321101, |
| "grad_norm": 0.10887467861175537, |
| "learning_rate": 2.151606028887092e-06, |
| "loss": 1.1508567333221436, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.837920489296636, |
| "grad_norm": 0.14138160645961761, |
| "learning_rate": 2.146108511165331e-06, |
| "loss": 1.2135510444641113, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.840978593272171, |
| "grad_norm": 0.11023754626512527, |
| "learning_rate": 2.14071170233382e-06, |
| "loss": 1.1841347217559814, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.8440366972477067, |
| "grad_norm": 0.20284757018089294, |
| "learning_rate": 2.135415663778743e-06, |
| "loss": 1.157168984413147, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.8470948012232418, |
| "grad_norm": 0.1384735405445099, |
| "learning_rate": 2.1302204557400727e-06, |
| "loss": 1.1463501453399658, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.850152905198777, |
| "grad_norm": 0.1709088236093521, |
| "learning_rate": 2.125126137310878e-06, |
| "loss": 1.1752015352249146, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.853211009174312, |
| "grad_norm": 0.13996273279190063, |
| "learning_rate": 2.1201327664366585e-06, |
| "loss": 1.162741780281067, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.856269113149847, |
| "grad_norm": 0.13266918063163757, |
| "learning_rate": 2.115240399914681e-06, |
| "loss": 1.1238844394683838, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.859327217125382, |
| "grad_norm": 0.25243642926216125, |
| "learning_rate": 2.1104490933933357e-06, |
| "loss": 1.188450813293457, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.8623853211009176, |
| "grad_norm": 0.12690605223178864, |
| "learning_rate": 2.1057589013715016e-06, |
| "loss": 1.2131199836730957, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.8654434250764527, |
| "grad_norm": 0.22158950567245483, |
| "learning_rate": 2.101169877197926e-06, |
| "loss": 1.222786545753479, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.8685015290519877, |
| "grad_norm": 0.12401717156171799, |
| "learning_rate": 2.096682073070622e-06, |
| "loss": 1.2122353315353394, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.871559633027523, |
| "grad_norm": 0.39234742522239685, |
| "learning_rate": 2.092295540036271e-06, |
| "loss": 1.2304370403289795, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.8746177370030583, |
| "grad_norm": 0.14366573095321655, |
| "learning_rate": 2.088010327989642e-06, |
| "loss": 1.208174228668213, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.8776758409785934, |
| "grad_norm": 0.13212910294532776, |
| "learning_rate": 2.0838264856730233e-06, |
| "loss": 1.2271817922592163, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.8807339449541285, |
| "grad_norm": 0.09124042838811874, |
| "learning_rate": 2.0797440606756747e-06, |
| "loss": 1.1685302257537842, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.8837920489296636, |
| "grad_norm": 0.13218353688716888, |
| "learning_rate": 2.075763099433277e-06, |
| "loss": 1.2074134349822998, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.8868501529051986, |
| "grad_norm": 0.13880771398544312, |
| "learning_rate": 2.0718836472274094e-06, |
| "loss": 1.1736494302749634, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.8899082568807337, |
| "grad_norm": 0.0981225073337555, |
| "learning_rate": 2.0681057481850338e-06, |
| "loss": 1.1989068984985352, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.8929663608562692, |
| "grad_norm": 0.16569402813911438, |
| "learning_rate": 2.0644294452779904e-06, |
| "loss": 1.207861304283142, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.8960244648318043, |
| "grad_norm": 0.15494751930236816, |
| "learning_rate": 2.060854780322513e-06, |
| "loss": 1.2271397113800049, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.8990825688073394, |
| "grad_norm": 0.11410652101039886, |
| "learning_rate": 2.05738179397875e-06, |
| "loss": 1.2082901000976562, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.9021406727828745, |
| "grad_norm": 0.11099483072757721, |
| "learning_rate": 2.054010525750302e-06, |
| "loss": 1.1450985670089722, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.90519877675841, |
| "grad_norm": 0.12173059582710266, |
| "learning_rate": 2.050741013983773e-06, |
| "loss": 1.187538981437683, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.908256880733945, |
| "grad_norm": 0.14034679532051086, |
| "learning_rate": 2.0475732958683374e-06, |
| "loss": 1.2137997150421143, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.91131498470948, |
| "grad_norm": 0.1078169122338295, |
| "learning_rate": 2.0445074074353143e-06, |
| "loss": 1.2389276027679443, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.914373088685015, |
| "grad_norm": 0.1030258983373642, |
| "learning_rate": 2.0415433835577536e-06, |
| "loss": 1.2125961780548096, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 0.14901351928710938, |
| "learning_rate": 2.038681257950046e-06, |
| "loss": 1.1889057159423828, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.9204892966360854, |
| "grad_norm": 0.1964503526687622, |
| "learning_rate": 2.035921063167539e-06, |
| "loss": 1.1632894277572632, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.923547400611621, |
| "grad_norm": 0.15398406982421875, |
| "learning_rate": 2.0332628306061598e-06, |
| "loss": 1.1950072050094604, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.926605504587156, |
| "grad_norm": 0.21499764919281006, |
| "learning_rate": 2.0307065905020655e-06, |
| "loss": 1.2189935445785522, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.929663608562691, |
| "grad_norm": 0.18662309646606445, |
| "learning_rate": 2.028252371931297e-06, |
| "loss": 1.1687815189361572, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.9327217125382266, |
| "grad_norm": 0.13196606934070587, |
| "learning_rate": 2.025900202809447e-06, |
| "loss": 1.1919829845428467, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 0.1411059945821762, |
| "learning_rate": 2.0236501098913433e-06, |
| "loss": 1.193024754524231, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.9388379204892967, |
| "grad_norm": 0.3092004060745239, |
| "learning_rate": 2.021502118770743e-06, |
| "loss": 1.2424871921539307, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.941896024464832, |
| "grad_norm": 0.12397109717130661, |
| "learning_rate": 2.019456253880047e-06, |
| "loss": 1.2504069805145264, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.944954128440367, |
| "grad_norm": 0.2362269163131714, |
| "learning_rate": 2.0175125384900125e-06, |
| "loss": 1.2087825536727905, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.948012232415902, |
| "grad_norm": 0.1304045021533966, |
| "learning_rate": 2.015670994709497e-06, |
| "loss": 1.2350709438323975, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.9510703363914375, |
| "grad_norm": 0.14016836881637573, |
| "learning_rate": 2.0139316434852034e-06, |
| "loss": 1.203549861907959, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.9541284403669725, |
| "grad_norm": 0.16034546494483948, |
| "learning_rate": 2.0122945046014427e-06, |
| "loss": 1.1559655666351318, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.9571865443425076, |
| "grad_norm": 0.11549582332372665, |
| "learning_rate": 2.0107595966799047e-06, |
| "loss": 1.1843830347061157, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.9602446483180427, |
| "grad_norm": 0.1061682254076004, |
| "learning_rate": 2.009326937179452e-06, |
| "loss": 1.2071914672851562, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.963302752293578, |
| "grad_norm": 0.13189159333705902, |
| "learning_rate": 2.0079965423959206e-06, |
| "loss": 1.187361240386963, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.9663608562691133, |
| "grad_norm": 0.16287079453468323, |
| "learning_rate": 2.0067684274619298e-06, |
| "loss": 1.1922651529312134, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.9694189602446484, |
| "grad_norm": 0.13288040459156036, |
| "learning_rate": 2.0056426063467157e-06, |
| "loss": 1.2393989562988281, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.9724770642201834, |
| "grad_norm": 0.21218396723270416, |
| "learning_rate": 2.0046190918559676e-06, |
| "loss": 1.229328989982605, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.9755351681957185, |
| "grad_norm": 0.11764626204967499, |
| "learning_rate": 2.0036978956316867e-06, |
| "loss": 1.2407135963439941, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.9785932721712536, |
| "grad_norm": 0.23697015643119812, |
| "learning_rate": 2.002879028152051e-06, |
| "loss": 1.1935560703277588, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.981651376146789, |
| "grad_norm": 0.11776944994926453, |
| "learning_rate": 2.0021624987312975e-06, |
| "loss": 1.204485297203064, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.984709480122324, |
| "grad_norm": 0.3524181544780731, |
| "learning_rate": 2.001548315519612e-06, |
| "loss": 1.2240840196609497, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.9877675840978593, |
| "grad_norm": 0.16863545775413513, |
| "learning_rate": 2.0010364855030445e-06, |
| "loss": 1.2831623554229736, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.9908256880733948, |
| "grad_norm": 0.14657853543758392, |
| "learning_rate": 2.0006270145034217e-06, |
| "loss": 1.3105254173278809, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.99388379204893, |
| "grad_norm": 0.1979057639837265, |
| "learning_rate": 2.000319907178286e-06, |
| "loss": 1.2615809440612793, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.996941896024465, |
| "grad_norm": 0.2079269289970398, |
| "learning_rate": 2.00011516702084e-06, |
| "loss": 1.1906858682632446, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.3183704912662506, |
| "learning_rate": 2.0000127963599083e-06, |
| "loss": 1.2669897079467773, |
| "step": 1962 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1962, |
| "total_flos": 2.4882019145802056e+18, |
| "train_loss": 1.4038474378721917, |
| "train_runtime": 17010.8289, |
| "train_samples_per_second": 1.845, |
| "train_steps_per_second": 0.115 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1962, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4882019145802056e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|