| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1962, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0030581039755351682, |
| "grad_norm": 0.18179698288440704, |
| "learning_rate": 2.0202020202020205e-07, |
| "loss": 1.8941408395767212, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0061162079510703364, |
| "grad_norm": 0.1191081777215004, |
| "learning_rate": 6.060606060606061e-07, |
| "loss": 1.855010747909546, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.009174311926605505, |
| "grad_norm": 0.14209145307540894, |
| "learning_rate": 1.01010101010101e-06, |
| "loss": 1.8903889656066895, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.012232415902140673, |
| "grad_norm": 0.1455659419298172, |
| "learning_rate": 1.4141414141414143e-06, |
| "loss": 1.8439750671386719, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01529051987767584, |
| "grad_norm": 0.1313670426607132, |
| "learning_rate": 1.8181818181818183e-06, |
| "loss": 2.0096511840820312, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01834862385321101, |
| "grad_norm": 0.10032304376363754, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.8792736530303955, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.021406727828746176, |
| "grad_norm": 0.22813281416893005, |
| "learning_rate": 2.6262626262626267e-06, |
| "loss": 1.879863977432251, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.024464831804281346, |
| "grad_norm": 0.25767821073532104, |
| "learning_rate": 3.0303030303030305e-06, |
| "loss": 1.9464257955551147, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.027522935779816515, |
| "grad_norm": 0.13845738768577576, |
| "learning_rate": 3.4343434343434347e-06, |
| "loss": 1.8291980028152466, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03058103975535168, |
| "grad_norm": 0.1477503925561905, |
| "learning_rate": 3.8383838383838385e-06, |
| "loss": 1.8531450033187866, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03363914373088685, |
| "grad_norm": 0.3766998052597046, |
| "learning_rate": 4.242424242424243e-06, |
| "loss": 1.9907399415969849, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.03669724770642202, |
| "grad_norm": 1.00423264503479, |
| "learning_rate": 4.646464646464647e-06, |
| "loss": 2.0054450035095215, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.039755351681957186, |
| "grad_norm": 0.21465469896793365, |
| "learning_rate": 5.0505050505050515e-06, |
| "loss": 1.7041486501693726, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.04281345565749235, |
| "grad_norm": 0.19542913138866425, |
| "learning_rate": 5.4545454545454545e-06, |
| "loss": 1.879454493522644, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.045871559633027525, |
| "grad_norm": 0.2842688262462616, |
| "learning_rate": 5.858585858585859e-06, |
| "loss": 2.1720046997070312, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04892966360856269, |
| "grad_norm": 0.21216483414173126, |
| "learning_rate": 6.262626262626264e-06, |
| "loss": 1.8467118740081787, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.05198776758409786, |
| "grad_norm": 0.17894431948661804, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 1.9493175745010376, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.05504587155963303, |
| "grad_norm": 0.15230964124202728, |
| "learning_rate": 7.070707070707071e-06, |
| "loss": 1.7910125255584717, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0581039755351682, |
| "grad_norm": 0.14847195148468018, |
| "learning_rate": 7.474747474747476e-06, |
| "loss": 1.8185133934020996, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.06116207951070336, |
| "grad_norm": 0.4041803479194641, |
| "learning_rate": 7.87878787878788e-06, |
| "loss": 1.8133583068847656, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06422018348623854, |
| "grad_norm": 0.2895216643810272, |
| "learning_rate": 8.282828282828283e-06, |
| "loss": 1.8604023456573486, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0672782874617737, |
| "grad_norm": 0.17079144716262817, |
| "learning_rate": 8.686868686868687e-06, |
| "loss": 1.943013072013855, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.07033639143730887, |
| "grad_norm": 0.23315830528736115, |
| "learning_rate": 9.090909090909091e-06, |
| "loss": 2.0355608463287354, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.07339449541284404, |
| "grad_norm": 0.27873244881629944, |
| "learning_rate": 9.494949494949497e-06, |
| "loss": 2.0327694416046143, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.0764525993883792, |
| "grad_norm": 0.16263337433338165, |
| "learning_rate": 9.8989898989899e-06, |
| "loss": 1.834035038948059, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07951070336391437, |
| "grad_norm": 0.34875982999801636, |
| "learning_rate": 1.0303030303030304e-05, |
| "loss": 1.8212206363677979, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.08256880733944955, |
| "grad_norm": 0.13111037015914917, |
| "learning_rate": 1.0707070707070708e-05, |
| "loss": 1.9612611532211304, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0856269113149847, |
| "grad_norm": 0.196682870388031, |
| "learning_rate": 1.1111111111111113e-05, |
| "loss": 1.8873212337493896, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.08868501529051988, |
| "grad_norm": 0.13784241676330566, |
| "learning_rate": 1.1515151515151517e-05, |
| "loss": 1.92000412940979, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.09174311926605505, |
| "grad_norm": 0.19873078167438507, |
| "learning_rate": 1.191919191919192e-05, |
| "loss": 1.8548063039779663, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09480122324159021, |
| "grad_norm": 0.17927613854408264, |
| "learning_rate": 1.2323232323232323e-05, |
| "loss": 1.785029411315918, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.09785932721712538, |
| "grad_norm": 0.12024315446615219, |
| "learning_rate": 1.2727272727272728e-05, |
| "loss": 1.6718401908874512, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.10091743119266056, |
| "grad_norm": 0.13896070420742035, |
| "learning_rate": 1.3131313131313132e-05, |
| "loss": 1.7962878942489624, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.10397553516819572, |
| "grad_norm": 0.1479691118001938, |
| "learning_rate": 1.3535353535353538e-05, |
| "loss": 1.9109517335891724, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.10703363914373089, |
| "grad_norm": 0.15656954050064087, |
| "learning_rate": 1.3939393939393942e-05, |
| "loss": 1.7726895809173584, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.11009174311926606, |
| "grad_norm": 0.2845805287361145, |
| "learning_rate": 1.4343434343434344e-05, |
| "loss": 1.9658571481704712, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.11314984709480122, |
| "grad_norm": 0.2494230568408966, |
| "learning_rate": 1.4747474747474747e-05, |
| "loss": 2.046457290649414, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.1162079510703364, |
| "grad_norm": 0.36776164174079895, |
| "learning_rate": 1.5151515151515153e-05, |
| "loss": 1.8348238468170166, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.11926605504587157, |
| "grad_norm": 0.3683006465435028, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 2.14683198928833, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.12232415902140673, |
| "grad_norm": 0.17250515520572662, |
| "learning_rate": 1.595959595959596e-05, |
| "loss": 1.8108762502670288, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.12538226299694188, |
| "grad_norm": 0.1463659554719925, |
| "learning_rate": 1.6363636363636366e-05, |
| "loss": 1.578155517578125, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.12844036697247707, |
| "grad_norm": 0.15676458179950714, |
| "learning_rate": 1.6767676767676768e-05, |
| "loss": 1.7267800569534302, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.13149847094801223, |
| "grad_norm": 0.24986772239208221, |
| "learning_rate": 1.7171717171717173e-05, |
| "loss": 1.7589430809020996, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.1345565749235474, |
| "grad_norm": 0.17329798638820648, |
| "learning_rate": 1.7575757575757576e-05, |
| "loss": 1.6775318384170532, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.13761467889908258, |
| "grad_norm": 0.1558336615562439, |
| "learning_rate": 1.797979797979798e-05, |
| "loss": 1.7209129333496094, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.14067278287461774, |
| "grad_norm": 0.11867998540401459, |
| "learning_rate": 1.8383838383838387e-05, |
| "loss": 1.762731671333313, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1437308868501529, |
| "grad_norm": 0.1370459347963333, |
| "learning_rate": 1.8787878787878792e-05, |
| "loss": 1.8421965837478638, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.14678899082568808, |
| "grad_norm": 0.6398494243621826, |
| "learning_rate": 1.9191919191919194e-05, |
| "loss": 1.7744200229644775, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.14984709480122324, |
| "grad_norm": 0.19712288677692413, |
| "learning_rate": 1.9595959595959596e-05, |
| "loss": 1.7096507549285889, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.1529051987767584, |
| "grad_norm": 0.22378289699554443, |
| "learning_rate": 2e-05, |
| "loss": 1.8468637466430664, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1559633027522936, |
| "grad_norm": 0.17655548453330994, |
| "learning_rate": 1.999994881459676e-05, |
| "loss": 1.7527018785476685, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.15902140672782875, |
| "grad_norm": 0.6452811360359192, |
| "learning_rate": 1.9999795258969242e-05, |
| "loss": 1.7133104801177979, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1620795107033639, |
| "grad_norm": 0.14039914309978485, |
| "learning_rate": 1.9999539334864075e-05, |
| "loss": 1.735745906829834, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1651376146788991, |
| "grad_norm": 0.24329495429992676, |
| "learning_rate": 1.9999181045192272e-05, |
| "loss": 1.7769023180007935, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.16819571865443425, |
| "grad_norm": 0.10750233381986618, |
| "learning_rate": 1.9998720394029214e-05, |
| "loss": 1.7763116359710693, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1712538226299694, |
| "grad_norm": 0.36938831210136414, |
| "learning_rate": 1.9998157386614592e-05, |
| "loss": 1.6457706689834595, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.1743119266055046, |
| "grad_norm": 0.14405888319015503, |
| "learning_rate": 1.999749202935236e-05, |
| "loss": 1.7605836391448975, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.17737003058103976, |
| "grad_norm": 0.11128629744052887, |
| "learning_rate": 1.9996724329810635e-05, |
| "loss": 1.634245753288269, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.18042813455657492, |
| "grad_norm": 0.4720332622528076, |
| "learning_rate": 1.999585429672165e-05, |
| "loss": 1.8564720153808594, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.1834862385321101, |
| "grad_norm": 0.1645146757364273, |
| "learning_rate": 1.999488193998162e-05, |
| "loss": 1.682362675666809, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.18654434250764526, |
| "grad_norm": 0.16431479156017303, |
| "learning_rate": 1.9993807270650653e-05, |
| "loss": 1.7643775939941406, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.18960244648318042, |
| "grad_norm": 0.09633471816778183, |
| "learning_rate": 1.9992630300952616e-05, |
| "loss": 1.7723275423049927, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.1926605504587156, |
| "grad_norm": 0.1628941148519516, |
| "learning_rate": 1.9991351044274984e-05, |
| "loss": 1.7144907712936401, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.19571865443425077, |
| "grad_norm": 0.1856471598148346, |
| "learning_rate": 1.9989969515168707e-05, |
| "loss": 1.6783483028411865, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.19877675840978593, |
| "grad_norm": 0.1167445257306099, |
| "learning_rate": 1.9988485729348042e-05, |
| "loss": 1.6974326372146606, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2018348623853211, |
| "grad_norm": 0.16413787007331848, |
| "learning_rate": 1.998689970369035e-05, |
| "loss": 1.6718995571136475, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.20489296636085627, |
| "grad_norm": 0.351113885641098, |
| "learning_rate": 1.9985211456235943e-05, |
| "loss": 1.7726083993911743, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.20795107033639143, |
| "grad_norm": 0.14564545452594757, |
| "learning_rate": 1.9983421006187847e-05, |
| "loss": 1.6731343269348145, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.21100917431192662, |
| "grad_norm": 0.21894535422325134, |
| "learning_rate": 1.9981528373911593e-05, |
| "loss": 1.5750818252563477, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.21406727828746178, |
| "grad_norm": 0.28063738346099854, |
| "learning_rate": 1.9979533580934997e-05, |
| "loss": 1.532321572303772, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.21712538226299694, |
| "grad_norm": 0.10466428846120834, |
| "learning_rate": 1.9977436649947894e-05, |
| "loss": 1.619662880897522, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.22018348623853212, |
| "grad_norm": 0.2574918568134308, |
| "learning_rate": 1.99752376048019e-05, |
| "loss": 1.5412100553512573, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.22324159021406728, |
| "grad_norm": 0.13497450947761536, |
| "learning_rate": 1.997293647051013e-05, |
| "loss": 1.6017297506332397, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.22629969418960244, |
| "grad_norm": 0.10458122938871384, |
| "learning_rate": 1.9970533273246915e-05, |
| "loss": 1.523569107055664, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.22935779816513763, |
| "grad_norm": 0.29012468457221985, |
| "learning_rate": 1.9968028040347495e-05, |
| "loss": 1.6920928955078125, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.2324159021406728, |
| "grad_norm": 0.252836138010025, |
| "learning_rate": 1.996542080030774e-05, |
| "loss": 1.7454942464828491, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.23547400611620795, |
| "grad_norm": 0.1512991040945053, |
| "learning_rate": 1.9962711582783782e-05, |
| "loss": 1.7650762796401978, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.23853211009174313, |
| "grad_norm": 0.17810527980327606, |
| "learning_rate": 1.995990041859171e-05, |
| "loss": 1.6707919836044312, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.2415902140672783, |
| "grad_norm": 0.13168492913246155, |
| "learning_rate": 1.9956987339707212e-05, |
| "loss": 1.743039608001709, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.24464831804281345, |
| "grad_norm": 0.1702589988708496, |
| "learning_rate": 1.9953972379265195e-05, |
| "loss": 1.6535260677337646, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.24770642201834864, |
| "grad_norm": 0.19646987318992615, |
| "learning_rate": 1.9950855571559434e-05, |
| "loss": 1.7839441299438477, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.25076452599388377, |
| "grad_norm": 0.2019776999950409, |
| "learning_rate": 1.994763695204216e-05, |
| "loss": 1.5607678890228271, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.25382262996941896, |
| "grad_norm": 0.29837653040885925, |
| "learning_rate": 1.9944316557323676e-05, |
| "loss": 1.7146563529968262, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.25688073394495414, |
| "grad_norm": 0.2998209297657013, |
| "learning_rate": 1.9940894425171923e-05, |
| "loss": 1.4525647163391113, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2599388379204893, |
| "grad_norm": 0.46811026334762573, |
| "learning_rate": 1.9937370594512054e-05, |
| "loss": 1.6660023927688599, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.26299694189602446, |
| "grad_norm": 0.22554853558540344, |
| "learning_rate": 1.9933745105426012e-05, |
| "loss": 1.2973607778549194, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.26605504587155965, |
| "grad_norm": 0.25682735443115234, |
| "learning_rate": 1.9930017999152035e-05, |
| "loss": 1.2847613096237183, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.2691131498470948, |
| "grad_norm": 0.17357558012008667, |
| "learning_rate": 1.9926189318084225e-05, |
| "loss": 1.5048258304595947, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.27217125382262997, |
| "grad_norm": 0.1701100617647171, |
| "learning_rate": 1.992225910577205e-05, |
| "loss": 1.4852646589279175, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.27522935779816515, |
| "grad_norm": 0.3405025601387024, |
| "learning_rate": 1.9918227406919834e-05, |
| "loss": 1.6833899021148682, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2782874617737003, |
| "grad_norm": 0.3251838684082031, |
| "learning_rate": 1.9914094267386282e-05, |
| "loss": 1.6130907535552979, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.28134556574923547, |
| "grad_norm": 0.21916796267032623, |
| "learning_rate": 1.9909859734183922e-05, |
| "loss": 1.348628282546997, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.28440366972477066, |
| "grad_norm": 0.1524391770362854, |
| "learning_rate": 1.9905523855478605e-05, |
| "loss": 1.293289303779602, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2874617737003058, |
| "grad_norm": 0.10658152401447296, |
| "learning_rate": 1.990108668058892e-05, |
| "loss": 1.4680689573287964, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.290519877675841, |
| "grad_norm": 0.35397616028785706, |
| "learning_rate": 1.9896548259985677e-05, |
| "loss": 1.3957206010818481, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 0.19746947288513184, |
| "learning_rate": 1.9891908645291285e-05, |
| "loss": 1.4401719570159912, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.2966360856269113, |
| "grad_norm": 0.21412421762943268, |
| "learning_rate": 1.98871678892792e-05, |
| "loss": 1.6005785465240479, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2996941896024465, |
| "grad_norm": 0.2620628774166107, |
| "learning_rate": 1.9882326045873318e-05, |
| "loss": 1.5429439544677734, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.30275229357798167, |
| "grad_norm": 0.1319998800754547, |
| "learning_rate": 1.9877383170147354e-05, |
| "loss": 1.2774600982666016, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.3058103975535168, |
| "grad_norm": 0.1912466585636139, |
| "learning_rate": 1.987233931832421e-05, |
| "loss": 1.4500045776367188, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.308868501529052, |
| "grad_norm": 0.1693103313446045, |
| "learning_rate": 1.9867194547775352e-05, |
| "loss": 1.595328450202942, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.3119266055045872, |
| "grad_norm": 0.14706411957740784, |
| "learning_rate": 1.9861948917020147e-05, |
| "loss": 1.7478187084197998, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.3149847094801223, |
| "grad_norm": 0.10898048430681229, |
| "learning_rate": 1.98566024857252e-05, |
| "loss": 1.7878528833389282, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.3180428134556575, |
| "grad_norm": 0.22221213579177856, |
| "learning_rate": 1.985115531470368e-05, |
| "loss": 1.875852108001709, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.3211009174311927, |
| "grad_norm": 0.1595430225133896, |
| "learning_rate": 1.9845607465914617e-05, |
| "loss": 1.440093994140625, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.3241590214067278, |
| "grad_norm": 0.18608379364013672, |
| "learning_rate": 1.9839959002462204e-05, |
| "loss": 1.5460582971572876, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.327217125382263, |
| "grad_norm": 0.130866140127182, |
| "learning_rate": 1.9834209988595086e-05, |
| "loss": 1.3713728189468384, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.3302752293577982, |
| "grad_norm": 0.0909891426563263, |
| "learning_rate": 1.982836048970561e-05, |
| "loss": 1.2875492572784424, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.18003132939338684, |
| "learning_rate": 1.9822410572329106e-05, |
| "loss": 1.727955937385559, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.3363914373088685, |
| "grad_norm": 0.11840490251779556, |
| "learning_rate": 1.9816360304143107e-05, |
| "loss": 1.5818911790847778, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3394495412844037, |
| "grad_norm": 0.16139858961105347, |
| "learning_rate": 1.98102097539666e-05, |
| "loss": 1.735103964805603, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.3425076452599388, |
| "grad_norm": 0.15649574995040894, |
| "learning_rate": 1.9803958991759223e-05, |
| "loss": 1.2130670547485352, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.345565749235474, |
| "grad_norm": 0.12992748618125916, |
| "learning_rate": 1.979760808862049e-05, |
| "loss": 1.4926730394363403, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.3486238532110092, |
| "grad_norm": 0.14847305417060852, |
| "learning_rate": 1.979115711678896e-05, |
| "loss": 1.7769122123718262, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.3516819571865443, |
| "grad_norm": 0.15531417727470398, |
| "learning_rate": 1.9784606149641425e-05, |
| "loss": 1.5090936422348022, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3547400611620795, |
| "grad_norm": 0.11508797854185104, |
| "learning_rate": 1.9777955261692096e-05, |
| "loss": 1.6367048025131226, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.3577981651376147, |
| "grad_norm": 0.17374970018863678, |
| "learning_rate": 1.977120452859172e-05, |
| "loss": 1.5295836925506592, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.36085626911314983, |
| "grad_norm": 0.10554488748311996, |
| "learning_rate": 1.976435402712674e-05, |
| "loss": 1.4442095756530762, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.363914373088685, |
| "grad_norm": 0.14663386344909668, |
| "learning_rate": 1.9757403835218416e-05, |
| "loss": 1.5376912355422974, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.3669724770642202, |
| "grad_norm": 0.17038187384605408, |
| "learning_rate": 1.9750354031921945e-05, |
| "loss": 1.5929934978485107, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.37003058103975534, |
| "grad_norm": 0.1317462921142578, |
| "learning_rate": 1.9743204697425555e-05, |
| "loss": 1.4519989490509033, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3730886850152905, |
| "grad_norm": 0.09555897116661072, |
| "learning_rate": 1.9735955913049596e-05, |
| "loss": 1.555535078048706, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3761467889908257, |
| "grad_norm": 0.18885712325572968, |
| "learning_rate": 1.972860776124561e-05, |
| "loss": 1.8240364789962769, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.37920489296636084, |
| "grad_norm": 0.3851545751094818, |
| "learning_rate": 1.97211603255954e-05, |
| "loss": 2.276089668273926, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.382262996941896, |
| "grad_norm": 0.27271491289138794, |
| "learning_rate": 1.971361369081008e-05, |
| "loss": 1.4479761123657227, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.3853211009174312, |
| "grad_norm": 0.16263516247272491, |
| "learning_rate": 1.9705967942729097e-05, |
| "loss": 1.4307349920272827, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.38837920489296635, |
| "grad_norm": 0.17890089750289917, |
| "learning_rate": 1.969822316831928e-05, |
| "loss": 1.4967151880264282, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.39143730886850153, |
| "grad_norm": 0.9752420783042908, |
| "learning_rate": 1.969037945567383e-05, |
| "loss": 1.6618462800979614, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.3944954128440367, |
| "grad_norm": 0.12987129390239716, |
| "learning_rate": 1.9682436894011314e-05, |
| "loss": 1.5646541118621826, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.39755351681957185, |
| "grad_norm": 0.29777029156684875, |
| "learning_rate": 1.9674395573674682e-05, |
| "loss": 1.4057655334472656, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.40061162079510704, |
| "grad_norm": 0.13696956634521484, |
| "learning_rate": 1.9666255586130196e-05, |
| "loss": 1.297253131866455, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.4036697247706422, |
| "grad_norm": 0.09536871314048767, |
| "learning_rate": 1.9658017023966428e-05, |
| "loss": 1.2214246988296509, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.40672782874617736, |
| "grad_norm": 0.10002201050519943, |
| "learning_rate": 1.964967998089318e-05, |
| "loss": 1.3900551795959473, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.40978593272171254, |
| "grad_norm": 0.25706544518470764, |
| "learning_rate": 1.9641244551740438e-05, |
| "loss": 1.6182241439819336, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.41284403669724773, |
| "grad_norm": 0.14891637861728668, |
| "learning_rate": 1.9632710832457272e-05, |
| "loss": 1.6432101726531982, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.41590214067278286, |
| "grad_norm": 0.229234516620636, |
| "learning_rate": 1.9624078920110766e-05, |
| "loss": 1.6490212678909302, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.41896024464831805, |
| "grad_norm": 0.13612765073776245, |
| "learning_rate": 1.9615348912884897e-05, |
| "loss": 1.6697027683258057, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.42201834862385323, |
| "grad_norm": 0.13454599678516388, |
| "learning_rate": 1.960652091007944e-05, |
| "loss": 1.5065478086471558, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.42507645259938837, |
| "grad_norm": 0.2709161937236786, |
| "learning_rate": 1.9597595012108797e-05, |
| "loss": 1.649755597114563, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.42813455657492355, |
| "grad_norm": 0.2529757022857666, |
| "learning_rate": 1.9588571320500914e-05, |
| "loss": 1.5640302896499634, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.43119266055045874, |
| "grad_norm": 0.11685580015182495, |
| "learning_rate": 1.9579449937896067e-05, |
| "loss": 1.4505410194396973, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.43425076452599387, |
| "grad_norm": 0.14383523166179657, |
| "learning_rate": 1.957023096804574e-05, |
| "loss": 1.4420454502105713, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.43730886850152906, |
| "grad_norm": 0.15812362730503082, |
| "learning_rate": 1.9560914515811416e-05, |
| "loss": 1.6419968605041504, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.44036697247706424, |
| "grad_norm": 0.2930157482624054, |
| "learning_rate": 1.9551500687163404e-05, |
| "loss": 1.3646347522735596, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.4434250764525994, |
| "grad_norm": 0.31321683526039124, |
| "learning_rate": 1.9541989589179608e-05, |
| "loss": 1.5918828248977661, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.44648318042813456, |
| "grad_norm": 0.12769058346748352, |
| "learning_rate": 1.9532381330044346e-05, |
| "loss": 1.459935188293457, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.44954128440366975, |
| "grad_norm": 0.1212194636464119, |
| "learning_rate": 1.9522676019047084e-05, |
| "loss": 1.4860665798187256, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.4525993883792049, |
| "grad_norm": 0.16963601112365723, |
| "learning_rate": 1.9512873766581216e-05, |
| "loss": 1.6114251613616943, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.45565749235474007, |
| "grad_norm": 0.15480950474739075, |
| "learning_rate": 1.9502974684142787e-05, |
| "loss": 1.6236363649368286, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.45871559633027525, |
| "grad_norm": 0.1646520048379898, |
| "learning_rate": 1.949297888432926e-05, |
| "loss": 1.4507560729980469, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.4617737003058104, |
| "grad_norm": 0.12311575561761856, |
| "learning_rate": 1.9482886480838193e-05, |
| "loss": 1.3734780550003052, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.4648318042813456, |
| "grad_norm": 0.3230009973049164, |
| "learning_rate": 1.947269758846597e-05, |
| "loss": 1.532796859741211, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.46788990825688076, |
| "grad_norm": 0.160029798746109, |
| "learning_rate": 1.9462412323106506e-05, |
| "loss": 1.4639440774917603, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.4709480122324159, |
| "grad_norm": 0.1815919578075409, |
| "learning_rate": 1.945203080174989e-05, |
| "loss": 1.620712399482727, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.4740061162079511, |
| "grad_norm": 0.12287808954715729, |
| "learning_rate": 1.94415531424811e-05, |
| "loss": 1.4684053659439087, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.47706422018348627, |
| "grad_norm": 0.20326243340969086, |
| "learning_rate": 1.9430979464478618e-05, |
| "loss": 1.4242714643478394, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.4801223241590214, |
| "grad_norm": 0.2062222808599472, |
| "learning_rate": 1.9420309888013115e-05, |
| "loss": 1.4069018363952637, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4831804281345566, |
| "grad_norm": 0.27631694078445435, |
| "learning_rate": 1.940954453444604e-05, |
| "loss": 1.5006767511367798, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.48623853211009177, |
| "grad_norm": 0.1338021457195282, |
| "learning_rate": 1.9398683526228283e-05, |
| "loss": 1.4913774728775024, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.4892966360856269, |
| "grad_norm": 0.15452533960342407, |
| "learning_rate": 1.9387726986898753e-05, |
| "loss": 1.49446702003479, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4923547400611621, |
| "grad_norm": 0.23729997873306274, |
| "learning_rate": 1.9376675041082974e-05, |
| "loss": 1.6449737548828125, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.4954128440366973, |
| "grad_norm": 0.18367542326450348, |
| "learning_rate": 1.936552781449168e-05, |
| "loss": 1.8280832767486572, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4984709480122324, |
| "grad_norm": 0.1884259730577469, |
| "learning_rate": 1.935428543391938e-05, |
| "loss": 1.7086812257766724, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.5015290519877675, |
| "grad_norm": 0.11275501549243927, |
| "learning_rate": 1.9342948027242923e-05, |
| "loss": 1.686227798461914, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.5045871559633027, |
| "grad_norm": 0.18229232728481293, |
| "learning_rate": 1.9331515723420016e-05, |
| "loss": 1.6126062870025635, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5076452599388379, |
| "grad_norm": 0.1593896448612213, |
| "learning_rate": 1.9319988652487794e-05, |
| "loss": 1.3089760541915894, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.5107033639143731, |
| "grad_norm": 0.42358121275901794, |
| "learning_rate": 1.930836694556131e-05, |
| "loss": 1.2807157039642334, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.5137614678899083, |
| "grad_norm": 0.16546684503555298, |
| "learning_rate": 1.929665073483208e-05, |
| "loss": 1.7080059051513672, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.5168195718654435, |
| "grad_norm": 0.1714116334915161, |
| "learning_rate": 1.9284840153566533e-05, |
| "loss": 1.705456018447876, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.5198776758409785, |
| "grad_norm": 0.17549371719360352, |
| "learning_rate": 1.9272935336104526e-05, |
| "loss": 1.7870020866394043, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5229357798165137, |
| "grad_norm": 0.26433461904525757, |
| "learning_rate": 1.926093641785781e-05, |
| "loss": 1.7173891067504883, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.5259938837920489, |
| "grad_norm": 0.13767947256565094, |
| "learning_rate": 1.9248843535308494e-05, |
| "loss": 1.759930968284607, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.5290519877675841, |
| "grad_norm": 0.3766266703605652, |
| "learning_rate": 1.9236656826007483e-05, |
| "loss": 1.676863670349121, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.5321100917431193, |
| "grad_norm": 0.18953099846839905, |
| "learning_rate": 1.9224376428572914e-05, |
| "loss": 1.6074869632720947, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.5351681957186545, |
| "grad_norm": 0.4343929588794708, |
| "learning_rate": 1.9212002482688586e-05, |
| "loss": 1.5012247562408447, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.5382262996941896, |
| "grad_norm": 0.3309212327003479, |
| "learning_rate": 1.919953512910237e-05, |
| "loss": 1.7654335498809814, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.5412844036697247, |
| "grad_norm": 0.2186765819787979, |
| "learning_rate": 1.9186974509624596e-05, |
| "loss": 1.6823170185089111, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.5443425076452599, |
| "grad_norm": 0.22455096244812012, |
| "learning_rate": 1.917432076712647e-05, |
| "loss": 1.2209478616714478, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.5474006116207951, |
| "grad_norm": 0.26915469765663147, |
| "learning_rate": 1.916157404553841e-05, |
| "loss": 1.80545175075531, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.5504587155963303, |
| "grad_norm": 0.3115030825138092, |
| "learning_rate": 1.914873448984843e-05, |
| "loss": 1.8234632015228271, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5535168195718655, |
| "grad_norm": 0.5503238439559937, |
| "learning_rate": 1.913580224610051e-05, |
| "loss": 1.8974953889846802, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.5565749235474006, |
| "grad_norm": 0.48155084252357483, |
| "learning_rate": 1.912277746139288e-05, |
| "loss": 1.821452260017395, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.5596330275229358, |
| "grad_norm": 0.289008766412735, |
| "learning_rate": 1.9109660283876402e-05, |
| "loss": 1.7944821119308472, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.5626911314984709, |
| "grad_norm": 0.1955709308385849, |
| "learning_rate": 1.909645086275286e-05, |
| "loss": 1.6593105792999268, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.5657492354740061, |
| "grad_norm": 0.24237479269504547, |
| "learning_rate": 1.9083149348273267e-05, |
| "loss": 1.5331079959869385, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5688073394495413, |
| "grad_norm": 0.22706088423728943, |
| "learning_rate": 1.906975589173615e-05, |
| "loss": 1.0948290824890137, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.5718654434250765, |
| "grad_norm": 0.1655234396457672, |
| "learning_rate": 1.9056270645485832e-05, |
| "loss": 1.0862908363342285, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.5749235474006116, |
| "grad_norm": 0.11473622173070908, |
| "learning_rate": 1.904269376291071e-05, |
| "loss": 1.3455995321273804, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.5779816513761468, |
| "grad_norm": 0.11378367990255356, |
| "learning_rate": 1.9029025398441502e-05, |
| "loss": 1.2777307033538818, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.581039755351682, |
| "grad_norm": 0.17563015222549438, |
| "learning_rate": 1.9015265707549475e-05, |
| "loss": 1.228100061416626, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5840978593272171, |
| "grad_norm": 0.09832898527383804, |
| "learning_rate": 1.9001414846744708e-05, |
| "loss": 1.2333452701568604, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 0.20122288167476654, |
| "learning_rate": 1.898747297357429e-05, |
| "loss": 1.519348382949829, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.5902140672782875, |
| "grad_norm": 0.4179234802722931, |
| "learning_rate": 1.8973440246620527e-05, |
| "loss": 1.4286757707595825, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.5932721712538226, |
| "grad_norm": 0.24806340038776398, |
| "learning_rate": 1.895931682549915e-05, |
| "loss": 1.2454115152359009, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.5963302752293578, |
| "grad_norm": 0.2059653401374817, |
| "learning_rate": 1.8945102870857502e-05, |
| "loss": 1.5481377840042114, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.599388379204893, |
| "grad_norm": 0.08632580935955048, |
| "learning_rate": 1.8930798544372683e-05, |
| "loss": 1.5465701818466187, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.6024464831804281, |
| "grad_norm": 0.20156033337116241, |
| "learning_rate": 1.891640400874975e-05, |
| "loss": 1.3250479698181152, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.6055045871559633, |
| "grad_norm": 0.29586711525917053, |
| "learning_rate": 1.8901919427719835e-05, |
| "loss": 1.4996241331100464, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.6085626911314985, |
| "grad_norm": 0.4265895187854767, |
| "learning_rate": 1.8887344966038293e-05, |
| "loss": 1.6997027397155762, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.6116207951070336, |
| "grad_norm": 0.2118600457906723, |
| "learning_rate": 1.8872680789482847e-05, |
| "loss": 1.2160531282424927, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.6146788990825688, |
| "grad_norm": 0.1629539579153061, |
| "learning_rate": 1.8857927064851663e-05, |
| "loss": 1.4833059310913086, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.617737003058104, |
| "grad_norm": 0.18409305810928345, |
| "learning_rate": 1.8843083959961487e-05, |
| "loss": 1.5481505393981934, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.6207951070336392, |
| "grad_norm": 0.2286827266216278, |
| "learning_rate": 1.8828151643645723e-05, |
| "loss": 1.4782235622406006, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.6238532110091743, |
| "grad_norm": 0.31445422768592834, |
| "learning_rate": 1.8813130285752504e-05, |
| "loss": 1.34043550491333, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.6269113149847095, |
| "grad_norm": 0.3406539857387543, |
| "learning_rate": 1.8798020057142787e-05, |
| "loss": 0.978967010974884, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6299694189602446, |
| "grad_norm": 0.18329036235809326, |
| "learning_rate": 1.8782821129688378e-05, |
| "loss": 1.6057853698730469, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.6330275229357798, |
| "grad_norm": 0.20872509479522705, |
| "learning_rate": 1.8767533676269994e-05, |
| "loss": 1.548257827758789, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.636085626911315, |
| "grad_norm": 0.13048969209194183, |
| "learning_rate": 1.8752157870775293e-05, |
| "loss": 1.4458184242248535, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.6391437308868502, |
| "grad_norm": 0.2189042568206787, |
| "learning_rate": 1.87366938880969e-05, |
| "loss": 1.551786184310913, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.6422018348623854, |
| "grad_norm": 0.2083597481250763, |
| "learning_rate": 1.872114190413041e-05, |
| "loss": 1.4876255989074707, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6452599388379205, |
| "grad_norm": 0.23290075361728668, |
| "learning_rate": 1.87055020957724e-05, |
| "loss": 1.4596383571624756, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.6483180428134556, |
| "grad_norm": 0.17102104425430298, |
| "learning_rate": 1.86897746409184e-05, |
| "loss": 1.6562044620513916, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.6513761467889908, |
| "grad_norm": 0.20064817368984222, |
| "learning_rate": 1.8673959718460877e-05, |
| "loss": 1.3577477931976318, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.654434250764526, |
| "grad_norm": 0.214597687125206, |
| "learning_rate": 1.865805750828721e-05, |
| "loss": 1.086013674736023, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.6574923547400612, |
| "grad_norm": 0.2168869823217392, |
| "learning_rate": 1.8642068191277632e-05, |
| "loss": 1.724764108657837, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6605504587155964, |
| "grad_norm": 0.207820326089859, |
| "learning_rate": 1.8625991949303163e-05, |
| "loss": 1.4837605953216553, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.6636085626911316, |
| "grad_norm": 0.14926370978355408, |
| "learning_rate": 1.8609828965223577e-05, |
| "loss": 1.6416270732879639, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.25954487919807434, |
| "learning_rate": 1.8593579422885282e-05, |
| "loss": 1.4783693552017212, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.6697247706422018, |
| "grad_norm": 0.1701032519340515, |
| "learning_rate": 1.857724350711925e-05, |
| "loss": 1.6790128946304321, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.672782874617737, |
| "grad_norm": 0.2131899893283844, |
| "learning_rate": 1.8560821403738913e-05, |
| "loss": 1.5477441549301147, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.6758409785932722, |
| "grad_norm": 0.2975439131259918, |
| "learning_rate": 1.854431329953804e-05, |
| "loss": 1.576401948928833, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.6788990825688074, |
| "grad_norm": 0.2657731771469116, |
| "learning_rate": 1.852771938228863e-05, |
| "loss": 1.467031478881836, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.6819571865443425, |
| "grad_norm": 0.18308323621749878, |
| "learning_rate": 1.851103984073876e-05, |
| "loss": 1.1278611421585083, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.6850152905198776, |
| "grad_norm": 0.18087266385555267, |
| "learning_rate": 1.8494274864610442e-05, |
| "loss": 1.5722160339355469, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.6880733944954128, |
| "grad_norm": 0.12121923267841339, |
| "learning_rate": 1.8477424644597466e-05, |
| "loss": 1.5512651205062866, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.691131498470948, |
| "grad_norm": 0.17362695932388306, |
| "learning_rate": 1.8460489372363233e-05, |
| "loss": 1.5511324405670166, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.6941896024464832, |
| "grad_norm": 0.2894831597805023, |
| "learning_rate": 1.844346924053858e-05, |
| "loss": 1.558009147644043, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.6972477064220184, |
| "grad_norm": 0.19669781625270844, |
| "learning_rate": 1.842636444271957e-05, |
| "loss": 1.3858280181884766, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.7003058103975535, |
| "grad_norm": 0.1768866628408432, |
| "learning_rate": 1.8409175173465305e-05, |
| "loss": 1.594611406326294, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.7033639143730887, |
| "grad_norm": 0.2871125638484955, |
| "learning_rate": 1.8391901628295723e-05, |
| "loss": 1.6866259574890137, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7064220183486238, |
| "grad_norm": 0.5151063799858093, |
| "learning_rate": 1.8374544003689346e-05, |
| "loss": 1.4172760248184204, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.709480122324159, |
| "grad_norm": 0.5280262231826782, |
| "learning_rate": 1.8357102497081068e-05, |
| "loss": 1.663338303565979, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.7125382262996942, |
| "grad_norm": 0.20690284669399261, |
| "learning_rate": 1.8339577306859898e-05, |
| "loss": 1.4435617923736572, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.7155963302752294, |
| "grad_norm": 0.17216677963733673, |
| "learning_rate": 1.832196863236671e-05, |
| "loss": 1.5643521547317505, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.7186544342507645, |
| "grad_norm": 1.2874486446380615, |
| "learning_rate": 1.830427667389197e-05, |
| "loss": 1.5050475597381592, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7217125382262997, |
| "grad_norm": 0.18002872169017792, |
| "learning_rate": 1.8286501632673467e-05, |
| "loss": 1.637873649597168, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.7247706422018348, |
| "grad_norm": 0.1843208372592926, |
| "learning_rate": 1.8268643710894008e-05, |
| "loss": 1.7227345705032349, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.72782874617737, |
| "grad_norm": 0.3121058940887451, |
| "learning_rate": 1.8250703111679135e-05, |
| "loss": 1.623226523399353, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.7308868501529052, |
| "grad_norm": 0.12877033650875092, |
| "learning_rate": 1.8232680039094807e-05, |
| "loss": 1.4622364044189453, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.7339449541284404, |
| "grad_norm": 0.41891077160835266, |
| "learning_rate": 1.821457469814507e-05, |
| "loss": 1.5695327520370483, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7370030581039755, |
| "grad_norm": 0.18470750749111176, |
| "learning_rate": 1.8196387294769744e-05, |
| "loss": 1.5794202089309692, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.7400611620795107, |
| "grad_norm": 0.18489842116832733, |
| "learning_rate": 1.8178118035842068e-05, |
| "loss": 1.573374629020691, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.7431192660550459, |
| "grad_norm": 1.045165777206421, |
| "learning_rate": 1.8159767129166353e-05, |
| "loss": 1.5771058797836304, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.746177370030581, |
| "grad_norm": 0.13188393414020538, |
| "learning_rate": 1.8141334783475608e-05, |
| "loss": 1.574911117553711, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.7492354740061162, |
| "grad_norm": 0.183307483792305, |
| "learning_rate": 1.8122821208429177e-05, |
| "loss": 1.371832013130188, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.7522935779816514, |
| "grad_norm": 0.2250596582889557, |
| "learning_rate": 1.8104226614610355e-05, |
| "loss": 1.4597737789154053, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.7553516819571865, |
| "grad_norm": 0.2038484811782837, |
| "learning_rate": 1.808555121352398e-05, |
| "loss": 1.3941731452941895, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.7584097859327217, |
| "grad_norm": 0.32169243693351746, |
| "learning_rate": 1.806679521759403e-05, |
| "loss": 1.4842920303344727, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.7614678899082569, |
| "grad_norm": 1.4948965311050415, |
| "learning_rate": 1.804795884016123e-05, |
| "loss": 1.5212973356246948, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.764525993883792, |
| "grad_norm": 0.30841079354286194, |
| "learning_rate": 1.802904229548059e-05, |
| "loss": 1.3416656255722046, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7675840978593272, |
| "grad_norm": 0.1549503058195114, |
| "learning_rate": 1.8010045798718996e-05, |
| "loss": 1.0037795305252075, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.7706422018348624, |
| "grad_norm": 0.17972712218761444, |
| "learning_rate": 1.7990969565952744e-05, |
| "loss": 1.2808337211608887, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.7737003058103975, |
| "grad_norm": 0.15994155406951904, |
| "learning_rate": 1.7971813814165096e-05, |
| "loss": 1.3440696001052856, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.7767584097859327, |
| "grad_norm": 0.442206472158432, |
| "learning_rate": 1.79525787612438e-05, |
| "loss": 1.3879811763763428, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.7798165137614679, |
| "grad_norm": 0.399215430021286, |
| "learning_rate": 1.793326462597862e-05, |
| "loss": 1.4018313884735107, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7828746177370031, |
| "grad_norm": 0.11791788786649704, |
| "learning_rate": 1.7913871628058852e-05, |
| "loss": 1.466645359992981, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.7859327217125383, |
| "grad_norm": 0.2953292727470398, |
| "learning_rate": 1.7894399988070804e-05, |
| "loss": 1.3789677619934082, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.7889908256880734, |
| "grad_norm": 0.10243761539459229, |
| "learning_rate": 1.7874849927495312e-05, |
| "loss": 1.4025226831436157, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.7920489296636085, |
| "grad_norm": 0.10896909981966019, |
| "learning_rate": 1.78552216687052e-05, |
| "loss": 1.366543173789978, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.7951070336391437, |
| "grad_norm": 0.11357754468917847, |
| "learning_rate": 1.7835515434962775e-05, |
| "loss": 1.3763110637664795, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.7981651376146789, |
| "grad_norm": 0.26994413137435913, |
| "learning_rate": 1.781573145041726e-05, |
| "loss": 1.330032229423523, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.8012232415902141, |
| "grad_norm": 0.15781600773334503, |
| "learning_rate": 1.7795869940102256e-05, |
| "loss": 1.376378059387207, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.8042813455657493, |
| "grad_norm": 0.19157709181308746, |
| "learning_rate": 1.77759311299332e-05, |
| "loss": 1.318992018699646, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.8073394495412844, |
| "grad_norm": 0.05687440186738968, |
| "learning_rate": 1.775591524670475e-05, |
| "loss": 1.1846191883087158, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.8103975535168195, |
| "grad_norm": 0.0644853487610817, |
| "learning_rate": 1.773582251808827e-05, |
| "loss": 1.2165440320968628, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.8134556574923547, |
| "grad_norm": 0.06147017702460289, |
| "learning_rate": 1.7715653172629172e-05, |
| "loss": 1.0007672309875488, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.8165137614678899, |
| "grad_norm": 0.10844209045171738, |
| "learning_rate": 1.7695407439744367e-05, |
| "loss": 1.2789226770401, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.8195718654434251, |
| "grad_norm": 0.12735959887504578, |
| "learning_rate": 1.7675085549719638e-05, |
| "loss": 1.331377625465393, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.8226299694189603, |
| "grad_norm": 0.19527040421962738, |
| "learning_rate": 1.765468773370701e-05, |
| "loss": 1.3414947986602783, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.8256880733944955, |
| "grad_norm": 0.08658599853515625, |
| "learning_rate": 1.7634214223722136e-05, |
| "loss": 1.3086833953857422, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8287461773700305, |
| "grad_norm": 0.19912293553352356, |
| "learning_rate": 1.7613665252641656e-05, |
| "loss": 1.3237627744674683, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.8318042813455657, |
| "grad_norm": 0.1858687400817871, |
| "learning_rate": 1.7593041054200535e-05, |
| "loss": 1.2972196340560913, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.8348623853211009, |
| "grad_norm": 0.09588196873664856, |
| "learning_rate": 1.757234186298943e-05, |
| "loss": 1.2714868783950806, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.8379204892966361, |
| "grad_norm": 0.09199360013008118, |
| "learning_rate": 1.7551567914451982e-05, |
| "loss": 1.3335715532302856, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.8409785932721713, |
| "grad_norm": 0.13166598975658417, |
| "learning_rate": 1.7530719444882192e-05, |
| "loss": 1.3041012287139893, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8440366972477065, |
| "grad_norm": 0.1695961356163025, |
| "learning_rate": 1.7509796691421677e-05, |
| "loss": 1.2752689123153687, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.8470948012232415, |
| "grad_norm": 0.15224502980709076, |
| "learning_rate": 1.7488799892057012e-05, |
| "loss": 1.2604331970214844, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.8501529051987767, |
| "grad_norm": 0.16102130711078644, |
| "learning_rate": 1.746772928561701e-05, |
| "loss": 1.2783102989196777, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.8532110091743119, |
| "grad_norm": 0.1706506460905075, |
| "learning_rate": 1.7446585111769994e-05, |
| "loss": 1.2757190465927124, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.8562691131498471, |
| "grad_norm": 0.09909018874168396, |
| "learning_rate": 1.7425367611021095e-05, |
| "loss": 1.2308428287506104, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8593272171253823, |
| "grad_norm": 0.0939355418086052, |
| "learning_rate": 1.740407702470949e-05, |
| "loss": 1.290454626083374, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.8623853211009175, |
| "grad_norm": 0.15474659204483032, |
| "learning_rate": 1.738271359500569e-05, |
| "loss": 1.3222665786743164, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.8654434250764526, |
| "grad_norm": 0.14680948853492737, |
| "learning_rate": 1.7361277564908746e-05, |
| "loss": 1.3195000886917114, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.8685015290519877, |
| "grad_norm": 0.15361595153808594, |
| "learning_rate": 1.7339769178243513e-05, |
| "loss": 1.3176097869873047, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.8715596330275229, |
| "grad_norm": 0.1914825141429901, |
| "learning_rate": 1.7318188679657868e-05, |
| "loss": 1.3322255611419678, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8746177370030581, |
| "grad_norm": 0.15622292459011078, |
| "learning_rate": 1.7296536314619927e-05, |
| "loss": 1.3179453611373901, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.8776758409785933, |
| "grad_norm": 0.14867334067821503, |
| "learning_rate": 1.7274812329415256e-05, |
| "loss": 1.3439091444015503, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 0.1103372722864151, |
| "learning_rate": 1.725301697114406e-05, |
| "loss": 1.2625937461853027, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.8837920489296636, |
| "grad_norm": 0.24455446004867554, |
| "learning_rate": 1.7231150487718388e-05, |
| "loss": 1.3101165294647217, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.8868501529051988, |
| "grad_norm": 0.09268049895763397, |
| "learning_rate": 1.7209213127859298e-05, |
| "loss": 1.267791748046875, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8899082568807339, |
| "grad_norm": 0.24207623302936554, |
| "learning_rate": 1.718720514109404e-05, |
| "loss": 1.295712947845459, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.8929663608562691, |
| "grad_norm": 0.14914532005786896, |
| "learning_rate": 1.7165126777753205e-05, |
| "loss": 1.3040282726287842, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.8960244648318043, |
| "grad_norm": 0.1064625084400177, |
| "learning_rate": 1.714297828896789e-05, |
| "loss": 1.3282526731491089, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.8990825688073395, |
| "grad_norm": 0.10693900287151337, |
| "learning_rate": 1.7120759926666833e-05, |
| "loss": 1.3108649253845215, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.9021406727828746, |
| "grad_norm": 0.14860939979553223, |
| "learning_rate": 1.7098471943573554e-05, |
| "loss": 1.2462693452835083, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9051987767584098, |
| "grad_norm": 0.10575126856565475, |
| "learning_rate": 1.7076114593203477e-05, |
| "loss": 1.2716872692108154, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.908256880733945, |
| "grad_norm": 0.12260717153549194, |
| "learning_rate": 1.7053688129861047e-05, |
| "loss": 1.3093860149383545, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.9113149847094801, |
| "grad_norm": 0.13889411091804504, |
| "learning_rate": 1.703119280863683e-05, |
| "loss": 1.3331063985824585, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.9143730886850153, |
| "grad_norm": 0.10041464865207672, |
| "learning_rate": 1.700862888540463e-05, |
| "loss": 1.3085261583328247, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.9174311926605505, |
| "grad_norm": 0.16233770549297333, |
| "learning_rate": 1.698599661681855e-05, |
| "loss": 1.2828779220581055, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9204892966360856, |
| "grad_norm": 0.11468062549829483, |
| "learning_rate": 1.6963296260310108e-05, |
| "loss": 1.2531770467758179, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.9235474006116208, |
| "grad_norm": 0.20973286032676697, |
| "learning_rate": 1.6940528074085277e-05, |
| "loss": 1.2862355709075928, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.926605504587156, |
| "grad_norm": 0.1067318320274353, |
| "learning_rate": 1.6917692317121574e-05, |
| "loss": 1.3118236064910889, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.9296636085626911, |
| "grad_norm": 0.105461984872818, |
| "learning_rate": 1.6894789249165088e-05, |
| "loss": 1.2574189901351929, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.9327217125382263, |
| "grad_norm": 0.2145971655845642, |
| "learning_rate": 1.6871819130727543e-05, |
| "loss": 1.284620761871338, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9357798165137615, |
| "grad_norm": 0.13296093046665192, |
| "learning_rate": 1.6848782223083346e-05, |
| "loss": 1.28190279006958, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.9388379204892966, |
| "grad_norm": 0.10392415523529053, |
| "learning_rate": 1.682567878826657e-05, |
| "loss": 1.3311290740966797, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.9418960244648318, |
| "grad_norm": 0.16643521189689636, |
| "learning_rate": 1.6802509089068037e-05, |
| "loss": 1.3452017307281494, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.944954128440367, |
| "grad_norm": 0.12805478274822235, |
| "learning_rate": 1.6779273389032268e-05, |
| "loss": 1.3025506734848022, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.9480122324159022, |
| "grad_norm": 0.19968613982200623, |
| "learning_rate": 1.675597195245453e-05, |
| "loss": 1.3243719339370728, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.9510703363914373, |
| "grad_norm": 0.15357588231563568, |
| "learning_rate": 1.6732605044377804e-05, |
| "loss": 1.2921831607818604, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.9541284403669725, |
| "grad_norm": 0.12729910016059875, |
| "learning_rate": 1.670917293058979e-05, |
| "loss": 1.235489845275879, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.9571865443425076, |
| "grad_norm": 0.147382453083992, |
| "learning_rate": 1.668567587761985e-05, |
| "loss": 1.2604652643203735, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.9602446483180428, |
| "grad_norm": 0.15162380039691925, |
| "learning_rate": 1.6662114152736025e-05, |
| "loss": 1.2839949131011963, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.963302752293578, |
| "grad_norm": 0.09574859589338303, |
| "learning_rate": 1.663848802394195e-05, |
| "loss": 1.256510615348816, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9663608562691132, |
| "grad_norm": 0.1858387142419815, |
| "learning_rate": 1.6614797759973834e-05, |
| "loss": 1.2705796957015991, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.9694189602446484, |
| "grad_norm": 0.19145724177360535, |
| "learning_rate": 1.6591043630297394e-05, |
| "loss": 1.3239952325820923, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.9724770642201835, |
| "grad_norm": 0.11227167397737503, |
| "learning_rate": 1.6567225905104785e-05, |
| "loss": 1.3034477233886719, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.9755351681957186, |
| "grad_norm": 0.23782867193222046, |
| "learning_rate": 1.654334485531153e-05, |
| "loss": 1.3204131126403809, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.9785932721712538, |
| "grad_norm": 0.14150187373161316, |
| "learning_rate": 1.651940075255345e-05, |
| "loss": 1.2745287418365479, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.981651376146789, |
| "grad_norm": 0.1170521229505539, |
| "learning_rate": 1.649539386918355e-05, |
| "loss": 1.2701953649520874, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.9847094801223242, |
| "grad_norm": 0.14334619045257568, |
| "learning_rate": 1.6471324478268946e-05, |
| "loss": 1.292121410369873, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.9877675840978594, |
| "grad_norm": 0.2446807622909546, |
| "learning_rate": 1.644719285358774e-05, |
| "loss": 1.3421876430511475, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.9908256880733946, |
| "grad_norm": 0.2528345584869385, |
| "learning_rate": 1.642299926962593e-05, |
| "loss": 1.370373010635376, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.9938837920489296, |
| "grad_norm": 0.2619344890117645, |
| "learning_rate": 1.639874400157425e-05, |
| "loss": 2.0050365924835205, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9969418960244648, |
| "grad_norm": 0.253921777009964, |
| "learning_rate": 1.6374427325325078e-05, |
| "loss": 1.9838625192642212, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.3810979127883911, |
| "learning_rate": 1.635004951746927e-05, |
| "loss": 1.95270574092865, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.003058103975535, |
| "grad_norm": 0.12289910018444061, |
| "learning_rate": 1.632561085529304e-05, |
| "loss": 1.4550848007202148, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.0061162079510704, |
| "grad_norm": 0.15277813374996185, |
| "learning_rate": 1.6301111616774778e-05, |
| "loss": 1.4208133220672607, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.0091743119266054, |
| "grad_norm": 0.10656877607107162, |
| "learning_rate": 1.6276552080581905e-05, |
| "loss": 1.4359259605407715, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.0122324159021407, |
| "grad_norm": 0.11480451375246048, |
| "learning_rate": 1.6251932526067705e-05, |
| "loss": 1.3664590120315552, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.0152905198776758, |
| "grad_norm": 0.1647813469171524, |
| "learning_rate": 1.622725323326814e-05, |
| "loss": 1.555678367614746, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.018348623853211, |
| "grad_norm": 0.10670245438814163, |
| "learning_rate": 1.6202514482898665e-05, |
| "loss": 1.4394117593765259, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.0214067278287462, |
| "grad_norm": 0.10963747650384903, |
| "learning_rate": 1.617771655635104e-05, |
| "loss": 1.3825225830078125, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.0244648318042813, |
| "grad_norm": 0.18553870916366577, |
| "learning_rate": 1.615285973569012e-05, |
| "loss": 1.4653931856155396, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0275229357798166, |
| "grad_norm": 0.10800433158874512, |
| "learning_rate": 1.6127944303650665e-05, |
| "loss": 1.3334200382232666, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.0305810397553516, |
| "grad_norm": 0.10137605667114258, |
| "learning_rate": 1.61029705436341e-05, |
| "loss": 1.4050424098968506, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.033639143730887, |
| "grad_norm": 0.10126882046461105, |
| "learning_rate": 1.607793873970531e-05, |
| "loss": 1.5176122188568115, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.036697247706422, |
| "grad_norm": 0.13856351375579834, |
| "learning_rate": 1.6052849176589402e-05, |
| "loss": 1.4309215545654297, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.039755351681957, |
| "grad_norm": 0.121686190366745, |
| "learning_rate": 1.6027702139668467e-05, |
| "loss": 1.2275919914245605, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0428134556574924, |
| "grad_norm": 0.12417905777692795, |
| "learning_rate": 1.600249791497833e-05, |
| "loss": 1.3951789140701294, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.0458715596330275, |
| "grad_norm": 0.16497156023979187, |
| "learning_rate": 1.5977236789205305e-05, |
| "loss": 1.6375888586044312, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.0489296636085628, |
| "grad_norm": 0.6940323710441589, |
| "learning_rate": 1.595191904968293e-05, |
| "loss": 1.302648901939392, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.0519877675840978, |
| "grad_norm": 0.1414870023727417, |
| "learning_rate": 1.592654498438869e-05, |
| "loss": 1.430026650428772, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.0550458715596331, |
| "grad_norm": 0.15512694418430328, |
| "learning_rate": 1.5901114881940755e-05, |
| "loss": 1.3186713457107544, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0581039755351682, |
| "grad_norm": 0.17043672502040863, |
| "learning_rate": 1.5875629031594695e-05, |
| "loss": 1.3079885244369507, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.0611620795107033, |
| "grad_norm": 0.11131853610277176, |
| "learning_rate": 1.585008772324018e-05, |
| "loss": 1.3147637844085693, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.0642201834862386, |
| "grad_norm": 0.39247646927833557, |
| "learning_rate": 1.5824491247397693e-05, |
| "loss": 1.350774884223938, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.0672782874617737, |
| "grad_norm": 0.15416143834590912, |
| "learning_rate": 1.5798839895215222e-05, |
| "loss": 1.5362554788589478, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.070336391437309, |
| "grad_norm": 0.14469249546527863, |
| "learning_rate": 1.5773133958464943e-05, |
| "loss": 1.555711030960083, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.073394495412844, |
| "grad_norm": 0.18093909323215485, |
| "learning_rate": 1.574737372953991e-05, |
| "loss": 1.3956756591796875, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.0764525993883791, |
| "grad_norm": 0.48004454374313354, |
| "learning_rate": 1.5721559501450725e-05, |
| "loss": 1.3599640130996704, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.0795107033639144, |
| "grad_norm": 0.14463584125041962, |
| "learning_rate": 1.56956915678222e-05, |
| "loss": 1.3459385633468628, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.0825688073394495, |
| "grad_norm": 0.14984038472175598, |
| "learning_rate": 1.5669770222890033e-05, |
| "loss": 1.421828031539917, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.0856269113149848, |
| "grad_norm": 0.15900403261184692, |
| "learning_rate": 1.564379576149744e-05, |
| "loss": 1.3967952728271484, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.0886850152905199, |
| "grad_norm": 0.1063733845949173, |
| "learning_rate": 1.561776847909182e-05, |
| "loss": 1.428087592124939, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.091743119266055, |
| "grad_norm": 0.10035966336727142, |
| "learning_rate": 1.5591688671721382e-05, |
| "loss": 1.4178051948547363, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.0948012232415902, |
| "grad_norm": 0.09157714992761612, |
| "learning_rate": 1.5565556636031784e-05, |
| "loss": 1.2836171388626099, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.0978593272171253, |
| "grad_norm": 0.18082380294799805, |
| "learning_rate": 1.553937266926275e-05, |
| "loss": 1.2452894449234009, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.1009174311926606, |
| "grad_norm": 0.10995090752840042, |
| "learning_rate": 1.551313706924471e-05, |
| "loss": 1.4010382890701294, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.1039755351681957, |
| "grad_norm": 0.14674334228038788, |
| "learning_rate": 1.5486850134395386e-05, |
| "loss": 1.521254062652588, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.107033639143731, |
| "grad_norm": 0.12439470738172531, |
| "learning_rate": 1.5460512163716413e-05, |
| "loss": 1.3604636192321777, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.110091743119266, |
| "grad_norm": 0.1851988583803177, |
| "learning_rate": 1.5434123456789935e-05, |
| "loss": 1.5580992698669434, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.1131498470948011, |
| "grad_norm": 0.13281765580177307, |
| "learning_rate": 1.54076843137752e-05, |
| "loss": 1.5704666376113892, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.1162079510703364, |
| "grad_norm": 0.2730847895145416, |
| "learning_rate": 1.5381195035405138e-05, |
| "loss": 1.333621859550476, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.1192660550458715, |
| "grad_norm": 0.20384500920772552, |
| "learning_rate": 1.535465592298295e-05, |
| "loss": 1.3046903610229492, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.1223241590214068, |
| "grad_norm": 0.11985786259174347, |
| "learning_rate": 1.5328067278378672e-05, |
| "loss": 1.4316734075546265, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.1253822629969419, |
| "grad_norm": 0.09865355491638184, |
| "learning_rate": 1.5301429404025752e-05, |
| "loss": 1.239088773727417, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.1284403669724772, |
| "grad_norm": 0.2635539174079895, |
| "learning_rate": 1.5274742602917594e-05, |
| "loss": 1.3266866207122803, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.1314984709480123, |
| "grad_norm": 0.1433660089969635, |
| "learning_rate": 1.5248007178604125e-05, |
| "loss": 1.2814881801605225, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.1345565749235473, |
| "grad_norm": 0.11298327147960663, |
| "learning_rate": 1.5221223435188346e-05, |
| "loss": 1.3507574796676636, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.1376146788990826, |
| "grad_norm": 0.16731660068035126, |
| "learning_rate": 1.5194391677322852e-05, |
| "loss": 1.2645881175994873, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.1406727828746177, |
| "grad_norm": 0.10872001200914383, |
| "learning_rate": 1.516751221020639e-05, |
| "loss": 1.310599684715271, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.143730886850153, |
| "grad_norm": 0.16045227646827698, |
| "learning_rate": 1.5140585339580372e-05, |
| "loss": 1.4600030183792114, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.146788990825688, |
| "grad_norm": 0.14260467886924744, |
| "learning_rate": 1.5113611371725405e-05, |
| "loss": 1.3346760272979736, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.1498470948012232, |
| "grad_norm": 0.10074730217456818, |
| "learning_rate": 1.5086590613457808e-05, |
| "loss": 1.2854725122451782, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.1529051987767585, |
| "grad_norm": 0.23083588480949402, |
| "learning_rate": 1.5059523372126112e-05, |
| "loss": 1.509653925895691, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.1559633027522935, |
| "grad_norm": 0.13861213624477386, |
| "learning_rate": 1.5032409955607578e-05, |
| "loss": 1.3918688297271729, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.1590214067278288, |
| "grad_norm": 0.1264001429080963, |
| "learning_rate": 1.5005250672304687e-05, |
| "loss": 1.3027894496917725, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.162079510703364, |
| "grad_norm": 0.20812299847602844, |
| "learning_rate": 1.4978045831141626e-05, |
| "loss": 1.4077316522598267, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.165137614678899, |
| "grad_norm": 0.19392693042755127, |
| "learning_rate": 1.4950795741560793e-05, |
| "loss": 1.3892260789871216, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.1681957186544343, |
| "grad_norm": 0.14911192655563354, |
| "learning_rate": 1.4923500713519259e-05, |
| "loss": 1.4410245418548584, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.1712538226299694, |
| "grad_norm": 0.39478185772895813, |
| "learning_rate": 1.4896161057485248e-05, |
| "loss": 1.3235844373703003, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.1743119266055047, |
| "grad_norm": 0.24101360142230988, |
| "learning_rate": 1.4868777084434607e-05, |
| "loss": 1.4132270812988281, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.1773700305810397, |
| "grad_norm": 0.1202300488948822, |
| "learning_rate": 1.4841349105847275e-05, |
| "loss": 1.308053970336914, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.1804281345565748, |
| "grad_norm": 0.13351330161094666, |
| "learning_rate": 1.4813877433703723e-05, |
| "loss": 1.6288765668869019, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.18348623853211, |
| "grad_norm": 0.14136730134487152, |
| "learning_rate": 1.4786362380481427e-05, |
| "loss": 1.3889225721359253, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.1865443425076452, |
| "grad_norm": 0.16120900213718414, |
| "learning_rate": 1.475880425915129e-05, |
| "loss": 1.4441417455673218, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.1896024464831805, |
| "grad_norm": 0.29744964838027954, |
| "learning_rate": 1.4731203383174109e-05, |
| "loss": 1.4236780405044556, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.1926605504587156, |
| "grad_norm": 0.15025398135185242, |
| "learning_rate": 1.4703560066496982e-05, |
| "loss": 1.3891468048095703, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.1957186544342508, |
| "grad_norm": 0.21743427217006683, |
| "learning_rate": 1.467587462354976e-05, |
| "loss": 1.3874202966690063, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.198776758409786, |
| "grad_norm": 0.2140037715435028, |
| "learning_rate": 1.4648147369241452e-05, |
| "loss": 1.39767587184906, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.2018348623853212, |
| "grad_norm": 0.15381909906864166, |
| "learning_rate": 1.4620378618956663e-05, |
| "loss": 1.3834893703460693, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.2048929663608563, |
| "grad_norm": 0.1879752278327942, |
| "learning_rate": 1.4592568688551982e-05, |
| "loss": 1.4598149061203003, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.2079510703363914, |
| "grad_norm": 0.13184306025505066, |
| "learning_rate": 1.4564717894352414e-05, |
| "loss": 1.3960065841674805, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.2110091743119267, |
| "grad_norm": 0.1333802342414856, |
| "learning_rate": 1.4536826553147762e-05, |
| "loss": 1.2982501983642578, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.2140672782874617, |
| "grad_norm": 0.11398273706436157, |
| "learning_rate": 1.450889498218904e-05, |
| "loss": 1.2693817615509033, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.217125382262997, |
| "grad_norm": 0.09528715908527374, |
| "learning_rate": 1.4480923499184851e-05, |
| "loss": 1.3175259828567505, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.2201834862385321, |
| "grad_norm": 0.19164840877056122, |
| "learning_rate": 1.4452912422297783e-05, |
| "loss": 1.2807133197784424, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.2232415902140672, |
| "grad_norm": 0.2311921864748001, |
| "learning_rate": 1.4424862070140782e-05, |
| "loss": 1.336983561515808, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.2262996941896025, |
| "grad_norm": 0.12099915742874146, |
| "learning_rate": 1.439677276177353e-05, |
| "loss": 1.2923321723937988, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.2293577981651376, |
| "grad_norm": 0.19206538796424866, |
| "learning_rate": 1.4368644816698831e-05, |
| "loss": 1.5531456470489502, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.2324159021406729, |
| "grad_norm": 0.13806301355361938, |
| "learning_rate": 1.4340478554858948e-05, |
| "loss": 1.5599114894866943, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.235474006116208, |
| "grad_norm": 0.5654169321060181, |
| "learning_rate": 1.4312274296631986e-05, |
| "loss": 1.6073856353759766, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.238532110091743, |
| "grad_norm": 0.1509082019329071, |
| "learning_rate": 1.428403236282824e-05, |
| "loss": 1.5244386196136475, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.2415902140672783, |
| "grad_norm": 0.1720213145017624, |
| "learning_rate": 1.4255753074686554e-05, |
| "loss": 1.5786917209625244, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.2446483180428134, |
| "grad_norm": 0.15120771527290344, |
| "learning_rate": 1.4227436753870645e-05, |
| "loss": 1.4663296937942505, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.2477064220183487, |
| "grad_norm": 0.3516086935997009, |
| "learning_rate": 1.4199083722465473e-05, |
| "loss": 1.5755946636199951, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.2507645259938838, |
| "grad_norm": 0.19253411889076233, |
| "learning_rate": 1.4170694302973558e-05, |
| "loss": 1.3558707237243652, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.2538226299694188, |
| "grad_norm": 1.164911150932312, |
| "learning_rate": 1.4142268818311318e-05, |
| "loss": 1.472469687461853, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2568807339449541, |
| "grad_norm": 0.1894596666097641, |
| "learning_rate": 1.4113807591805403e-05, |
| "loss": 1.2632770538330078, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.2599388379204892, |
| "grad_norm": 0.2617301642894745, |
| "learning_rate": 1.408531094718899e-05, |
| "loss": 1.481677532196045, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.2629969418960245, |
| "grad_norm": 0.15160690248012543, |
| "learning_rate": 1.4056779208598148e-05, |
| "loss": 1.0913214683532715, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.2660550458715596, |
| "grad_norm": 0.22855854034423828, |
| "learning_rate": 1.40282127005681e-05, |
| "loss": 1.0603516101837158, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.2691131498470947, |
| "grad_norm": 0.2478480339050293, |
| "learning_rate": 1.3999611748029567e-05, |
| "loss": 1.2708483934402466, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.27217125382263, |
| "grad_norm": 0.1977955400943756, |
| "learning_rate": 1.3970976676305057e-05, |
| "loss": 1.3254450559616089, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.2752293577981653, |
| "grad_norm": 0.6160405278205872, |
| "learning_rate": 1.3942307811105174e-05, |
| "loss": 1.438636064529419, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.2782874617737003, |
| "grad_norm": 0.517250657081604, |
| "learning_rate": 1.3913605478524893e-05, |
| "loss": 1.3800345659255981, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.2813455657492354, |
| "grad_norm": 0.14924201369285583, |
| "learning_rate": 1.3884870005039876e-05, |
| "loss": 1.1602931022644043, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.2844036697247707, |
| "grad_norm": 0.11800377070903778, |
| "learning_rate": 1.3856101717502745e-05, |
| "loss": 1.1218430995941162, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.2874617737003058, |
| "grad_norm": 0.19642794132232666, |
| "learning_rate": 1.3827300943139368e-05, |
| "loss": 1.3466745615005493, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.290519877675841, |
| "grad_norm": 0.24130217730998993, |
| "learning_rate": 1.3798468009545132e-05, |
| "loss": 1.259774088859558, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.2935779816513762, |
| "grad_norm": 0.17322690784931183, |
| "learning_rate": 1.3769603244681224e-05, |
| "loss": 1.220116138458252, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.2966360856269112, |
| "grad_norm": 0.1480104923248291, |
| "learning_rate": 1.3740706976870894e-05, |
| "loss": 1.4275568723678589, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.2996941896024465, |
| "grad_norm": 0.17000189423561096, |
| "learning_rate": 1.3711779534795726e-05, |
| "loss": 1.3954261541366577, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3027522935779816, |
| "grad_norm": 0.26931893825531006, |
| "learning_rate": 1.3682821247491888e-05, |
| "loss": 1.1222623586654663, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.305810397553517, |
| "grad_norm": 0.1886853724718094, |
| "learning_rate": 1.365383244434641e-05, |
| "loss": 1.333132028579712, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.308868501529052, |
| "grad_norm": 0.21062615513801575, |
| "learning_rate": 1.3624813455093426e-05, |
| "loss": 1.4747931957244873, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.311926605504587, |
| "grad_norm": 0.18099278211593628, |
| "learning_rate": 1.3595764609810409e-05, |
| "loss": 1.617061972618103, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.3149847094801224, |
| "grad_norm": 0.19481272995471954, |
| "learning_rate": 1.3566686238914442e-05, |
| "loss": 1.6403002738952637, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.3180428134556574, |
| "grad_norm": 0.15626607835292816, |
| "learning_rate": 1.3537578673158447e-05, |
| "loss": 1.762399673461914, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.3211009174311927, |
| "grad_norm": 0.17738987505435944, |
| "learning_rate": 1.3508442243627414e-05, |
| "loss": 1.3067858219146729, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.3241590214067278, |
| "grad_norm": 0.18935874104499817, |
| "learning_rate": 1.3479277281734665e-05, |
| "loss": 1.3856911659240723, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.3272171253822629, |
| "grad_norm": 0.13359865546226501, |
| "learning_rate": 1.345008411921804e-05, |
| "loss": 1.2296390533447266, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.3302752293577982, |
| "grad_norm": 0.14726310968399048, |
| "learning_rate": 1.342086308813617e-05, |
| "loss": 1.15348482131958, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 0.32481470704078674, |
| "learning_rate": 1.3391614520864665e-05, |
| "loss": 1.5289275646209717, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.3363914373088686, |
| "grad_norm": 0.14799554646015167, |
| "learning_rate": 1.3362338750092345e-05, |
| "loss": 1.428595781326294, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.3394495412844036, |
| "grad_norm": 0.15309157967567444, |
| "learning_rate": 1.3333036108817468e-05, |
| "loss": 1.6211644411087036, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.3425076452599387, |
| "grad_norm": 0.2861718237400055, |
| "learning_rate": 1.330370693034392e-05, |
| "loss": 1.0886820554733276, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.345565749235474, |
| "grad_norm": 0.22317488491535187, |
| "learning_rate": 1.3274351548277444e-05, |
| "loss": 1.330242395401001, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.3486238532110093, |
| "grad_norm": 0.15274080634117126, |
| "learning_rate": 1.3244970296521832e-05, |
| "loss": 1.5976680517196655, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.3516819571865444, |
| "grad_norm": 0.13366302847862244, |
| "learning_rate": 1.3215563509275134e-05, |
| "loss": 1.3342012166976929, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.3547400611620795, |
| "grad_norm": 0.17470058798789978, |
| "learning_rate": 1.3186131521025848e-05, |
| "loss": 1.4997955560684204, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.3577981651376148, |
| "grad_norm": 0.19331222772598267, |
| "learning_rate": 1.3156674666549131e-05, |
| "loss": 1.3701845407485962, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.3608562691131498, |
| "grad_norm": 0.1622903048992157, |
| "learning_rate": 1.3127193280902977e-05, |
| "loss": 1.2937860488891602, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3639143730886851, |
| "grad_norm": 0.12703940272331238, |
| "learning_rate": 1.3097687699424411e-05, |
| "loss": 1.351706624031067, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.3669724770642202, |
| "grad_norm": 0.24528349936008453, |
| "learning_rate": 1.306815825772567e-05, |
| "loss": 1.4135133028030396, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.3700305810397553, |
| "grad_norm": 0.12702107429504395, |
| "learning_rate": 1.3038605291690401e-05, |
| "loss": 1.265538215637207, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.3730886850152906, |
| "grad_norm": 0.11304288357496262, |
| "learning_rate": 1.300902913746982e-05, |
| "loss": 1.3828136920928955, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.3761467889908257, |
| "grad_norm": 0.7138680219650269, |
| "learning_rate": 1.2979430131478895e-05, |
| "loss": 1.6873407363891602, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.379204892966361, |
| "grad_norm": 0.2656860947608948, |
| "learning_rate": 1.2949808610392536e-05, |
| "loss": 2.001885175704956, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.382262996941896, |
| "grad_norm": 0.502257227897644, |
| "learning_rate": 1.2920164911141739e-05, |
| "loss": 1.3146154880523682, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.385321100917431, |
| "grad_norm": 0.20982535183429718, |
| "learning_rate": 1.289049937090977e-05, |
| "loss": 1.3075802326202393, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.3883792048929664, |
| "grad_norm": 0.22410085797309875, |
| "learning_rate": 1.2860812327128329e-05, |
| "loss": 1.3694846630096436, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.3914373088685015, |
| "grad_norm": 0.5280166864395142, |
| "learning_rate": 1.2831104117473708e-05, |
| "loss": 1.4554123878479004, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.3944954128440368, |
| "grad_norm": 0.1828601062297821, |
| "learning_rate": 1.2801375079862941e-05, |
| "loss": 1.3881217241287231, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.3975535168195719, |
| "grad_norm": 0.17245271801948547, |
| "learning_rate": 1.2771625552449989e-05, |
| "loss": 1.2560186386108398, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.400611620795107, |
| "grad_norm": 0.5974639058113098, |
| "learning_rate": 1.2741855873621853e-05, |
| "loss": 1.182005763053894, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.4036697247706422, |
| "grad_norm": 0.10427405685186386, |
| "learning_rate": 1.2712066381994771e-05, |
| "loss": 1.1084463596343994, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.4067278287461773, |
| "grad_norm": 0.12200755625963211, |
| "learning_rate": 1.2682257416410324e-05, |
| "loss": 1.2430078983306885, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.4097859327217126, |
| "grad_norm": 0.18042518198490143, |
| "learning_rate": 1.2652429315931607e-05, |
| "loss": 1.4864814281463623, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.4128440366972477, |
| "grad_norm": 0.2689650058746338, |
| "learning_rate": 1.2622582419839364e-05, |
| "loss": 1.5187842845916748, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.4159021406727827, |
| "grad_norm": 0.2541566789150238, |
| "learning_rate": 1.259271706762813e-05, |
| "loss": 1.5033844709396362, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.418960244648318, |
| "grad_norm": 0.2005116492509842, |
| "learning_rate": 1.2562833599002376e-05, |
| "loss": 1.5435667037963867, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.4220183486238533, |
| "grad_norm": 0.1511639952659607, |
| "learning_rate": 1.2532932353872626e-05, |
| "loss": 1.3693771362304688, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.4250764525993884, |
| "grad_norm": 0.2612217664718628, |
| "learning_rate": 1.2503013672351614e-05, |
| "loss": 1.4804924726486206, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.4281345565749235, |
| "grad_norm": 1.1553821563720703, |
| "learning_rate": 1.2473077894750406e-05, |
| "loss": 1.384411096572876, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.4311926605504588, |
| "grad_norm": 0.11659958958625793, |
| "learning_rate": 1.2443125361574516e-05, |
| "loss": 1.308059811592102, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.4342507645259939, |
| "grad_norm": 0.1776539385318756, |
| "learning_rate": 1.241315641352006e-05, |
| "loss": 1.3241143226623535, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.4373088685015292, |
| "grad_norm": 0.17549070715904236, |
| "learning_rate": 1.238317139146985e-05, |
| "loss": 1.5167508125305176, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.4403669724770642, |
| "grad_norm": 0.13119763135910034, |
| "learning_rate": 1.235317063648955e-05, |
| "loss": 1.279841661453247, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.4434250764525993, |
| "grad_norm": 0.31867051124572754, |
| "learning_rate": 1.2323154489823766e-05, |
| "loss": 1.4430445432662964, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.4464831804281346, |
| "grad_norm": 0.13233892619609833, |
| "learning_rate": 1.2293123292892176e-05, |
| "loss": 1.3445417881011963, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.4495412844036697, |
| "grad_norm": 0.1906617283821106, |
| "learning_rate": 1.2263077387285656e-05, |
| "loss": 1.3487993478775024, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.452599388379205, |
| "grad_norm": 0.2014293074607849, |
| "learning_rate": 1.2233017114762383e-05, |
| "loss": 1.503042221069336, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.45565749235474, |
| "grad_norm": 0.1651790589094162, |
| "learning_rate": 1.2202942817243945e-05, |
| "loss": 1.501765489578247, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.4587155963302751, |
| "grad_norm": 0.1424659937620163, |
| "learning_rate": 1.217285483681147e-05, |
| "loss": 1.3252204656600952, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.4617737003058104, |
| "grad_norm": 0.12578605115413666, |
| "learning_rate": 1.2142753515701715e-05, |
| "loss": 1.270477056503296, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.4648318042813455, |
| "grad_norm": 0.15729480981826782, |
| "learning_rate": 1.2112639196303177e-05, |
| "loss": 1.3961900472640991, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.4678899082568808, |
| "grad_norm": 0.13876628875732422, |
| "learning_rate": 1.2082512221152211e-05, |
| "loss": 1.351151704788208, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.470948012232416, |
| "grad_norm": 0.16256465017795563, |
| "learning_rate": 1.2052372932929124e-05, |
| "loss": 1.501070261001587, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.474006116207951, |
| "grad_norm": 0.09779898822307587, |
| "learning_rate": 1.2022221674454276e-05, |
| "loss": 1.364396095275879, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.4770642201834863, |
| "grad_norm": 0.14863306283950806, |
| "learning_rate": 1.1992058788684178e-05, |
| "loss": 1.309662103652954, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.4801223241590213, |
| "grad_norm": 0.1444341093301773, |
| "learning_rate": 1.1961884618707606e-05, |
| "loss": 1.2788057327270508, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.4831804281345566, |
| "grad_norm": 1.06325364112854, |
| "learning_rate": 1.1931699507741681e-05, |
| "loss": 1.399967074394226, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.4862385321100917, |
| "grad_norm": 0.23110361397266388, |
| "learning_rate": 1.1901503799127978e-05, |
| "loss": 1.3916560411453247, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.4892966360856268, |
| "grad_norm": 0.22410522401332855, |
| "learning_rate": 1.1871297836328615e-05, |
| "loss": 1.356363296508789, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.492354740061162, |
| "grad_norm": 0.1541077345609665, |
| "learning_rate": 1.1841081962922339e-05, |
| "loss": 1.547667384147644, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.4954128440366974, |
| "grad_norm": 0.24510353803634644, |
| "learning_rate": 1.1810856522600633e-05, |
| "loss": 1.7400732040405273, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.4984709480122325, |
| "grad_norm": 0.2446478307247162, |
| "learning_rate": 1.1780621859163799e-05, |
| "loss": 1.618131160736084, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.5015290519877675, |
| "grad_norm": 0.16440457105636597, |
| "learning_rate": 1.1750378316517042e-05, |
| "loss": 1.6069653034210205, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.5045871559633026, |
| "grad_norm": 0.16012217104434967, |
| "learning_rate": 1.1720126238666574e-05, |
| "loss": 1.5139660835266113, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.507645259938838, |
| "grad_norm": 0.16031064093112946, |
| "learning_rate": 1.1689865969715682e-05, |
| "loss": 1.187379240989685, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.5107033639143732, |
| "grad_norm": 0.1701585203409195, |
| "learning_rate": 1.1659597853860822e-05, |
| "loss": 1.1819970607757568, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.5137614678899083, |
| "grad_norm": 0.41513463854789734, |
| "learning_rate": 1.1629322235387712e-05, |
| "loss": 1.5957657098770142, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.5168195718654434, |
| "grad_norm": 0.1705729365348816, |
| "learning_rate": 1.1599039458667404e-05, |
| "loss": 1.5875592231750488, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.5198776758409784, |
| "grad_norm": 0.5938596725463867, |
| "learning_rate": 1.1568749868152376e-05, |
| "loss": 1.6956580877304077, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.5229357798165137, |
| "grad_norm": 0.23035727441310883, |
| "learning_rate": 1.1538453808372601e-05, |
| "loss": 1.629127025604248, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.525993883792049, |
| "grad_norm": 0.543903112411499, |
| "learning_rate": 1.1508151623931652e-05, |
| "loss": 1.677422285079956, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.529051987767584, |
| "grad_norm": 0.28359514474868774, |
| "learning_rate": 1.1477843659502748e-05, |
| "loss": 1.5048930644989014, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.5321100917431192, |
| "grad_norm": 0.1776496171951294, |
| "learning_rate": 1.1447530259824867e-05, |
| "loss": 1.5032374858856201, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.5351681957186545, |
| "grad_norm": 0.30241939425468445, |
| "learning_rate": 1.1417211769698803e-05, |
| "loss": 1.3834539651870728, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.5382262996941896, |
| "grad_norm": 0.2581124007701874, |
| "learning_rate": 1.1386888533983263e-05, |
| "loss": 1.6410409212112427, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.5412844036697249, |
| "grad_norm": 0.2438291609287262, |
| "learning_rate": 1.1356560897590914e-05, |
| "loss": 1.4713975191116333, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.54434250764526, |
| "grad_norm": 0.1657806932926178, |
| "learning_rate": 1.1326229205484494e-05, |
| "loss": 1.1473451852798462, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.547400611620795, |
| "grad_norm": 0.21121683716773987, |
| "learning_rate": 1.1295893802672867e-05, |
| "loss": 1.659318208694458, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.5504587155963303, |
| "grad_norm": 0.14165081083774567, |
| "learning_rate": 1.1265555034207103e-05, |
| "loss": 1.7471133470535278, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.5535168195718656, |
| "grad_norm": 0.30989158153533936, |
| "learning_rate": 1.1235213245176564e-05, |
| "loss": 1.81640625, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.5565749235474007, |
| "grad_norm": 0.4983619749546051, |
| "learning_rate": 1.1204868780704952e-05, |
| "loss": 1.7423572540283203, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.5596330275229358, |
| "grad_norm": 0.2264312505722046, |
| "learning_rate": 1.117452198594642e-05, |
| "loss": 1.7088443040847778, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.5626911314984708, |
| "grad_norm": 0.2084045708179474, |
| "learning_rate": 1.1144173206081619e-05, |
| "loss": 1.5804930925369263, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.5657492354740061, |
| "grad_norm": 0.5413640737533569, |
| "learning_rate": 1.111382278631377e-05, |
| "loss": 1.4329633712768555, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.5688073394495414, |
| "grad_norm": 0.19622007012367249, |
| "learning_rate": 1.1083471071864766e-05, |
| "loss": 0.9974356293678284, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.5718654434250765, |
| "grad_norm": 0.3593452572822571, |
| "learning_rate": 1.105311840797121e-05, |
| "loss": 0.9936216473579407, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.5749235474006116, |
| "grad_norm": 0.12100227177143097, |
| "learning_rate": 1.1022765139880517e-05, |
| "loss": 1.2237460613250732, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.5779816513761467, |
| "grad_norm": 0.33734220266342163, |
| "learning_rate": 1.0992411612846962e-05, |
| "loss": 1.1613401174545288, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.581039755351682, |
| "grad_norm": 0.6587600111961365, |
| "learning_rate": 1.0962058172127774e-05, |
| "loss": 1.1073070764541626, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.5840978593272173, |
| "grad_norm": 0.11212319880723953, |
| "learning_rate": 1.0931705162979203e-05, |
| "loss": 1.1541969776153564, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.5871559633027523, |
| "grad_norm": 0.24790284037590027, |
| "learning_rate": 1.090135293065258e-05, |
| "loss": 1.386697769165039, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.5902140672782874, |
| "grad_norm": 0.35141927003860474, |
| "learning_rate": 1.0871001820390406e-05, |
| "loss": 1.3391034603118896, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.5932721712538225, |
| "grad_norm": 0.11842223256826401, |
| "learning_rate": 1.0840652177422418e-05, |
| "loss": 1.1840428113937378, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.5963302752293578, |
| "grad_norm": 0.3350653648376465, |
| "learning_rate": 1.0810304346961666e-05, |
| "loss": 1.390136480331421, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.599388379204893, |
| "grad_norm": 0.15494462847709656, |
| "learning_rate": 1.0779958674200577e-05, |
| "loss": 1.457975149154663, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.6024464831804281, |
| "grad_norm": 0.23394669592380524, |
| "learning_rate": 1.0749615504307044e-05, |
| "loss": 1.2204726934432983, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.6055045871559632, |
| "grad_norm": 0.15462008118629456, |
| "learning_rate": 1.0719275182420484e-05, |
| "loss": 1.4140408039093018, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.6085626911314985, |
| "grad_norm": 0.2375929355621338, |
| "learning_rate": 1.0688938053647919e-05, |
| "loss": 1.6344470977783203, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.6116207951070336, |
| "grad_norm": 0.24957749247550964, |
| "learning_rate": 1.0658604463060059e-05, |
| "loss": 1.1358975172042847, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.614678899082569, |
| "grad_norm": 0.25691115856170654, |
| "learning_rate": 1.062827475568736e-05, |
| "loss": 1.384519338607788, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.617737003058104, |
| "grad_norm": 0.12493827193975449, |
| "learning_rate": 1.059794927651611e-05, |
| "loss": 1.4746840000152588, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.620795107033639, |
| "grad_norm": 0.23655423521995544, |
| "learning_rate": 1.0567628370484503e-05, |
| "loss": 1.3424731492996216, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.6238532110091743, |
| "grad_norm": 0.11737493425607681, |
| "learning_rate": 1.0537312382478721e-05, |
| "loss": 1.1979994773864746, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.6269113149847096, |
| "grad_norm": 0.355948269367218, |
| "learning_rate": 1.0507001657329003e-05, |
| "loss": 0.8049882054328918, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.6299694189602447, |
| "grad_norm": 0.18187829852104187, |
| "learning_rate": 1.047669653980572e-05, |
| "loss": 1.5212846994400024, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.6330275229357798, |
| "grad_norm": 0.5914613604545593, |
| "learning_rate": 1.0446397374615466e-05, |
| "loss": 1.4378846883773804, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.6360856269113149, |
| "grad_norm": 0.20652273297309875, |
| "learning_rate": 1.0416104506397127e-05, |
| "loss": 1.327819585800171, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.6391437308868502, |
| "grad_norm": 0.23796908557415009, |
| "learning_rate": 1.0385818279717963e-05, |
| "loss": 1.451419711112976, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.6422018348623855, |
| "grad_norm": 0.2503039538860321, |
| "learning_rate": 1.0355539039069692e-05, |
| "loss": 1.3870141506195068, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.6452599388379205, |
| "grad_norm": 0.21600009500980377, |
| "learning_rate": 1.032526712886457e-05, |
| "loss": 1.3547929525375366, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.6483180428134556, |
| "grad_norm": 0.42859020829200745, |
| "learning_rate": 1.0295002893431465e-05, |
| "loss": 1.5427790880203247, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.6513761467889907, |
| "grad_norm": 0.6327798366546631, |
| "learning_rate": 1.0264746677011957e-05, |
| "loss": 1.185137152671814, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.654434250764526, |
| "grad_norm": 0.25891441106796265, |
| "learning_rate": 1.0234498823756409e-05, |
| "loss": 0.9753936529159546, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.6574923547400613, |
| "grad_norm": 0.23983488976955414, |
| "learning_rate": 1.020425967772006e-05, |
| "loss": 1.6024234294891357, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.6605504587155964, |
| "grad_norm": 0.1868588924407959, |
| "learning_rate": 1.0174029582859104e-05, |
| "loss": 1.3905179500579834, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.6636085626911314, |
| "grad_norm": 0.16272062063217163, |
| "learning_rate": 1.0143808883026785e-05, |
| "loss": 1.5206785202026367, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 0.19251751899719238, |
| "learning_rate": 1.0113597921969482e-05, |
| "loss": 1.3476663827896118, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.6697247706422018, |
| "grad_norm": 0.22667865455150604, |
| "learning_rate": 1.0083397043322802e-05, |
| "loss": 1.5934844017028809, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.6727828746177371, |
| "grad_norm": 0.18778584897518158, |
| "learning_rate": 1.0053206590607667e-05, |
| "loss": 1.453669548034668, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.6758409785932722, |
| "grad_norm": 0.22909604012966156, |
| "learning_rate": 1.002302690722641e-05, |
| "loss": 1.4673449993133545, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.6788990825688073, |
| "grad_norm": 0.2620207369327545, |
| "learning_rate": 9.992858336458863e-06, |
| "loss": 1.3346080780029297, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.6819571865443423, |
| "grad_norm": 0.21637646853923798, |
| "learning_rate": 9.962701221458468e-06, |
| "loss": 1.0002862215042114, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.6850152905198776, |
| "grad_norm": 0.23026728630065918, |
| "learning_rate": 9.932555905248359e-06, |
| "loss": 1.467809796333313, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.688073394495413, |
| "grad_norm": 0.2184467613697052, |
| "learning_rate": 9.902422730717447e-06, |
| "loss": 1.437518835067749, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.691131498470948, |
| "grad_norm": 0.1917940080165863, |
| "learning_rate": 9.872302040616564e-06, |
| "loss": 1.4575303792953491, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.694189602446483, |
| "grad_norm": 0.17743395268917084, |
| "learning_rate": 9.842194177554522e-06, |
| "loss": 1.4566274881362915, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.6972477064220184, |
| "grad_norm": 0.1295081377029419, |
| "learning_rate": 9.812099483994237e-06, |
| "loss": 1.26300048828125, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.7003058103975535, |
| "grad_norm": 1.3070435523986816, |
| "learning_rate": 9.782018302248823e-06, |
| "loss": 1.4953501224517822, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.7033639143730888, |
| "grad_norm": 0.18546514213085175, |
| "learning_rate": 9.751950974477706e-06, |
| "loss": 1.5722805261611938, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.7064220183486238, |
| "grad_norm": 0.2597721219062805, |
| "learning_rate": 9.721897842682733e-06, |
| "loss": 1.2844600677490234, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.709480122324159, |
| "grad_norm": 0.29226014018058777, |
| "learning_rate": 9.691859248704271e-06, |
| "loss": 1.5464119911193848, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.7125382262996942, |
| "grad_norm": 0.23562973737716675, |
| "learning_rate": 9.661835534217332e-06, |
| "loss": 1.3296096324920654, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.7155963302752295, |
| "grad_norm": 0.1732785552740097, |
| "learning_rate": 9.631827040727679e-06, |
| "loss": 1.4681756496429443, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.7186544342507646, |
| "grad_norm": 0.20363065600395203, |
| "learning_rate": 9.601834109567942e-06, |
| "loss": 1.4041388034820557, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.7217125382262997, |
| "grad_norm": 0.1860877275466919, |
| "learning_rate": 9.571857081893739e-06, |
| "loss": 1.534124493598938, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.7247706422018347, |
| "grad_norm": 0.2061803787946701, |
| "learning_rate": 9.541896298679794e-06, |
| "loss": 1.6303178071975708, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.72782874617737, |
| "grad_norm": 1.1309088468551636, |
| "learning_rate": 9.511952100716051e-06, |
| "loss": 1.5267082452774048, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.7308868501529053, |
| "grad_norm": 0.1478930562734604, |
| "learning_rate": 9.482024828603813e-06, |
| "loss": 1.3527547121047974, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.7339449541284404, |
| "grad_norm": 0.18034934997558594, |
| "learning_rate": 9.452114822751854e-06, |
| "loss": 1.4615706205368042, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.7370030581039755, |
| "grad_norm": 0.29384928941726685, |
| "learning_rate": 9.422222423372557e-06, |
| "loss": 1.4624444246292114, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.7400611620795106, |
| "grad_norm": 0.18714860081672668, |
| "learning_rate": 9.392347970478035e-06, |
| "loss": 1.4573503732681274, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.7431192660550459, |
| "grad_norm": 0.24465042352676392, |
| "learning_rate": 9.362491803876267e-06, |
| "loss": 1.4855374097824097, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.7461773700305812, |
| "grad_norm": 0.3961007297039032, |
| "learning_rate": 9.332654263167242e-06, |
| "loss": 1.4774978160858154, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.7492354740061162, |
| "grad_norm": 0.20310214161872864, |
| "learning_rate": 9.30283568773908e-06, |
| "loss": 1.267431378364563, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.7522935779816513, |
| "grad_norm": 0.22026310861110687, |
| "learning_rate": 9.273036416764182e-06, |
| "loss": 1.3370521068572998, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.7553516819571864, |
| "grad_norm": 0.23095861077308655, |
| "learning_rate": 9.243256789195374e-06, |
| "loss": 1.2592549324035645, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.7584097859327217, |
| "grad_norm": 0.23291274905204773, |
| "learning_rate": 9.213497143762036e-06, |
| "loss": 1.340893030166626, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.761467889908257, |
| "grad_norm": 0.21600410342216492, |
| "learning_rate": 9.18375781896628e-06, |
| "loss": 1.3454080820083618, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.764525993883792, |
| "grad_norm": 0.1855204850435257, |
| "learning_rate": 9.154039153079054e-06, |
| "loss": 1.1888267993927002, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.7675840978593271, |
| "grad_norm": 0.13759852945804596, |
| "learning_rate": 9.12434148413635e-06, |
| "loss": 0.8760353922843933, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.7706422018348624, |
| "grad_norm": 0.12697182595729828, |
| "learning_rate": 9.094665149935307e-06, |
| "loss": 1.1793420314788818, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.7737003058103975, |
| "grad_norm": 0.14431315660476685, |
| "learning_rate": 9.065010488030397e-06, |
| "loss": 1.241503119468689, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.7767584097859328, |
| "grad_norm": 0.12438403815031052, |
| "learning_rate": 9.035377835729588e-06, |
| "loss": 1.3003443479537964, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.7798165137614679, |
| "grad_norm": 0.2160700112581253, |
| "learning_rate": 9.005767530090489e-06, |
| "loss": 1.3123278617858887, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.782874617737003, |
| "grad_norm": 0.12084904313087463, |
| "learning_rate": 8.976179907916528e-06, |
| "loss": 1.3691197633743286, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.7859327217125383, |
| "grad_norm": 0.3914507329463959, |
| "learning_rate": 8.946615305753127e-06, |
| "loss": 1.2905107736587524, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.7889908256880735, |
| "grad_norm": 0.12264278531074524, |
| "learning_rate": 8.917074059883852e-06, |
| "loss": 1.3113428354263306, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.7920489296636086, |
| "grad_norm": 0.11514458805322647, |
| "learning_rate": 8.887556506326615e-06, |
| "loss": 1.2807221412658691, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.7951070336391437, |
| "grad_norm": 0.11093078553676605, |
| "learning_rate": 8.858062980829838e-06, |
| "loss": 1.2935819625854492, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.7981651376146788, |
| "grad_norm": 0.10017645359039307, |
| "learning_rate": 8.828593818868622e-06, |
| "loss": 1.253427267074585, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.801223241590214, |
| "grad_norm": 0.10647612810134888, |
| "learning_rate": 8.799149355640961e-06, |
| "loss": 1.2969856262207031, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.8042813455657494, |
| "grad_norm": 0.08907388150691986, |
| "learning_rate": 8.769729926063904e-06, |
| "loss": 1.247278094291687, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.8073394495412844, |
| "grad_norm": 0.05995384231209755, |
| "learning_rate": 8.740335864769747e-06, |
| "loss": 1.1338144540786743, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.8103975535168195, |
| "grad_norm": 0.14091065526008606, |
| "learning_rate": 8.71096750610225e-06, |
| "loss": 1.1891536712646484, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.8134556574923546, |
| "grad_norm": 0.11070030927658081, |
| "learning_rate": 8.681625184112803e-06, |
| "loss": 0.9717427492141724, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.81651376146789, |
| "grad_norm": 0.10197513550519943, |
| "learning_rate": 8.652309232556651e-06, |
| "loss": 1.1956229209899902, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.8195718654434252, |
| "grad_norm": 0.13055320084095, |
| "learning_rate": 8.623019984889078e-06, |
| "loss": 1.23086416721344, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.8226299694189603, |
| "grad_norm": 0.10850291699171066, |
| "learning_rate": 8.593757774261638e-06, |
| "loss": 1.246797800064087, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.8256880733944953, |
| "grad_norm": 0.18308448791503906, |
| "learning_rate": 8.56452293351833e-06, |
| "loss": 1.216019630432129, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.8287461773700304, |
| "grad_norm": 0.0984484925866127, |
| "learning_rate": 8.535315795191858e-06, |
| "loss": 1.234249234199524, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.8318042813455657, |
| "grad_norm": 0.10241630673408508, |
| "learning_rate": 8.506136691499805e-06, |
| "loss": 1.2063539028167725, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.834862385321101, |
| "grad_norm": 0.12511692941188812, |
| "learning_rate": 8.476985954340877e-06, |
| "loss": 1.181626319885254, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.837920489296636, |
| "grad_norm": 0.20748142898082733, |
| "learning_rate": 8.447863915291133e-06, |
| "loss": 1.2395812273025513, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.8409785932721712, |
| "grad_norm": 0.13440315425395966, |
| "learning_rate": 8.418770905600191e-06, |
| "loss": 1.2137912511825562, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.8440366972477065, |
| "grad_norm": 0.15985599160194397, |
| "learning_rate": 8.389707256187484e-06, |
| "loss": 1.187180757522583, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.8470948012232415, |
| "grad_norm": 0.13617949187755585, |
| "learning_rate": 8.360673297638484e-06, |
| "loss": 1.1760993003845215, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.8501529051987768, |
| "grad_norm": 0.16985023021697998, |
| "learning_rate": 8.331669360200937e-06, |
| "loss": 1.2030588388442993, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.853211009174312, |
| "grad_norm": 0.1729569286108017, |
| "learning_rate": 8.302695773781124e-06, |
| "loss": 1.195267915725708, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.856269113149847, |
| "grad_norm": 0.1228983923792839, |
| "learning_rate": 8.273752867940081e-06, |
| "loss": 1.151159644126892, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.8593272171253823, |
| "grad_norm": 0.11534194648265839, |
| "learning_rate": 8.244840971889885e-06, |
| "loss": 1.2157320976257324, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.8623853211009176, |
| "grad_norm": 0.1633436530828476, |
| "learning_rate": 8.215960414489873e-06, |
| "loss": 1.243991732597351, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.8654434250764527, |
| "grad_norm": 0.12825258076190948, |
| "learning_rate": 8.187111524242938e-06, |
| "loss": 1.2486207485198975, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.8685015290519877, |
| "grad_norm": 0.13412782549858093, |
| "learning_rate": 8.15829462929176e-06, |
| "loss": 1.2371561527252197, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.8715596330275228, |
| "grad_norm": 0.14021556079387665, |
| "learning_rate": 8.129510057415091e-06, |
| "loss": 1.2565103769302368, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.8746177370030581, |
| "grad_norm": 0.2174684852361679, |
| "learning_rate": 8.100758136024027e-06, |
| "loss": 1.238993525505066, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.8776758409785934, |
| "grad_norm": 0.17089374363422394, |
| "learning_rate": 8.072039192158272e-06, |
| "loss": 1.2617957592010498, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.8807339449541285, |
| "grad_norm": 0.08758228272199631, |
| "learning_rate": 8.043353552482435e-06, |
| "loss": 1.1902625560760498, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.8837920489296636, |
| "grad_norm": 0.1003388836979866, |
| "learning_rate": 8.014701543282302e-06, |
| "loss": 1.2346378564834595, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.8868501529051986, |
| "grad_norm": 0.09711755067110062, |
| "learning_rate": 7.986083490461124e-06, |
| "loss": 1.197476863861084, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.889908256880734, |
| "grad_norm": 0.14229439198970795, |
| "learning_rate": 7.957499719535922e-06, |
| "loss": 1.2236450910568237, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.8929663608562692, |
| "grad_norm": 0.13267415761947632, |
| "learning_rate": 7.928950555633767e-06, |
| "loss": 1.231372594833374, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.8960244648318043, |
| "grad_norm": 0.1696489304304123, |
| "learning_rate": 7.900436323488098e-06, |
| "loss": 1.2525113821029663, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.8990825688073394, |
| "grad_norm": 0.11210724711418152, |
| "learning_rate": 7.871957347435025e-06, |
| "loss": 1.2359347343444824, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.9021406727828745, |
| "grad_norm": 0.17409557104110718, |
| "learning_rate": 7.843513951409618e-06, |
| "loss": 1.1706980466842651, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.9051987767584098, |
| "grad_norm": 0.13088209927082062, |
| "learning_rate": 7.815106458942265e-06, |
| "loss": 1.2025657892227173, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.908256880733945, |
| "grad_norm": 0.11867424845695496, |
| "learning_rate": 7.78673519315495e-06, |
| "loss": 1.2371224164962769, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.9113149847094801, |
| "grad_norm": 0.12216029316186905, |
| "learning_rate": 7.758400476757609e-06, |
| "loss": 1.261689305305481, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.9143730886850152, |
| "grad_norm": 0.10635160654783249, |
| "learning_rate": 7.73010263204443e-06, |
| "loss": 1.2365918159484863, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.9174311926605505, |
| "grad_norm": 0.10856136679649353, |
| "learning_rate": 7.70184198089022e-06, |
| "loss": 1.2102067470550537, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.9204892966360856, |
| "grad_norm": 0.1211787536740303, |
| "learning_rate": 7.673618844746709e-06, |
| "loss": 1.1856294870376587, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.9235474006116209, |
| "grad_norm": 0.10200649499893188, |
| "learning_rate": 7.645433544638926e-06, |
| "loss": 1.2183263301849365, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.926605504587156, |
| "grad_norm": 0.10630739480257034, |
| "learning_rate": 7.617286401161523e-06, |
| "loss": 1.2423368692398071, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.929663608562691, |
| "grad_norm": 0.1024373397231102, |
| "learning_rate": 7.589177734475148e-06, |
| "loss": 1.1908504962921143, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.9327217125382263, |
| "grad_norm": 0.21826599538326263, |
| "learning_rate": 7.561107864302784e-06, |
| "loss": 1.2161670923233032, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.9357798165137616, |
| "grad_norm": 0.16537855565547943, |
| "learning_rate": 7.533077109926124e-06, |
| "loss": 1.2151883840560913, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.9388379204892967, |
| "grad_norm": 0.13985595107078552, |
| "learning_rate": 7.505085790181938e-06, |
| "loss": 1.263318419456482, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.9418960244648318, |
| "grad_norm": 0.14018817245960236, |
| "learning_rate": 7.477134223458449e-06, |
| "loss": 1.274073600769043, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.9449541284403669, |
| "grad_norm": 0.16601698100566864, |
| "learning_rate": 7.4492227276917e-06, |
| "loss": 1.2327349185943604, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.9480122324159022, |
| "grad_norm": 0.32889804244041443, |
| "learning_rate": 7.421351620361954e-06, |
| "loss": 1.2563930749893188, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.9510703363914375, |
| "grad_norm": 0.11310184746980667, |
| "learning_rate": 7.39352121849007e-06, |
| "loss": 1.227959394454956, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.9541284403669725, |
| "grad_norm": 0.13983066380023956, |
| "learning_rate": 7.3657318386339e-06, |
| "loss": 1.1745449304580688, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.9571865443425076, |
| "grad_norm": 0.1259947121143341, |
| "learning_rate": 7.337983796884694e-06, |
| "loss": 1.2027474641799927, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.9602446483180427, |
| "grad_norm": 0.12081421166658401, |
| "learning_rate": 7.310277408863493e-06, |
| "loss": 1.2270736694335938, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.963302752293578, |
| "grad_norm": 0.11816192418336868, |
| "learning_rate": 7.282612989717555e-06, |
| "loss": 1.202258825302124, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.9663608562691133, |
| "grad_norm": 0.15373268723487854, |
| "learning_rate": 7.254990854116759e-06, |
| "loss": 1.2122732400894165, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.9694189602446484, |
| "grad_norm": 0.14751672744750977, |
| "learning_rate": 7.2274113162500285e-06, |
| "loss": 1.2614079713821411, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.9724770642201834, |
| "grad_norm": 0.12097210437059402, |
| "learning_rate": 7.199874689821744e-06, |
| "loss": 1.2466144561767578, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.9755351681957185, |
| "grad_norm": 0.12983256578445435, |
| "learning_rate": 7.1723812880482114e-06, |
| "loss": 1.259993553161621, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.9785932721712538, |
| "grad_norm": 0.1823742687702179, |
| "learning_rate": 7.144931423654069e-06, |
| "loss": 1.2138053178787231, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.981651376146789, |
| "grad_norm": 0.162600576877594, |
| "learning_rate": 7.117525408868722e-06, |
| "loss": 1.2149325609207153, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.9847094801223242, |
| "grad_norm": 0.1520078033208847, |
| "learning_rate": 7.090163555422824e-06, |
| "loss": 1.2373226881027222, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.9877675840978593, |
| "grad_norm": 0.19275744259357452, |
| "learning_rate": 7.062846174544713e-06, |
| "loss": 1.292237639427185, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.9908256880733946, |
| "grad_norm": 0.14872559905052185, |
| "learning_rate": 7.035573576956867e-06, |
| "loss": 1.321597933769226, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.9938837920489296, |
| "grad_norm": 0.20485380291938782, |
| "learning_rate": 7.008346072872372e-06, |
| "loss": 1.480501651763916, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.996941896024465, |
| "grad_norm": 0.18706592917442322, |
| "learning_rate": 6.9811639719914004e-06, |
| "loss": 1.4298397302627563, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.3856329023838043, |
| "learning_rate": 6.954027583497691e-06, |
| "loss": 1.4748750925064087, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.003058103975535, |
| "grad_norm": 0.13063521683216095, |
| "learning_rate": 6.92693721605501e-06, |
| "loss": 1.3944097757339478, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.00611620795107, |
| "grad_norm": 0.11890590935945511, |
| "learning_rate": 6.899893177803667e-06, |
| "loss": 1.3550368547439575, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.0091743119266057, |
| "grad_norm": 0.12871591746807098, |
| "learning_rate": 6.8728957763570005e-06, |
| "loss": 1.3610459566116333, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.0122324159021407, |
| "grad_norm": 0.12763932347297668, |
| "learning_rate": 6.8459453187978706e-06, |
| "loss": 1.2988834381103516, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.015290519877676, |
| "grad_norm": 0.23050488531589508, |
| "learning_rate": 6.819042111675172e-06, |
| "loss": 1.489760398864746, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.018348623853211, |
| "grad_norm": 0.09902098029851913, |
| "learning_rate": 6.792186461000352e-06, |
| "loss": 1.3719017505645752, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.021406727828746, |
| "grad_norm": 0.17471402883529663, |
| "learning_rate": 6.765378672243923e-06, |
| "loss": 1.3188618421554565, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.0244648318042815, |
| "grad_norm": 0.17060087621212006, |
| "learning_rate": 6.738619050331995e-06, |
| "loss": 1.3893766403198242, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.0275229357798166, |
| "grad_norm": 0.13662482798099518, |
| "learning_rate": 6.711907899642793e-06, |
| "loss": 1.2671425342559814, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.0305810397553516, |
| "grad_norm": 0.13896261155605316, |
| "learning_rate": 6.685245524003212e-06, |
| "loss": 1.341953992843628, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.0336391437308867, |
| "grad_norm": 0.15634822845458984, |
| "learning_rate": 6.658632226685355e-06, |
| "loss": 1.4511994123458862, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.036697247706422, |
| "grad_norm": 0.11488586664199829, |
| "learning_rate": 6.632068310403075e-06, |
| "loss": 1.3489059209823608, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.0397553516819573, |
| "grad_norm": 0.10222349315881729, |
| "learning_rate": 6.605554077308541e-06, |
| "loss": 1.1634528636932373, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.0428134556574924, |
| "grad_norm": 0.17645561695098877, |
| "learning_rate": 6.579089828988806e-06, |
| "loss": 1.333652377128601, |
| "step": 1336 |
| }, |
| { |
| "epoch": 2.0458715596330275, |
| "grad_norm": 0.18325085937976837, |
| "learning_rate": 6.552675866462358e-06, |
| "loss": 1.5699204206466675, |
| "step": 1338 |
| }, |
| { |
| "epoch": 2.0489296636085625, |
| "grad_norm": 0.11928494274616241, |
| "learning_rate": 6.526312490175719e-06, |
| "loss": 1.21940016746521, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.051987767584098, |
| "grad_norm": 0.13842761516571045, |
| "learning_rate": 6.500000000000003e-06, |
| "loss": 1.364502191543579, |
| "step": 1342 |
| }, |
| { |
| "epoch": 2.055045871559633, |
| "grad_norm": 0.32689663767814636, |
| "learning_rate": 6.473738695227528e-06, |
| "loss": 1.2524632215499878, |
| "step": 1344 |
| }, |
| { |
| "epoch": 2.058103975535168, |
| "grad_norm": 0.10303899645805359, |
| "learning_rate": 6.447528874568403e-06, |
| "loss": 1.2425854206085205, |
| "step": 1346 |
| }, |
| { |
| "epoch": 2.0611620795107033, |
| "grad_norm": 0.12432652711868286, |
| "learning_rate": 6.421370836147125e-06, |
| "loss": 1.2536406517028809, |
| "step": 1348 |
| }, |
| { |
| "epoch": 2.0642201834862384, |
| "grad_norm": 0.13824020326137543, |
| "learning_rate": 6.3952648774991895e-06, |
| "loss": 1.2881417274475098, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.067278287461774, |
| "grad_norm": 0.13316668570041656, |
| "learning_rate": 6.3692112955677145e-06, |
| "loss": 1.474996566772461, |
| "step": 1352 |
| }, |
| { |
| "epoch": 2.070336391437309, |
| "grad_norm": 0.1996292918920517, |
| "learning_rate": 6.343210386700056e-06, |
| "loss": 1.4938266277313232, |
| "step": 1354 |
| }, |
| { |
| "epoch": 2.073394495412844, |
| "grad_norm": 0.14201390743255615, |
| "learning_rate": 6.317262446644432e-06, |
| "loss": 1.2885432243347168, |
| "step": 1356 |
| }, |
| { |
| "epoch": 2.076452599388379, |
| "grad_norm": 0.15462332963943481, |
| "learning_rate": 6.291367770546576e-06, |
| "loss": 1.3016390800476074, |
| "step": 1358 |
| }, |
| { |
| "epoch": 2.079510703363914, |
| "grad_norm": 0.16548824310302734, |
| "learning_rate": 6.265526652946361e-06, |
| "loss": 1.284877061843872, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.0825688073394497, |
| "grad_norm": 0.21291159093379974, |
| "learning_rate": 6.23973938777446e-06, |
| "loss": 1.3487058877944946, |
| "step": 1362 |
| }, |
| { |
| "epoch": 2.085626911314985, |
| "grad_norm": 0.10997481644153595, |
| "learning_rate": 6.214006268348997e-06, |
| "loss": 1.3343586921691895, |
| "step": 1364 |
| }, |
| { |
| "epoch": 2.08868501529052, |
| "grad_norm": 0.17500963807106018, |
| "learning_rate": 6.188327587372216e-06, |
| "loss": 1.362959384918213, |
| "step": 1366 |
| }, |
| { |
| "epoch": 2.091743119266055, |
| "grad_norm": 0.09822017699480057, |
| "learning_rate": 6.162703636927147e-06, |
| "loss": 1.3468924760818481, |
| "step": 1368 |
| }, |
| { |
| "epoch": 2.09480122324159, |
| "grad_norm": 0.12087301164865494, |
| "learning_rate": 6.137134708474293e-06, |
| "loss": 1.2233235836029053, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.0978593272171255, |
| "grad_norm": 0.10145165026187897, |
| "learning_rate": 6.111621092848293e-06, |
| "loss": 1.1914565563201904, |
| "step": 1372 |
| }, |
| { |
| "epoch": 2.1009174311926606, |
| "grad_norm": 0.13930702209472656, |
| "learning_rate": 6.086163080254641e-06, |
| "loss": 1.3449511528015137, |
| "step": 1374 |
| }, |
| { |
| "epoch": 2.1039755351681957, |
| "grad_norm": 0.12874501943588257, |
| "learning_rate": 6.060760960266372e-06, |
| "loss": 1.4593555927276611, |
| "step": 1376 |
| }, |
| { |
| "epoch": 2.1070336391437308, |
| "grad_norm": 0.3009782135486603, |
| "learning_rate": 6.035415021820756e-06, |
| "loss": 1.2994122505187988, |
| "step": 1378 |
| }, |
| { |
| "epoch": 2.1100917431192663, |
| "grad_norm": 0.25140684843063354, |
| "learning_rate": 6.0101255532160376e-06, |
| "loss": 1.4939740896224976, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.1131498470948014, |
| "grad_norm": 0.18365797400474548, |
| "learning_rate": 5.984892842108143e-06, |
| "loss": 1.4886876344680786, |
| "step": 1382 |
| }, |
| { |
| "epoch": 2.1162079510703364, |
| "grad_norm": 0.17874866724014282, |
| "learning_rate": 5.959717175507396e-06, |
| "loss": 1.2372279167175293, |
| "step": 1384 |
| }, |
| { |
| "epoch": 2.1192660550458715, |
| "grad_norm": 0.20835275948047638, |
| "learning_rate": 5.93459883977528e-06, |
| "loss": 1.1672478914260864, |
| "step": 1386 |
| }, |
| { |
| "epoch": 2.1223241590214066, |
| "grad_norm": 0.16295093297958374, |
| "learning_rate": 5.909538120621155e-06, |
| "loss": 1.3735610246658325, |
| "step": 1388 |
| }, |
| { |
| "epoch": 2.1253822629969417, |
| "grad_norm": 0.5294936895370483, |
| "learning_rate": 5.884535303099026e-06, |
| "loss": 1.1844180822372437, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.128440366972477, |
| "grad_norm": 1.3583707809448242, |
| "learning_rate": 5.859590671604297e-06, |
| "loss": 1.2543091773986816, |
| "step": 1392 |
| }, |
| { |
| "epoch": 2.1314984709480123, |
| "grad_norm": 0.15588292479515076, |
| "learning_rate": 5.8347045098705216e-06, |
| "loss": 1.215010404586792, |
| "step": 1394 |
| }, |
| { |
| "epoch": 2.1345565749235473, |
| "grad_norm": 0.11016454547643661, |
| "learning_rate": 5.809877100966197e-06, |
| "loss": 1.2965039014816284, |
| "step": 1396 |
| }, |
| { |
| "epoch": 2.1376146788990824, |
| "grad_norm": 0.16797038912773132, |
| "learning_rate": 5.785108727291532e-06, |
| "loss": 1.2082189321517944, |
| "step": 1398 |
| }, |
| { |
| "epoch": 2.140672782874618, |
| "grad_norm": 0.11476773768663406, |
| "learning_rate": 5.760399670575236e-06, |
| "loss": 1.2553752660751343, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.143730886850153, |
| "grad_norm": 0.46541839838027954, |
| "learning_rate": 5.735750211871316e-06, |
| "loss": 1.3943759202957153, |
| "step": 1402 |
| }, |
| { |
| "epoch": 2.146788990825688, |
| "grad_norm": 0.13297665119171143, |
| "learning_rate": 5.711160631555877e-06, |
| "loss": 1.262938380241394, |
| "step": 1404 |
| }, |
| { |
| "epoch": 2.149847094801223, |
| "grad_norm": 0.14411872625350952, |
| "learning_rate": 5.686631209323941e-06, |
| "loss": 1.2249363660812378, |
| "step": 1406 |
| }, |
| { |
| "epoch": 2.1529051987767582, |
| "grad_norm": 0.13139708340168, |
| "learning_rate": 5.662162224186258e-06, |
| "loss": 1.4485572576522827, |
| "step": 1408 |
| }, |
| { |
| "epoch": 2.1559633027522938, |
| "grad_norm": 0.20670275390148163, |
| "learning_rate": 5.637753954466127e-06, |
| "loss": 1.31044602394104, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.159021406727829, |
| "grad_norm": 0.11347033828496933, |
| "learning_rate": 5.613406677796246e-06, |
| "loss": 1.2472971677780151, |
| "step": 1412 |
| }, |
| { |
| "epoch": 2.162079510703364, |
| "grad_norm": 0.14416420459747314, |
| "learning_rate": 5.589120671115542e-06, |
| "loss": 1.3487032651901245, |
| "step": 1414 |
| }, |
| { |
| "epoch": 2.165137614678899, |
| "grad_norm": 0.2108708918094635, |
| "learning_rate": 5.564896210666031e-06, |
| "loss": 1.3209969997406006, |
| "step": 1416 |
| }, |
| { |
| "epoch": 2.168195718654434, |
| "grad_norm": 0.12039677798748016, |
| "learning_rate": 5.540733571989654e-06, |
| "loss": 1.3818414211273193, |
| "step": 1418 |
| }, |
| { |
| "epoch": 2.1712538226299696, |
| "grad_norm": 0.19275279343128204, |
| "learning_rate": 5.51663302992517e-06, |
| "loss": 1.2611538171768188, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.1743119266055047, |
| "grad_norm": 0.1626497060060501, |
| "learning_rate": 5.4925948586050224e-06, |
| "loss": 1.3544585704803467, |
| "step": 1422 |
| }, |
| { |
| "epoch": 2.1773700305810397, |
| "grad_norm": 0.13071835041046143, |
| "learning_rate": 5.4686193314522e-06, |
| "loss": 1.2558131217956543, |
| "step": 1424 |
| }, |
| { |
| "epoch": 2.180428134556575, |
| "grad_norm": 0.1628914475440979, |
| "learning_rate": 5.444706721177157e-06, |
| "loss": 1.567854642868042, |
| "step": 1426 |
| }, |
| { |
| "epoch": 2.18348623853211, |
| "grad_norm": 0.18418411910533905, |
| "learning_rate": 5.420857299774696e-06, |
| "loss": 1.3090449571609497, |
| "step": 1428 |
| }, |
| { |
| "epoch": 2.1865443425076454, |
| "grad_norm": 0.12824971973896027, |
| "learning_rate": 5.397071338520867e-06, |
| "loss": 1.377962589263916, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.1896024464831805, |
| "grad_norm": 0.11358148604631424, |
| "learning_rate": 5.373349107969902e-06, |
| "loss": 1.3557363748550415, |
| "step": 1432 |
| }, |
| { |
| "epoch": 2.1926605504587156, |
| "grad_norm": 0.15211330354213715, |
| "learning_rate": 5.349690877951115e-06, |
| "loss": 1.324013352394104, |
| "step": 1434 |
| }, |
| { |
| "epoch": 2.1957186544342506, |
| "grad_norm": 0.15498948097229004, |
| "learning_rate": 5.326096917565853e-06, |
| "loss": 1.329780101776123, |
| "step": 1436 |
| }, |
| { |
| "epoch": 2.198776758409786, |
| "grad_norm": 0.16712406277656555, |
| "learning_rate": 5.302567495184422e-06, |
| "loss": 1.331944465637207, |
| "step": 1438 |
| }, |
| { |
| "epoch": 2.2018348623853212, |
| "grad_norm": 0.1716017872095108, |
| "learning_rate": 5.279102878443032e-06, |
| "loss": 1.3320518732070923, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.2048929663608563, |
| "grad_norm": 0.18443110585212708, |
| "learning_rate": 5.255703334240774e-06, |
| "loss": 1.3890141248703003, |
| "step": 1442 |
| }, |
| { |
| "epoch": 2.2079510703363914, |
| "grad_norm": 0.12922358512878418, |
| "learning_rate": 5.232369128736553e-06, |
| "loss": 1.3398292064666748, |
| "step": 1444 |
| }, |
| { |
| "epoch": 2.2110091743119265, |
| "grad_norm": 0.14857889711856842, |
| "learning_rate": 5.2091005273460914e-06, |
| "loss": 1.2449339628219604, |
| "step": 1446 |
| }, |
| { |
| "epoch": 2.214067278287462, |
| "grad_norm": 0.2069442719221115, |
| "learning_rate": 5.185897794738881e-06, |
| "loss": 1.224430799484253, |
| "step": 1448 |
| }, |
| { |
| "epoch": 2.217125382262997, |
| "grad_norm": 0.13191770017147064, |
| "learning_rate": 5.162761194835198e-06, |
| "loss": 1.2660671472549438, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.220183486238532, |
| "grad_norm": 0.11560212075710297, |
| "learning_rate": 5.139690990803084e-06, |
| "loss": 1.231581449508667, |
| "step": 1452 |
| }, |
| { |
| "epoch": 2.223241590214067, |
| "grad_norm": 0.1220598965883255, |
| "learning_rate": 5.1166874450553635e-06, |
| "loss": 1.2865968942642212, |
| "step": 1454 |
| }, |
| { |
| "epoch": 2.2262996941896023, |
| "grad_norm": 0.12835901975631714, |
| "learning_rate": 5.093750819246648e-06, |
| "loss": 1.2431528568267822, |
| "step": 1456 |
| }, |
| { |
| "epoch": 2.229357798165138, |
| "grad_norm": 0.1959560513496399, |
| "learning_rate": 5.0708813742703666e-06, |
| "loss": 1.4930410385131836, |
| "step": 1458 |
| }, |
| { |
| "epoch": 2.232415902140673, |
| "grad_norm": 0.1700781285762787, |
| "learning_rate": 5.0480793702558085e-06, |
| "loss": 1.5038816928863525, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.235474006116208, |
| "grad_norm": 0.1439688354730606, |
| "learning_rate": 5.025345066565135e-06, |
| "loss": 1.5418845415115356, |
| "step": 1462 |
| }, |
| { |
| "epoch": 2.238532110091743, |
| "grad_norm": 0.24274107813835144, |
| "learning_rate": 5.002678721790462e-06, |
| "loss": 1.473767876625061, |
| "step": 1464 |
| }, |
| { |
| "epoch": 2.241590214067278, |
| "grad_norm": 0.16784918308258057, |
| "learning_rate": 4.980080593750901e-06, |
| "loss": 1.5189673900604248, |
| "step": 1466 |
| }, |
| { |
| "epoch": 2.2446483180428136, |
| "grad_norm": 0.16405650973320007, |
| "learning_rate": 4.9575509394896306e-06, |
| "loss": 1.4091551303863525, |
| "step": 1468 |
| }, |
| { |
| "epoch": 2.2477064220183487, |
| "grad_norm": 0.17201636731624603, |
| "learning_rate": 4.9350900152709644e-06, |
| "loss": 1.5018997192382812, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.2507645259938838, |
| "grad_norm": 0.19293750822544098, |
| "learning_rate": 4.9126980765774535e-06, |
| "loss": 1.2914767265319824, |
| "step": 1472 |
| }, |
| { |
| "epoch": 2.253822629969419, |
| "grad_norm": 0.40470463037490845, |
| "learning_rate": 4.890375378106969e-06, |
| "loss": 1.3884903192520142, |
| "step": 1474 |
| }, |
| { |
| "epoch": 2.2568807339449544, |
| "grad_norm": 0.22830891609191895, |
| "learning_rate": 4.8681221737698e-06, |
| "loss": 1.1885076761245728, |
| "step": 1476 |
| }, |
| { |
| "epoch": 2.2599388379204894, |
| "grad_norm": 0.29555222392082214, |
| "learning_rate": 4.845938716685783e-06, |
| "loss": 1.3951637744903564, |
| "step": 1478 |
| }, |
| { |
| "epoch": 2.2629969418960245, |
| "grad_norm": 0.19929106533527374, |
| "learning_rate": 4.8238252591813994e-06, |
| "loss": 0.9905915260314941, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.2660550458715596, |
| "grad_norm": 0.3056342899799347, |
| "learning_rate": 4.801782052786928e-06, |
| "loss": 0.9598507881164551, |
| "step": 1482 |
| }, |
| { |
| "epoch": 2.2691131498470947, |
| "grad_norm": 0.3265058100223541, |
| "learning_rate": 4.7798093482335736e-06, |
| "loss": 1.1725411415100098, |
| "step": 1484 |
| }, |
| { |
| "epoch": 2.2721712538226297, |
| "grad_norm": 0.19392286241054535, |
| "learning_rate": 4.757907395450607e-06, |
| "loss": 1.2541828155517578, |
| "step": 1486 |
| }, |
| { |
| "epoch": 2.2752293577981653, |
| "grad_norm": 1.1085748672485352, |
| "learning_rate": 4.736076443562537e-06, |
| "loss": 1.3106220960617065, |
| "step": 1488 |
| }, |
| { |
| "epoch": 2.2782874617737003, |
| "grad_norm": 0.2238902747631073, |
| "learning_rate": 4.714316740886271e-06, |
| "loss": 1.2723891735076904, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.2813455657492354, |
| "grad_norm": 0.1651279628276825, |
| "learning_rate": 4.69262853492829e-06, |
| "loss": 1.0989571809768677, |
| "step": 1492 |
| }, |
| { |
| "epoch": 2.2844036697247705, |
| "grad_norm": 0.14337404072284698, |
| "learning_rate": 4.671012072381827e-06, |
| "loss": 1.0573805570602417, |
| "step": 1494 |
| }, |
| { |
| "epoch": 2.287461773700306, |
| "grad_norm": 0.15021967887878418, |
| "learning_rate": 4.6494675991240725e-06, |
| "loss": 1.303949236869812, |
| "step": 1496 |
| }, |
| { |
| "epoch": 2.290519877675841, |
| "grad_norm": 0.31146693229675293, |
| "learning_rate": 4.627995360213372e-06, |
| "loss": 1.18751859664917, |
| "step": 1498 |
| }, |
| { |
| "epoch": 2.293577981651376, |
| "grad_norm": 0.23499751091003418, |
| "learning_rate": 4.606595599886441e-06, |
| "loss": 1.140386700630188, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.2966360856269112, |
| "grad_norm": 0.16011349856853485, |
| "learning_rate": 4.585268561555577e-06, |
| "loss": 1.3484982252120972, |
| "step": 1502 |
| }, |
| { |
| "epoch": 2.2996941896024463, |
| "grad_norm": 0.1556277722120285, |
| "learning_rate": 4.564014487805905e-06, |
| "loss": 1.292175531387329, |
| "step": 1504 |
| }, |
| { |
| "epoch": 2.302752293577982, |
| "grad_norm": 0.16667304933071136, |
| "learning_rate": 4.542833620392616e-06, |
| "loss": 1.0477646589279175, |
| "step": 1506 |
| }, |
| { |
| "epoch": 2.305810397553517, |
| "grad_norm": 0.18525543808937073, |
| "learning_rate": 4.521726200238199e-06, |
| "loss": 1.295340895652771, |
| "step": 1508 |
| }, |
| { |
| "epoch": 2.308868501529052, |
| "grad_norm": 0.20533132553100586, |
| "learning_rate": 4.5006924674297285e-06, |
| "loss": 1.4325908422470093, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.311926605504587, |
| "grad_norm": 0.1476341038942337, |
| "learning_rate": 4.479732661216114e-06, |
| "loss": 1.569886326789856, |
| "step": 1512 |
| }, |
| { |
| "epoch": 2.314984709480122, |
| "grad_norm": 0.19520513713359833, |
| "learning_rate": 4.458847020005387e-06, |
| "loss": 1.5973249673843384, |
| "step": 1514 |
| }, |
| { |
| "epoch": 2.3180428134556577, |
| "grad_norm": 0.1807132065296173, |
| "learning_rate": 4.43803578136198e-06, |
| "loss": 1.716496467590332, |
| "step": 1516 |
| }, |
| { |
| "epoch": 2.3211009174311927, |
| "grad_norm": 0.16778655350208282, |
| "learning_rate": 4.4172991820040385e-06, |
| "loss": 1.2482978105545044, |
| "step": 1518 |
| }, |
| { |
| "epoch": 2.324159021406728, |
| "grad_norm": 0.19130687415599823, |
| "learning_rate": 4.396637457800717e-06, |
| "loss": 1.3302521705627441, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.327217125382263, |
| "grad_norm": 0.11455649137496948, |
| "learning_rate": 4.376050843769508e-06, |
| "loss": 1.1768373250961304, |
| "step": 1522 |
| }, |
| { |
| "epoch": 2.330275229357798, |
| "grad_norm": 0.12459295988082886, |
| "learning_rate": 4.355539574073543e-06, |
| "loss": 1.104164958000183, |
| "step": 1524 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 0.2198324352502823, |
| "learning_rate": 4.3351038820189605e-06, |
| "loss": 1.4232064485549927, |
| "step": 1526 |
| }, |
| { |
| "epoch": 2.3363914373088686, |
| "grad_norm": 0.21143583953380585, |
| "learning_rate": 4.314744000052238e-06, |
| "loss": 1.3790403604507446, |
| "step": 1528 |
| }, |
| { |
| "epoch": 2.3394495412844036, |
| "grad_norm": 0.1558067500591278, |
| "learning_rate": 4.294460159757549e-06, |
| "loss": 1.568415880203247, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.3425076452599387, |
| "grad_norm": 0.224117711186409, |
| "learning_rate": 4.274252591854119e-06, |
| "loss": 1.0414113998413086, |
| "step": 1532 |
| }, |
| { |
| "epoch": 2.3455657492354742, |
| "grad_norm": 0.24501486122608185, |
| "learning_rate": 4.254121526193621e-06, |
| "loss": 1.2666254043579102, |
| "step": 1534 |
| }, |
| { |
| "epoch": 2.3486238532110093, |
| "grad_norm": 0.16644375026226044, |
| "learning_rate": 4.234067191757547e-06, |
| "loss": 1.5166178941726685, |
| "step": 1536 |
| }, |
| { |
| "epoch": 2.3516819571865444, |
| "grad_norm": 0.17968301475048065, |
| "learning_rate": 4.2140898166546094e-06, |
| "loss": 1.282509207725525, |
| "step": 1538 |
| }, |
| { |
| "epoch": 2.3547400611620795, |
| "grad_norm": 0.1535612940788269, |
| "learning_rate": 4.1941896281181345e-06, |
| "loss": 1.4368783235549927, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.3577981651376145, |
| "grad_norm": 0.148167684674263, |
| "learning_rate": 4.1743668525035e-06, |
| "loss": 1.3030726909637451, |
| "step": 1542 |
| }, |
| { |
| "epoch": 2.3608562691131496, |
| "grad_norm": 0.14207233488559723, |
| "learning_rate": 4.154621715285544e-06, |
| "loss": 1.247427225112915, |
| "step": 1544 |
| }, |
| { |
| "epoch": 2.363914373088685, |
| "grad_norm": 0.13422517478466034, |
| "learning_rate": 4.134954441055996e-06, |
| "loss": 1.2868202924728394, |
| "step": 1546 |
| }, |
| { |
| "epoch": 2.36697247706422, |
| "grad_norm": 0.19672590494155884, |
| "learning_rate": 4.11536525352094e-06, |
| "loss": 1.3480786085128784, |
| "step": 1548 |
| }, |
| { |
| "epoch": 2.3700305810397553, |
| "grad_norm": 0.13905340433120728, |
| "learning_rate": 4.0958543754982555e-06, |
| "loss": 1.2070438861846924, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.3730886850152904, |
| "grad_norm": 0.13256540894508362, |
| "learning_rate": 4.076422028915092e-06, |
| "loss": 1.3284435272216797, |
| "step": 1552 |
| }, |
| { |
| "epoch": 2.376146788990826, |
| "grad_norm": 0.40406838059425354, |
| "learning_rate": 4.057068434805334e-06, |
| "loss": 1.6244902610778809, |
| "step": 1554 |
| }, |
| { |
| "epoch": 2.379204892966361, |
| "grad_norm": 0.4620969593524933, |
| "learning_rate": 4.037793813307097e-06, |
| "loss": 1.8415812253952026, |
| "step": 1556 |
| }, |
| { |
| "epoch": 2.382262996941896, |
| "grad_norm": 0.1653379201889038, |
| "learning_rate": 4.018598383660221e-06, |
| "loss": 1.2424896955490112, |
| "step": 1558 |
| }, |
| { |
| "epoch": 2.385321100917431, |
| "grad_norm": 0.19328530132770538, |
| "learning_rate": 3.999482364203777e-06, |
| "loss": 1.2497849464416504, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.388379204892966, |
| "grad_norm": 0.429834246635437, |
| "learning_rate": 3.980445972373572e-06, |
| "loss": 1.3060815334320068, |
| "step": 1562 |
| }, |
| { |
| "epoch": 2.3914373088685017, |
| "grad_norm": 0.45071524381637573, |
| "learning_rate": 3.961489424699698e-06, |
| "loss": 1.3558465242385864, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.3944954128440368, |
| "grad_norm": 0.2264939397573471, |
| "learning_rate": 3.9426129368040525e-06, |
| "loss": 1.3144913911819458, |
| "step": 1566 |
| }, |
| { |
| "epoch": 2.397553516819572, |
| "grad_norm": 0.16805239021778107, |
| "learning_rate": 3.923816723397891e-06, |
| "loss": 1.19287109375, |
| "step": 1568 |
| }, |
| { |
| "epoch": 2.400611620795107, |
| "grad_norm": 0.21667709946632385, |
| "learning_rate": 3.905100998279378e-06, |
| "loss": 1.140608310699463, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.4036697247706424, |
| "grad_norm": 0.10687562078237534, |
| "learning_rate": 3.8864659743311674e-06, |
| "loss": 1.0686283111572266, |
| "step": 1572 |
| }, |
| { |
| "epoch": 2.4067278287461775, |
| "grad_norm": 0.1965143382549286, |
| "learning_rate": 3.867911863517976e-06, |
| "loss": 1.191169261932373, |
| "step": 1574 |
| }, |
| { |
| "epoch": 2.4097859327217126, |
| "grad_norm": 0.2572914659976959, |
| "learning_rate": 3.849438876884171e-06, |
| "loss": 1.4364819526672363, |
| "step": 1576 |
| }, |
| { |
| "epoch": 2.4128440366972477, |
| "grad_norm": 0.260490357875824, |
| "learning_rate": 3.831047224551362e-06, |
| "loss": 1.4710748195648193, |
| "step": 1578 |
| }, |
| { |
| "epoch": 2.4159021406727827, |
| "grad_norm": 0.2510780692100525, |
| "learning_rate": 3.8127371157160274e-06, |
| "loss": 1.445107340812683, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.418960244648318, |
| "grad_norm": 0.22010444104671478, |
| "learning_rate": 3.794508758647125e-06, |
| "loss": 1.4945043325424194, |
| "step": 1582 |
| }, |
| { |
| "epoch": 2.4220183486238533, |
| "grad_norm": 0.3746698498725891, |
| "learning_rate": 3.776362360683725e-06, |
| "loss": 1.3151277303695679, |
| "step": 1584 |
| }, |
| { |
| "epoch": 2.4250764525993884, |
| "grad_norm": 0.18635894358158112, |
| "learning_rate": 3.7582981282326436e-06, |
| "loss": 1.4105780124664307, |
| "step": 1586 |
| }, |
| { |
| "epoch": 2.4281345565749235, |
| "grad_norm": 0.18661010265350342, |
| "learning_rate": 3.74031626676611e-06, |
| "loss": 1.2960549592971802, |
| "step": 1588 |
| }, |
| { |
| "epoch": 2.4311926605504586, |
| "grad_norm": 0.1374787539243698, |
| "learning_rate": 3.7224169808194234e-06, |
| "loss": 1.2444533109664917, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.434250764525994, |
| "grad_norm": 0.26428133249282837, |
| "learning_rate": 3.704600473988616e-06, |
| "loss": 1.2722461223602295, |
| "step": 1592 |
| }, |
| { |
| "epoch": 2.437308868501529, |
| "grad_norm": 0.16100512444972992, |
| "learning_rate": 3.6868669489281526e-06, |
| "loss": 1.4575788974761963, |
| "step": 1594 |
| }, |
| { |
| "epoch": 2.4403669724770642, |
| "grad_norm": 0.2964245080947876, |
| "learning_rate": 3.6692166073486207e-06, |
| "loss": 1.2261364459991455, |
| "step": 1596 |
| }, |
| { |
| "epoch": 2.4434250764525993, |
| "grad_norm": 0.34498047828674316, |
| "learning_rate": 3.6516496500144315e-06, |
| "loss": 1.3639787435531616, |
| "step": 1598 |
| }, |
| { |
| "epoch": 2.4464831804281344, |
| "grad_norm": 0.13297414779663086, |
| "learning_rate": 3.6341662767415366e-06, |
| "loss": 1.283323884010315, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.44954128440367, |
| "grad_norm": 0.2322530299425125, |
| "learning_rate": 3.616766686395161e-06, |
| "loss": 1.2881537675857544, |
| "step": 1602 |
| }, |
| { |
| "epoch": 2.452599388379205, |
| "grad_norm": 0.2094813585281372, |
| "learning_rate": 3.599451076887539e-06, |
| "loss": 1.4600404500961304, |
| "step": 1604 |
| }, |
| { |
| "epoch": 2.45565749235474, |
| "grad_norm": 0.24529632925987244, |
| "learning_rate": 3.5822196451756617e-06, |
| "loss": 1.4501386880874634, |
| "step": 1606 |
| }, |
| { |
| "epoch": 2.458715596330275, |
| "grad_norm": 0.25701767206192017, |
| "learning_rate": 3.5650725872590343e-06, |
| "loss": 1.2705899477005005, |
| "step": 1608 |
| }, |
| { |
| "epoch": 2.46177370030581, |
| "grad_norm": 0.13494141399860382, |
| "learning_rate": 3.54801009817745e-06, |
| "loss": 1.2268667221069336, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.4648318042813457, |
| "grad_norm": 0.13910284638404846, |
| "learning_rate": 3.5310323720087747e-06, |
| "loss": 1.3551948070526123, |
| "step": 1612 |
| }, |
| { |
| "epoch": 2.467889908256881, |
| "grad_norm": 0.1411304473876953, |
| "learning_rate": 3.5141396018667327e-06, |
| "loss": 1.3125407695770264, |
| "step": 1614 |
| }, |
| { |
| "epoch": 2.470948012232416, |
| "grad_norm": 0.2372998297214508, |
| "learning_rate": 3.4973319798987075e-06, |
| "loss": 1.4539326429367065, |
| "step": 1616 |
| }, |
| { |
| "epoch": 2.474006116207951, |
| "grad_norm": 0.1646145135164261, |
| "learning_rate": 3.480609697283574e-06, |
| "loss": 1.3223282098770142, |
| "step": 1618 |
| }, |
| { |
| "epoch": 2.477064220183486, |
| "grad_norm": 0.18089331686496735, |
| "learning_rate": 3.463972944229502e-06, |
| "loss": 1.2549902200698853, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.4801223241590216, |
| "grad_norm": 0.15575502812862396, |
| "learning_rate": 3.4474219099718085e-06, |
| "loss": 1.215592622756958, |
| "step": 1622 |
| }, |
| { |
| "epoch": 2.4831804281345566, |
| "grad_norm": 0.1584579348564148, |
| "learning_rate": 3.4309567827707936e-06, |
| "loss": 1.3570244312286377, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.4862385321100917, |
| "grad_norm": 0.1323862224817276, |
| "learning_rate": 3.4145777499096066e-06, |
| "loss": 1.3482739925384521, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.489296636085627, |
| "grad_norm": 0.2227022349834442, |
| "learning_rate": 3.3982849976921185e-06, |
| "loss": 1.2847216129302979, |
| "step": 1628 |
| }, |
| { |
| "epoch": 2.4923547400611623, |
| "grad_norm": 0.14622831344604492, |
| "learning_rate": 3.3820787114407927e-06, |
| "loss": 1.5019530057907104, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.4954128440366974, |
| "grad_norm": 0.16076745092868805, |
| "learning_rate": 3.3659590754945816e-06, |
| "loss": 1.6965469121932983, |
| "step": 1632 |
| }, |
| { |
| "epoch": 2.4984709480122325, |
| "grad_norm": 0.18824124336242676, |
| "learning_rate": 3.349926273206834e-06, |
| "loss": 1.5644712448120117, |
| "step": 1634 |
| }, |
| { |
| "epoch": 2.5015290519877675, |
| "grad_norm": 0.1523565948009491, |
| "learning_rate": 3.3339804869432092e-06, |
| "loss": 1.5674009323120117, |
| "step": 1636 |
| }, |
| { |
| "epoch": 2.5045871559633026, |
| "grad_norm": 0.17791804671287537, |
| "learning_rate": 3.3181218980795915e-06, |
| "loss": 1.4739274978637695, |
| "step": 1638 |
| }, |
| { |
| "epoch": 2.5076452599388377, |
| "grad_norm": 0.2756288945674896, |
| "learning_rate": 3.302350687000041e-06, |
| "loss": 1.1127119064331055, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.510703363914373, |
| "grad_norm": 0.2104320228099823, |
| "learning_rate": 3.2866670330947372e-06, |
| "loss": 1.1336884498596191, |
| "step": 1642 |
| }, |
| { |
| "epoch": 2.5137614678899083, |
| "grad_norm": 0.26251697540283203, |
| "learning_rate": 3.271071114757936e-06, |
| "loss": 1.5312784910202026, |
| "step": 1644 |
| }, |
| { |
| "epoch": 2.5168195718654434, |
| "grad_norm": 0.45193246006965637, |
| "learning_rate": 3.2555631093859376e-06, |
| "loss": 1.5268921852111816, |
| "step": 1646 |
| }, |
| { |
| "epoch": 2.5198776758409784, |
| "grad_norm": 0.19322098791599274, |
| "learning_rate": 3.240143193375079e-06, |
| "loss": 1.6491858959197998, |
| "step": 1648 |
| }, |
| { |
| "epoch": 2.522935779816514, |
| "grad_norm": 0.25457441806793213, |
| "learning_rate": 3.2248115421197207e-06, |
| "loss": 1.5816277265548706, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.525993883792049, |
| "grad_norm": 0.1778043955564499, |
| "learning_rate": 3.2095683300102544e-06, |
| "loss": 1.6332181692123413, |
| "step": 1652 |
| }, |
| { |
| "epoch": 2.529051987767584, |
| "grad_norm": 0.35289818048477173, |
| "learning_rate": 3.194413730431111e-06, |
| "loss": 1.4238784313201904, |
| "step": 1654 |
| }, |
| { |
| "epoch": 2.532110091743119, |
| "grad_norm": 0.1957436054944992, |
| "learning_rate": 3.1793479157588e-06, |
| "loss": 1.449766755104065, |
| "step": 1656 |
| }, |
| { |
| "epoch": 2.5351681957186543, |
| "grad_norm": 0.5064666271209717, |
| "learning_rate": 3.1643710573599484e-06, |
| "loss": 1.3231000900268555, |
| "step": 1658 |
| }, |
| { |
| "epoch": 2.5382262996941893, |
| "grad_norm": 0.2884846329689026, |
| "learning_rate": 3.1494833255893347e-06, |
| "loss": 1.5847378969192505, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.541284403669725, |
| "grad_norm": 0.29620814323425293, |
| "learning_rate": 3.1346848897879773e-06, |
| "loss": 1.357380986213684, |
| "step": 1662 |
| }, |
| { |
| "epoch": 2.54434250764526, |
| "grad_norm": 0.23009905219078064, |
| "learning_rate": 3.1199759182811835e-06, |
| "loss": 1.1059685945510864, |
| "step": 1664 |
| }, |
| { |
| "epoch": 2.547400611620795, |
| "grad_norm": 1.7077906131744385, |
| "learning_rate": 3.105356578376652e-06, |
| "loss": 1.583744764328003, |
| "step": 1666 |
| }, |
| { |
| "epoch": 2.5504587155963305, |
| "grad_norm": 0.15876507759094238, |
| "learning_rate": 3.090827036362566e-06, |
| "loss": 1.707784652709961, |
| "step": 1668 |
| }, |
| { |
| "epoch": 2.5535168195718656, |
| "grad_norm": 0.2697174549102783, |
| "learning_rate": 3.0763874575056897e-06, |
| "loss": 1.7691806554794312, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.5565749235474007, |
| "grad_norm": 0.43980327248573303, |
| "learning_rate": 3.062038006049509e-06, |
| "loss": 1.7003355026245117, |
| "step": 1672 |
| }, |
| { |
| "epoch": 2.5596330275229358, |
| "grad_norm": 0.2680771052837372, |
| "learning_rate": 3.0477788452123474e-06, |
| "loss": 1.6606260538101196, |
| "step": 1674 |
| }, |
| { |
| "epoch": 2.562691131498471, |
| "grad_norm": 0.24174532294273376, |
| "learning_rate": 3.0336101371855132e-06, |
| "loss": 1.532172441482544, |
| "step": 1676 |
| }, |
| { |
| "epoch": 2.565749235474006, |
| "grad_norm": 0.23746666312217712, |
| "learning_rate": 3.019532043131461e-06, |
| "loss": 1.3807486295700073, |
| "step": 1678 |
| }, |
| { |
| "epoch": 2.5688073394495414, |
| "grad_norm": 0.5547491312026978, |
| "learning_rate": 3.005544723181949e-06, |
| "loss": 0.9383170008659363, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.5718654434250765, |
| "grad_norm": 0.19193193316459656, |
| "learning_rate": 2.9916483364362273e-06, |
| "loss": 0.9406712055206299, |
| "step": 1682 |
| }, |
| { |
| "epoch": 2.5749235474006116, |
| "grad_norm": 0.1363154798746109, |
| "learning_rate": 2.9778430409592165e-06, |
| "loss": 1.1644362211227417, |
| "step": 1684 |
| }, |
| { |
| "epoch": 2.5779816513761467, |
| "grad_norm": 0.16005928814411163, |
| "learning_rate": 2.964128993779721e-06, |
| "loss": 1.0990843772888184, |
| "step": 1686 |
| }, |
| { |
| "epoch": 2.581039755351682, |
| "grad_norm": 0.20601928234100342, |
| "learning_rate": 2.95050635088864e-06, |
| "loss": 1.0353059768676758, |
| "step": 1688 |
| }, |
| { |
| "epoch": 2.5840978593272173, |
| "grad_norm": 0.11444630473852158, |
| "learning_rate": 2.936975267237188e-06, |
| "loss": 1.115382194519043, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.5871559633027523, |
| "grad_norm": 0.28663313388824463, |
| "learning_rate": 2.9235358967351346e-06, |
| "loss": 1.3251932859420776, |
| "step": 1692 |
| }, |
| { |
| "epoch": 2.5902140672782874, |
| "grad_norm": 0.478197306394577, |
| "learning_rate": 2.9101883922490577e-06, |
| "loss": 1.30450439453125, |
| "step": 1694 |
| }, |
| { |
| "epoch": 2.5932721712538225, |
| "grad_norm": 0.22590430080890656, |
| "learning_rate": 2.8969329056006052e-06, |
| "loss": 1.1485813856124878, |
| "step": 1696 |
| }, |
| { |
| "epoch": 2.5963302752293576, |
| "grad_norm": 0.2817031741142273, |
| "learning_rate": 2.883769587564757e-06, |
| "loss": 1.3016425371170044, |
| "step": 1698 |
| }, |
| { |
| "epoch": 2.599388379204893, |
| "grad_norm": 0.09478060901165009, |
| "learning_rate": 2.8706985878681236e-06, |
| "loss": 1.4139856100082397, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.602446483180428, |
| "grad_norm": 0.20007891952991486, |
| "learning_rate": 2.857720055187237e-06, |
| "loss": 1.1640866994857788, |
| "step": 1702 |
| }, |
| { |
| "epoch": 2.6055045871559632, |
| "grad_norm": 0.2094452977180481, |
| "learning_rate": 2.8448341371468606e-06, |
| "loss": 1.3702013492584229, |
| "step": 1704 |
| }, |
| { |
| "epoch": 2.6085626911314987, |
| "grad_norm": 0.2330324500799179, |
| "learning_rate": 2.832040980318304e-06, |
| "loss": 1.590658187866211, |
| "step": 1706 |
| }, |
| { |
| "epoch": 2.611620795107034, |
| "grad_norm": 0.18907496333122253, |
| "learning_rate": 2.8193407302177696e-06, |
| "loss": 1.088836431503296, |
| "step": 1708 |
| }, |
| { |
| "epoch": 2.614678899082569, |
| "grad_norm": 0.21448901295661926, |
| "learning_rate": 2.806733531304681e-06, |
| "loss": 1.3330715894699097, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.617737003058104, |
| "grad_norm": 0.149040088057518, |
| "learning_rate": 2.7942195269800524e-06, |
| "loss": 1.438978672027588, |
| "step": 1712 |
| }, |
| { |
| "epoch": 2.620795107033639, |
| "grad_norm": 0.32018783688545227, |
| "learning_rate": 2.781798859584855e-06, |
| "loss": 1.2572848796844482, |
| "step": 1714 |
| }, |
| { |
| "epoch": 2.623853211009174, |
| "grad_norm": 0.10462283343076706, |
| "learning_rate": 2.769471670398389e-06, |
| "loss": 1.1367379426956177, |
| "step": 1716 |
| }, |
| { |
| "epoch": 2.6269113149847096, |
| "grad_norm": 0.29408755898475647, |
| "learning_rate": 2.757238099636689e-06, |
| "loss": 0.7103652954101562, |
| "step": 1718 |
| }, |
| { |
| "epoch": 2.6299694189602447, |
| "grad_norm": 0.1653836965560913, |
| "learning_rate": 2.7450982864509253e-06, |
| "loss": 1.487276554107666, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.63302752293578, |
| "grad_norm": 0.31154564023017883, |
| "learning_rate": 2.7330523689258106e-06, |
| "loss": 1.3814054727554321, |
| "step": 1722 |
| }, |
| { |
| "epoch": 2.636085626911315, |
| "grad_norm": 0.27761033177375793, |
| "learning_rate": 2.721100484078048e-06, |
| "loss": 1.280024528503418, |
| "step": 1724 |
| }, |
| { |
| "epoch": 2.6391437308868504, |
| "grad_norm": 0.34517747163772583, |
| "learning_rate": 2.709242767854758e-06, |
| "loss": 1.4065845012664795, |
| "step": 1726 |
| }, |
| { |
| "epoch": 2.6422018348623855, |
| "grad_norm": 0.30397874116897583, |
| "learning_rate": 2.6974793551319383e-06, |
| "loss": 1.3509793281555176, |
| "step": 1728 |
| }, |
| { |
| "epoch": 2.6452599388379205, |
| "grad_norm": 0.16377079486846924, |
| "learning_rate": 2.6858103797129246e-06, |
| "loss": 1.3154374361038208, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.6483180428134556, |
| "grad_norm": 0.236458882689476, |
| "learning_rate": 2.674235974326878e-06, |
| "loss": 1.4936411380767822, |
| "step": 1732 |
| }, |
| { |
| "epoch": 2.6513761467889907, |
| "grad_norm": 0.18089789152145386, |
| "learning_rate": 2.6627562706272657e-06, |
| "loss": 1.0965957641601562, |
| "step": 1734 |
| }, |
| { |
| "epoch": 2.6544342507645258, |
| "grad_norm": 0.2745553255081177, |
| "learning_rate": 2.6513713991903705e-06, |
| "loss": 0.9196306467056274, |
| "step": 1736 |
| }, |
| { |
| "epoch": 2.6574923547400613, |
| "grad_norm": 0.4575045108795166, |
| "learning_rate": 2.640081489513797e-06, |
| "loss": 1.5425406694412231, |
| "step": 1738 |
| }, |
| { |
| "epoch": 2.6605504587155964, |
| "grad_norm": 0.1725241094827652, |
| "learning_rate": 2.628886670015009e-06, |
| "loss": 1.34834885597229, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.6636085626911314, |
| "grad_norm": 0.4089125096797943, |
| "learning_rate": 2.6177870680298624e-06, |
| "loss": 1.4617201089859009, |
| "step": 1742 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 0.13572430610656738, |
| "learning_rate": 2.606782809811155e-06, |
| "loss": 1.29056978225708, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.669724770642202, |
| "grad_norm": 0.23469695448875427, |
| "learning_rate": 2.5958740205272003e-06, |
| "loss": 1.5568315982818604, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.672782874617737, |
| "grad_norm": 0.2925702929496765, |
| "learning_rate": 2.5850608242603913e-06, |
| "loss": 1.4032976627349854, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.675840978593272, |
| "grad_norm": 0.2862710654735565, |
| "learning_rate": 2.5743433440058002e-06, |
| "loss": 1.4086406230926514, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.6788990825688073, |
| "grad_norm": 0.4533703029155731, |
| "learning_rate": 2.5637217016697663e-06, |
| "loss": 1.266021966934204, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.6819571865443423, |
| "grad_norm": 0.20051054656505585, |
| "learning_rate": 2.5531960180685276e-06, |
| "loss": 0.9320064783096313, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.6850152905198774, |
| "grad_norm": 0.2008654624223709, |
| "learning_rate": 2.5427664129268253e-06, |
| "loss": 1.4211325645446777, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.688073394495413, |
| "grad_norm": 0.1971636712551117, |
| "learning_rate": 2.5324330048765626e-06, |
| "loss": 1.3933095932006836, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.691131498470948, |
| "grad_norm": 0.17986322939395905, |
| "learning_rate": 2.522195911455437e-06, |
| "loss": 1.4166622161865234, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.694189602446483, |
| "grad_norm": 0.24720223248004913, |
| "learning_rate": 2.5120552491056197e-06, |
| "loss": 1.403609275817871, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.6972477064220186, |
| "grad_norm": 0.16844205558300018, |
| "learning_rate": 2.502011133172418e-06, |
| "loss": 1.2095983028411865, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.7003058103975537, |
| "grad_norm": 0.18861918151378632, |
| "learning_rate": 2.4920636779029736e-06, |
| "loss": 1.4543178081512451, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.7033639143730888, |
| "grad_norm": 0.2471121847629547, |
| "learning_rate": 2.482212996444952e-06, |
| "loss": 1.518424391746521, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.706422018348624, |
| "grad_norm": 0.2335500419139862, |
| "learning_rate": 2.4724592008452655e-06, |
| "loss": 1.2269190549850464, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.709480122324159, |
| "grad_norm": 0.1741182655096054, |
| "learning_rate": 2.4628024020487946e-06, |
| "loss": 1.4943727254867554, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.712538226299694, |
| "grad_norm": 0.261243611574173, |
| "learning_rate": 2.4532427098971276e-06, |
| "loss": 1.2752156257629395, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.7155963302752295, |
| "grad_norm": 0.25120115280151367, |
| "learning_rate": 2.4437802331273052e-06, |
| "loss": 1.4258947372436523, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.7186544342507646, |
| "grad_norm": 0.16213715076446533, |
| "learning_rate": 2.4344150793705944e-06, |
| "loss": 1.3501546382904053, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.7217125382262997, |
| "grad_norm": 0.22600753605365753, |
| "learning_rate": 2.425147355151254e-06, |
| "loss": 1.4926719665527344, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.7247706422018347, |
| "grad_norm": 0.19003809988498688, |
| "learning_rate": 2.4159771658853306e-06, |
| "loss": 1.5851101875305176, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.7278287461773703, |
| "grad_norm": 0.46118253469467163, |
| "learning_rate": 2.406904615879453e-06, |
| "loss": 1.4793192148208618, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.7308868501529053, |
| "grad_norm": 0.14569902420043945, |
| "learning_rate": 2.3979298083296488e-06, |
| "loss": 1.3102328777313232, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.7339449541284404, |
| "grad_norm": 0.23725034296512604, |
| "learning_rate": 2.3890528453201756e-06, |
| "loss": 1.4172600507736206, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.7370030581039755, |
| "grad_norm": 0.21555176377296448, |
| "learning_rate": 2.3802738278223474e-06, |
| "loss": 1.4081342220306396, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.7400611620795106, |
| "grad_norm": 0.20975619554519653, |
| "learning_rate": 2.3715928556934005e-06, |
| "loss": 1.409588098526001, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.7431192660550456, |
| "grad_norm": 0.4077710509300232, |
| "learning_rate": 2.3630100276753463e-06, |
| "loss": 1.4408369064331055, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.746177370030581, |
| "grad_norm": 0.16624042391777039, |
| "learning_rate": 2.354525441393857e-06, |
| "loss": 1.4369324445724487, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.7492354740061162, |
| "grad_norm": 0.14807072281837463, |
| "learning_rate": 2.346139193357145e-06, |
| "loss": 1.2198858261108398, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.7522935779816513, |
| "grad_norm": 0.22369414567947388, |
| "learning_rate": 2.337851378954877e-06, |
| "loss": 1.2793935537338257, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.7553516819571864, |
| "grad_norm": 0.21964912116527557, |
| "learning_rate": 2.3296620924570772e-06, |
| "loss": 1.1872437000274658, |
| "step": 1802 |
| }, |
| { |
| "epoch": 2.758409785932722, |
| "grad_norm": 0.2314019501209259, |
| "learning_rate": 2.3215714270130673e-06, |
| "loss": 1.2786332368850708, |
| "step": 1804 |
| }, |
| { |
| "epoch": 2.761467889908257, |
| "grad_norm": 0.17015686631202698, |
| "learning_rate": 2.3135794746503934e-06, |
| "loss": 1.2361197471618652, |
| "step": 1806 |
| }, |
| { |
| "epoch": 2.764525993883792, |
| "grad_norm": 0.20887653529644012, |
| "learning_rate": 2.3056863262737915e-06, |
| "loss": 1.1266238689422607, |
| "step": 1808 |
| }, |
| { |
| "epoch": 2.767584097859327, |
| "grad_norm": 0.12092727422714233, |
| "learning_rate": 2.2978920716641456e-06, |
| "loss": 0.8243193626403809, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.770642201834862, |
| "grad_norm": 0.12099810689687729, |
| "learning_rate": 2.290196799477473e-06, |
| "loss": 1.1438877582550049, |
| "step": 1812 |
| }, |
| { |
| "epoch": 2.7737003058103973, |
| "grad_norm": 0.1507033258676529, |
| "learning_rate": 2.2826005972439056e-06, |
| "loss": 1.2042597532272339, |
| "step": 1814 |
| }, |
| { |
| "epoch": 2.776758409785933, |
| "grad_norm": 0.19429726898670197, |
| "learning_rate": 2.2751035513667067e-06, |
| "loss": 1.269202470779419, |
| "step": 1816 |
| }, |
| { |
| "epoch": 2.779816513761468, |
| "grad_norm": 0.17468275129795074, |
| "learning_rate": 2.2677057471212783e-06, |
| "loss": 1.280784010887146, |
| "step": 1818 |
| }, |
| { |
| "epoch": 2.782874617737003, |
| "grad_norm": 0.12032614648342133, |
| "learning_rate": 2.2604072686541992e-06, |
| "loss": 1.337203025817871, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.7859327217125385, |
| "grad_norm": 0.10318220406770706, |
| "learning_rate": 2.25320819898226e-06, |
| "loss": 1.2575452327728271, |
| "step": 1822 |
| }, |
| { |
| "epoch": 2.7889908256880735, |
| "grad_norm": 0.40229856967926025, |
| "learning_rate": 2.2461086199915215e-06, |
| "loss": 1.2804014682769775, |
| "step": 1824 |
| }, |
| { |
| "epoch": 2.7920489296636086, |
| "grad_norm": 0.13444264233112335, |
| "learning_rate": 2.2391086124363907e-06, |
| "loss": 1.2483794689178467, |
| "step": 1826 |
| }, |
| { |
| "epoch": 2.7951070336391437, |
| "grad_norm": 0.11740458756685257, |
| "learning_rate": 2.232208255938689e-06, |
| "loss": 1.265547513961792, |
| "step": 1828 |
| }, |
| { |
| "epoch": 2.7981651376146788, |
| "grad_norm": 0.22319306433200836, |
| "learning_rate": 2.2254076289867574e-06, |
| "loss": 1.228807806968689, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.801223241590214, |
| "grad_norm": 0.14261578023433685, |
| "learning_rate": 2.218706808934559e-06, |
| "loss": 1.2727794647216797, |
| "step": 1832 |
| }, |
| { |
| "epoch": 2.8042813455657494, |
| "grad_norm": 0.1558150202035904, |
| "learning_rate": 2.2121058720008005e-06, |
| "loss": 1.2246638536453247, |
| "step": 1834 |
| }, |
| { |
| "epoch": 2.8073394495412844, |
| "grad_norm": 0.20606814324855804, |
| "learning_rate": 2.205604893268061e-06, |
| "loss": 1.1137512922286987, |
| "step": 1836 |
| }, |
| { |
| "epoch": 2.8103975535168195, |
| "grad_norm": 0.7590409517288208, |
| "learning_rate": 2.1992039466819464e-06, |
| "loss": 1.1792261600494385, |
| "step": 1838 |
| }, |
| { |
| "epoch": 2.8134556574923546, |
| "grad_norm": 0.2555246353149414, |
| "learning_rate": 2.192903105050242e-06, |
| "loss": 0.9643247127532959, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.81651376146789, |
| "grad_norm": 0.16072718799114227, |
| "learning_rate": 2.186702440042086e-06, |
| "loss": 1.1716961860656738, |
| "step": 1842 |
| }, |
| { |
| "epoch": 2.819571865443425, |
| "grad_norm": 0.1394997090101242, |
| "learning_rate": 2.18060202218715e-06, |
| "loss": 1.2029292583465576, |
| "step": 1844 |
| }, |
| { |
| "epoch": 2.8226299694189603, |
| "grad_norm": 0.10753278434276581, |
| "learning_rate": 2.174601920874849e-06, |
| "loss": 1.221397876739502, |
| "step": 1846 |
| }, |
| { |
| "epoch": 2.8256880733944953, |
| "grad_norm": 0.11972783505916595, |
| "learning_rate": 2.168702204353538e-06, |
| "loss": 1.186174988746643, |
| "step": 1848 |
| }, |
| { |
| "epoch": 2.8287461773700304, |
| "grad_norm": 0.12710753083229065, |
| "learning_rate": 2.162902939729744e-06, |
| "loss": 1.2035380601882935, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.8318042813455655, |
| "grad_norm": 0.12419600784778595, |
| "learning_rate": 2.1572041929673983e-06, |
| "loss": 1.181868553161621, |
| "step": 1852 |
| }, |
| { |
| "epoch": 2.834862385321101, |
| "grad_norm": 0.1348334550857544, |
| "learning_rate": 2.151606028887092e-06, |
| "loss": 1.1523938179016113, |
| "step": 1854 |
| }, |
| { |
| "epoch": 2.837920489296636, |
| "grad_norm": 0.1725814938545227, |
| "learning_rate": 2.146108511165331e-06, |
| "loss": 1.210580825805664, |
| "step": 1856 |
| }, |
| { |
| "epoch": 2.840978593272171, |
| "grad_norm": 0.13193202018737793, |
| "learning_rate": 2.14071170233382e-06, |
| "loss": 1.1815242767333984, |
| "step": 1858 |
| }, |
| { |
| "epoch": 2.8440366972477067, |
| "grad_norm": 0.11987490952014923, |
| "learning_rate": 2.135415663778743e-06, |
| "loss": 1.1560810804367065, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.8470948012232418, |
| "grad_norm": 0.13950860500335693, |
| "learning_rate": 2.1302204557400727e-06, |
| "loss": 1.1448893547058105, |
| "step": 1862 |
| }, |
| { |
| "epoch": 2.850152905198777, |
| "grad_norm": 0.10012101382017136, |
| "learning_rate": 2.125126137310878e-06, |
| "loss": 1.173750638961792, |
| "step": 1864 |
| }, |
| { |
| "epoch": 2.853211009174312, |
| "grad_norm": 0.14672072231769562, |
| "learning_rate": 2.1201327664366585e-06, |
| "loss": 1.1636675596237183, |
| "step": 1866 |
| }, |
| { |
| "epoch": 2.856269113149847, |
| "grad_norm": 1.3487753868103027, |
| "learning_rate": 2.115240399914681e-06, |
| "loss": 1.1240174770355225, |
| "step": 1868 |
| }, |
| { |
| "epoch": 2.859327217125382, |
| "grad_norm": 0.11260063946247101, |
| "learning_rate": 2.1104490933933357e-06, |
| "loss": 1.1871709823608398, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.8623853211009176, |
| "grad_norm": 0.331380695104599, |
| "learning_rate": 2.1057589013715016e-06, |
| "loss": 1.212131381034851, |
| "step": 1872 |
| }, |
| { |
| "epoch": 2.8654434250764527, |
| "grad_norm": 0.13593994081020355, |
| "learning_rate": 2.101169877197926e-06, |
| "loss": 1.2223803997039795, |
| "step": 1874 |
| }, |
| { |
| "epoch": 2.8685015290519877, |
| "grad_norm": 0.11861570924520493, |
| "learning_rate": 2.096682073070622e-06, |
| "loss": 1.2096714973449707, |
| "step": 1876 |
| }, |
| { |
| "epoch": 2.871559633027523, |
| "grad_norm": 0.1468341201543808, |
| "learning_rate": 2.092295540036271e-06, |
| "loss": 1.228167176246643, |
| "step": 1878 |
| }, |
| { |
| "epoch": 2.8746177370030583, |
| "grad_norm": 0.13528312742710114, |
| "learning_rate": 2.088010327989642e-06, |
| "loss": 1.2062925100326538, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.8776758409785934, |
| "grad_norm": 0.16309846937656403, |
| "learning_rate": 2.0838264856730233e-06, |
| "loss": 1.2277228832244873, |
| "step": 1882 |
| }, |
| { |
| "epoch": 2.8807339449541285, |
| "grad_norm": 0.1445493847131729, |
| "learning_rate": 2.0797440606756747e-06, |
| "loss": 1.1675866842269897, |
| "step": 1884 |
| }, |
| { |
| "epoch": 2.8837920489296636, |
| "grad_norm": 0.20202840864658356, |
| "learning_rate": 2.075763099433277e-06, |
| "loss": 1.2077354192733765, |
| "step": 1886 |
| }, |
| { |
| "epoch": 2.8868501529051986, |
| "grad_norm": 0.23036618530750275, |
| "learning_rate": 2.0718836472274094e-06, |
| "loss": 1.1735302209854126, |
| "step": 1888 |
| }, |
| { |
| "epoch": 2.8899082568807337, |
| "grad_norm": 0.10639970749616623, |
| "learning_rate": 2.0681057481850338e-06, |
| "loss": 1.1969777345657349, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.8929663608562692, |
| "grad_norm": 0.21437813341617584, |
| "learning_rate": 2.0644294452779904e-06, |
| "loss": 1.2072774171829224, |
| "step": 1892 |
| }, |
| { |
| "epoch": 2.8960244648318043, |
| "grad_norm": 0.10676970332860947, |
| "learning_rate": 2.060854780322513e-06, |
| "loss": 1.225974678993225, |
| "step": 1894 |
| }, |
| { |
| "epoch": 2.8990825688073394, |
| "grad_norm": 0.11626426875591278, |
| "learning_rate": 2.05738179397875e-06, |
| "loss": 1.2107893228530884, |
| "step": 1896 |
| }, |
| { |
| "epoch": 2.9021406727828745, |
| "grad_norm": 0.09534060955047607, |
| "learning_rate": 2.054010525750302e-06, |
| "loss": 1.1430492401123047, |
| "step": 1898 |
| }, |
| { |
| "epoch": 2.90519877675841, |
| "grad_norm": 0.11424271017313004, |
| "learning_rate": 2.050741013983773e-06, |
| "loss": 1.1798888444900513, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.908256880733945, |
| "grad_norm": 0.12284081429243088, |
| "learning_rate": 2.0475732958683374e-06, |
| "loss": 1.2137783765792847, |
| "step": 1902 |
| }, |
| { |
| "epoch": 2.91131498470948, |
| "grad_norm": 0.12876354157924652, |
| "learning_rate": 2.0445074074353143e-06, |
| "loss": 1.2381041049957275, |
| "step": 1904 |
| }, |
| { |
| "epoch": 2.914373088685015, |
| "grad_norm": 0.14884409308433533, |
| "learning_rate": 2.0415433835577536e-06, |
| "loss": 1.2129112482070923, |
| "step": 1906 |
| }, |
| { |
| "epoch": 2.9174311926605503, |
| "grad_norm": 0.13569043576717377, |
| "learning_rate": 2.038681257950046e-06, |
| "loss": 1.187499761581421, |
| "step": 1908 |
| }, |
| { |
| "epoch": 2.9204892966360854, |
| "grad_norm": 0.11992838978767395, |
| "learning_rate": 2.035921063167539e-06, |
| "loss": 1.1636050939559937, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.923547400611621, |
| "grad_norm": 0.13300110399723053, |
| "learning_rate": 2.0332628306061598e-06, |
| "loss": 1.1939361095428467, |
| "step": 1912 |
| }, |
| { |
| "epoch": 2.926605504587156, |
| "grad_norm": 0.12980246543884277, |
| "learning_rate": 2.0307065905020655e-06, |
| "loss": 1.2195342779159546, |
| "step": 1914 |
| }, |
| { |
| "epoch": 2.929663608562691, |
| "grad_norm": 0.17000386118888855, |
| "learning_rate": 2.028252371931297e-06, |
| "loss": 1.1668133735656738, |
| "step": 1916 |
| }, |
| { |
| "epoch": 2.9327217125382266, |
| "grad_norm": 0.13674472272396088, |
| "learning_rate": 2.025900202809447e-06, |
| "loss": 1.1926172971725464, |
| "step": 1918 |
| }, |
| { |
| "epoch": 2.9357798165137616, |
| "grad_norm": 0.10880409181118011, |
| "learning_rate": 2.0236501098913433e-06, |
| "loss": 1.1915616989135742, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.9388379204892967, |
| "grad_norm": 0.18399053812026978, |
| "learning_rate": 2.021502118770743e-06, |
| "loss": 1.2413643598556519, |
| "step": 1922 |
| }, |
| { |
| "epoch": 2.941896024464832, |
| "grad_norm": 0.11673276871442795, |
| "learning_rate": 2.019456253880047e-06, |
| "loss": 1.2483376264572144, |
| "step": 1924 |
| }, |
| { |
| "epoch": 2.944954128440367, |
| "grad_norm": 0.14663535356521606, |
| "learning_rate": 2.0175125384900125e-06, |
| "loss": 1.2075457572937012, |
| "step": 1926 |
| }, |
| { |
| "epoch": 2.948012232415902, |
| "grad_norm": 0.13420651853084564, |
| "learning_rate": 2.015670994709497e-06, |
| "loss": 1.23482346534729, |
| "step": 1928 |
| }, |
| { |
| "epoch": 2.9510703363914375, |
| "grad_norm": 0.48742786049842834, |
| "learning_rate": 2.0139316434852034e-06, |
| "loss": 1.2056578397750854, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.9541284403669725, |
| "grad_norm": 0.12787304818630219, |
| "learning_rate": 2.0122945046014427e-06, |
| "loss": 1.1541380882263184, |
| "step": 1932 |
| }, |
| { |
| "epoch": 2.9571865443425076, |
| "grad_norm": 0.11604852229356766, |
| "learning_rate": 2.0107595966799047e-06, |
| "loss": 1.1836968660354614, |
| "step": 1934 |
| }, |
| { |
| "epoch": 2.9602446483180427, |
| "grad_norm": 0.11788922548294067, |
| "learning_rate": 2.009326937179452e-06, |
| "loss": 1.206952691078186, |
| "step": 1936 |
| }, |
| { |
| "epoch": 2.963302752293578, |
| "grad_norm": 0.1157788410782814, |
| "learning_rate": 2.0079965423959206e-06, |
| "loss": 1.1847132444381714, |
| "step": 1938 |
| }, |
| { |
| "epoch": 2.9663608562691133, |
| "grad_norm": 0.16670945286750793, |
| "learning_rate": 2.0067684274619298e-06, |
| "loss": 1.193540096282959, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.9694189602446484, |
| "grad_norm": 0.126400887966156, |
| "learning_rate": 2.0056426063467157e-06, |
| "loss": 1.2413787841796875, |
| "step": 1942 |
| }, |
| { |
| "epoch": 2.9724770642201834, |
| "grad_norm": 0.1293567568063736, |
| "learning_rate": 2.0046190918559676e-06, |
| "loss": 1.2285208702087402, |
| "step": 1944 |
| }, |
| { |
| "epoch": 2.9755351681957185, |
| "grad_norm": 0.19044171273708344, |
| "learning_rate": 2.0036978956316867e-06, |
| "loss": 1.2406749725341797, |
| "step": 1946 |
| }, |
| { |
| "epoch": 2.9785932721712536, |
| "grad_norm": 0.15015482902526855, |
| "learning_rate": 2.002879028152051e-06, |
| "loss": 1.194046974182129, |
| "step": 1948 |
| }, |
| { |
| "epoch": 2.981651376146789, |
| "grad_norm": 0.17410290241241455, |
| "learning_rate": 2.0021624987312975e-06, |
| "loss": 1.2008163928985596, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.984709480122324, |
| "grad_norm": 0.14580507576465607, |
| "learning_rate": 2.001548315519612e-06, |
| "loss": 1.2215114831924438, |
| "step": 1952 |
| }, |
| { |
| "epoch": 2.9877675840978593, |
| "grad_norm": 0.1949174851179123, |
| "learning_rate": 2.0010364855030445e-06, |
| "loss": 1.2802963256835938, |
| "step": 1954 |
| }, |
| { |
| "epoch": 2.9908256880733948, |
| "grad_norm": 0.16277354955673218, |
| "learning_rate": 2.0006270145034217e-06, |
| "loss": 1.3111497163772583, |
| "step": 1956 |
| }, |
| { |
| "epoch": 2.99388379204893, |
| "grad_norm": 0.19236025214195251, |
| "learning_rate": 2.000319907178286e-06, |
| "loss": 1.267144799232483, |
| "step": 1958 |
| }, |
| { |
| "epoch": 2.996941896024465, |
| "grad_norm": 0.2983456552028656, |
| "learning_rate": 2.00011516702084e-06, |
| "loss": 1.196736454963684, |
| "step": 1960 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.5205248594284058, |
| "learning_rate": 2.0000127963599083e-06, |
| "loss": 1.273805856704712, |
| "step": 1962 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 1962, |
| "total_flos": 2.4882019145802056e+18, |
| "train_loss": 1.403386620814161, |
| "train_runtime": 17106.4454, |
| "train_samples_per_second": 1.835, |
| "train_steps_per_second": 0.115 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1962, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4882019145802056e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|