| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1182, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008460236886632825, |
| "grad_norm": 4.550535678863525, |
| "learning_rate": 0.0, |
| "loss": 1.2052, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.001692047377326565, |
| "grad_norm": 4.09000301361084, |
| "learning_rate": 1.3888888888888888e-07, |
| "loss": 1.0467, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0025380710659898475, |
| "grad_norm": 4.39274787902832, |
| "learning_rate": 2.7777777777777776e-07, |
| "loss": 1.1306, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00338409475465313, |
| "grad_norm": 4.3457722663879395, |
| "learning_rate": 4.1666666666666667e-07, |
| "loss": 1.0388, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.004230118443316413, |
| "grad_norm": 4.247500419616699, |
| "learning_rate": 5.555555555555555e-07, |
| "loss": 1.1666, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.005076142131979695, |
| "grad_norm": 4.16987943649292, |
| "learning_rate": 6.944444444444446e-07, |
| "loss": 1.1084, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.005922165820642978, |
| "grad_norm": 3.504650592803955, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 0.8725, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00676818950930626, |
| "grad_norm": 4.1873297691345215, |
| "learning_rate": 9.722222222222224e-07, |
| "loss": 1.129, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.007614213197969543, |
| "grad_norm": 4.281223773956299, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 1.0329, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.008460236886632826, |
| "grad_norm": 3.8109793663024902, |
| "learning_rate": 1.25e-06, |
| "loss": 1.0024, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.009306260575296108, |
| "grad_norm": 4.020341873168945, |
| "learning_rate": 1.3888888888888892e-06, |
| "loss": 1.054, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01015228426395939, |
| "grad_norm": 3.5737178325653076, |
| "learning_rate": 1.527777777777778e-06, |
| "loss": 1.0726, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.010998307952622674, |
| "grad_norm": 3.3822622299194336, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 0.9914, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.011844331641285956, |
| "grad_norm": 3.4175751209259033, |
| "learning_rate": 1.8055555555555557e-06, |
| "loss": 1.1581, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.012690355329949238, |
| "grad_norm": 3.0857129096984863, |
| "learning_rate": 1.944444444444445e-06, |
| "loss": 0.9684, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.01353637901861252, |
| "grad_norm": 3.017608165740967, |
| "learning_rate": 2.0833333333333334e-06, |
| "loss": 1.0433, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.014382402707275803, |
| "grad_norm": 3.069457530975342, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 1.0798, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.015228426395939087, |
| "grad_norm": 2.6652209758758545, |
| "learning_rate": 2.361111111111111e-06, |
| "loss": 0.9204, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.016074450084602367, |
| "grad_norm": 2.924373149871826, |
| "learning_rate": 2.5e-06, |
| "loss": 1.0251, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.01692047377326565, |
| "grad_norm": 2.1823043823242188, |
| "learning_rate": 2.6388888888888893e-06, |
| "loss": 0.8016, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.017766497461928935, |
| "grad_norm": 2.238309860229492, |
| "learning_rate": 2.7777777777777783e-06, |
| "loss": 0.8186, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.018612521150592216, |
| "grad_norm": 2.507589817047119, |
| "learning_rate": 2.916666666666667e-06, |
| "loss": 0.9278, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.0194585448392555, |
| "grad_norm": 2.268179416656494, |
| "learning_rate": 3.055555555555556e-06, |
| "loss": 0.8594, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.02030456852791878, |
| "grad_norm": 2.1308953762054443, |
| "learning_rate": 3.1944444444444443e-06, |
| "loss": 0.9286, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.021150592216582064, |
| "grad_norm": 2.1231722831726074, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.7292, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.021996615905245348, |
| "grad_norm": 2.203334331512451, |
| "learning_rate": 3.4722222222222224e-06, |
| "loss": 0.806, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.02284263959390863, |
| "grad_norm": 2.161112070083618, |
| "learning_rate": 3.6111111111111115e-06, |
| "loss": 0.8608, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.023688663282571912, |
| "grad_norm": 2.1429355144500732, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.7729, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.024534686971235193, |
| "grad_norm": 2.123563051223755, |
| "learning_rate": 3.88888888888889e-06, |
| "loss": 0.7233, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.025380710659898477, |
| "grad_norm": 2.211416482925415, |
| "learning_rate": 4.027777777777779e-06, |
| "loss": 0.796, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02622673434856176, |
| "grad_norm": 2.374946355819702, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.7406, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.02707275803722504, |
| "grad_norm": 1.9759607315063477, |
| "learning_rate": 4.305555555555556e-06, |
| "loss": 0.7946, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.027918781725888325, |
| "grad_norm": 2.052825689315796, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 0.8375, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.028764805414551606, |
| "grad_norm": 1.8392261266708374, |
| "learning_rate": 4.583333333333333e-06, |
| "loss": 0.7907, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02961082910321489, |
| "grad_norm": 1.8615978956222534, |
| "learning_rate": 4.722222222222222e-06, |
| "loss": 0.7694, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.030456852791878174, |
| "grad_norm": 1.7482495307922363, |
| "learning_rate": 4.861111111111111e-06, |
| "loss": 0.7296, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.031302876480541454, |
| "grad_norm": 1.808103322982788, |
| "learning_rate": 5e-06, |
| "loss": 0.7169, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.032148900169204735, |
| "grad_norm": 1.6232045888900757, |
| "learning_rate": 4.999990606222893e-06, |
| "loss": 0.6382, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.03299492385786802, |
| "grad_norm": 1.79764986038208, |
| "learning_rate": 4.9999624249621655e-06, |
| "loss": 0.7091, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0338409475465313, |
| "grad_norm": 1.9869894981384277, |
| "learning_rate": 4.999915456429602e-06, |
| "loss": 0.7723, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03468697123519458, |
| "grad_norm": 1.8739275932312012, |
| "learning_rate": 4.99984970097817e-06, |
| "loss": 0.7096, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03553299492385787, |
| "grad_norm": 1.8806772232055664, |
| "learning_rate": 4.999765159102025e-06, |
| "loss": 0.7704, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03637901861252115, |
| "grad_norm": 1.8095641136169434, |
| "learning_rate": 4.999661831436499e-06, |
| "loss": 0.6302, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03722504230118443, |
| "grad_norm": 1.986670970916748, |
| "learning_rate": 4.9995397187581026e-06, |
| "loss": 0.6971, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.03807106598984772, |
| "grad_norm": 1.7087007761001587, |
| "learning_rate": 4.9993988219845155e-06, |
| "loss": 0.7039, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.038917089678511, |
| "grad_norm": 1.7987544536590576, |
| "learning_rate": 4.999239142174581e-06, |
| "loss": 0.7259, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.03976311336717428, |
| "grad_norm": 2.194416046142578, |
| "learning_rate": 4.999060680528294e-06, |
| "loss": 0.7221, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.04060913705583756, |
| "grad_norm": 1.895753026008606, |
| "learning_rate": 4.9988634383867995e-06, |
| "loss": 0.712, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.04145516074450085, |
| "grad_norm": 1.4732792377471924, |
| "learning_rate": 4.998647417232375e-06, |
| "loss": 0.636, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.04230118443316413, |
| "grad_norm": 1.7982600927352905, |
| "learning_rate": 4.998412618688426e-06, |
| "loss": 0.6754, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04314720812182741, |
| "grad_norm": 1.6426688432693481, |
| "learning_rate": 4.9981590445194675e-06, |
| "loss": 0.6804, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.043993231810490696, |
| "grad_norm": 1.800573706626892, |
| "learning_rate": 4.997886696631115e-06, |
| "loss": 0.6273, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.04483925549915398, |
| "grad_norm": 1.5397448539733887, |
| "learning_rate": 4.997595577070068e-06, |
| "loss": 0.6667, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.04568527918781726, |
| "grad_norm": 1.6163142919540405, |
| "learning_rate": 4.997285688024097e-06, |
| "loss": 0.6126, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.046531302876480544, |
| "grad_norm": 1.696107268333435, |
| "learning_rate": 4.996957031822026e-06, |
| "loss": 0.6148, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.047377326565143825, |
| "grad_norm": 1.809167742729187, |
| "learning_rate": 4.996609610933713e-06, |
| "loss": 0.6086, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.048223350253807105, |
| "grad_norm": 1.7550158500671387, |
| "learning_rate": 4.996243427970032e-06, |
| "loss": 0.661, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.049069373942470386, |
| "grad_norm": 1.5835374593734741, |
| "learning_rate": 4.995858485682857e-06, |
| "loss": 0.6386, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04991539763113367, |
| "grad_norm": 1.7450281381607056, |
| "learning_rate": 4.995454786965037e-06, |
| "loss": 0.6046, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.050761421319796954, |
| "grad_norm": 1.586624026298523, |
| "learning_rate": 4.995032334850378e-06, |
| "loss": 0.6807, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.051607445008460234, |
| "grad_norm": 1.6673095226287842, |
| "learning_rate": 4.994591132513616e-06, |
| "loss": 0.6778, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.05245346869712352, |
| "grad_norm": 1.4863988161087036, |
| "learning_rate": 4.994131183270396e-06, |
| "loss": 0.5943, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0532994923857868, |
| "grad_norm": 1.789526343345642, |
| "learning_rate": 4.9936524905772466e-06, |
| "loss": 0.6049, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.05414551607445008, |
| "grad_norm": 1.6632400751113892, |
| "learning_rate": 4.993155058031554e-06, |
| "loss": 0.7222, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.05499153976311337, |
| "grad_norm": 1.4629849195480347, |
| "learning_rate": 4.992638889371534e-06, |
| "loss": 0.5864, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.05583756345177665, |
| "grad_norm": 1.3855196237564087, |
| "learning_rate": 4.992103988476206e-06, |
| "loss": 0.6227, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.05668358714043993, |
| "grad_norm": 1.7248979806900024, |
| "learning_rate": 4.99155035936536e-06, |
| "loss": 0.6841, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.05752961082910321, |
| "grad_norm": 1.5749865770339966, |
| "learning_rate": 4.990978006199534e-06, |
| "loss": 0.6157, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0583756345177665, |
| "grad_norm": 1.5531669855117798, |
| "learning_rate": 4.990386933279973e-06, |
| "loss": 0.5916, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.05922165820642978, |
| "grad_norm": 1.590692400932312, |
| "learning_rate": 4.989777145048601e-06, |
| "loss": 0.612, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06006768189509306, |
| "grad_norm": 1.673030138015747, |
| "learning_rate": 4.989148646087992e-06, |
| "loss": 0.6037, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.06091370558375635, |
| "grad_norm": 1.7553675174713135, |
| "learning_rate": 4.988501441121328e-06, |
| "loss": 0.6356, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.06175972927241963, |
| "grad_norm": 1.5859638452529907, |
| "learning_rate": 4.987835535012371e-06, |
| "loss": 0.5881, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.06260575296108291, |
| "grad_norm": 1.5672540664672852, |
| "learning_rate": 4.987150932765415e-06, |
| "loss": 0.6047, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.06345177664974619, |
| "grad_norm": 1.6025596857070923, |
| "learning_rate": 4.986447639525266e-06, |
| "loss": 0.6815, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.06429780033840947, |
| "grad_norm": 1.5830960273742676, |
| "learning_rate": 4.985725660577184e-06, |
| "loss": 0.6036, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.06514382402707276, |
| "grad_norm": 1.7896863222122192, |
| "learning_rate": 4.984985001346859e-06, |
| "loss": 0.6463, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.06598984771573604, |
| "grad_norm": 1.6958873271942139, |
| "learning_rate": 4.984225667400359e-06, |
| "loss": 0.6724, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.06683587140439932, |
| "grad_norm": 1.622676968574524, |
| "learning_rate": 4.983447664444097e-06, |
| "loss": 0.6548, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0676818950930626, |
| "grad_norm": 1.4418054819107056, |
| "learning_rate": 4.982650998324781e-06, |
| "loss": 0.5953, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06852791878172589, |
| "grad_norm": 1.5336499214172363, |
| "learning_rate": 4.981835675029375e-06, |
| "loss": 0.6232, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.06937394247038917, |
| "grad_norm": 1.6581315994262695, |
| "learning_rate": 4.981001700685051e-06, |
| "loss": 0.6987, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.07021996615905245, |
| "grad_norm": 1.5908305644989014, |
| "learning_rate": 4.980149081559142e-06, |
| "loss": 0.5882, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.07106598984771574, |
| "grad_norm": 1.8210628032684326, |
| "learning_rate": 4.979277824059103e-06, |
| "loss": 0.6913, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.07191201353637902, |
| "grad_norm": 1.5042943954467773, |
| "learning_rate": 4.978387934732451e-06, |
| "loss": 0.693, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0727580372250423, |
| "grad_norm": 1.7726975679397583, |
| "learning_rate": 4.9774794202667236e-06, |
| "loss": 0.7089, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.07360406091370558, |
| "grad_norm": 1.7052823305130005, |
| "learning_rate": 4.976552287489427e-06, |
| "loss": 0.6448, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.07445008460236886, |
| "grad_norm": 1.5747593641281128, |
| "learning_rate": 4.975606543367983e-06, |
| "loss": 0.5367, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.07529610829103214, |
| "grad_norm": 1.4632954597473145, |
| "learning_rate": 4.974642195009681e-06, |
| "loss": 0.5494, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.07614213197969544, |
| "grad_norm": 1.7577087879180908, |
| "learning_rate": 4.97365924966162e-06, |
| "loss": 0.6178, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.07698815566835872, |
| "grad_norm": 1.7583465576171875, |
| "learning_rate": 4.972657714710653e-06, |
| "loss": 0.622, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.077834179357022, |
| "grad_norm": 1.6962776184082031, |
| "learning_rate": 4.9716375976833395e-06, |
| "loss": 0.5397, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.07868020304568528, |
| "grad_norm": 1.7553181648254395, |
| "learning_rate": 4.9705989062458805e-06, |
| "loss": 0.5369, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.07952622673434856, |
| "grad_norm": 1.4741019010543823, |
| "learning_rate": 4.969541648204064e-06, |
| "loss": 0.5877, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.08037225042301184, |
| "grad_norm": 1.5855069160461426, |
| "learning_rate": 4.968465831503207e-06, |
| "loss": 0.7098, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08121827411167512, |
| "grad_norm": 1.7892258167266846, |
| "learning_rate": 4.967371464228096e-06, |
| "loss": 0.6401, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.08206429780033841, |
| "grad_norm": 1.4679720401763916, |
| "learning_rate": 4.966258554602924e-06, |
| "loss": 0.5463, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.0829103214890017, |
| "grad_norm": 1.7447293996810913, |
| "learning_rate": 4.965127110991232e-06, |
| "loss": 0.6407, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.08375634517766498, |
| "grad_norm": 1.4224369525909424, |
| "learning_rate": 4.9639771418958434e-06, |
| "loss": 0.568, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.08460236886632826, |
| "grad_norm": 1.539178729057312, |
| "learning_rate": 4.9628086559588e-06, |
| "loss": 0.6125, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08544839255499154, |
| "grad_norm": 1.6318973302841187, |
| "learning_rate": 4.961621661961299e-06, |
| "loss": 0.5793, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.08629441624365482, |
| "grad_norm": 1.7556897401809692, |
| "learning_rate": 4.960416168823626e-06, |
| "loss": 0.5352, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.08714043993231811, |
| "grad_norm": 1.5747413635253906, |
| "learning_rate": 4.959192185605089e-06, |
| "loss": 0.6511, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.08798646362098139, |
| "grad_norm": 1.4685758352279663, |
| "learning_rate": 4.957949721503947e-06, |
| "loss": 0.5377, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.08883248730964467, |
| "grad_norm": 1.4151921272277832, |
| "learning_rate": 4.956688785857345e-06, |
| "loss": 0.5788, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.08967851099830795, |
| "grad_norm": 1.6794856786727905, |
| "learning_rate": 4.955409388141243e-06, |
| "loss": 0.6054, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.09052453468697123, |
| "grad_norm": 1.4817862510681152, |
| "learning_rate": 4.954111537970342e-06, |
| "loss": 0.6027, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.09137055837563451, |
| "grad_norm": 1.728560209274292, |
| "learning_rate": 4.952795245098013e-06, |
| "loss": 0.552, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0922165820642978, |
| "grad_norm": 1.6162961721420288, |
| "learning_rate": 4.951460519416228e-06, |
| "loss": 0.6239, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.09306260575296109, |
| "grad_norm": 1.5593509674072266, |
| "learning_rate": 4.950107370955477e-06, |
| "loss": 0.6413, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09390862944162437, |
| "grad_norm": 1.680822491645813, |
| "learning_rate": 4.948735809884701e-06, |
| "loss": 0.5699, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.09475465313028765, |
| "grad_norm": 1.745162010192871, |
| "learning_rate": 4.94734584651121e-06, |
| "loss": 0.5752, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.09560067681895093, |
| "grad_norm": 1.4649637937545776, |
| "learning_rate": 4.945937491280611e-06, |
| "loss": 0.5046, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.09644670050761421, |
| "grad_norm": 1.5797159671783447, |
| "learning_rate": 4.944510754776724e-06, |
| "loss": 0.6037, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.09729272419627749, |
| "grad_norm": 1.8420116901397705, |
| "learning_rate": 4.9430656477215016e-06, |
| "loss": 0.647, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.09813874788494077, |
| "grad_norm": 1.5039803981781006, |
| "learning_rate": 4.941602180974958e-06, |
| "loss": 0.6207, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.09898477157360407, |
| "grad_norm": 1.636516809463501, |
| "learning_rate": 4.940120365535076e-06, |
| "loss": 0.5839, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.09983079526226735, |
| "grad_norm": 1.7785464525222778, |
| "learning_rate": 4.938620212537733e-06, |
| "loss": 0.5822, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.10067681895093063, |
| "grad_norm": 1.5129812955856323, |
| "learning_rate": 4.937101733256608e-06, |
| "loss": 0.6025, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.10152284263959391, |
| "grad_norm": 1.603428602218628, |
| "learning_rate": 4.9355649391031066e-06, |
| "loss": 0.6247, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.10236886632825719, |
| "grad_norm": 1.5037899017333984, |
| "learning_rate": 4.934009841626272e-06, |
| "loss": 0.5521, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.10321489001692047, |
| "grad_norm": 1.5160306692123413, |
| "learning_rate": 4.932436452512693e-06, |
| "loss": 0.6395, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.10406091370558376, |
| "grad_norm": 1.4215550422668457, |
| "learning_rate": 4.930844783586424e-06, |
| "loss": 0.4997, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.10490693739424704, |
| "grad_norm": 1.7557592391967773, |
| "learning_rate": 4.929234846808893e-06, |
| "loss": 0.6924, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.10575296108291032, |
| "grad_norm": 1.5873064994812012, |
| "learning_rate": 4.927606654278809e-06, |
| "loss": 0.5761, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.1065989847715736, |
| "grad_norm": 1.5326778888702393, |
| "learning_rate": 4.925960218232073e-06, |
| "loss": 0.5501, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.10744500846023688, |
| "grad_norm": 1.5582760572433472, |
| "learning_rate": 4.924295551041688e-06, |
| "loss": 0.5711, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.10829103214890017, |
| "grad_norm": 1.5439339876174927, |
| "learning_rate": 4.922612665217664e-06, |
| "loss": 0.5736, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.10913705583756345, |
| "grad_norm": 1.3591135740280151, |
| "learning_rate": 4.920911573406925e-06, |
| "loss": 0.5472, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.10998307952622674, |
| "grad_norm": 1.655671238899231, |
| "learning_rate": 4.919192288393213e-06, |
| "loss": 0.5782, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11082910321489002, |
| "grad_norm": 1.5855729579925537, |
| "learning_rate": 4.917454823096991e-06, |
| "loss": 0.6764, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.1116751269035533, |
| "grad_norm": 1.4577858448028564, |
| "learning_rate": 4.915699190575349e-06, |
| "loss": 0.4923, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.11252115059221658, |
| "grad_norm": 1.5771598815917969, |
| "learning_rate": 4.913925404021905e-06, |
| "loss": 0.548, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.11336717428087986, |
| "grad_norm": 1.7482346296310425, |
| "learning_rate": 4.912133476766701e-06, |
| "loss": 0.629, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.11421319796954314, |
| "grad_norm": 1.6378353834152222, |
| "learning_rate": 4.91032342227611e-06, |
| "loss": 0.6119, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.11505922165820642, |
| "grad_norm": 1.4870620965957642, |
| "learning_rate": 4.9084952541527315e-06, |
| "loss": 0.5078, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.11590524534686972, |
| "grad_norm": 1.6145250797271729, |
| "learning_rate": 4.9066489861352875e-06, |
| "loss": 0.608, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.116751269035533, |
| "grad_norm": 1.616117000579834, |
| "learning_rate": 4.904784632098523e-06, |
| "loss": 0.5443, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.11759729272419628, |
| "grad_norm": 1.6198755502700806, |
| "learning_rate": 4.902902206053099e-06, |
| "loss": 0.6141, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.11844331641285956, |
| "grad_norm": 1.4478083848953247, |
| "learning_rate": 4.9010017221454875e-06, |
| "loss": 0.5369, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11928934010152284, |
| "grad_norm": 1.6716082096099854, |
| "learning_rate": 4.899083194657867e-06, |
| "loss": 0.5421, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.12013536379018612, |
| "grad_norm": 1.6240705251693726, |
| "learning_rate": 4.897146638008012e-06, |
| "loss": 0.5594, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.12098138747884941, |
| "grad_norm": 1.4059849977493286, |
| "learning_rate": 4.89519206674919e-06, |
| "loss": 0.5416, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.1218274111675127, |
| "grad_norm": 1.4964284896850586, |
| "learning_rate": 4.893219495570043e-06, |
| "loss": 0.5634, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.12267343485617598, |
| "grad_norm": 1.5191673040390015, |
| "learning_rate": 4.891228939294489e-06, |
| "loss": 0.5912, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.12351945854483926, |
| "grad_norm": 1.652429223060608, |
| "learning_rate": 4.8892204128816e-06, |
| "loss": 0.6106, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.12436548223350254, |
| "grad_norm": 1.706398606300354, |
| "learning_rate": 4.8871939314254965e-06, |
| "loss": 0.6298, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.12521150592216582, |
| "grad_norm": 1.6904054880142212, |
| "learning_rate": 4.88514951015523e-06, |
| "loss": 0.5076, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.1260575296108291, |
| "grad_norm": 1.4144283533096313, |
| "learning_rate": 4.883087164434672e-06, |
| "loss": 0.5625, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.12690355329949238, |
| "grad_norm": 1.6963647603988647, |
| "learning_rate": 4.881006909762394e-06, |
| "loss": 0.7107, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12774957698815567, |
| "grad_norm": 1.7292715311050415, |
| "learning_rate": 4.878908761771555e-06, |
| "loss": 0.5773, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.12859560067681894, |
| "grad_norm": 1.321929693222046, |
| "learning_rate": 4.876792736229782e-06, |
| "loss": 0.5283, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.12944162436548223, |
| "grad_norm": 1.5928312540054321, |
| "learning_rate": 4.874658849039054e-06, |
| "loss": 0.5278, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.13028764805414553, |
| "grad_norm": 1.5850692987442017, |
| "learning_rate": 4.8725071162355805e-06, |
| "loss": 0.6298, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.1311336717428088, |
| "grad_norm": 1.534232497215271, |
| "learning_rate": 4.870337553989678e-06, |
| "loss": 0.5157, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1319796954314721, |
| "grad_norm": 1.4662110805511475, |
| "learning_rate": 4.8681501786056545e-06, |
| "loss": 0.4884, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.13282571912013535, |
| "grad_norm": 1.4880340099334717, |
| "learning_rate": 4.865945006521684e-06, |
| "loss": 0.6217, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.13367174280879865, |
| "grad_norm": 1.578230857849121, |
| "learning_rate": 4.863722054309682e-06, |
| "loss": 0.6814, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.13451776649746192, |
| "grad_norm": 1.6861345767974854, |
| "learning_rate": 4.861481338675183e-06, |
| "loss": 0.5715, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.1353637901861252, |
| "grad_norm": 1.561371922492981, |
| "learning_rate": 4.8592228764572135e-06, |
| "loss": 0.5708, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1362098138747885, |
| "grad_norm": 1.7995845079421997, |
| "learning_rate": 4.856946684628167e-06, |
| "loss": 0.6436, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.13705583756345177, |
| "grad_norm": 1.5640236139297485, |
| "learning_rate": 4.854652780293672e-06, |
| "loss": 0.6295, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.13790186125211507, |
| "grad_norm": 1.3940402269363403, |
| "learning_rate": 4.852341180692471e-06, |
| "loss": 0.5547, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.13874788494077833, |
| "grad_norm": 1.512726068496704, |
| "learning_rate": 4.8500119031962845e-06, |
| "loss": 0.5077, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.13959390862944163, |
| "grad_norm": 1.684700608253479, |
| "learning_rate": 4.847664965309684e-06, |
| "loss": 0.5076, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1404399323181049, |
| "grad_norm": 1.5767724514007568, |
| "learning_rate": 4.845300384669958e-06, |
| "loss": 0.6229, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1412859560067682, |
| "grad_norm": 1.6619733572006226, |
| "learning_rate": 4.842918179046982e-06, |
| "loss": 0.555, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.14213197969543148, |
| "grad_norm": 1.633802056312561, |
| "learning_rate": 4.840518366343083e-06, |
| "loss": 0.592, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.14297800338409475, |
| "grad_norm": 1.6347830295562744, |
| "learning_rate": 4.8381009645929044e-06, |
| "loss": 0.6016, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.14382402707275804, |
| "grad_norm": 1.4932504892349243, |
| "learning_rate": 4.835665991963274e-06, |
| "loss": 0.5356, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1446700507614213, |
| "grad_norm": 1.5529918670654297, |
| "learning_rate": 4.833213466753063e-06, |
| "loss": 0.503, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1455160744500846, |
| "grad_norm": 1.5487970113754272, |
| "learning_rate": 4.830743407393052e-06, |
| "loss": 0.5763, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1463620981387479, |
| "grad_norm": 1.521419644355774, |
| "learning_rate": 4.82825583244579e-06, |
| "loss": 0.5377, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.14720812182741116, |
| "grad_norm": 1.516845703125, |
| "learning_rate": 4.825750760605458e-06, |
| "loss": 0.6147, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.14805414551607446, |
| "grad_norm": 1.7243348360061646, |
| "learning_rate": 4.823228210697723e-06, |
| "loss": 0.5545, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.14890016920473773, |
| "grad_norm": 1.5753135681152344, |
| "learning_rate": 4.820688201679605e-06, |
| "loss": 0.5235, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.14974619289340102, |
| "grad_norm": 1.7663754224777222, |
| "learning_rate": 4.818130752639326e-06, |
| "loss": 0.6196, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.1505922165820643, |
| "grad_norm": 1.7618986368179321, |
| "learning_rate": 4.815555882796169e-06, |
| "loss": 0.6838, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.15143824027072758, |
| "grad_norm": 1.4118471145629883, |
| "learning_rate": 4.8129636115003396e-06, |
| "loss": 0.5275, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.15228426395939088, |
| "grad_norm": 1.6629770994186401, |
| "learning_rate": 4.810353958232811e-06, |
| "loss": 0.5783, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.15313028764805414, |
| "grad_norm": 1.4159945249557495, |
| "learning_rate": 4.807726942605184e-06, |
| "loss": 0.508, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.15397631133671744, |
| "grad_norm": 1.6594223976135254, |
| "learning_rate": 4.8050825843595395e-06, |
| "loss": 0.5711, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.1548223350253807, |
| "grad_norm": 1.604934811592102, |
| "learning_rate": 4.802420903368286e-06, |
| "loss": 0.5777, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.155668358714044, |
| "grad_norm": 1.6225334405899048, |
| "learning_rate": 4.7997419196340136e-06, |
| "loss": 0.5079, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.15651438240270726, |
| "grad_norm": 1.7522289752960205, |
| "learning_rate": 4.797045653289343e-06, |
| "loss": 0.587, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.15736040609137056, |
| "grad_norm": 1.5537394285202026, |
| "learning_rate": 4.794332124596775e-06, |
| "loss": 0.5068, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.15820642978003385, |
| "grad_norm": 1.5667427778244019, |
| "learning_rate": 4.791601353948537e-06, |
| "loss": 0.6201, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.15905245346869712, |
| "grad_norm": 1.667194128036499, |
| "learning_rate": 4.788853361866429e-06, |
| "loss": 0.5411, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.1598984771573604, |
| "grad_norm": 1.434585452079773, |
| "learning_rate": 4.786088169001671e-06, |
| "loss": 0.5377, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.16074450084602368, |
| "grad_norm": 1.4391204118728638, |
| "learning_rate": 4.7833057961347476e-06, |
| "loss": 0.5865, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16159052453468697, |
| "grad_norm": 1.5104649066925049, |
| "learning_rate": 4.78050626417525e-06, |
| "loss": 0.4837, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.16243654822335024, |
| "grad_norm": 1.483412742614746, |
| "learning_rate": 4.777689594161724e-06, |
| "loss": 0.5627, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.16328257191201354, |
| "grad_norm": 1.9065080881118774, |
| "learning_rate": 4.774855807261504e-06, |
| "loss": 0.611, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.16412859560067683, |
| "grad_norm": 1.5537798404693604, |
| "learning_rate": 4.77200492477056e-06, |
| "loss": 0.5078, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.1649746192893401, |
| "grad_norm": 1.5670864582061768, |
| "learning_rate": 4.769136968113337e-06, |
| "loss": 0.5509, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.1658206429780034, |
| "grad_norm": 1.5053881406784058, |
| "learning_rate": 4.766251958842589e-06, |
| "loss": 0.5504, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.16666666666666666, |
| "grad_norm": 1.4908168315887451, |
| "learning_rate": 4.763349918639228e-06, |
| "loss": 0.5645, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.16751269035532995, |
| "grad_norm": 1.4086905717849731, |
| "learning_rate": 4.760430869312144e-06, |
| "loss": 0.4633, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.16835871404399322, |
| "grad_norm": 1.4943495988845825, |
| "learning_rate": 4.757494832798057e-06, |
| "loss": 0.5893, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1692047377326565, |
| "grad_norm": 1.524116039276123, |
| "learning_rate": 4.7545418311613485e-06, |
| "loss": 0.5761, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1700507614213198, |
| "grad_norm": 1.6232064962387085, |
| "learning_rate": 4.751571886593886e-06, |
| "loss": 0.5514, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.17089678510998307, |
| "grad_norm": 1.592065453529358, |
| "learning_rate": 4.748585021414869e-06, |
| "loss": 0.586, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.17174280879864637, |
| "grad_norm": 1.39573335647583, |
| "learning_rate": 4.745581258070654e-06, |
| "loss": 0.538, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.17258883248730963, |
| "grad_norm": 1.5501444339752197, |
| "learning_rate": 4.742560619134587e-06, |
| "loss": 0.6041, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.17343485617597293, |
| "grad_norm": 1.645410418510437, |
| "learning_rate": 4.739523127306837e-06, |
| "loss": 0.5364, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.17428087986463622, |
| "grad_norm": 1.623639464378357, |
| "learning_rate": 4.736468805414218e-06, |
| "loss": 0.6014, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.1751269035532995, |
| "grad_norm": 1.521011471748352, |
| "learning_rate": 4.733397676410027e-06, |
| "loss": 0.5821, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.17597292724196278, |
| "grad_norm": 1.590009331703186, |
| "learning_rate": 4.730309763373866e-06, |
| "loss": 0.5419, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.17681895093062605, |
| "grad_norm": 1.5016567707061768, |
| "learning_rate": 4.727205089511466e-06, |
| "loss": 0.5539, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.17766497461928935, |
| "grad_norm": 1.3730658292770386, |
| "learning_rate": 4.7240836781545205e-06, |
| "loss": 0.5356, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1785109983079526, |
| "grad_norm": 1.5500950813293457, |
| "learning_rate": 4.720945552760503e-06, |
| "loss": 0.5907, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.1793570219966159, |
| "grad_norm": 1.5380445718765259, |
| "learning_rate": 4.717790736912493e-06, |
| "loss": 0.5068, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.1802030456852792, |
| "grad_norm": 1.4724454879760742, |
| "learning_rate": 4.7146192543190005e-06, |
| "loss": 0.5491, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.18104906937394247, |
| "grad_norm": 1.558326244354248, |
| "learning_rate": 4.711431128813787e-06, |
| "loss": 0.5843, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.18189509306260576, |
| "grad_norm": 1.6515816450119019, |
| "learning_rate": 4.708226384355684e-06, |
| "loss": 0.5486, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.18274111675126903, |
| "grad_norm": 1.4587641954421997, |
| "learning_rate": 4.705005045028415e-06, |
| "loss": 0.6305, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.18358714043993232, |
| "grad_norm": 1.5102735757827759, |
| "learning_rate": 4.701767135040415e-06, |
| "loss": 0.4159, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.1844331641285956, |
| "grad_norm": 1.6922459602355957, |
| "learning_rate": 4.698512678724649e-06, |
| "loss": 0.5456, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.18527918781725888, |
| "grad_norm": 1.7045525312423706, |
| "learning_rate": 4.695241700538425e-06, |
| "loss": 0.5584, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.18612521150592218, |
| "grad_norm": 1.6846798658370972, |
| "learning_rate": 4.691954225063218e-06, |
| "loss": 0.5568, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.18697123519458544, |
| "grad_norm": 1.4572404623031616, |
| "learning_rate": 4.688650277004474e-06, |
| "loss": 0.5408, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.18781725888324874, |
| "grad_norm": 1.7677491903305054, |
| "learning_rate": 4.685329881191436e-06, |
| "loss": 0.6165, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.188663282571912, |
| "grad_norm": 1.613861322402954, |
| "learning_rate": 4.68199306257695e-06, |
| "loss": 0.5826, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.1895093062605753, |
| "grad_norm": 1.4331867694854736, |
| "learning_rate": 4.678639846237281e-06, |
| "loss": 0.54, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.19035532994923857, |
| "grad_norm": 1.5444533824920654, |
| "learning_rate": 4.675270257371922e-06, |
| "loss": 0.5222, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.19120135363790186, |
| "grad_norm": 1.3472926616668701, |
| "learning_rate": 4.671884321303407e-06, |
| "loss": 0.5211, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.19204737732656516, |
| "grad_norm": 1.4535293579101562, |
| "learning_rate": 4.668482063477118e-06, |
| "loss": 0.5718, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.19289340101522842, |
| "grad_norm": 1.6504758596420288, |
| "learning_rate": 4.665063509461098e-06, |
| "loss": 0.5433, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.19373942470389172, |
| "grad_norm": 1.2919827699661255, |
| "learning_rate": 4.661628684945851e-06, |
| "loss": 0.4668, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.19458544839255498, |
| "grad_norm": 1.582503318786621, |
| "learning_rate": 4.658177615744162e-06, |
| "loss": 0.5492, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.19543147208121828, |
| "grad_norm": 1.4641120433807373, |
| "learning_rate": 4.654710327790889e-06, |
| "loss": 0.556, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.19627749576988154, |
| "grad_norm": 1.5751663446426392, |
| "learning_rate": 4.651226847142774e-06, |
| "loss": 0.5209, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.19712351945854484, |
| "grad_norm": 1.361720085144043, |
| "learning_rate": 4.647727199978255e-06, |
| "loss": 0.5849, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.19796954314720813, |
| "grad_norm": 1.4193429946899414, |
| "learning_rate": 4.644211412597251e-06, |
| "loss": 0.4808, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.1988155668358714, |
| "grad_norm": 1.641695261001587, |
| "learning_rate": 4.640679511420983e-06, |
| "loss": 0.5782, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.1996615905245347, |
| "grad_norm": 1.509385347366333, |
| "learning_rate": 4.6371315229917644e-06, |
| "loss": 0.5397, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.20050761421319796, |
| "grad_norm": 1.431472897529602, |
| "learning_rate": 4.6335674739728055e-06, |
| "loss": 0.5817, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.20135363790186125, |
| "grad_norm": 1.4169070720672607, |
| "learning_rate": 4.629987391148012e-06, |
| "loss": 0.536, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.20219966159052452, |
| "grad_norm": 1.3376789093017578, |
| "learning_rate": 4.6263913014217826e-06, |
| "loss": 0.4489, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.20304568527918782, |
| "grad_norm": 1.4299821853637695, |
| "learning_rate": 4.622779231818811e-06, |
| "loss": 0.555, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2038917089678511, |
| "grad_norm": 1.5063016414642334, |
| "learning_rate": 4.619151209483879e-06, |
| "loss": 0.5898, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.20473773265651438, |
| "grad_norm": 1.5440738201141357, |
| "learning_rate": 4.6155072616816515e-06, |
| "loss": 0.5395, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.20558375634517767, |
| "grad_norm": 1.6442499160766602, |
| "learning_rate": 4.611847415796476e-06, |
| "loss": 0.5488, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.20642978003384094, |
| "grad_norm": 1.5802503824234009, |
| "learning_rate": 4.608171699332174e-06, |
| "loss": 0.5221, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.20727580372250423, |
| "grad_norm": 1.5823266506195068, |
| "learning_rate": 4.604480139911836e-06, |
| "loss": 0.5678, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.20812182741116753, |
| "grad_norm": 1.3660752773284912, |
| "learning_rate": 4.600772765277607e-06, |
| "loss": 0.5177, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2089678510998308, |
| "grad_norm": 1.495895266532898, |
| "learning_rate": 4.597049603290491e-06, |
| "loss": 0.5982, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2098138747884941, |
| "grad_norm": 1.5751233100891113, |
| "learning_rate": 4.59331068193013e-06, |
| "loss": 0.5461, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.21065989847715735, |
| "grad_norm": 1.4056577682495117, |
| "learning_rate": 4.5895560292946e-06, |
| "loss": 0.5657, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.21150592216582065, |
| "grad_norm": 1.6429760456085205, |
| "learning_rate": 4.585785673600196e-06, |
| "loss": 0.6208, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.21235194585448391, |
| "grad_norm": 1.6389528512954712, |
| "learning_rate": 4.581999643181223e-06, |
| "loss": 0.5263, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2131979695431472, |
| "grad_norm": 1.4250948429107666, |
| "learning_rate": 4.578197966489782e-06, |
| "loss": 0.514, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2140439932318105, |
| "grad_norm": 1.6212941408157349, |
| "learning_rate": 4.574380672095555e-06, |
| "loss": 0.4906, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.21489001692047377, |
| "grad_norm": 1.450196385383606, |
| "learning_rate": 4.5705477886855925e-06, |
| "loss": 0.4833, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.21573604060913706, |
| "grad_norm": 1.454309344291687, |
| "learning_rate": 4.566699345064097e-06, |
| "loss": 0.5631, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.21658206429780033, |
| "grad_norm": 1.706693410873413, |
| "learning_rate": 4.562835370152206e-06, |
| "loss": 0.548, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.21742808798646363, |
| "grad_norm": 1.4211560487747192, |
| "learning_rate": 4.558955892987774e-06, |
| "loss": 0.5947, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.2182741116751269, |
| "grad_norm": 1.4968616962432861, |
| "learning_rate": 4.555060942725156e-06, |
| "loss": 0.5693, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.21912013536379019, |
| "grad_norm": 1.4211528301239014, |
| "learning_rate": 4.551150548634987e-06, |
| "loss": 0.5976, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.21996615905245348, |
| "grad_norm": 1.4713115692138672, |
| "learning_rate": 4.547224740103966e-06, |
| "loss": 0.4759, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.22081218274111675, |
| "grad_norm": 1.6762306690216064, |
| "learning_rate": 4.543283546634626e-06, |
| "loss": 0.5732, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.22165820642978004, |
| "grad_norm": 1.266420841217041, |
| "learning_rate": 4.539326997845124e-06, |
| "loss": 0.4456, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.2225042301184433, |
| "grad_norm": 1.568297266960144, |
| "learning_rate": 4.535355123469009e-06, |
| "loss": 0.5853, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.2233502538071066, |
| "grad_norm": 1.5746086835861206, |
| "learning_rate": 4.531367953355002e-06, |
| "loss": 0.5569, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.22419627749576987, |
| "grad_norm": 1.4679572582244873, |
| "learning_rate": 4.527365517466775e-06, |
| "loss": 0.4425, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.22504230118443316, |
| "grad_norm": 1.5745289325714111, |
| "learning_rate": 4.523347845882718e-06, |
| "loss": 0.5316, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.22588832487309646, |
| "grad_norm": 1.5450767278671265, |
| "learning_rate": 4.519314968795722e-06, |
| "loss": 0.5353, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.22673434856175972, |
| "grad_norm": 1.850501537322998, |
| "learning_rate": 4.515266916512945e-06, |
| "loss": 0.6068, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.22758037225042302, |
| "grad_norm": 1.3289093971252441, |
| "learning_rate": 4.511203719455588e-06, |
| "loss": 0.529, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.22842639593908629, |
| "grad_norm": 1.6736395359039307, |
| "learning_rate": 4.507125408158665e-06, |
| "loss": 0.6073, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.22927241962774958, |
| "grad_norm": 1.3271763324737549, |
| "learning_rate": 4.503032013270774e-06, |
| "loss": 0.5165, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.23011844331641285, |
| "grad_norm": 1.4673720598220825, |
| "learning_rate": 4.498923565553866e-06, |
| "loss": 0.495, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.23096446700507614, |
| "grad_norm": 1.5206209421157837, |
| "learning_rate": 4.494800095883014e-06, |
| "loss": 0.5659, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.23181049069373943, |
| "grad_norm": 1.5659880638122559, |
| "learning_rate": 4.490661635246183e-06, |
| "loss": 0.5876, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.2326565143824027, |
| "grad_norm": 1.4979114532470703, |
| "learning_rate": 4.4865082147439945e-06, |
| "loss": 0.5988, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.233502538071066, |
| "grad_norm": 1.5441852807998657, |
| "learning_rate": 4.482339865589492e-06, |
| "loss": 0.5215, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.23434856175972926, |
| "grad_norm": 1.476711630821228, |
| "learning_rate": 4.478156619107912e-06, |
| "loss": 0.5019, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.23519458544839256, |
| "grad_norm": 1.4779571294784546, |
| "learning_rate": 4.4739585067364425e-06, |
| "loss": 0.6033, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.23604060913705585, |
| "grad_norm": 1.5338369607925415, |
| "learning_rate": 4.469745560023987e-06, |
| "loss": 0.5089, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.23688663282571912, |
| "grad_norm": 1.536832332611084, |
| "learning_rate": 4.465517810630933e-06, |
| "loss": 0.4967, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.2377326565143824, |
| "grad_norm": 1.5541664361953735, |
| "learning_rate": 4.461275290328908e-06, |
| "loss": 0.5869, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.23857868020304568, |
| "grad_norm": 1.7338745594024658, |
| "learning_rate": 4.457018031000544e-06, |
| "loss": 0.5288, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.23942470389170897, |
| "grad_norm": 1.345275640487671, |
| "learning_rate": 4.452746064639239e-06, |
| "loss": 0.4971, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.24027072758037224, |
| "grad_norm": 1.4979954957962036, |
| "learning_rate": 4.448459423348911e-06, |
| "loss": 0.5437, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.24111675126903553, |
| "grad_norm": 1.5409454107284546, |
| "learning_rate": 4.444158139343763e-06, |
| "loss": 0.5521, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.24196277495769883, |
| "grad_norm": 1.4069455862045288, |
| "learning_rate": 4.439842244948036e-06, |
| "loss": 0.5609, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.2428087986463621, |
| "grad_norm": 1.5055841207504272, |
| "learning_rate": 4.435511772595773e-06, |
| "loss": 0.5308, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.2436548223350254, |
| "grad_norm": 1.7902872562408447, |
| "learning_rate": 4.4311667548305644e-06, |
| "loss": 0.613, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.24450084602368866, |
| "grad_norm": 1.5679998397827148, |
| "learning_rate": 4.426807224305315e-06, |
| "loss": 0.5521, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.24534686971235195, |
| "grad_norm": 1.5276457071304321, |
| "learning_rate": 4.422433213781991e-06, |
| "loss": 0.5454, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24619289340101522, |
| "grad_norm": 1.563459873199463, |
| "learning_rate": 4.4180447561313765e-06, |
| "loss": 0.508, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.2470389170896785, |
| "grad_norm": 1.575172781944275, |
| "learning_rate": 4.413641884332825e-06, |
| "loss": 0.5726, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.2478849407783418, |
| "grad_norm": 1.6327418088912964, |
| "learning_rate": 4.409224631474014e-06, |
| "loss": 0.6188, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.24873096446700507, |
| "grad_norm": 1.4867552518844604, |
| "learning_rate": 4.404793030750695e-06, |
| "loss": 0.5364, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.24957698815566837, |
| "grad_norm": 1.512021541595459, |
| "learning_rate": 4.400347115466442e-06, |
| "loss": 0.5083, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.25042301184433163, |
| "grad_norm": 1.3881378173828125, |
| "learning_rate": 4.395886919032406e-06, |
| "loss": 0.5789, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.2512690355329949, |
| "grad_norm": 1.4656717777252197, |
| "learning_rate": 4.39141247496706e-06, |
| "loss": 0.5213, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.2521150592216582, |
| "grad_norm": 1.5013030767440796, |
| "learning_rate": 4.3869238168959485e-06, |
| "loss": 0.5245, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.2529610829103215, |
| "grad_norm": 1.535556435585022, |
| "learning_rate": 4.382420978551433e-06, |
| "loss": 0.5541, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.25380710659898476, |
| "grad_norm": 1.4609181880950928, |
| "learning_rate": 4.377903993772442e-06, |
| "loss": 0.5155, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2546531302876481, |
| "grad_norm": 1.4020256996154785, |
| "learning_rate": 4.373372896504215e-06, |
| "loss": 0.5553, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.25549915397631134, |
| "grad_norm": 1.5317964553833008, |
| "learning_rate": 4.368827720798044e-06, |
| "loss": 0.5547, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.2563451776649746, |
| "grad_norm": 1.6519298553466797, |
| "learning_rate": 4.364268500811025e-06, |
| "loss": 0.5305, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2571912013536379, |
| "grad_norm": 1.511772632598877, |
| "learning_rate": 4.359695270805795e-06, |
| "loss": 0.449, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.2580372250423012, |
| "grad_norm": 1.3912087678909302, |
| "learning_rate": 4.3551080651502755e-06, |
| "loss": 0.5184, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.25888324873096447, |
| "grad_norm": 1.5851740837097168, |
| "learning_rate": 4.350506918317416e-06, |
| "loss": 0.5489, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.25972927241962773, |
| "grad_norm": 1.4331227540969849, |
| "learning_rate": 4.345891864884937e-06, |
| "loss": 0.5338, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.26057529610829105, |
| "grad_norm": 1.487821102142334, |
| "learning_rate": 4.341262939535063e-06, |
| "loss": 0.5155, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.2614213197969543, |
| "grad_norm": 1.5234811305999756, |
| "learning_rate": 4.336620177054269e-06, |
| "loss": 0.5026, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.2622673434856176, |
| "grad_norm": 1.3057184219360352, |
| "learning_rate": 4.331963612333017e-06, |
| "loss": 0.5378, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.26311336717428085, |
| "grad_norm": 1.5772360563278198, |
| "learning_rate": 4.327293280365491e-06, |
| "loss": 0.6281, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.2639593908629442, |
| "grad_norm": 1.5020463466644287, |
| "learning_rate": 4.322609216249336e-06, |
| "loss": 0.6181, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.26480541455160744, |
| "grad_norm": 1.4958893060684204, |
| "learning_rate": 4.317911455185396e-06, |
| "loss": 0.5468, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.2656514382402707, |
| "grad_norm": 1.5898452997207642, |
| "learning_rate": 4.3132000324774485e-06, |
| "loss": 0.5702, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.26649746192893403, |
| "grad_norm": 1.8296725749969482, |
| "learning_rate": 4.308474983531936e-06, |
| "loss": 0.7021, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.2673434856175973, |
| "grad_norm": 1.6258792877197266, |
| "learning_rate": 4.303736343857704e-06, |
| "loss": 0.557, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.26818950930626057, |
| "grad_norm": 1.581331729888916, |
| "learning_rate": 4.298984149065732e-06, |
| "loss": 0.5027, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.26903553299492383, |
| "grad_norm": 1.3485808372497559, |
| "learning_rate": 4.294218434868869e-06, |
| "loss": 0.4756, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.26988155668358715, |
| "grad_norm": 1.4658807516098022, |
| "learning_rate": 4.289439237081557e-06, |
| "loss": 0.5321, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.2707275803722504, |
| "grad_norm": 1.616141438484192, |
| "learning_rate": 4.284646591619575e-06, |
| "loss": 0.5233, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.2715736040609137, |
| "grad_norm": 1.5891221761703491, |
| "learning_rate": 4.2798405344997545e-06, |
| "loss": 0.5821, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.272419627749577, |
| "grad_norm": 1.4427226781845093, |
| "learning_rate": 4.2750211018397204e-06, |
| "loss": 0.4998, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.2732656514382403, |
| "grad_norm": 1.6634961366653442, |
| "learning_rate": 4.270188329857613e-06, |
| "loss": 0.6044, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.27411167512690354, |
| "grad_norm": 1.424747109413147, |
| "learning_rate": 4.2653422548718195e-06, |
| "loss": 0.5953, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.2749576988155668, |
| "grad_norm": 1.5241210460662842, |
| "learning_rate": 4.260482913300697e-06, |
| "loss": 0.576, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.27580372250423013, |
| "grad_norm": 1.4235581159591675, |
| "learning_rate": 4.255610341662304e-06, |
| "loss": 0.5074, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.2766497461928934, |
| "grad_norm": 1.5771161317825317, |
| "learning_rate": 4.2507245765741215e-06, |
| "loss": 0.5325, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.27749576988155666, |
| "grad_norm": 1.3609672784805298, |
| "learning_rate": 4.245825654752781e-06, |
| "loss": 0.5146, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.27834179357022, |
| "grad_norm": 1.4064686298370361, |
| "learning_rate": 4.240913613013785e-06, |
| "loss": 0.5279, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.27918781725888325, |
| "grad_norm": 1.3830342292785645, |
| "learning_rate": 4.235988488271235e-06, |
| "loss": 0.492, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2800338409475465, |
| "grad_norm": 1.4581482410430908, |
| "learning_rate": 4.231050317537548e-06, |
| "loss": 0.5313, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.2808798646362098, |
| "grad_norm": 1.6080012321472168, |
| "learning_rate": 4.226099137923186e-06, |
| "loss": 0.5134, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.2817258883248731, |
| "grad_norm": 1.5522176027297974, |
| "learning_rate": 4.221134986636371e-06, |
| "loss": 0.562, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2825719120135364, |
| "grad_norm": 1.5215039253234863, |
| "learning_rate": 4.216157900982808e-06, |
| "loss": 0.6292, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.28341793570219964, |
| "grad_norm": 1.5160623788833618, |
| "learning_rate": 4.211167918365402e-06, |
| "loss": 0.5636, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.28426395939086296, |
| "grad_norm": 1.3579323291778564, |
| "learning_rate": 4.206165076283983e-06, |
| "loss": 0.5459, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.28510998307952623, |
| "grad_norm": 1.6921268701553345, |
| "learning_rate": 4.201149412335015e-06, |
| "loss": 0.5202, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.2859560067681895, |
| "grad_norm": 1.5123941898345947, |
| "learning_rate": 4.196120964211322e-06, |
| "loss": 0.6074, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.2868020304568528, |
| "grad_norm": 1.4805959463119507, |
| "learning_rate": 4.1910797697018026e-06, |
| "loss": 0.5621, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.2876480541455161, |
| "grad_norm": 1.5556724071502686, |
| "learning_rate": 4.1860258666911415e-06, |
| "loss": 0.5228, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28849407783417935, |
| "grad_norm": 1.430757999420166, |
| "learning_rate": 4.180959293159529e-06, |
| "loss": 0.5698, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.2893401015228426, |
| "grad_norm": 1.419783115386963, |
| "learning_rate": 4.175880087182376e-06, |
| "loss": 0.5296, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.29018612521150594, |
| "grad_norm": 1.7747116088867188, |
| "learning_rate": 4.170788286930024e-06, |
| "loss": 0.5129, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.2910321489001692, |
| "grad_norm": 1.5256282091140747, |
| "learning_rate": 4.165683930667464e-06, |
| "loss": 0.6247, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.2918781725888325, |
| "grad_norm": 1.5945425033569336, |
| "learning_rate": 4.160567056754044e-06, |
| "loss": 0.555, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.2927241962774958, |
| "grad_norm": 1.4873846769332886, |
| "learning_rate": 4.155437703643182e-06, |
| "loss": 0.576, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.29357021996615906, |
| "grad_norm": 1.5128968954086304, |
| "learning_rate": 4.1502959098820774e-06, |
| "loss": 0.5328, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.29441624365482233, |
| "grad_norm": 1.6180955171585083, |
| "learning_rate": 4.145141714111421e-06, |
| "loss": 0.568, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2952622673434856, |
| "grad_norm": 1.605104923248291, |
| "learning_rate": 4.139975155065109e-06, |
| "loss": 0.5578, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2961082910321489, |
| "grad_norm": 1.6930770874023438, |
| "learning_rate": 4.134796271569942e-06, |
| "loss": 0.4953, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2969543147208122, |
| "grad_norm": 1.4728012084960938, |
| "learning_rate": 4.129605102545341e-06, |
| "loss": 0.4959, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.29780033840947545, |
| "grad_norm": 1.610328197479248, |
| "learning_rate": 4.124401687003057e-06, |
| "loss": 0.5775, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2986463620981388, |
| "grad_norm": 1.7682827711105347, |
| "learning_rate": 4.119186064046868e-06, |
| "loss": 0.548, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.29949238578680204, |
| "grad_norm": 1.525848150253296, |
| "learning_rate": 4.113958272872294e-06, |
| "loss": 0.5324, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.3003384094754653, |
| "grad_norm": 1.4608542919158936, |
| "learning_rate": 4.1087183527663e-06, |
| "loss": 0.434, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.3011844331641286, |
| "grad_norm": 1.5706309080123901, |
| "learning_rate": 4.103466343106999e-06, |
| "loss": 0.4819, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.3020304568527919, |
| "grad_norm": 1.5604802370071411, |
| "learning_rate": 4.098202283363356e-06, |
| "loss": 0.6517, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.30287648054145516, |
| "grad_norm": 1.406082272529602, |
| "learning_rate": 4.092926213094897e-06, |
| "loss": 0.4477, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.30372250423011843, |
| "grad_norm": 1.4490677118301392, |
| "learning_rate": 4.087638171951401e-06, |
| "loss": 0.5426, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.30456852791878175, |
| "grad_norm": 1.472643256187439, |
| "learning_rate": 4.082338199672615e-06, |
| "loss": 0.5259, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.305414551607445, |
| "grad_norm": 1.5344111919403076, |
| "learning_rate": 4.077026336087944e-06, |
| "loss": 0.5191, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.3062605752961083, |
| "grad_norm": 1.4391659498214722, |
| "learning_rate": 4.071702621116158e-06, |
| "loss": 0.5136, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.30710659898477155, |
| "grad_norm": 1.6155903339385986, |
| "learning_rate": 4.066367094765091e-06, |
| "loss": 0.5033, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.3079526226734349, |
| "grad_norm": 1.7278281450271606, |
| "learning_rate": 4.0610197971313395e-06, |
| "loss": 0.5509, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.30879864636209814, |
| "grad_norm": 1.4298288822174072, |
| "learning_rate": 4.0556607683999605e-06, |
| "loss": 0.5406, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3096446700507614, |
| "grad_norm": 1.405069351196289, |
| "learning_rate": 4.050290048844171e-06, |
| "loss": 0.5179, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.31049069373942473, |
| "grad_norm": 1.6136541366577148, |
| "learning_rate": 4.044907678825045e-06, |
| "loss": 0.5519, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.311336717428088, |
| "grad_norm": 1.4015752077102661, |
| "learning_rate": 4.03951369879121e-06, |
| "loss": 0.5596, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.31218274111675126, |
| "grad_norm": 1.5532114505767822, |
| "learning_rate": 4.034108149278544e-06, |
| "loss": 0.5302, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.3130287648054145, |
| "grad_norm": 1.4932410717010498, |
| "learning_rate": 4.028691070909867e-06, |
| "loss": 0.5539, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.31387478849407785, |
| "grad_norm": 1.5183504819869995, |
| "learning_rate": 4.0232625043946416e-06, |
| "loss": 0.5717, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.3147208121827411, |
| "grad_norm": 1.4620509147644043, |
| "learning_rate": 4.017822490528664e-06, |
| "loss": 0.5102, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.3155668358714044, |
| "grad_norm": 1.389592170715332, |
| "learning_rate": 4.012371070193753e-06, |
| "loss": 0.5388, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.3164128595600677, |
| "grad_norm": 1.6649476289749146, |
| "learning_rate": 4.006908284357453e-06, |
| "loss": 0.6184, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.31725888324873097, |
| "grad_norm": 1.5024865865707397, |
| "learning_rate": 4.001434174072718e-06, |
| "loss": 0.5858, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.31810490693739424, |
| "grad_norm": 1.4840333461761475, |
| "learning_rate": 3.995948780477605e-06, |
| "loss": 0.528, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.3189509306260575, |
| "grad_norm": 1.407777190208435, |
| "learning_rate": 3.990452144794966e-06, |
| "loss": 0.5988, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.3197969543147208, |
| "grad_norm": 1.409677505493164, |
| "learning_rate": 3.984944308332138e-06, |
| "loss": 0.5369, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.3206429780033841, |
| "grad_norm": 1.607074499130249, |
| "learning_rate": 3.97942531248063e-06, |
| "loss": 0.5795, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.32148900169204736, |
| "grad_norm": 1.291720986366272, |
| "learning_rate": 3.973895198715816e-06, |
| "loss": 0.5359, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3223350253807107, |
| "grad_norm": 1.5747040510177612, |
| "learning_rate": 3.968354008596621e-06, |
| "loss": 0.5902, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.32318104906937395, |
| "grad_norm": 1.5991111993789673, |
| "learning_rate": 3.96280178376521e-06, |
| "loss": 0.5414, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.3240270727580372, |
| "grad_norm": 1.3984931707382202, |
| "learning_rate": 3.957238565946672e-06, |
| "loss": 0.4185, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.3248730964467005, |
| "grad_norm": 1.4653754234313965, |
| "learning_rate": 3.951664396948709e-06, |
| "loss": 0.6029, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.3257191201353638, |
| "grad_norm": 1.4449567794799805, |
| "learning_rate": 3.9460793186613235e-06, |
| "loss": 0.4723, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.32656514382402707, |
| "grad_norm": 1.3438575267791748, |
| "learning_rate": 3.9404833730564975e-06, |
| "loss": 0.4138, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.32741116751269034, |
| "grad_norm": 1.423072099685669, |
| "learning_rate": 3.934876602187886e-06, |
| "loss": 0.5623, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.32825719120135366, |
| "grad_norm": 1.52696692943573, |
| "learning_rate": 3.929259048190492e-06, |
| "loss": 0.4729, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.3291032148900169, |
| "grad_norm": 1.3745728731155396, |
| "learning_rate": 3.923630753280358e-06, |
| "loss": 0.5163, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.3299492385786802, |
| "grad_norm": 1.48550546169281, |
| "learning_rate": 3.917991759754239e-06, |
| "loss": 0.5379, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.33079526226734346, |
| "grad_norm": 1.4326492547988892, |
| "learning_rate": 3.9123421099892955e-06, |
| "loss": 0.5736, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.3316412859560068, |
| "grad_norm": 1.6504223346710205, |
| "learning_rate": 3.906681846442768e-06, |
| "loss": 0.5303, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.33248730964467005, |
| "grad_norm": 1.511051893234253, |
| "learning_rate": 3.9010110116516595e-06, |
| "loss": 0.5524, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 1.604653000831604, |
| "learning_rate": 3.895329648232416e-06, |
| "loss": 0.5777, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.33417935702199664, |
| "grad_norm": 1.5326584577560425, |
| "learning_rate": 3.889637798880608e-06, |
| "loss": 0.5853, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.3350253807106599, |
| "grad_norm": 1.4475630521774292, |
| "learning_rate": 3.883935506370605e-06, |
| "loss": 0.4671, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.33587140439932317, |
| "grad_norm": 1.4971731901168823, |
| "learning_rate": 3.8782228135552615e-06, |
| "loss": 0.6189, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.33671742808798644, |
| "grad_norm": 1.8308625221252441, |
| "learning_rate": 3.872499763365585e-06, |
| "loss": 0.518, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.33756345177664976, |
| "grad_norm": 1.6570651531219482, |
| "learning_rate": 3.8667663988104245e-06, |
| "loss": 0.5731, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.338409475465313, |
| "grad_norm": 1.411230444908142, |
| "learning_rate": 3.861022762976136e-06, |
| "loss": 0.5218, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3392554991539763, |
| "grad_norm": 1.4464528560638428, |
| "learning_rate": 3.85526889902627e-06, |
| "loss": 0.5282, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.3401015228426396, |
| "grad_norm": 1.4024648666381836, |
| "learning_rate": 3.849504850201238e-06, |
| "loss": 0.529, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.3409475465313029, |
| "grad_norm": 1.5347040891647339, |
| "learning_rate": 3.84373065981799e-06, |
| "loss": 0.5267, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.34179357021996615, |
| "grad_norm": 1.5913515090942383, |
| "learning_rate": 3.837946371269696e-06, |
| "loss": 0.5788, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.3426395939086294, |
| "grad_norm": 1.4288476705551147, |
| "learning_rate": 3.832152028025406e-06, |
| "loss": 0.5769, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.34348561759729274, |
| "grad_norm": 1.577329397201538, |
| "learning_rate": 3.826347673629738e-06, |
| "loss": 0.6016, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.344331641285956, |
| "grad_norm": 1.6311994791030884, |
| "learning_rate": 3.820533351702538e-06, |
| "loss": 0.5265, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.34517766497461927, |
| "grad_norm": 1.445512294769287, |
| "learning_rate": 3.8147091059385646e-06, |
| "loss": 0.4741, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.3460236886632826, |
| "grad_norm": 1.338087558746338, |
| "learning_rate": 3.80887498010715e-06, |
| "loss": 0.4751, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.34686971235194586, |
| "grad_norm": 1.413231611251831, |
| "learning_rate": 3.8030310180518748e-06, |
| "loss": 0.4685, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3477157360406091, |
| "grad_norm": 1.547485113143921, |
| "learning_rate": 3.7971772636902425e-06, |
| "loss": 0.5329, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.34856175972927245, |
| "grad_norm": 1.6648271083831787, |
| "learning_rate": 3.791313761013343e-06, |
| "loss": 0.4467, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.3494077834179357, |
| "grad_norm": 1.4436287879943848, |
| "learning_rate": 3.7854405540855268e-06, |
| "loss": 0.4634, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.350253807106599, |
| "grad_norm": 1.4640777111053467, |
| "learning_rate": 3.77955768704407e-06, |
| "loss": 0.574, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.35109983079526225, |
| "grad_norm": 1.6034146547317505, |
| "learning_rate": 3.7736652040988474e-06, |
| "loss": 0.5958, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.35194585448392557, |
| "grad_norm": 1.3981318473815918, |
| "learning_rate": 3.7677631495319953e-06, |
| "loss": 0.5729, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.35279187817258884, |
| "grad_norm": 1.4371929168701172, |
| "learning_rate": 3.761851567697583e-06, |
| "loss": 0.4701, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.3536379018612521, |
| "grad_norm": 1.4483686685562134, |
| "learning_rate": 3.7559305030212746e-06, |
| "loss": 0.6109, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.3544839255499154, |
| "grad_norm": 1.5899584293365479, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.5399, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.3553299492385787, |
| "grad_norm": 1.4690099954605103, |
| "learning_rate": 3.744060103201619e-06, |
| "loss": 0.5434, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.35617597292724196, |
| "grad_norm": 1.5741721391677856, |
| "learning_rate": 3.7381108572645836e-06, |
| "loss": 0.5081, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.3570219966159052, |
| "grad_norm": 1.4619410037994385, |
| "learning_rate": 3.7321523068976068e-06, |
| "loss": 0.4984, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.35786802030456855, |
| "grad_norm": 1.5194240808486938, |
| "learning_rate": 3.726184496879323e-06, |
| "loss": 0.5694, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.3587140439932318, |
| "grad_norm": 1.4686346054077148, |
| "learning_rate": 3.7202074720579544e-06, |
| "loss": 0.4949, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.3595600676818951, |
| "grad_norm": 1.7008684873580933, |
| "learning_rate": 3.7142212773509727e-06, |
| "loss": 0.577, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.3604060913705584, |
| "grad_norm": 1.681458592414856, |
| "learning_rate": 3.7082259577447604e-06, |
| "loss": 0.5924, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.36125211505922167, |
| "grad_norm": 1.490136981010437, |
| "learning_rate": 3.702221558294274e-06, |
| "loss": 0.4885, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.36209813874788493, |
| "grad_norm": 1.41923189163208, |
| "learning_rate": 3.696208124122706e-06, |
| "loss": 0.4789, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.3629441624365482, |
| "grad_norm": 1.781841516494751, |
| "learning_rate": 3.690185700421145e-06, |
| "loss": 0.5364, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.3637901861252115, |
| "grad_norm": 1.4928046464920044, |
| "learning_rate": 3.6841543324482356e-06, |
| "loss": 0.4776, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.3646362098138748, |
| "grad_norm": 1.4005343914031982, |
| "learning_rate": 3.6781140655298374e-06, |
| "loss": 0.4997, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.36548223350253806, |
| "grad_norm": 1.7181766033172607, |
| "learning_rate": 3.6720649450586885e-06, |
| "loss": 0.5811, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.3663282571912014, |
| "grad_norm": 1.6100702285766602, |
| "learning_rate": 3.6660070164940614e-06, |
| "loss": 0.571, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.36717428087986465, |
| "grad_norm": 1.5206496715545654, |
| "learning_rate": 3.659940325361419e-06, |
| "loss": 0.4898, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.3680203045685279, |
| "grad_norm": 1.4749397039413452, |
| "learning_rate": 3.6538649172520774e-06, |
| "loss": 0.5013, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.3688663282571912, |
| "grad_norm": 1.568749189376831, |
| "learning_rate": 3.64778083782286e-06, |
| "loss": 0.5309, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.3697123519458545, |
| "grad_norm": 1.6498618125915527, |
| "learning_rate": 3.641688132795757e-06, |
| "loss": 0.6235, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.37055837563451777, |
| "grad_norm": 1.494895577430725, |
| "learning_rate": 3.635586847957577e-06, |
| "loss": 0.6193, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.37140439932318103, |
| "grad_norm": 1.5606284141540527, |
| "learning_rate": 3.6294770291596083e-06, |
| "loss": 0.5419, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.37225042301184436, |
| "grad_norm": 1.5283830165863037, |
| "learning_rate": 3.6233587223172717e-06, |
| "loss": 0.5235, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3730964467005076, |
| "grad_norm": 1.5230801105499268, |
| "learning_rate": 3.6172319734097764e-06, |
| "loss": 0.6246, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3739424703891709, |
| "grad_norm": 1.4073126316070557, |
| "learning_rate": 3.611096828479773e-06, |
| "loss": 0.5065, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.37478849407783416, |
| "grad_norm": 1.568429946899414, |
| "learning_rate": 3.604953333633009e-06, |
| "loss": 0.5097, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.3756345177664975, |
| "grad_norm": 1.5261017084121704, |
| "learning_rate": 3.59880153503798e-06, |
| "loss": 0.5877, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.37648054145516074, |
| "grad_norm": 1.5816318988800049, |
| "learning_rate": 3.5926414789255877e-06, |
| "loss": 0.5142, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.377326565143824, |
| "grad_norm": 1.4384475946426392, |
| "learning_rate": 3.586473211588787e-06, |
| "loss": 0.482, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.37817258883248733, |
| "grad_norm": 1.500186800956726, |
| "learning_rate": 3.5802967793822386e-06, |
| "loss": 0.5422, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3790186125211506, |
| "grad_norm": 1.3965848684310913, |
| "learning_rate": 3.5741122287219665e-06, |
| "loss": 0.4749, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.37986463620981387, |
| "grad_norm": 1.4717751741409302, |
| "learning_rate": 3.567919606085004e-06, |
| "loss": 0.5679, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.38071065989847713, |
| "grad_norm": 1.4586070775985718, |
| "learning_rate": 3.561718958009042e-06, |
| "loss": 0.4985, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.38155668358714045, |
| "grad_norm": 1.437875747680664, |
| "learning_rate": 3.555510331092087e-06, |
| "loss": 0.4932, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.3824027072758037, |
| "grad_norm": 1.5008926391601562, |
| "learning_rate": 3.549293771992104e-06, |
| "loss": 0.5441, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.383248730964467, |
| "grad_norm": 1.5105735063552856, |
| "learning_rate": 3.5430693274266694e-06, |
| "loss": 0.5371, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.3840947546531303, |
| "grad_norm": 1.4934806823730469, |
| "learning_rate": 3.5368370441726197e-06, |
| "loss": 0.5097, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.3849407783417936, |
| "grad_norm": 1.5622752904891968, |
| "learning_rate": 3.5305969690656985e-06, |
| "loss": 0.5208, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.38578680203045684, |
| "grad_norm": 1.5283855199813843, |
| "learning_rate": 3.5243491490002056e-06, |
| "loss": 0.5266, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.3866328257191201, |
| "grad_norm": 1.4231075048446655, |
| "learning_rate": 3.5180936309286444e-06, |
| "loss": 0.5582, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.38747884940778343, |
| "grad_norm": 1.4727346897125244, |
| "learning_rate": 3.5118304618613684e-06, |
| "loss": 0.5029, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.3883248730964467, |
| "grad_norm": 1.4854283332824707, |
| "learning_rate": 3.5055596888662295e-06, |
| "loss": 0.5081, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.38917089678510997, |
| "grad_norm": 1.3666778802871704, |
| "learning_rate": 3.4992813590682225e-06, |
| "loss": 0.4756, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3900169204737733, |
| "grad_norm": 1.7599936723709106, |
| "learning_rate": 3.4929955196491315e-06, |
| "loss": 0.498, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.39086294416243655, |
| "grad_norm": 1.5538440942764282, |
| "learning_rate": 3.4867022178471764e-06, |
| "loss": 0.6403, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.3917089678510998, |
| "grad_norm": 1.4417282342910767, |
| "learning_rate": 3.4804015009566573e-06, |
| "loss": 0.5727, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.3925549915397631, |
| "grad_norm": 1.512620449066162, |
| "learning_rate": 3.4740934163275974e-06, |
| "loss": 0.6629, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.3934010152284264, |
| "grad_norm": 1.446155071258545, |
| "learning_rate": 3.46777801136539e-06, |
| "loss": 0.5477, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.3942470389170897, |
| "grad_norm": 1.4650105237960815, |
| "learning_rate": 3.4614553335304407e-06, |
| "loss": 0.5874, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.39509306260575294, |
| "grad_norm": 1.7039233446121216, |
| "learning_rate": 3.455125430337809e-06, |
| "loss": 0.5101, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.39593908629441626, |
| "grad_norm": 1.5379666090011597, |
| "learning_rate": 3.4487883493568566e-06, |
| "loss": 0.5835, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.39678510998307953, |
| "grad_norm": 1.4904314279556274, |
| "learning_rate": 3.442444138210883e-06, |
| "loss": 0.5217, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.3976311336717428, |
| "grad_norm": 1.5643348693847656, |
| "learning_rate": 3.436092844576774e-06, |
| "loss": 0.5183, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.39847715736040606, |
| "grad_norm": 1.4762258529663086, |
| "learning_rate": 3.4297345161846373e-06, |
| "loss": 0.5368, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.3993231810490694, |
| "grad_norm": 1.496219277381897, |
| "learning_rate": 3.4233692008174497e-06, |
| "loss": 0.4962, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.40016920473773265, |
| "grad_norm": 1.5690780878067017, |
| "learning_rate": 3.416996946310694e-06, |
| "loss": 0.4984, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.4010152284263959, |
| "grad_norm": 1.4688186645507812, |
| "learning_rate": 3.4106178005520006e-06, |
| "loss": 0.5794, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.40186125211505924, |
| "grad_norm": 1.5053479671478271, |
| "learning_rate": 3.4042318114807893e-06, |
| "loss": 0.4901, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4027072758037225, |
| "grad_norm": 1.3780537843704224, |
| "learning_rate": 3.3978390270879056e-06, |
| "loss": 0.558, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.4035532994923858, |
| "grad_norm": 1.382036566734314, |
| "learning_rate": 3.3914394954152635e-06, |
| "loss": 0.5202, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.40439932318104904, |
| "grad_norm": 1.469938039779663, |
| "learning_rate": 3.385033264555482e-06, |
| "loss": 0.5, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.40524534686971236, |
| "grad_norm": 1.4906823635101318, |
| "learning_rate": 3.3786203826515235e-06, |
| "loss": 0.4972, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.40609137055837563, |
| "grad_norm": 1.448851466178894, |
| "learning_rate": 3.3722008978963365e-06, |
| "loss": 0.5561, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4069373942470389, |
| "grad_norm": 1.4323585033416748, |
| "learning_rate": 3.3657748585324874e-06, |
| "loss": 0.5233, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.4077834179357022, |
| "grad_norm": 1.6246329545974731, |
| "learning_rate": 3.3593423128518017e-06, |
| "loss": 0.6112, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.4086294416243655, |
| "grad_norm": 1.393144965171814, |
| "learning_rate": 3.352903309194999e-06, |
| "loss": 0.4907, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.40947546531302875, |
| "grad_norm": 1.4356099367141724, |
| "learning_rate": 3.3464578959513322e-06, |
| "loss": 0.5602, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.4103214890016921, |
| "grad_norm": 1.3799232244491577, |
| "learning_rate": 3.3400061215582213e-06, |
| "loss": 0.4997, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.41116751269035534, |
| "grad_norm": 1.570279836654663, |
| "learning_rate": 3.3335480345008907e-06, |
| "loss": 0.4928, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.4120135363790186, |
| "grad_norm": 1.4619395732879639, |
| "learning_rate": 3.3270836833120047e-06, |
| "loss": 0.4955, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.4128595600676819, |
| "grad_norm": 1.5027363300323486, |
| "learning_rate": 3.3206131165713023e-06, |
| "loss": 0.4928, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.4137055837563452, |
| "grad_norm": 1.5609545707702637, |
| "learning_rate": 3.314136382905234e-06, |
| "loss": 0.5996, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.41455160744500846, |
| "grad_norm": 1.6889272928237915, |
| "learning_rate": 3.3076535309865925e-06, |
| "loss": 0.604, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.41539763113367173, |
| "grad_norm": 1.5524094104766846, |
| "learning_rate": 3.301164609534151e-06, |
| "loss": 0.5188, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.41624365482233505, |
| "grad_norm": 1.5471911430358887, |
| "learning_rate": 3.2946696673122953e-06, |
| "loss": 0.56, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.4170896785109983, |
| "grad_norm": 1.515896201133728, |
| "learning_rate": 3.288168753130657e-06, |
| "loss": 0.5333, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.4179357021996616, |
| "grad_norm": 1.5967165231704712, |
| "learning_rate": 3.2816619158437463e-06, |
| "loss": 0.5538, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.41878172588832485, |
| "grad_norm": 1.4772549867630005, |
| "learning_rate": 3.2751492043505873e-06, |
| "loss": 0.5723, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.4196277495769882, |
| "grad_norm": 1.6248629093170166, |
| "learning_rate": 3.268630667594348e-06, |
| "loss": 0.5796, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.42047377326565144, |
| "grad_norm": 1.639542579650879, |
| "learning_rate": 3.2621063545619734e-06, |
| "loss": 0.5001, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.4213197969543147, |
| "grad_norm": 1.406894564628601, |
| "learning_rate": 3.2555763142838175e-06, |
| "loss": 0.414, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.42216582064297803, |
| "grad_norm": 1.5696587562561035, |
| "learning_rate": 3.2490405958332743e-06, |
| "loss": 0.4809, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.4230118443316413, |
| "grad_norm": 1.6583582162857056, |
| "learning_rate": 3.24249924832641e-06, |
| "loss": 0.4467, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.42385786802030456, |
| "grad_norm": 1.4358595609664917, |
| "learning_rate": 3.2359523209215933e-06, |
| "loss": 0.5417, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.42470389170896783, |
| "grad_norm": 1.497576355934143, |
| "learning_rate": 3.2293998628191246e-06, |
| "loss": 0.5172, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.42554991539763115, |
| "grad_norm": 1.3067649602890015, |
| "learning_rate": 3.2228419232608692e-06, |
| "loss": 0.488, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.4263959390862944, |
| "grad_norm": 1.7320537567138672, |
| "learning_rate": 3.2162785515298854e-06, |
| "loss": 0.5953, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.4272419627749577, |
| "grad_norm": 1.4349291324615479, |
| "learning_rate": 3.2097097969500545e-06, |
| "loss": 0.4994, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.428087986463621, |
| "grad_norm": 1.5768568515777588, |
| "learning_rate": 3.2031357088857083e-06, |
| "loss": 0.6069, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.4289340101522843, |
| "grad_norm": 1.8132436275482178, |
| "learning_rate": 3.196556336741261e-06, |
| "loss": 0.5251, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.42978003384094754, |
| "grad_norm": 1.570563793182373, |
| "learning_rate": 3.1899717299608384e-06, |
| "loss": 0.5625, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.4306260575296108, |
| "grad_norm": 1.51616632938385, |
| "learning_rate": 3.1833819380279028e-06, |
| "loss": 0.5653, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.43147208121827413, |
| "grad_norm": 1.4574007987976074, |
| "learning_rate": 3.1767870104648834e-06, |
| "loss": 0.4707, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4323181049069374, |
| "grad_norm": 1.5099055767059326, |
| "learning_rate": 3.1701869968328036e-06, |
| "loss": 0.4206, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.43316412859560066, |
| "grad_norm": 1.533036708831787, |
| "learning_rate": 3.1635819467309094e-06, |
| "loss": 0.5782, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.434010152284264, |
| "grad_norm": 1.3681944608688354, |
| "learning_rate": 3.156971909796295e-06, |
| "loss": 0.5213, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.43485617597292725, |
| "grad_norm": 1.522709846496582, |
| "learning_rate": 3.150356935703531e-06, |
| "loss": 0.496, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.4357021996615905, |
| "grad_norm": 1.6741329431533813, |
| "learning_rate": 3.143737074164292e-06, |
| "loss": 0.5972, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.4365482233502538, |
| "grad_norm": 1.6000012159347534, |
| "learning_rate": 3.1371123749269804e-06, |
| "loss": 0.5715, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.4373942470389171, |
| "grad_norm": 1.5590283870697021, |
| "learning_rate": 3.1304828877763567e-06, |
| "loss": 0.5033, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.43824027072758037, |
| "grad_norm": 1.471935749053955, |
| "learning_rate": 3.123848662533157e-06, |
| "loss": 0.4956, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.43908629441624364, |
| "grad_norm": 1.4592459201812744, |
| "learning_rate": 3.1172097490537308e-06, |
| "loss": 0.5009, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.43993231810490696, |
| "grad_norm": 1.4220460653305054, |
| "learning_rate": 3.110566197229655e-06, |
| "loss": 0.5181, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4407783417935702, |
| "grad_norm": 1.4611165523529053, |
| "learning_rate": 3.1039180569873667e-06, |
| "loss": 0.5199, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.4416243654822335, |
| "grad_norm": 1.3216363191604614, |
| "learning_rate": 3.0972653782877836e-06, |
| "loss": 0.4938, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.44247038917089676, |
| "grad_norm": 1.5299708843231201, |
| "learning_rate": 3.0906082111259313e-06, |
| "loss": 0.6092, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.4433164128595601, |
| "grad_norm": 1.4188978672027588, |
| "learning_rate": 3.083946605530564e-06, |
| "loss": 0.4906, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.44416243654822335, |
| "grad_norm": 1.493012547492981, |
| "learning_rate": 3.0772806115637934e-06, |
| "loss": 0.6105, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4450084602368866, |
| "grad_norm": 1.3560352325439453, |
| "learning_rate": 3.070610279320708e-06, |
| "loss": 0.465, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.44585448392554994, |
| "grad_norm": 1.5919266939163208, |
| "learning_rate": 3.063935658928998e-06, |
| "loss": 0.4673, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.4467005076142132, |
| "grad_norm": 1.4098035097122192, |
| "learning_rate": 3.0572568005485825e-06, |
| "loss": 0.5447, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.44754653130287647, |
| "grad_norm": 1.4572504758834839, |
| "learning_rate": 3.050573754371228e-06, |
| "loss": 0.5234, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.44839255499153974, |
| "grad_norm": 1.505883812904358, |
| "learning_rate": 3.0438865706201683e-06, |
| "loss": 0.5126, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.44923857868020306, |
| "grad_norm": 1.484840750694275, |
| "learning_rate": 3.0371952995497357e-06, |
| "loss": 0.5136, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.4500846023688663, |
| "grad_norm": 1.516889214515686, |
| "learning_rate": 3.0304999914449774e-06, |
| "loss": 0.5783, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.4509306260575296, |
| "grad_norm": 1.399756669998169, |
| "learning_rate": 3.02380069662128e-06, |
| "loss": 0.501, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.4517766497461929, |
| "grad_norm": 1.6473559141159058, |
| "learning_rate": 3.0170974654239877e-06, |
| "loss": 0.5147, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.4526226734348562, |
| "grad_norm": 1.333022117614746, |
| "learning_rate": 3.0103903482280295e-06, |
| "loss": 0.4848, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.45346869712351945, |
| "grad_norm": 1.3775697946548462, |
| "learning_rate": 3.0036793954375358e-06, |
| "loss": 0.4997, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.4543147208121827, |
| "grad_norm": 1.4264084100723267, |
| "learning_rate": 2.9969646574854632e-06, |
| "loss": 0.4977, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.45516074450084604, |
| "grad_norm": 1.5796583890914917, |
| "learning_rate": 2.9902461848332128e-06, |
| "loss": 0.6589, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.4560067681895093, |
| "grad_norm": 1.5686849355697632, |
| "learning_rate": 2.9835240279702516e-06, |
| "loss": 0.4683, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.45685279187817257, |
| "grad_norm": 1.3858373165130615, |
| "learning_rate": 2.9767982374137344e-06, |
| "loss": 0.5051, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4576988155668359, |
| "grad_norm": 1.6889417171478271, |
| "learning_rate": 2.9700688637081233e-06, |
| "loss": 0.5072, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.45854483925549916, |
| "grad_norm": 1.6027723550796509, |
| "learning_rate": 2.9633359574248077e-06, |
| "loss": 0.5958, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.4593908629441624, |
| "grad_norm": 1.4570367336273193, |
| "learning_rate": 2.9565995691617242e-06, |
| "loss": 0.5182, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.4602368866328257, |
| "grad_norm": 1.4474014043807983, |
| "learning_rate": 2.9498597495429773e-06, |
| "loss": 0.523, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.461082910321489, |
| "grad_norm": 1.5590665340423584, |
| "learning_rate": 2.943116549218457e-06, |
| "loss": 0.5413, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4619289340101523, |
| "grad_norm": 1.539737582206726, |
| "learning_rate": 2.9363700188634597e-06, |
| "loss": 0.6038, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.46277495769881555, |
| "grad_norm": 1.543686032295227, |
| "learning_rate": 2.929620209178307e-06, |
| "loss": 0.4771, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.46362098138747887, |
| "grad_norm": 1.4283702373504639, |
| "learning_rate": 2.9228671708879664e-06, |
| "loss": 0.5311, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.46446700507614214, |
| "grad_norm": 1.365286946296692, |
| "learning_rate": 2.916110954741667e-06, |
| "loss": 0.485, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.4653130287648054, |
| "grad_norm": 1.4654829502105713, |
| "learning_rate": 2.909351611512518e-06, |
| "loss": 0.4788, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.46615905245346867, |
| "grad_norm": 1.5056861639022827, |
| "learning_rate": 2.902589191997132e-06, |
| "loss": 0.5171, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.467005076142132, |
| "grad_norm": 1.3876997232437134, |
| "learning_rate": 2.8958237470152374e-06, |
| "loss": 0.5373, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.46785109983079526, |
| "grad_norm": 1.370692253112793, |
| "learning_rate": 2.889055327409301e-06, |
| "loss": 0.4746, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.4686971235194585, |
| "grad_norm": 1.5535578727722168, |
| "learning_rate": 2.882283984044141e-06, |
| "loss": 0.4739, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.46954314720812185, |
| "grad_norm": 1.6460717916488647, |
| "learning_rate": 2.8755097678065513e-06, |
| "loss": 0.5865, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.4703891708967851, |
| "grad_norm": 1.4789174795150757, |
| "learning_rate": 2.8687327296049126e-06, |
| "loss": 0.5395, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.4712351945854484, |
| "grad_norm": 1.4689819812774658, |
| "learning_rate": 2.861952920368816e-06, |
| "loss": 0.592, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.4720812182741117, |
| "grad_norm": 1.719758152961731, |
| "learning_rate": 2.8551703910486735e-06, |
| "loss": 0.5949, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.47292724196277497, |
| "grad_norm": 1.5856834650039673, |
| "learning_rate": 2.8483851926153396e-06, |
| "loss": 0.4885, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.47377326565143824, |
| "grad_norm": 1.5992249250411987, |
| "learning_rate": 2.8415973760597284e-06, |
| "loss": 0.5733, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4746192893401015, |
| "grad_norm": 1.512696385383606, |
| "learning_rate": 2.8348069923924277e-06, |
| "loss": 0.5093, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.4754653130287648, |
| "grad_norm": 1.4138331413269043, |
| "learning_rate": 2.828014092643319e-06, |
| "loss": 0.4628, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.4763113367174281, |
| "grad_norm": 1.6273956298828125, |
| "learning_rate": 2.8212187278611907e-06, |
| "loss": 0.6473, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.47715736040609136, |
| "grad_norm": 1.4268630743026733, |
| "learning_rate": 2.8144209491133573e-06, |
| "loss": 0.4941, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.4780033840947547, |
| "grad_norm": 1.5218658447265625, |
| "learning_rate": 2.807620807485273e-06, |
| "loss": 0.5629, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.47884940778341795, |
| "grad_norm": 1.5016785860061646, |
| "learning_rate": 2.8008183540801486e-06, |
| "loss": 0.5488, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.4796954314720812, |
| "grad_norm": 1.6292673349380493, |
| "learning_rate": 2.7940136400185697e-06, |
| "loss": 0.4968, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.4805414551607445, |
| "grad_norm": 1.7516484260559082, |
| "learning_rate": 2.7872067164381113e-06, |
| "loss": 0.6068, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.4813874788494078, |
| "grad_norm": 1.6018465757369995, |
| "learning_rate": 2.7803976344929497e-06, |
| "loss": 0.515, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.48223350253807107, |
| "grad_norm": 1.4703809022903442, |
| "learning_rate": 2.7735864453534845e-06, |
| "loss": 0.4804, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.48307952622673433, |
| "grad_norm": 1.6393179893493652, |
| "learning_rate": 2.7667732002059494e-06, |
| "loss": 0.5815, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.48392554991539766, |
| "grad_norm": 1.3294684886932373, |
| "learning_rate": 2.7599579502520295e-06, |
| "loss": 0.4847, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.4847715736040609, |
| "grad_norm": 1.5507575273513794, |
| "learning_rate": 2.753140746708477e-06, |
| "loss": 0.6029, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.4856175972927242, |
| "grad_norm": 1.458669662475586, |
| "learning_rate": 2.746321640806722e-06, |
| "loss": 0.5659, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.48646362098138746, |
| "grad_norm": 1.5378319025039673, |
| "learning_rate": 2.7395006837924953e-06, |
| "loss": 0.5321, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.4873096446700508, |
| "grad_norm": 1.6430675983428955, |
| "learning_rate": 2.7326779269254363e-06, |
| "loss": 0.4837, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.48815566835871405, |
| "grad_norm": 1.6236116886138916, |
| "learning_rate": 2.7258534214787108e-06, |
| "loss": 0.4962, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.4890016920473773, |
| "grad_norm": 1.4811713695526123, |
| "learning_rate": 2.7190272187386246e-06, |
| "loss": 0.4433, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.48984771573604063, |
| "grad_norm": 1.4098522663116455, |
| "learning_rate": 2.7121993700042403e-06, |
| "loss": 0.5793, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.4906937394247039, |
| "grad_norm": 1.4402129650115967, |
| "learning_rate": 2.7053699265869883e-06, |
| "loss": 0.4585, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.49153976311336717, |
| "grad_norm": 1.5286259651184082, |
| "learning_rate": 2.6985389398102844e-06, |
| "loss": 0.502, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.49238578680203043, |
| "grad_norm": 1.4347509145736694, |
| "learning_rate": 2.6917064610091425e-06, |
| "loss": 0.4995, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.49323181049069376, |
| "grad_norm": 1.3990005254745483, |
| "learning_rate": 2.6848725415297888e-06, |
| "loss": 0.4727, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.494077834179357, |
| "grad_norm": 1.4724209308624268, |
| "learning_rate": 2.6780372327292763e-06, |
| "loss": 0.5381, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.4949238578680203, |
| "grad_norm": 1.631493330001831, |
| "learning_rate": 2.6712005859751e-06, |
| "loss": 0.5569, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.4957698815566836, |
| "grad_norm": 1.513969898223877, |
| "learning_rate": 2.6643626526448063e-06, |
| "loss": 0.4989, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.4966159052453469, |
| "grad_norm": 1.542619228363037, |
| "learning_rate": 2.6575234841256137e-06, |
| "loss": 0.5313, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.49746192893401014, |
| "grad_norm": 1.4393746852874756, |
| "learning_rate": 2.6506831318140226e-06, |
| "loss": 0.5492, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.4983079526226734, |
| "grad_norm": 1.3358962535858154, |
| "learning_rate": 2.6438416471154277e-06, |
| "loss": 0.478, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.49915397631133673, |
| "grad_norm": 1.6701815128326416, |
| "learning_rate": 2.636999081443736e-06, |
| "loss": 0.5219, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.4150053262710571, |
| "learning_rate": 2.6301554862209756e-06, |
| "loss": 0.4718, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.5008460236886633, |
| "grad_norm": 1.3618898391723633, |
| "learning_rate": 2.6233109128769134e-06, |
| "loss": 0.5055, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.5016920473773265, |
| "grad_norm": 1.6331762075424194, |
| "learning_rate": 2.6164654128486683e-06, |
| "loss": 0.5177, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.5025380710659898, |
| "grad_norm": 1.5203489065170288, |
| "learning_rate": 2.6096190375803183e-06, |
| "loss": 0.5066, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.5033840947546532, |
| "grad_norm": 1.6266874074935913, |
| "learning_rate": 2.602771838522525e-06, |
| "loss": 0.4494, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.5042301184433164, |
| "grad_norm": 1.3882555961608887, |
| "learning_rate": 2.595923867132136e-06, |
| "loss": 0.5231, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.5050761421319797, |
| "grad_norm": 1.4567385911941528, |
| "learning_rate": 2.5890751748718055e-06, |
| "loss": 0.5295, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.505922165820643, |
| "grad_norm": 1.4776872396469116, |
| "learning_rate": 2.5822258132096038e-06, |
| "loss": 0.505, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.5067681895093062, |
| "grad_norm": 1.404428243637085, |
| "learning_rate": 2.575375833618633e-06, |
| "loss": 0.4914, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.5076142131979695, |
| "grad_norm": 1.4569783210754395, |
| "learning_rate": 2.568525287576638e-06, |
| "loss": 0.4488, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5084602368866328, |
| "grad_norm": 1.4980030059814453, |
| "learning_rate": 2.561674226565621e-06, |
| "loss": 0.5389, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.5093062605752962, |
| "grad_norm": 1.4138386249542236, |
| "learning_rate": 2.5548227020714532e-06, |
| "loss": 0.5175, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.5101522842639594, |
| "grad_norm": 1.7734383344650269, |
| "learning_rate": 2.547970765583491e-06, |
| "loss": 0.5559, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.5109983079526227, |
| "grad_norm": 1.5217783451080322, |
| "learning_rate": 2.541118468594185e-06, |
| "loss": 0.4747, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.511844331641286, |
| "grad_norm": 1.696345329284668, |
| "learning_rate": 2.5342658625986965e-06, |
| "loss": 0.5078, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5126903553299492, |
| "grad_norm": 1.5817019939422607, |
| "learning_rate": 2.527412999094507e-06, |
| "loss": 0.5418, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.5135363790186125, |
| "grad_norm": 1.3751314878463745, |
| "learning_rate": 2.520559929581034e-06, |
| "loss": 0.4278, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.5143824027072758, |
| "grad_norm": 1.4618191719055176, |
| "learning_rate": 2.5137067055592457e-06, |
| "loss": 0.5491, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.5152284263959391, |
| "grad_norm": 1.4312200546264648, |
| "learning_rate": 2.5068533785312673e-06, |
| "loss": 0.5052, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.5160744500846024, |
| "grad_norm": 1.6040703058242798, |
| "learning_rate": 2.5e-06, |
| "loss": 0.4421, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5169204737732657, |
| "grad_norm": 1.7869144678115845, |
| "learning_rate": 2.4931466214687336e-06, |
| "loss": 0.4662, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.5177664974619289, |
| "grad_norm": 1.4954723119735718, |
| "learning_rate": 2.486293294440755e-06, |
| "loss": 0.482, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.5186125211505922, |
| "grad_norm": 1.504907488822937, |
| "learning_rate": 2.479440070418967e-06, |
| "loss": 0.4846, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.5194585448392555, |
| "grad_norm": 1.5628070831298828, |
| "learning_rate": 2.4725870009054944e-06, |
| "loss": 0.5379, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.5203045685279187, |
| "grad_norm": 1.3009322881698608, |
| "learning_rate": 2.4657341374013047e-06, |
| "loss": 0.4173, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5211505922165821, |
| "grad_norm": 1.4733883142471313, |
| "learning_rate": 2.4588815314058155e-06, |
| "loss": 0.4481, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.5219966159052454, |
| "grad_norm": 1.511818528175354, |
| "learning_rate": 2.4520292344165093e-06, |
| "loss": 0.4529, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.5228426395939086, |
| "grad_norm": 1.5413731336593628, |
| "learning_rate": 2.4451772979285468e-06, |
| "loss": 0.5215, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.5236886632825719, |
| "grad_norm": 1.4342833757400513, |
| "learning_rate": 2.4383257734343795e-06, |
| "loss": 0.4864, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.5245346869712352, |
| "grad_norm": 1.339322566986084, |
| "learning_rate": 2.431474712423363e-06, |
| "loss": 0.4954, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5253807106598984, |
| "grad_norm": 1.6234588623046875, |
| "learning_rate": 2.4246241663813675e-06, |
| "loss": 0.4753, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.5262267343485617, |
| "grad_norm": 1.3801982402801514, |
| "learning_rate": 2.4177741867903966e-06, |
| "loss": 0.4836, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.5270727580372251, |
| "grad_norm": 1.5722270011901855, |
| "learning_rate": 2.4109248251281953e-06, |
| "loss": 0.5472, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.5279187817258884, |
| "grad_norm": 1.5523285865783691, |
| "learning_rate": 2.4040761328678647e-06, |
| "loss": 0.5056, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.5287648054145516, |
| "grad_norm": 1.481810212135315, |
| "learning_rate": 2.3972281614774764e-06, |
| "loss": 0.5493, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.5296108291032149, |
| "grad_norm": 1.4845212697982788, |
| "learning_rate": 2.3903809624196826e-06, |
| "loss": 0.5125, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.5304568527918782, |
| "grad_norm": 1.3311069011688232, |
| "learning_rate": 2.3835345871513334e-06, |
| "loss": 0.4476, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.5313028764805414, |
| "grad_norm": 1.6216577291488647, |
| "learning_rate": 2.376689087123087e-06, |
| "loss": 0.5174, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.5321489001692047, |
| "grad_norm": 1.6146211624145508, |
| "learning_rate": 2.369844513779026e-06, |
| "loss": 0.5302, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.5329949238578681, |
| "grad_norm": 1.3406670093536377, |
| "learning_rate": 2.3630009185562646e-06, |
| "loss": 0.571, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5338409475465313, |
| "grad_norm": 1.4292736053466797, |
| "learning_rate": 2.3561583528845723e-06, |
| "loss": 0.5067, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.5346869712351946, |
| "grad_norm": 1.4825644493103027, |
| "learning_rate": 2.3493168681859782e-06, |
| "loss": 0.5024, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.5355329949238579, |
| "grad_norm": 1.6885347366333008, |
| "learning_rate": 2.3424765158743867e-06, |
| "loss": 0.5526, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.5363790186125211, |
| "grad_norm": 1.4368715286254883, |
| "learning_rate": 2.335637347355194e-06, |
| "loss": 0.5817, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.5372250423011844, |
| "grad_norm": 1.5050628185272217, |
| "learning_rate": 2.3287994140249005e-06, |
| "loss": 0.5771, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5380710659898477, |
| "grad_norm": 1.43318510055542, |
| "learning_rate": 2.321962767270724e-06, |
| "loss": 0.4677, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.538917089678511, |
| "grad_norm": 1.4823578596115112, |
| "learning_rate": 2.315127458470212e-06, |
| "loss": 0.496, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.5397631133671743, |
| "grad_norm": 1.4760611057281494, |
| "learning_rate": 2.308293538990858e-06, |
| "loss": 0.5526, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.5406091370558376, |
| "grad_norm": 1.4542453289031982, |
| "learning_rate": 2.301461060189716e-06, |
| "loss": 0.4325, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.5414551607445008, |
| "grad_norm": 1.4895241260528564, |
| "learning_rate": 2.2946300734130126e-06, |
| "loss": 0.5135, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5423011844331641, |
| "grad_norm": 1.3324871063232422, |
| "learning_rate": 2.2878006299957613e-06, |
| "loss": 0.4435, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.5431472081218274, |
| "grad_norm": 1.5044846534729004, |
| "learning_rate": 2.2809727812613767e-06, |
| "loss": 0.5371, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.5439932318104906, |
| "grad_norm": 1.3756799697875977, |
| "learning_rate": 2.2741465785212905e-06, |
| "loss": 0.4914, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.544839255499154, |
| "grad_norm": 1.5922890901565552, |
| "learning_rate": 2.267322073074564e-06, |
| "loss": 0.4904, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.5456852791878173, |
| "grad_norm": 1.377279281616211, |
| "learning_rate": 2.260499316207505e-06, |
| "loss": 0.4736, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.5465313028764806, |
| "grad_norm": 1.5589686632156372, |
| "learning_rate": 2.2536783591932786e-06, |
| "loss": 0.5133, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.5473773265651438, |
| "grad_norm": 1.617056965827942, |
| "learning_rate": 2.246859253291524e-06, |
| "loss": 0.4293, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.5482233502538071, |
| "grad_norm": 1.2803261280059814, |
| "learning_rate": 2.2400420497479713e-06, |
| "loss": 0.4212, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.5490693739424704, |
| "grad_norm": 1.705077886581421, |
| "learning_rate": 2.2332267997940514e-06, |
| "loss": 0.5949, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.5499153976311336, |
| "grad_norm": 1.2756019830703735, |
| "learning_rate": 2.2264135546465163e-06, |
| "loss": 0.4538, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.550761421319797, |
| "grad_norm": 1.4529634714126587, |
| "learning_rate": 2.219602365507051e-06, |
| "loss": 0.5181, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.5516074450084603, |
| "grad_norm": 1.4561599493026733, |
| "learning_rate": 2.21279328356189e-06, |
| "loss": 0.5149, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.5524534686971235, |
| "grad_norm": 1.526835560798645, |
| "learning_rate": 2.205986359981431e-06, |
| "loss": 0.4777, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.5532994923857868, |
| "grad_norm": 1.572192907333374, |
| "learning_rate": 2.1991816459198526e-06, |
| "loss": 0.4954, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.5541455160744501, |
| "grad_norm": 1.3265060186386108, |
| "learning_rate": 2.1923791925147287e-06, |
| "loss": 0.5218, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5549915397631133, |
| "grad_norm": 1.4780526161193848, |
| "learning_rate": 2.1855790508866435e-06, |
| "loss": 0.5365, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.5558375634517766, |
| "grad_norm": 1.5056841373443604, |
| "learning_rate": 2.1787812721388093e-06, |
| "loss": 0.5579, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.55668358714044, |
| "grad_norm": 1.5058797597885132, |
| "learning_rate": 2.1719859073566813e-06, |
| "loss": 0.5154, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.5575296108291032, |
| "grad_norm": 1.3792710304260254, |
| "learning_rate": 2.1651930076075727e-06, |
| "loss": 0.4752, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.5583756345177665, |
| "grad_norm": 1.4559630155563354, |
| "learning_rate": 2.158402623940273e-06, |
| "loss": 0.5478, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5592216582064298, |
| "grad_norm": 1.4395264387130737, |
| "learning_rate": 2.1516148073846613e-06, |
| "loss": 0.4919, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.560067681895093, |
| "grad_norm": 1.4851564168930054, |
| "learning_rate": 2.1448296089513273e-06, |
| "loss": 0.458, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.5609137055837563, |
| "grad_norm": 1.4913303852081299, |
| "learning_rate": 2.1380470796311843e-06, |
| "loss": 0.539, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.5617597292724196, |
| "grad_norm": 1.4618514776229858, |
| "learning_rate": 2.131267270395088e-06, |
| "loss": 0.4823, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.562605752961083, |
| "grad_norm": 1.4132062196731567, |
| "learning_rate": 2.1244902321934495e-06, |
| "loss": 0.4256, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5634517766497462, |
| "grad_norm": 1.6368167400360107, |
| "learning_rate": 2.11771601595586e-06, |
| "loss": 0.5763, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.5642978003384095, |
| "grad_norm": 1.5602619647979736, |
| "learning_rate": 2.1109446725907003e-06, |
| "loss": 0.5281, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.5651438240270727, |
| "grad_norm": 1.4556282758712769, |
| "learning_rate": 2.104176252984763e-06, |
| "loss": 0.542, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.565989847715736, |
| "grad_norm": 1.4794425964355469, |
| "learning_rate": 2.097410808002869e-06, |
| "loss": 0.5617, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.5668358714043993, |
| "grad_norm": 1.653266429901123, |
| "learning_rate": 2.0906483884874816e-06, |
| "loss": 0.58, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5676818950930627, |
| "grad_norm": 1.421467661857605, |
| "learning_rate": 2.0838890452583337e-06, |
| "loss": 0.5255, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.5685279187817259, |
| "grad_norm": 1.327575922012329, |
| "learning_rate": 2.0771328291120336e-06, |
| "loss": 0.4885, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.5693739424703892, |
| "grad_norm": 1.7012816667556763, |
| "learning_rate": 2.070379790821693e-06, |
| "loss": 0.5295, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.5702199661590525, |
| "grad_norm": 1.7111144065856934, |
| "learning_rate": 2.063629981136541e-06, |
| "loss": 0.4796, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.5710659898477157, |
| "grad_norm": 1.4859437942504883, |
| "learning_rate": 2.0568834507815434e-06, |
| "loss": 0.4998, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.571912013536379, |
| "grad_norm": 1.4416285753250122, |
| "learning_rate": 2.050140250457023e-06, |
| "loss": 0.5211, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.5727580372250423, |
| "grad_norm": 1.4626954793930054, |
| "learning_rate": 2.043400430838276e-06, |
| "loss": 0.5127, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.5736040609137056, |
| "grad_norm": 1.4203089475631714, |
| "learning_rate": 2.036664042575193e-06, |
| "loss": 0.4599, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.5744500846023689, |
| "grad_norm": 1.3037195205688477, |
| "learning_rate": 2.0299311362918775e-06, |
| "loss": 0.4848, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.5752961082910322, |
| "grad_norm": 1.6444575786590576, |
| "learning_rate": 2.0232017625862664e-06, |
| "loss": 0.5882, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5761421319796954, |
| "grad_norm": 1.4801158905029297, |
| "learning_rate": 2.01647597202975e-06, |
| "loss": 0.5177, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.5769881556683587, |
| "grad_norm": 1.3590668439865112, |
| "learning_rate": 2.0097538151667885e-06, |
| "loss": 0.5236, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.577834179357022, |
| "grad_norm": 1.7626949548721313, |
| "learning_rate": 2.0030353425145376e-06, |
| "loss": 0.5392, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.5786802030456852, |
| "grad_norm": 1.493628740310669, |
| "learning_rate": 1.9963206045624647e-06, |
| "loss": 0.5182, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.5795262267343486, |
| "grad_norm": 1.515743374824524, |
| "learning_rate": 1.989609651771971e-06, |
| "loss": 0.5648, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.5803722504230119, |
| "grad_norm": 1.511932134628296, |
| "learning_rate": 1.9829025345760127e-06, |
| "loss": 0.4885, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.5812182741116751, |
| "grad_norm": 1.457202434539795, |
| "learning_rate": 1.9761993033787206e-06, |
| "loss": 0.5903, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5820642978003384, |
| "grad_norm": 1.4985764026641846, |
| "learning_rate": 1.969500008555023e-06, |
| "loss": 0.5411, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5829103214890017, |
| "grad_norm": 1.451522707939148, |
| "learning_rate": 1.962804700450265e-06, |
| "loss": 0.5094, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.583756345177665, |
| "grad_norm": 1.3009876012802124, |
| "learning_rate": 1.956113429379833e-06, |
| "loss": 0.4272, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5846023688663282, |
| "grad_norm": 1.2623531818389893, |
| "learning_rate": 1.9494262456287735e-06, |
| "loss": 0.4011, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.5854483925549916, |
| "grad_norm": 1.3815149068832397, |
| "learning_rate": 1.942743199451418e-06, |
| "loss": 0.5189, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.5862944162436549, |
| "grad_norm": 1.6201854944229126, |
| "learning_rate": 1.9360643410710027e-06, |
| "loss": 0.6014, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.5871404399323181, |
| "grad_norm": 1.3804255723953247, |
| "learning_rate": 1.929389720679294e-06, |
| "loss": 0.4803, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5879864636209814, |
| "grad_norm": 1.566095232963562, |
| "learning_rate": 1.922719388436208e-06, |
| "loss": 0.5526, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5888324873096447, |
| "grad_norm": 1.5268288850784302, |
| "learning_rate": 1.916053394469437e-06, |
| "loss": 0.4897, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5896785109983079, |
| "grad_norm": 1.3224598169326782, |
| "learning_rate": 1.909391788874069e-06, |
| "loss": 0.4971, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.5905245346869712, |
| "grad_norm": 1.6039224863052368, |
| "learning_rate": 1.9027346217122161e-06, |
| "loss": 0.6325, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5913705583756346, |
| "grad_norm": 1.4165875911712646, |
| "learning_rate": 1.8960819430126337e-06, |
| "loss": 0.49, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.5922165820642978, |
| "grad_norm": 1.480425238609314, |
| "learning_rate": 1.8894338027703456e-06, |
| "loss": 0.5507, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5930626057529611, |
| "grad_norm": 1.3495324850082397, |
| "learning_rate": 1.88279025094627e-06, |
| "loss": 0.4775, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5939086294416244, |
| "grad_norm": 1.507717251777649, |
| "learning_rate": 1.8761513374668434e-06, |
| "loss": 0.5575, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.5947546531302876, |
| "grad_norm": 1.415955662727356, |
| "learning_rate": 1.8695171122236443e-06, |
| "loss": 0.4595, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.5956006768189509, |
| "grad_norm": 1.4151737689971924, |
| "learning_rate": 1.8628876250730198e-06, |
| "loss": 0.484, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.5964467005076142, |
| "grad_norm": 1.379846215248108, |
| "learning_rate": 1.8562629258357087e-06, |
| "loss": 0.4786, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5972927241962775, |
| "grad_norm": 1.6034132242202759, |
| "learning_rate": 1.8496430642964698e-06, |
| "loss": 0.5037, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.5981387478849408, |
| "grad_norm": 1.4743245840072632, |
| "learning_rate": 1.8430280902037061e-06, |
| "loss": 0.4941, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.5989847715736041, |
| "grad_norm": 1.4026728868484497, |
| "learning_rate": 1.8364180532690916e-06, |
| "loss": 0.4978, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.5998307952622673, |
| "grad_norm": 1.5930570363998413, |
| "learning_rate": 1.8298130031671974e-06, |
| "loss": 0.5155, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.6006768189509306, |
| "grad_norm": 1.4963091611862183, |
| "learning_rate": 1.8232129895351164e-06, |
| "loss": 0.4775, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6015228426395939, |
| "grad_norm": 1.3382537364959717, |
| "learning_rate": 1.8166180619720974e-06, |
| "loss": 0.4759, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.6023688663282571, |
| "grad_norm": 1.5729764699935913, |
| "learning_rate": 1.8100282700391616e-06, |
| "loss": 0.4431, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.6032148900169205, |
| "grad_norm": 1.6764527559280396, |
| "learning_rate": 1.8034436632587394e-06, |
| "loss": 0.4979, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.6040609137055838, |
| "grad_norm": 1.3650676012039185, |
| "learning_rate": 1.7968642911142926e-06, |
| "loss": 0.5107, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.6049069373942471, |
| "grad_norm": 1.4936357736587524, |
| "learning_rate": 1.7902902030499463e-06, |
| "loss": 0.5537, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.6057529610829103, |
| "grad_norm": 1.4027994871139526, |
| "learning_rate": 1.7837214484701154e-06, |
| "loss": 0.487, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.6065989847715736, |
| "grad_norm": 1.433212161064148, |
| "learning_rate": 1.7771580767391314e-06, |
| "loss": 0.5041, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.6074450084602369, |
| "grad_norm": 1.3107362985610962, |
| "learning_rate": 1.7706001371808763e-06, |
| "loss": 0.4696, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.6082910321489001, |
| "grad_norm": 1.5976382493972778, |
| "learning_rate": 1.7640476790784077e-06, |
| "loss": 0.5224, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.6091370558375635, |
| "grad_norm": 1.513655185699463, |
| "learning_rate": 1.7575007516735909e-06, |
| "loss": 0.5048, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6099830795262268, |
| "grad_norm": 1.4939072132110596, |
| "learning_rate": 1.7509594041667265e-06, |
| "loss": 0.4744, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.61082910321489, |
| "grad_norm": 1.4942210912704468, |
| "learning_rate": 1.7444236857161837e-06, |
| "loss": 0.548, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.6116751269035533, |
| "grad_norm": 1.6308627128601074, |
| "learning_rate": 1.7378936454380277e-06, |
| "loss": 0.555, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.6125211505922166, |
| "grad_norm": 1.5539953708648682, |
| "learning_rate": 1.7313693324056523e-06, |
| "loss": 0.4423, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.6133671742808798, |
| "grad_norm": 1.5067429542541504, |
| "learning_rate": 1.724850795649413e-06, |
| "loss": 0.5053, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6142131979695431, |
| "grad_norm": 1.4803341627120972, |
| "learning_rate": 1.718338084156254e-06, |
| "loss": 0.5284, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.6150592216582065, |
| "grad_norm": 1.3545798063278198, |
| "learning_rate": 1.7118312468693437e-06, |
| "loss": 0.4296, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.6159052453468697, |
| "grad_norm": 1.3966692686080933, |
| "learning_rate": 1.7053303326877051e-06, |
| "loss": 0.5169, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.616751269035533, |
| "grad_norm": 1.5924454927444458, |
| "learning_rate": 1.6988353904658495e-06, |
| "loss": 0.5, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.6175972927241963, |
| "grad_norm": 1.3930429220199585, |
| "learning_rate": 1.692346469013408e-06, |
| "loss": 0.5098, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6184433164128595, |
| "grad_norm": 1.2582634687423706, |
| "learning_rate": 1.6858636170947668e-06, |
| "loss": 0.4755, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.6192893401015228, |
| "grad_norm": 1.777503252029419, |
| "learning_rate": 1.6793868834286985e-06, |
| "loss": 0.578, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.6201353637901861, |
| "grad_norm": 1.353458285331726, |
| "learning_rate": 1.6729163166879964e-06, |
| "loss": 0.4851, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.6209813874788495, |
| "grad_norm": 1.5160582065582275, |
| "learning_rate": 1.6664519654991101e-06, |
| "loss": 0.6046, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.6218274111675127, |
| "grad_norm": 1.39556086063385, |
| "learning_rate": 1.6599938784417796e-06, |
| "loss": 0.5351, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.622673434856176, |
| "grad_norm": 1.260372281074524, |
| "learning_rate": 1.6535421040486686e-06, |
| "loss": 0.4827, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.6235194585448393, |
| "grad_norm": 1.6492784023284912, |
| "learning_rate": 1.6470966908050012e-06, |
| "loss": 0.5938, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.6243654822335025, |
| "grad_norm": 1.360202670097351, |
| "learning_rate": 1.6406576871481985e-06, |
| "loss": 0.484, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.6252115059221658, |
| "grad_norm": 1.4238383769989014, |
| "learning_rate": 1.634225141467513e-06, |
| "loss": 0.4169, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.626057529610829, |
| "grad_norm": 1.4238007068634033, |
| "learning_rate": 1.6277991021036644e-06, |
| "loss": 0.5064, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6269035532994924, |
| "grad_norm": 1.5756981372833252, |
| "learning_rate": 1.6213796173484769e-06, |
| "loss": 0.5694, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.6277495769881557, |
| "grad_norm": 1.5684349536895752, |
| "learning_rate": 1.6149667354445192e-06, |
| "loss": 0.5578, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.628595600676819, |
| "grad_norm": 1.3545986413955688, |
| "learning_rate": 1.608560504584737e-06, |
| "loss": 0.5363, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.6294416243654822, |
| "grad_norm": 1.3728703260421753, |
| "learning_rate": 1.6021609729120948e-06, |
| "loss": 0.4479, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.6302876480541455, |
| "grad_norm": 1.4639620780944824, |
| "learning_rate": 1.5957681885192111e-06, |
| "loss": 0.5478, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6311336717428088, |
| "grad_norm": 1.3161569833755493, |
| "learning_rate": 1.5893821994479996e-06, |
| "loss": 0.4568, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.631979695431472, |
| "grad_norm": 1.5565990209579468, |
| "learning_rate": 1.5830030536893066e-06, |
| "loss": 0.5132, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.6328257191201354, |
| "grad_norm": 1.4323241710662842, |
| "learning_rate": 1.5766307991825514e-06, |
| "loss": 0.4374, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.6336717428087987, |
| "grad_norm": 1.372560739517212, |
| "learning_rate": 1.5702654838153641e-06, |
| "loss": 0.5318, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.6345177664974619, |
| "grad_norm": 1.3776485919952393, |
| "learning_rate": 1.5639071554232266e-06, |
| "loss": 0.4903, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6353637901861252, |
| "grad_norm": 1.421043872833252, |
| "learning_rate": 1.5575558617891173e-06, |
| "loss": 0.4828, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.6362098138747885, |
| "grad_norm": 1.4715155363082886, |
| "learning_rate": 1.551211650643144e-06, |
| "loss": 0.5535, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.6370558375634517, |
| "grad_norm": 1.4590644836425781, |
| "learning_rate": 1.5448745696621915e-06, |
| "loss": 0.4879, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.637901861252115, |
| "grad_norm": 1.3986414670944214, |
| "learning_rate": 1.5385446664695603e-06, |
| "loss": 0.4828, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.6387478849407784, |
| "grad_norm": 1.2732402086257935, |
| "learning_rate": 1.53222198863461e-06, |
| "loss": 0.4103, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6395939086294417, |
| "grad_norm": 1.4938690662384033, |
| "learning_rate": 1.5259065836724035e-06, |
| "loss": 0.4481, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.6404399323181049, |
| "grad_norm": 1.4133765697479248, |
| "learning_rate": 1.5195984990433437e-06, |
| "loss": 0.4322, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.6412859560067682, |
| "grad_norm": 1.4759124517440796, |
| "learning_rate": 1.5132977821528244e-06, |
| "loss": 0.5065, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.6421319796954315, |
| "grad_norm": 1.4921671152114868, |
| "learning_rate": 1.5070044803508693e-06, |
| "loss": 0.5302, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.6429780033840947, |
| "grad_norm": 1.5433109998703003, |
| "learning_rate": 1.500718640931779e-06, |
| "loss": 0.5281, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.643824027072758, |
| "grad_norm": 1.4201653003692627, |
| "learning_rate": 1.494440311133772e-06, |
| "loss": 0.4778, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.6446700507614214, |
| "grad_norm": 1.4562307596206665, |
| "learning_rate": 1.4881695381386324e-06, |
| "loss": 0.5654, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.6455160744500846, |
| "grad_norm": 1.3270257711410522, |
| "learning_rate": 1.4819063690713565e-06, |
| "loss": 0.4588, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.6463620981387479, |
| "grad_norm": 1.3628114461898804, |
| "learning_rate": 1.4756508509997946e-06, |
| "loss": 0.5339, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.6472081218274112, |
| "grad_norm": 1.5659477710723877, |
| "learning_rate": 1.4694030309343015e-06, |
| "loss": 0.4217, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6480541455160744, |
| "grad_norm": 1.6154391765594482, |
| "learning_rate": 1.4631629558273803e-06, |
| "loss": 0.5273, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.6489001692047377, |
| "grad_norm": 1.5286039113998413, |
| "learning_rate": 1.4569306725733313e-06, |
| "loss": 0.5359, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.649746192893401, |
| "grad_norm": 1.4298804998397827, |
| "learning_rate": 1.450706228007897e-06, |
| "loss": 0.526, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.6505922165820643, |
| "grad_norm": 1.3179171085357666, |
| "learning_rate": 1.4444896689079142e-06, |
| "loss": 0.4854, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.6514382402707276, |
| "grad_norm": 1.4320234060287476, |
| "learning_rate": 1.4382810419909587e-06, |
| "loss": 0.5674, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6522842639593909, |
| "grad_norm": 1.5675216913223267, |
| "learning_rate": 1.432080393914997e-06, |
| "loss": 0.6243, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.6531302876480541, |
| "grad_norm": 1.4815562963485718, |
| "learning_rate": 1.4258877712780333e-06, |
| "loss": 0.5564, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.6539763113367174, |
| "grad_norm": 1.3841257095336914, |
| "learning_rate": 1.4197032206177618e-06, |
| "loss": 0.481, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.6548223350253807, |
| "grad_norm": 1.3601456880569458, |
| "learning_rate": 1.4135267884112153e-06, |
| "loss": 0.4912, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.6556683587140439, |
| "grad_norm": 1.3832186460494995, |
| "learning_rate": 1.4073585210744136e-06, |
| "loss": 0.5253, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6565143824027073, |
| "grad_norm": 1.516714096069336, |
| "learning_rate": 1.401198464962021e-06, |
| "loss": 0.4864, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.6573604060913706, |
| "grad_norm": 1.6682307720184326, |
| "learning_rate": 1.3950466663669915e-06, |
| "loss": 0.5815, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.6582064297800339, |
| "grad_norm": 1.3226845264434814, |
| "learning_rate": 1.3889031715202272e-06, |
| "loss": 0.4574, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.6590524534686971, |
| "grad_norm": 1.5030988454818726, |
| "learning_rate": 1.3827680265902235e-06, |
| "loss": 0.5515, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.6598984771573604, |
| "grad_norm": 1.5196452140808105, |
| "learning_rate": 1.3766412776827282e-06, |
| "loss": 0.5655, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6607445008460237, |
| "grad_norm": 1.392120361328125, |
| "learning_rate": 1.3705229708403928e-06, |
| "loss": 0.5012, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.6615905245346869, |
| "grad_norm": 1.4476877450942993, |
| "learning_rate": 1.3644131520424241e-06, |
| "loss": 0.5739, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.6624365482233503, |
| "grad_norm": 1.4679392576217651, |
| "learning_rate": 1.3583118672042441e-06, |
| "loss": 0.5118, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.6632825719120136, |
| "grad_norm": 1.5056501626968384, |
| "learning_rate": 1.3522191621771402e-06, |
| "loss": 0.5661, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.6641285956006768, |
| "grad_norm": 1.583238124847412, |
| "learning_rate": 1.346135082747923e-06, |
| "loss": 0.6109, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.6649746192893401, |
| "grad_norm": 1.6272413730621338, |
| "learning_rate": 1.3400596746385817e-06, |
| "loss": 0.5755, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.6658206429780034, |
| "grad_norm": 1.630968451499939, |
| "learning_rate": 1.3339929835059393e-06, |
| "loss": 0.5263, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.260642647743225, |
| "learning_rate": 1.3279350549413117e-06, |
| "loss": 0.4703, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.6675126903553299, |
| "grad_norm": 1.5122746229171753, |
| "learning_rate": 1.3218859344701634e-06, |
| "loss": 0.5735, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.6683587140439933, |
| "grad_norm": 1.5645116567611694, |
| "learning_rate": 1.3158456675517657e-06, |
| "loss": 0.5894, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6692047377326565, |
| "grad_norm": 1.6442245244979858, |
| "learning_rate": 1.3098142995788554e-06, |
| "loss": 0.5377, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.6700507614213198, |
| "grad_norm": 1.5986851453781128, |
| "learning_rate": 1.3037918758772944e-06, |
| "loss": 0.5731, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.6708967851099831, |
| "grad_norm": 1.582816243171692, |
| "learning_rate": 1.2977784417057262e-06, |
| "loss": 0.4785, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.6717428087986463, |
| "grad_norm": 1.3253484964370728, |
| "learning_rate": 1.29177404225524e-06, |
| "loss": 0.4108, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.6725888324873096, |
| "grad_norm": 1.4294928312301636, |
| "learning_rate": 1.2857787226490275e-06, |
| "loss": 0.5675, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.6734348561759729, |
| "grad_norm": 1.5926408767700195, |
| "learning_rate": 1.2797925279420454e-06, |
| "loss": 0.5764, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.6742808798646363, |
| "grad_norm": 1.352016568183899, |
| "learning_rate": 1.2738155031206772e-06, |
| "loss": 0.5051, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.6751269035532995, |
| "grad_norm": 1.4352556467056274, |
| "learning_rate": 1.2678476931023947e-06, |
| "loss": 0.4998, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.6759729272419628, |
| "grad_norm": 1.5205786228179932, |
| "learning_rate": 1.2618891427354174e-06, |
| "loss": 0.4852, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.676818950930626, |
| "grad_norm": 1.4403825998306274, |
| "learning_rate": 1.2559398967983821e-06, |
| "loss": 0.5406, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6776649746192893, |
| "grad_norm": 1.380646824836731, |
| "learning_rate": 1.2500000000000007e-06, |
| "loss": 0.4836, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.6785109983079526, |
| "grad_norm": 1.469948410987854, |
| "learning_rate": 1.2440694969787262e-06, |
| "loss": 0.5521, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.6793570219966159, |
| "grad_norm": 1.4081382751464844, |
| "learning_rate": 1.2381484323024178e-06, |
| "loss": 0.4733, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.6802030456852792, |
| "grad_norm": 1.3682475090026855, |
| "learning_rate": 1.232236850468004e-06, |
| "loss": 0.5307, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.6810490693739425, |
| "grad_norm": 1.5731854438781738, |
| "learning_rate": 1.2263347959011534e-06, |
| "loss": 0.4799, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.6818950930626058, |
| "grad_norm": 1.4115339517593384, |
| "learning_rate": 1.2204423129559306e-06, |
| "loss": 0.4808, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.682741116751269, |
| "grad_norm": 1.5260988473892212, |
| "learning_rate": 1.2145594459144745e-06, |
| "loss": 0.5247, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.6835871404399323, |
| "grad_norm": 1.402204155921936, |
| "learning_rate": 1.2086862389866577e-06, |
| "loss": 0.5507, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.6844331641285956, |
| "grad_norm": 1.574589729309082, |
| "learning_rate": 1.2028227363097583e-06, |
| "loss": 0.4803, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.6852791878172588, |
| "grad_norm": 1.4717187881469727, |
| "learning_rate": 1.1969689819481257e-06, |
| "loss": 0.5736, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6861252115059222, |
| "grad_norm": 1.4452178478240967, |
| "learning_rate": 1.1911250198928508e-06, |
| "loss": 0.5348, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.6869712351945855, |
| "grad_norm": 1.5130823850631714, |
| "learning_rate": 1.1852908940614354e-06, |
| "loss": 0.4494, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.6878172588832487, |
| "grad_norm": 1.5016757249832153, |
| "learning_rate": 1.1794666482974617e-06, |
| "loss": 0.4589, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.688663282571912, |
| "grad_norm": 1.604846715927124, |
| "learning_rate": 1.1736523263702637e-06, |
| "loss": 0.5153, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6895093062605753, |
| "grad_norm": 1.5163328647613525, |
| "learning_rate": 1.167847971974595e-06, |
| "loss": 0.5082, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6903553299492385, |
| "grad_norm": 1.3733899593353271, |
| "learning_rate": 1.1620536287303052e-06, |
| "loss": 0.478, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.6912013536379019, |
| "grad_norm": 1.4143409729003906, |
| "learning_rate": 1.1562693401820094e-06, |
| "loss": 0.497, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.6920473773265652, |
| "grad_norm": 1.385507345199585, |
| "learning_rate": 1.1504951497987626e-06, |
| "loss": 0.5322, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.6928934010152284, |
| "grad_norm": 1.6582002639770508, |
| "learning_rate": 1.14473110097373e-06, |
| "loss": 0.5422, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.6937394247038917, |
| "grad_norm": 1.633652925491333, |
| "learning_rate": 1.1389772370238638e-06, |
| "loss": 0.5311, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.694585448392555, |
| "grad_norm": 1.4567234516143799, |
| "learning_rate": 1.133233601189577e-06, |
| "loss": 0.5541, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.6954314720812182, |
| "grad_norm": 1.4335215091705322, |
| "learning_rate": 1.1275002366344156e-06, |
| "loss": 0.4871, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.6962774957698815, |
| "grad_norm": 1.401573896408081, |
| "learning_rate": 1.1217771864447396e-06, |
| "loss": 0.4619, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.6971235194585449, |
| "grad_norm": 1.5211458206176758, |
| "learning_rate": 1.1160644936293955e-06, |
| "loss": 0.5443, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.6979695431472082, |
| "grad_norm": 1.3945550918579102, |
| "learning_rate": 1.110362201119393e-06, |
| "loss": 0.5781, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6988155668358714, |
| "grad_norm": 1.577100396156311, |
| "learning_rate": 1.1046703517675848e-06, |
| "loss": 0.5209, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.6996615905245347, |
| "grad_norm": 1.50028657913208, |
| "learning_rate": 1.0989889883483415e-06, |
| "loss": 0.4327, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.700507614213198, |
| "grad_norm": 1.5429868698120117, |
| "learning_rate": 1.093318153557233e-06, |
| "loss": 0.4869, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.7013536379018612, |
| "grad_norm": 1.4478799104690552, |
| "learning_rate": 1.0876578900107053e-06, |
| "loss": 0.5054, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.7021996615905245, |
| "grad_norm": 1.5432162284851074, |
| "learning_rate": 1.0820082402457617e-06, |
| "loss": 0.5047, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7030456852791879, |
| "grad_norm": 1.5668376684188843, |
| "learning_rate": 1.0763692467196432e-06, |
| "loss": 0.5276, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.7038917089678511, |
| "grad_norm": 1.5857386589050293, |
| "learning_rate": 1.070740951809508e-06, |
| "loss": 0.5437, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.7047377326565144, |
| "grad_norm": 1.518097996711731, |
| "learning_rate": 1.0651233978121145e-06, |
| "loss": 0.5266, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.7055837563451777, |
| "grad_norm": 1.5712751150131226, |
| "learning_rate": 1.0595166269435027e-06, |
| "loss": 0.5185, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.7064297800338409, |
| "grad_norm": 1.4731966257095337, |
| "learning_rate": 1.0539206813386774e-06, |
| "loss": 0.5471, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.7072758037225042, |
| "grad_norm": 1.4393733739852905, |
| "learning_rate": 1.048335603051291e-06, |
| "loss": 0.4901, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.7081218274111675, |
| "grad_norm": 1.6055103540420532, |
| "learning_rate": 1.0427614340533293e-06, |
| "loss": 0.5252, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.7089678510998308, |
| "grad_norm": 1.5624628067016602, |
| "learning_rate": 1.037198216234791e-06, |
| "loss": 0.5123, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.7098138747884941, |
| "grad_norm": 1.5251977443695068, |
| "learning_rate": 1.0316459914033794e-06, |
| "loss": 0.4864, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.7106598984771574, |
| "grad_norm": 1.4280132055282593, |
| "learning_rate": 1.0261048012841848e-06, |
| "loss": 0.4176, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7115059221658206, |
| "grad_norm": 1.5174310207366943, |
| "learning_rate": 1.0205746875193712e-06, |
| "loss": 0.468, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.7123519458544839, |
| "grad_norm": 1.4498469829559326, |
| "learning_rate": 1.0150556916678634e-06, |
| "loss": 0.4887, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.7131979695431472, |
| "grad_norm": 1.3770853281021118, |
| "learning_rate": 1.0095478552050348e-06, |
| "loss": 0.4825, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.7140439932318104, |
| "grad_norm": 1.3348174095153809, |
| "learning_rate": 1.0040512195223947e-06, |
| "loss": 0.4465, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.7148900169204738, |
| "grad_norm": 1.5871872901916504, |
| "learning_rate": 9.985658259272826e-07, |
| "loss": 0.504, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.7157360406091371, |
| "grad_norm": 1.5275450944900513, |
| "learning_rate": 9.930917156425477e-07, |
| "loss": 0.5283, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.7165820642978004, |
| "grad_norm": 1.7547075748443604, |
| "learning_rate": 9.876289298062478e-07, |
| "loss": 0.5201, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.7174280879864636, |
| "grad_norm": 1.480360507965088, |
| "learning_rate": 9.821775094713376e-07, |
| "loss": 0.5058, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.7182741116751269, |
| "grad_norm": 1.6711392402648926, |
| "learning_rate": 9.767374956053584e-07, |
| "loss": 0.4928, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.7191201353637902, |
| "grad_norm": 1.2926833629608154, |
| "learning_rate": 9.713089290901334e-07, |
| "loss": 0.4889, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7199661590524534, |
| "grad_norm": 1.6195472478866577, |
| "learning_rate": 9.658918507214567e-07, |
| "loss": 0.5089, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.7208121827411168, |
| "grad_norm": 1.544764757156372, |
| "learning_rate": 9.604863012087904e-07, |
| "loss": 0.5558, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.7216582064297801, |
| "grad_norm": 1.4564974308013916, |
| "learning_rate": 9.550923211749557e-07, |
| "loss": 0.5031, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.7225042301184433, |
| "grad_norm": 1.4476693868637085, |
| "learning_rate": 9.497099511558309e-07, |
| "loss": 0.486, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.7233502538071066, |
| "grad_norm": 1.3963452577590942, |
| "learning_rate": 9.443392316000413e-07, |
| "loss": 0.4551, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7241962774957699, |
| "grad_norm": 1.6682579517364502, |
| "learning_rate": 9.389802028686617e-07, |
| "loss": 0.5026, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.7250423011844331, |
| "grad_norm": 1.4662981033325195, |
| "learning_rate": 9.336329052349089e-07, |
| "loss": 0.5005, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.7258883248730964, |
| "grad_norm": 1.414525032043457, |
| "learning_rate": 9.28297378883842e-07, |
| "loss": 0.4489, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.7267343485617598, |
| "grad_norm": 1.6276682615280151, |
| "learning_rate": 9.229736639120562e-07, |
| "loss": 0.6037, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.727580372250423, |
| "grad_norm": 1.5041996240615845, |
| "learning_rate": 9.176618003273848e-07, |
| "loss": 0.5154, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7284263959390863, |
| "grad_norm": 1.3910162448883057, |
| "learning_rate": 9.123618280485993e-07, |
| "loss": 0.3958, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.7292724196277496, |
| "grad_norm": 1.3514389991760254, |
| "learning_rate": 9.070737869051044e-07, |
| "loss": 0.456, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.7301184433164128, |
| "grad_norm": 1.4312387704849243, |
| "learning_rate": 9.017977166366445e-07, |
| "loss": 0.5148, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.7309644670050761, |
| "grad_norm": 1.4258145093917847, |
| "learning_rate": 8.965336568930022e-07, |
| "loss": 0.5091, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.7318104906937394, |
| "grad_norm": 1.468034029006958, |
| "learning_rate": 8.912816472337008e-07, |
| "loss": 0.4846, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.7326565143824028, |
| "grad_norm": 1.375032663345337, |
| "learning_rate": 8.860417271277067e-07, |
| "loss": 0.4945, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.733502538071066, |
| "grad_norm": 1.2760509252548218, |
| "learning_rate": 8.808139359531332e-07, |
| "loss": 0.4549, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.7343485617597293, |
| "grad_norm": 1.5727797746658325, |
| "learning_rate": 8.75598312996944e-07, |
| "loss": 0.4966, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.7351945854483926, |
| "grad_norm": 1.5262360572814941, |
| "learning_rate": 8.703948974546592e-07, |
| "loss": 0.5525, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.7360406091370558, |
| "grad_norm": 1.482978105545044, |
| "learning_rate": 8.65203728430059e-07, |
| "loss": 0.463, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7368866328257191, |
| "grad_norm": 1.515729546546936, |
| "learning_rate": 8.600248449348916e-07, |
| "loss": 0.5125, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.7377326565143824, |
| "grad_norm": 1.6180707216262817, |
| "learning_rate": 8.548582858885787e-07, |
| "loss": 0.5446, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.7385786802030457, |
| "grad_norm": 1.7022454738616943, |
| "learning_rate": 8.497040901179232e-07, |
| "loss": 0.439, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.739424703891709, |
| "grad_norm": 1.35090172290802, |
| "learning_rate": 8.445622963568184e-07, |
| "loss": 0.4509, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.7402707275803723, |
| "grad_norm": 1.2935441732406616, |
| "learning_rate": 8.394329432459561e-07, |
| "loss": 0.4708, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.7411167512690355, |
| "grad_norm": 1.4397931098937988, |
| "learning_rate": 8.343160693325356e-07, |
| "loss": 0.4763, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.7419627749576988, |
| "grad_norm": 1.4349732398986816, |
| "learning_rate": 8.292117130699767e-07, |
| "loss": 0.4536, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.7428087986463621, |
| "grad_norm": 1.4029067754745483, |
| "learning_rate": 8.241199128176255e-07, |
| "loss": 0.5145, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.7436548223350253, |
| "grad_norm": 1.4133881330490112, |
| "learning_rate": 8.190407068404721e-07, |
| "loss": 0.5282, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.7445008460236887, |
| "grad_norm": 1.4240069389343262, |
| "learning_rate": 8.139741333088597e-07, |
| "loss": 0.4763, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.745346869712352, |
| "grad_norm": 1.5198265314102173, |
| "learning_rate": 8.089202302981983e-07, |
| "loss": 0.5663, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.7461928934010152, |
| "grad_norm": 1.3956340551376343, |
| "learning_rate": 8.038790357886783e-07, |
| "loss": 0.4796, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.7470389170896785, |
| "grad_norm": 1.4687814712524414, |
| "learning_rate": 7.988505876649863e-07, |
| "loss": 0.5539, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.7478849407783418, |
| "grad_norm": 1.3739749193191528, |
| "learning_rate": 7.938349237160184e-07, |
| "loss": 0.4545, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.748730964467005, |
| "grad_norm": 1.3965612649917603, |
| "learning_rate": 7.888320816345984e-07, |
| "loss": 0.5094, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.7495769881556683, |
| "grad_norm": 1.383991003036499, |
| "learning_rate": 7.838420990171927e-07, |
| "loss": 0.3785, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.7504230118443317, |
| "grad_norm": 1.4848946332931519, |
| "learning_rate": 7.788650133636291e-07, |
| "loss": 0.4969, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.751269035532995, |
| "grad_norm": 1.5263686180114746, |
| "learning_rate": 7.739008620768143e-07, |
| "loss": 0.4673, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.7521150592216582, |
| "grad_norm": 1.5392656326293945, |
| "learning_rate": 7.689496824624526e-07, |
| "loss": 0.5333, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.7529610829103215, |
| "grad_norm": 1.4336823225021362, |
| "learning_rate": 7.640115117287661e-07, |
| "loss": 0.5547, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7538071065989848, |
| "grad_norm": 1.606310486793518, |
| "learning_rate": 7.590863869862155e-07, |
| "loss": 0.5233, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.754653130287648, |
| "grad_norm": 1.4799706935882568, |
| "learning_rate": 7.541743452472194e-07, |
| "loss": 0.5235, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.7554991539763113, |
| "grad_norm": 1.4974623918533325, |
| "learning_rate": 7.492754234258794e-07, |
| "loss": 0.4494, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.7563451776649747, |
| "grad_norm": 1.7052468061447144, |
| "learning_rate": 7.443896583376972e-07, |
| "loss": 0.5512, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.7571912013536379, |
| "grad_norm": 1.6682631969451904, |
| "learning_rate": 7.395170866993043e-07, |
| "loss": 0.5588, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.7580372250423012, |
| "grad_norm": 1.5600894689559937, |
| "learning_rate": 7.346577451281822e-07, |
| "loss": 0.5154, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.7588832487309645, |
| "grad_norm": 1.35463547706604, |
| "learning_rate": 7.298116701423874e-07, |
| "loss": 0.5002, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.7597292724196277, |
| "grad_norm": 1.4675949811935425, |
| "learning_rate": 7.249788981602801e-07, |
| "loss": 0.5376, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.760575296108291, |
| "grad_norm": 1.5439352989196777, |
| "learning_rate": 7.201594655002458e-07, |
| "loss": 0.4828, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.7614213197969543, |
| "grad_norm": 1.5555047988891602, |
| "learning_rate": 7.153534083804253e-07, |
| "loss": 0.5266, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.7622673434856176, |
| "grad_norm": 1.6355152130126953, |
| "learning_rate": 7.105607629184433e-07, |
| "loss": 0.5003, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.7631133671742809, |
| "grad_norm": 1.4930115938186646, |
| "learning_rate": 7.057815651311323e-07, |
| "loss": 0.5193, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.7639593908629442, |
| "grad_norm": 1.5737615823745728, |
| "learning_rate": 7.010158509342682e-07, |
| "loss": 0.5353, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.7648054145516074, |
| "grad_norm": 1.5561256408691406, |
| "learning_rate": 6.962636561422967e-07, |
| "loss": 0.5157, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.7656514382402707, |
| "grad_norm": 1.5765726566314697, |
| "learning_rate": 6.915250164680648e-07, |
| "loss": 0.5358, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.766497461928934, |
| "grad_norm": 1.4768812656402588, |
| "learning_rate": 6.867999675225523e-07, |
| "loss": 0.5021, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.7673434856175972, |
| "grad_norm": 1.4443702697753906, |
| "learning_rate": 6.820885448146041e-07, |
| "loss": 0.5442, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.7681895093062606, |
| "grad_norm": 1.5117536783218384, |
| "learning_rate": 6.773907837506646e-07, |
| "loss": 0.6001, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.7690355329949239, |
| "grad_norm": 1.4011093378067017, |
| "learning_rate": 6.7270671963451e-07, |
| "loss": 0.5374, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.7698815566835872, |
| "grad_norm": 1.523114562034607, |
| "learning_rate": 6.680363876669832e-07, |
| "loss": 0.4964, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.7707275803722504, |
| "grad_norm": 1.3653088808059692, |
| "learning_rate": 6.633798229457309e-07, |
| "loss": 0.4675, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.7715736040609137, |
| "grad_norm": 1.532370686531067, |
| "learning_rate": 6.587370604649373e-07, |
| "loss": 0.5292, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.772419627749577, |
| "grad_norm": 1.5474774837493896, |
| "learning_rate": 6.541081351150638e-07, |
| "loss": 0.5468, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.7732656514382402, |
| "grad_norm": 1.6492289304733276, |
| "learning_rate": 6.494930816825842e-07, |
| "loss": 0.5242, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.7741116751269036, |
| "grad_norm": 1.6536519527435303, |
| "learning_rate": 6.448919348497254e-07, |
| "loss": 0.5071, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.7749576988155669, |
| "grad_norm": 1.7425479888916016, |
| "learning_rate": 6.403047291942057e-07, |
| "loss": 0.5401, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.7758037225042301, |
| "grad_norm": 1.5332328081130981, |
| "learning_rate": 6.357314991889757e-07, |
| "loss": 0.5349, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.7766497461928934, |
| "grad_norm": 1.671473503112793, |
| "learning_rate": 6.311722792019565e-07, |
| "loss": 0.5401, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.7774957698815567, |
| "grad_norm": 1.3117287158966064, |
| "learning_rate": 6.266271034957861e-07, |
| "loss": 0.4603, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.7783417935702199, |
| "grad_norm": 1.5059982538223267, |
| "learning_rate": 6.220960062275583e-07, |
| "loss": 0.5476, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7791878172588832, |
| "grad_norm": 1.5411993265151978, |
| "learning_rate": 6.175790214485674e-07, |
| "loss": 0.5483, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.7800338409475466, |
| "grad_norm": 1.4125125408172607, |
| "learning_rate": 6.130761831040522e-07, |
| "loss": 0.4734, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.7808798646362098, |
| "grad_norm": 1.3989757299423218, |
| "learning_rate": 6.085875250329401e-07, |
| "loss": 0.4282, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.7817258883248731, |
| "grad_norm": 1.408751368522644, |
| "learning_rate": 6.041130809675944e-07, |
| "loss": 0.4072, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.7825719120135364, |
| "grad_norm": 1.6468225717544556, |
| "learning_rate": 5.996528845335587e-07, |
| "loss": 0.5275, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.7834179357021996, |
| "grad_norm": 1.299722671508789, |
| "learning_rate": 5.952069692493062e-07, |
| "loss": 0.4825, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.7842639593908629, |
| "grad_norm": 1.3989574909210205, |
| "learning_rate": 5.907753685259865e-07, |
| "loss": 0.5353, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.7851099830795262, |
| "grad_norm": 1.632617473602295, |
| "learning_rate": 5.863581156671755e-07, |
| "loss": 0.5604, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.7859560067681896, |
| "grad_norm": 1.3802127838134766, |
| "learning_rate": 5.819552438686238e-07, |
| "loss": 0.5555, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.7868020304568528, |
| "grad_norm": 1.2966598272323608, |
| "learning_rate": 5.775667862180087e-07, |
| "loss": 0.5135, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7876480541455161, |
| "grad_norm": 1.4955836534500122, |
| "learning_rate": 5.731927756946848e-07, |
| "loss": 0.5242, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.7884940778341794, |
| "grad_norm": 1.486580729484558, |
| "learning_rate": 5.688332451694356e-07, |
| "loss": 0.5137, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.7893401015228426, |
| "grad_norm": 1.5617793798446655, |
| "learning_rate": 5.644882274042285e-07, |
| "loss": 0.5552, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.7901861252115059, |
| "grad_norm": 1.4690701961517334, |
| "learning_rate": 5.601577550519646e-07, |
| "loss": 0.5497, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.7910321489001692, |
| "grad_norm": 1.4751214981079102, |
| "learning_rate": 5.558418606562385e-07, |
| "loss": 0.4816, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.7918781725888325, |
| "grad_norm": 1.4868813753128052, |
| "learning_rate": 5.5154057665109e-07, |
| "loss": 0.511, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.7927241962774958, |
| "grad_norm": 1.3979462385177612, |
| "learning_rate": 5.472539353607612e-07, |
| "loss": 0.4614, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.7935702199661591, |
| "grad_norm": 1.40373694896698, |
| "learning_rate": 5.429819689994556e-07, |
| "loss": 0.5177, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.7944162436548223, |
| "grad_norm": 1.5623489618301392, |
| "learning_rate": 5.387247096710921e-07, |
| "loss": 0.5668, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.7952622673434856, |
| "grad_norm": 1.258279800415039, |
| "learning_rate": 5.344821893690679e-07, |
| "loss": 0.4178, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7961082910321489, |
| "grad_norm": 1.3184384107589722, |
| "learning_rate": 5.30254439976014e-07, |
| "loss": 0.4387, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.7969543147208121, |
| "grad_norm": 1.380998969078064, |
| "learning_rate": 5.260414932635588e-07, |
| "loss": 0.5116, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.7978003384094755, |
| "grad_norm": 1.447558045387268, |
| "learning_rate": 5.218433808920884e-07, |
| "loss": 0.5289, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.7986463620981388, |
| "grad_norm": 1.6539430618286133, |
| "learning_rate": 5.176601344105084e-07, |
| "loss": 0.5358, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.799492385786802, |
| "grad_norm": 1.6026103496551514, |
| "learning_rate": 5.134917852560067e-07, |
| "loss": 0.5461, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.8003384094754653, |
| "grad_norm": 1.8216614723205566, |
| "learning_rate": 5.09338364753818e-07, |
| "loss": 0.5195, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.8011844331641286, |
| "grad_norm": 1.376305103302002, |
| "learning_rate": 5.051999041169869e-07, |
| "loss": 0.5051, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.8020304568527918, |
| "grad_norm": 1.5061923265457153, |
| "learning_rate": 5.010764344461352e-07, |
| "loss": 0.4656, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.8028764805414551, |
| "grad_norm": 1.6302450895309448, |
| "learning_rate": 4.969679867292276e-07, |
| "loss": 0.5381, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.8037225042301185, |
| "grad_norm": 1.4138633012771606, |
| "learning_rate": 4.928745918413352e-07, |
| "loss": 0.5356, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8045685279187818, |
| "grad_norm": 1.4501382112503052, |
| "learning_rate": 4.887962805444122e-07, |
| "loss": 0.506, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.805414551607445, |
| "grad_norm": 1.3429620265960693, |
| "learning_rate": 4.847330834870551e-07, |
| "loss": 0.4975, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.8062605752961083, |
| "grad_norm": 1.3932536840438843, |
| "learning_rate": 4.806850312042782e-07, |
| "loss": 0.4677, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.8071065989847716, |
| "grad_norm": 1.4443645477294922, |
| "learning_rate": 4.766521541172822e-07, |
| "loss": 0.5573, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.8079526226734348, |
| "grad_norm": 1.536169409751892, |
| "learning_rate": 4.7263448253322574e-07, |
| "loss": 0.5413, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.8087986463620981, |
| "grad_norm": 1.3791049718856812, |
| "learning_rate": 4.686320466449981e-07, |
| "loss": 0.4739, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.8096446700507615, |
| "grad_norm": 1.523890733718872, |
| "learning_rate": 4.6464487653099216e-07, |
| "loss": 0.5314, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.8104906937394247, |
| "grad_norm": 1.4911043643951416, |
| "learning_rate": 4.6067300215487663e-07, |
| "loss": 0.5903, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.811336717428088, |
| "grad_norm": 1.383557915687561, |
| "learning_rate": 4.5671645336537425e-07, |
| "loss": 0.5124, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.8121827411167513, |
| "grad_norm": 1.5496838092803955, |
| "learning_rate": 4.52775259896035e-07, |
| "loss": 0.5472, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8130287648054145, |
| "grad_norm": 1.4877400398254395, |
| "learning_rate": 4.4884945136501325e-07, |
| "loss": 0.5108, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.8138747884940778, |
| "grad_norm": 1.4807846546173096, |
| "learning_rate": 4.449390572748449e-07, |
| "loss": 0.4671, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.8147208121827412, |
| "grad_norm": 1.3834859132766724, |
| "learning_rate": 4.4104410701222703e-07, |
| "loss": 0.4498, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.8155668358714044, |
| "grad_norm": 1.4340556859970093, |
| "learning_rate": 4.371646298477947e-07, |
| "loss": 0.4934, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.8164128595600677, |
| "grad_norm": 1.6941485404968262, |
| "learning_rate": 4.333006549359034e-07, |
| "loss": 0.5905, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.817258883248731, |
| "grad_norm": 1.3260046243667603, |
| "learning_rate": 4.2945221131440783e-07, |
| "loss": 0.4841, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.8181049069373942, |
| "grad_norm": 1.5405261516571045, |
| "learning_rate": 4.2561932790444597e-07, |
| "loss": 0.5125, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.8189509306260575, |
| "grad_norm": 1.5137594938278198, |
| "learning_rate": 4.218020335102191e-07, |
| "loss": 0.469, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.8197969543147208, |
| "grad_norm": 1.3630168437957764, |
| "learning_rate": 4.1800035681877765e-07, |
| "loss": 0.5501, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.8206429780033841, |
| "grad_norm": 1.3924651145935059, |
| "learning_rate": 4.142143263998047e-07, |
| "loss": 0.4994, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8214890016920474, |
| "grad_norm": 1.426595687866211, |
| "learning_rate": 4.104439707054003e-07, |
| "loss": 0.4795, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.8223350253807107, |
| "grad_norm": 1.356465220451355, |
| "learning_rate": 4.0668931806987e-07, |
| "loss": 0.4743, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.823181049069374, |
| "grad_norm": 1.474966049194336, |
| "learning_rate": 4.029503967095097e-07, |
| "loss": 0.5118, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.8240270727580372, |
| "grad_norm": 1.4417613744735718, |
| "learning_rate": 3.9922723472239356e-07, |
| "loss": 0.5565, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.8248730964467005, |
| "grad_norm": 1.5358595848083496, |
| "learning_rate": 3.9551986008816544e-07, |
| "loss": 0.5128, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8257191201353637, |
| "grad_norm": 1.390822410583496, |
| "learning_rate": 3.9182830066782614e-07, |
| "loss": 0.5552, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.8265651438240271, |
| "grad_norm": 1.3930827379226685, |
| "learning_rate": 3.8815258420352385e-07, |
| "loss": 0.4682, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.8274111675126904, |
| "grad_norm": 1.3697726726531982, |
| "learning_rate": 3.844927383183486e-07, |
| "loss": 0.4713, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.8282571912013537, |
| "grad_norm": 1.5058186054229736, |
| "learning_rate": 3.808487905161215e-07, |
| "loss": 0.5004, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.8291032148900169, |
| "grad_norm": 1.4689873456954956, |
| "learning_rate": 3.772207681811896e-07, |
| "loss": 0.5002, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8299492385786802, |
| "grad_norm": 1.4826774597167969, |
| "learning_rate": 3.7360869857821805e-07, |
| "loss": 0.4269, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.8307952622673435, |
| "grad_norm": 1.3101059198379517, |
| "learning_rate": 3.7001260885198925e-07, |
| "loss": 0.53, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.8316412859560067, |
| "grad_norm": 1.5160847902297974, |
| "learning_rate": 3.664325260271953e-07, |
| "loss": 0.5218, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.8324873096446701, |
| "grad_norm": 1.3929725885391235, |
| "learning_rate": 3.6286847700823634e-07, |
| "loss": 0.55, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.8333333333333334, |
| "grad_norm": 1.4347172975540161, |
| "learning_rate": 3.5932048857901773e-07, |
| "loss": 0.455, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.8341793570219966, |
| "grad_norm": 1.373490333557129, |
| "learning_rate": 3.5578858740274976e-07, |
| "loss": 0.4914, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.8350253807106599, |
| "grad_norm": 1.4317591190338135, |
| "learning_rate": 3.5227280002174626e-07, |
| "loss": 0.5637, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.8358714043993232, |
| "grad_norm": 1.6726568937301636, |
| "learning_rate": 3.487731528572255e-07, |
| "loss": 0.5096, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.8367174280879864, |
| "grad_norm": 1.3493298292160034, |
| "learning_rate": 3.4528967220911287e-07, |
| "loss": 0.4357, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.8375634517766497, |
| "grad_norm": 1.4160189628601074, |
| "learning_rate": 3.418223842558385e-07, |
| "loss": 0.4529, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8384094754653131, |
| "grad_norm": 1.5254844427108765, |
| "learning_rate": 3.38371315054149e-07, |
| "loss": 0.4857, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.8392554991539763, |
| "grad_norm": 1.4467828273773193, |
| "learning_rate": 3.3493649053890325e-07, |
| "loss": 0.5402, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.8401015228426396, |
| "grad_norm": 1.5890865325927734, |
| "learning_rate": 3.315179365228824e-07, |
| "loss": 0.4991, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.8409475465313029, |
| "grad_norm": 1.4599320888519287, |
| "learning_rate": 3.281156786965933e-07, |
| "loss": 0.399, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.8417935702199661, |
| "grad_norm": 1.501905918121338, |
| "learning_rate": 3.2472974262807794e-07, |
| "loss": 0.4408, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8426395939086294, |
| "grad_norm": 1.4133471250534058, |
| "learning_rate": 3.213601537627195e-07, |
| "loss": 0.4854, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.8434856175972927, |
| "grad_norm": 1.474295973777771, |
| "learning_rate": 3.1800693742305074e-07, |
| "loss": 0.4868, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.8443316412859561, |
| "grad_norm": 1.340802788734436, |
| "learning_rate": 3.146701188085649e-07, |
| "loss": 0.5024, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.8451776649746193, |
| "grad_norm": 1.5965441465377808, |
| "learning_rate": 3.11349722995527e-07, |
| "loss": 0.5567, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.8460236886632826, |
| "grad_norm": 1.4308526515960693, |
| "learning_rate": 3.080457749367832e-07, |
| "loss": 0.4896, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8468697123519459, |
| "grad_norm": 1.475598931312561, |
| "learning_rate": 3.04758299461575e-07, |
| "loss": 0.4918, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.8477157360406091, |
| "grad_norm": 1.4925700426101685, |
| "learning_rate": 3.014873212753516e-07, |
| "loss": 0.4652, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.8485617597292724, |
| "grad_norm": 1.5554040670394897, |
| "learning_rate": 2.982328649595856e-07, |
| "loss": 0.5302, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.8494077834179357, |
| "grad_norm": 1.666174054145813, |
| "learning_rate": 2.949949549715858e-07, |
| "loss": 0.4715, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.850253807106599, |
| "grad_norm": 1.4457473754882812, |
| "learning_rate": 2.917736156443171e-07, |
| "loss": 0.4546, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.8510998307952623, |
| "grad_norm": 1.4290974140167236, |
| "learning_rate": 2.885688711862136e-07, |
| "loss": 0.5035, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.8519458544839256, |
| "grad_norm": 1.4836621284484863, |
| "learning_rate": 2.8538074568099954e-07, |
| "loss": 0.5912, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.8527918781725888, |
| "grad_norm": 1.309338092803955, |
| "learning_rate": 2.8220926308750757e-07, |
| "loss": 0.4693, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.8536379018612521, |
| "grad_norm": 1.3714721202850342, |
| "learning_rate": 2.7905444723949765e-07, |
| "loss": 0.4841, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.8544839255499154, |
| "grad_norm": 1.3649089336395264, |
| "learning_rate": 2.7591632184547996e-07, |
| "loss": 0.4965, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.8553299492385786, |
| "grad_norm": 1.4042978286743164, |
| "learning_rate": 2.727949104885341e-07, |
| "loss": 0.4913, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.856175972927242, |
| "grad_norm": 1.6518502235412598, |
| "learning_rate": 2.6969023662613473e-07, |
| "loss": 0.5123, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.8570219966159053, |
| "grad_norm": 1.5500681400299072, |
| "learning_rate": 2.666023235899734e-07, |
| "loss": 0.464, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.8578680203045685, |
| "grad_norm": 1.5040642023086548, |
| "learning_rate": 2.6353119458578297e-07, |
| "loss": 0.4687, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.8587140439932318, |
| "grad_norm": 1.548913836479187, |
| "learning_rate": 2.604768726931645e-07, |
| "loss": 0.565, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.8595600676818951, |
| "grad_norm": 1.342336893081665, |
| "learning_rate": 2.5743938086541354e-07, |
| "loss": 0.4772, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.8604060913705583, |
| "grad_norm": 1.419202446937561, |
| "learning_rate": 2.544187419293462e-07, |
| "loss": 0.5304, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.8612521150592216, |
| "grad_norm": 1.3937733173370361, |
| "learning_rate": 2.514149785851311e-07, |
| "loss": 0.5183, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.862098138747885, |
| "grad_norm": 1.44895601272583, |
| "learning_rate": 2.4842811340611423e-07, |
| "loss": 0.497, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.8629441624365483, |
| "grad_norm": 1.648419976234436, |
| "learning_rate": 2.454581688386523e-07, |
| "loss": 0.4864, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.8637901861252115, |
| "grad_norm": 1.4835489988327026, |
| "learning_rate": 2.4250516720194267e-07, |
| "loss": 0.5337, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.8646362098138748, |
| "grad_norm": 1.247205376625061, |
| "learning_rate": 2.3956913068785697e-07, |
| "loss": 0.4464, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.8654822335025381, |
| "grad_norm": 1.5768427848815918, |
| "learning_rate": 2.3665008136077332e-07, |
| "loss": 0.5662, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.8663282571912013, |
| "grad_norm": 1.430821180343628, |
| "learning_rate": 2.3374804115741056e-07, |
| "loss": 0.4908, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.8671742808798646, |
| "grad_norm": 1.5622667074203491, |
| "learning_rate": 2.3086303188666393e-07, |
| "loss": 0.5398, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.868020304568528, |
| "grad_norm": 1.375335931777954, |
| "learning_rate": 2.2799507522944048e-07, |
| "loss": 0.4289, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.8688663282571912, |
| "grad_norm": 1.2710477113723755, |
| "learning_rate": 2.2514419273849674e-07, |
| "loss": 0.4489, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.8697123519458545, |
| "grad_norm": 1.3400541543960571, |
| "learning_rate": 2.223104058382766e-07, |
| "loss": 0.4705, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.8705583756345178, |
| "grad_norm": 1.3298795223236084, |
| "learning_rate": 2.1949373582475065e-07, |
| "loss": 0.5148, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.871404399323181, |
| "grad_norm": 1.3770630359649658, |
| "learning_rate": 2.166942038652531e-07, |
| "loss": 0.4446, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.8722504230118443, |
| "grad_norm": 1.5070509910583496, |
| "learning_rate": 2.1391183099832958e-07, |
| "loss": 0.4768, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.8730964467005076, |
| "grad_norm": 1.6530050039291382, |
| "learning_rate": 2.111466381335714e-07, |
| "loss": 0.5459, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.873942470389171, |
| "grad_norm": 1.3462867736816406, |
| "learning_rate": 2.083986460514631e-07, |
| "loss": 0.4644, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.8747884940778342, |
| "grad_norm": 1.366542100906372, |
| "learning_rate": 2.056678754032246e-07, |
| "loss": 0.4752, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.8756345177664975, |
| "grad_norm": 1.3380286693572998, |
| "learning_rate": 2.0295434671065706e-07, |
| "loss": 0.4484, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.8764805414551607, |
| "grad_norm": 1.5602799654006958, |
| "learning_rate": 2.002580803659873e-07, |
| "loss": 0.5123, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.877326565143824, |
| "grad_norm": 1.4561575651168823, |
| "learning_rate": 1.9757909663171508e-07, |
| "loss": 0.5151, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.8781725888324873, |
| "grad_norm": 1.4674752950668335, |
| "learning_rate": 1.9491741564046125e-07, |
| "loss": 0.4728, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.8790186125211505, |
| "grad_norm": 1.5501822233200073, |
| "learning_rate": 1.9227305739481618e-07, |
| "loss": 0.4961, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.8798646362098139, |
| "grad_norm": 1.3679369688034058, |
| "learning_rate": 1.896460417671897e-07, |
| "loss": 0.5051, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.8807106598984772, |
| "grad_norm": 1.462009072303772, |
| "learning_rate": 1.8703638849966094e-07, |
| "loss": 0.4909, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.8815566835871405, |
| "grad_norm": 1.4271647930145264, |
| "learning_rate": 1.844441172038311e-07, |
| "loss": 0.4598, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.8824027072758037, |
| "grad_norm": 1.3603568077087402, |
| "learning_rate": 1.818692473606748e-07, |
| "loss": 0.446, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.883248730964467, |
| "grad_norm": 1.4679503440856934, |
| "learning_rate": 1.7931179832039513e-07, |
| "loss": 0.4422, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.8840947546531303, |
| "grad_norm": 1.4945645332336426, |
| "learning_rate": 1.767717893022769e-07, |
| "loss": 0.5402, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.8849407783417935, |
| "grad_norm": 1.5339360237121582, |
| "learning_rate": 1.7424923939454274e-07, |
| "loss": 0.5342, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.8857868020304569, |
| "grad_norm": 1.4887621402740479, |
| "learning_rate": 1.717441675542106e-07, |
| "loss": 0.5502, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.8866328257191202, |
| "grad_norm": 1.5359052419662476, |
| "learning_rate": 1.6925659260694894e-07, |
| "loss": 0.5523, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.8874788494077834, |
| "grad_norm": 1.4636895656585693, |
| "learning_rate": 1.667865332469379e-07, |
| "loss": 0.4753, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.8883248730964467, |
| "grad_norm": 1.4733718633651733, |
| "learning_rate": 1.643340080367267e-07, |
| "loss": 0.4974, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.88917089678511, |
| "grad_norm": 1.4999078512191772, |
| "learning_rate": 1.6189903540709595e-07, |
| "loss": 0.4722, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.8900169204737732, |
| "grad_norm": 1.534686803817749, |
| "learning_rate": 1.5948163365691798e-07, |
| "loss": 0.5379, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.8908629441624365, |
| "grad_norm": 1.3469022512435913, |
| "learning_rate": 1.5708182095301867e-07, |
| "loss": 0.5219, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.8917089678510999, |
| "grad_norm": 1.2992980480194092, |
| "learning_rate": 1.5469961533004258e-07, |
| "loss": 0.4464, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.8925549915397631, |
| "grad_norm": 1.1914132833480835, |
| "learning_rate": 1.5233503469031686e-07, |
| "loss": 0.3623, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.8934010152284264, |
| "grad_norm": 1.5193549394607544, |
| "learning_rate": 1.499880968037165e-07, |
| "loss": 0.4728, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.8942470389170897, |
| "grad_norm": 1.4608553647994995, |
| "learning_rate": 1.4765881930752983e-07, |
| "loss": 0.4644, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.8950930626057529, |
| "grad_norm": 1.6205723285675049, |
| "learning_rate": 1.4534721970632882e-07, |
| "loss": 0.5703, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.8959390862944162, |
| "grad_norm": 1.5539836883544922, |
| "learning_rate": 1.4305331537183387e-07, |
| "loss": 0.6296, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.8967851099830795, |
| "grad_norm": 1.4455832242965698, |
| "learning_rate": 1.4077712354278683e-07, |
| "loss": 0.5241, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8976311336717429, |
| "grad_norm": 1.4322365522384644, |
| "learning_rate": 1.385186613248171e-07, |
| "loss": 0.4872, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.8984771573604061, |
| "grad_norm": 1.5108438730239868, |
| "learning_rate": 1.362779456903182e-07, |
| "loss": 0.526, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.8993231810490694, |
| "grad_norm": 1.3449329137802124, |
| "learning_rate": 1.340549934783164e-07, |
| "loss": 0.4537, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.9001692047377327, |
| "grad_norm": 1.4694099426269531, |
| "learning_rate": 1.3184982139434587e-07, |
| "loss": 0.5095, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.9010152284263959, |
| "grad_norm": 1.5920803546905518, |
| "learning_rate": 1.2966244601032267e-07, |
| "loss": 0.5452, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.9018612521150592, |
| "grad_norm": 1.3615165948867798, |
| "learning_rate": 1.2749288376442044e-07, |
| "loss": 0.4741, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.9027072758037225, |
| "grad_norm": 1.6377193927764893, |
| "learning_rate": 1.253411509609459e-07, |
| "loss": 0.555, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.9035532994923858, |
| "grad_norm": 1.604526400566101, |
| "learning_rate": 1.2320726377021836e-07, |
| "loss": 0.5634, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.9043993231810491, |
| "grad_norm": 1.342687964439392, |
| "learning_rate": 1.2109123822844653e-07, |
| "loss": 0.5433, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.9052453468697124, |
| "grad_norm": 1.5053247213363647, |
| "learning_rate": 1.1899309023760686e-07, |
| "loss": 0.5671, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.9060913705583756, |
| "grad_norm": 1.361431360244751, |
| "learning_rate": 1.1691283556532846e-07, |
| "loss": 0.5314, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.9069373942470389, |
| "grad_norm": 1.2656420469284058, |
| "learning_rate": 1.1485048984476998e-07, |
| "loss": 0.4949, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.9077834179357022, |
| "grad_norm": 1.4055432081222534, |
| "learning_rate": 1.1280606857450387e-07, |
| "loss": 0.4909, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.9086294416243654, |
| "grad_norm": 1.4172112941741943, |
| "learning_rate": 1.1077958711840032e-07, |
| "loss": 0.4917, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.9094754653130288, |
| "grad_norm": 1.3536546230316162, |
| "learning_rate": 1.0877106070551175e-07, |
| "loss": 0.4862, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.9103214890016921, |
| "grad_norm": 1.4950735569000244, |
| "learning_rate": 1.0678050442995802e-07, |
| "loss": 0.479, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.9111675126903553, |
| "grad_norm": 1.420637607574463, |
| "learning_rate": 1.0480793325081174e-07, |
| "loss": 0.5085, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.9120135363790186, |
| "grad_norm": 1.3945380449295044, |
| "learning_rate": 1.0285336199198858e-07, |
| "loss": 0.4308, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.9128595600676819, |
| "grad_norm": 1.4180177450180054, |
| "learning_rate": 1.0091680534213389e-07, |
| "loss": 0.4753, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.9137055837563451, |
| "grad_norm": 1.3723344802856445, |
| "learning_rate": 9.899827785451288e-08, |
| "loss": 0.4353, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.9145516074450084, |
| "grad_norm": 1.4225527048110962, |
| "learning_rate": 9.709779394690144e-08, |
| "loss": 0.5055, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.9153976311336718, |
| "grad_norm": 1.4633773565292358, |
| "learning_rate": 9.521536790147722e-08, |
| "loss": 0.4279, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.916243654822335, |
| "grad_norm": 1.5522949695587158, |
| "learning_rate": 9.335101386471285e-08, |
| "loss": 0.5099, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.9170896785109983, |
| "grad_norm": 1.4376224279403687, |
| "learning_rate": 9.150474584726926e-08, |
| "loss": 0.5157, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.9179357021996616, |
| "grad_norm": 1.551571249961853, |
| "learning_rate": 8.967657772389032e-08, |
| "loss": 0.5229, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.9187817258883249, |
| "grad_norm": 1.5067143440246582, |
| "learning_rate": 8.78665232332998e-08, |
| "loss": 0.5731, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.9196277495769881, |
| "grad_norm": 1.6685928106307983, |
| "learning_rate": 8.607459597809565e-08, |
| "loss": 0.5185, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.9204737732656514, |
| "grad_norm": 1.6518747806549072, |
| "learning_rate": 8.430080942465085e-08, |
| "loss": 0.4492, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.9213197969543148, |
| "grad_norm": 1.4557331800460815, |
| "learning_rate": 8.254517690300946e-08, |
| "loss": 0.4527, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.922165820642978, |
| "grad_norm": 1.3865046501159668, |
| "learning_rate": 8.080771160678763e-08, |
| "loss": 0.4718, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9230118443316413, |
| "grad_norm": 1.6267703771591187, |
| "learning_rate": 7.908842659307525e-08, |
| "loss": 0.5811, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.9238578680203046, |
| "grad_norm": 1.4735057353973389, |
| "learning_rate": 7.738733478233673e-08, |
| "loss": 0.5173, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.9247038917089678, |
| "grad_norm": 1.2759696245193481, |
| "learning_rate": 7.57044489583128e-08, |
| "loss": 0.447, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.9255499153976311, |
| "grad_norm": 1.3496994972229004, |
| "learning_rate": 7.40397817679278e-08, |
| "loss": 0.5328, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.9263959390862944, |
| "grad_norm": 1.4382102489471436, |
| "learning_rate": 7.239334572119172e-08, |
| "loss": 0.4431, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.9272419627749577, |
| "grad_norm": 1.497261643409729, |
| "learning_rate": 7.076515319110688e-08, |
| "loss": 0.4202, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.928087986463621, |
| "grad_norm": 1.4099481105804443, |
| "learning_rate": 6.915521641357504e-08, |
| "loss": 0.4269, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.9289340101522843, |
| "grad_norm": 1.5935684442520142, |
| "learning_rate": 6.756354748730709e-08, |
| "loss": 0.4807, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.9297800338409475, |
| "grad_norm": 1.3946175575256348, |
| "learning_rate": 6.599015837372907e-08, |
| "loss": 0.4243, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.9306260575296108, |
| "grad_norm": 1.605188012123108, |
| "learning_rate": 6.443506089689411e-08, |
| "loss": 0.4828, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9314720812182741, |
| "grad_norm": 1.4577580690383911, |
| "learning_rate": 6.289826674339333e-08, |
| "loss": 0.492, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.9323181049069373, |
| "grad_norm": 1.3271147012710571, |
| "learning_rate": 6.137978746226848e-08, |
| "loss": 0.4126, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.9331641285956007, |
| "grad_norm": 1.4398398399353027, |
| "learning_rate": 5.987963446492384e-08, |
| "loss": 0.5249, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.934010152284264, |
| "grad_norm": 1.4152181148529053, |
| "learning_rate": 5.839781902504227e-08, |
| "loss": 0.4867, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.9348561759729273, |
| "grad_norm": 1.6749119758605957, |
| "learning_rate": 5.693435227849875e-08, |
| "loss": 0.5718, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.9357021996615905, |
| "grad_norm": 1.4649239778518677, |
| "learning_rate": 5.548924522327748e-08, |
| "loss": 0.4816, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.9365482233502538, |
| "grad_norm": 1.4536762237548828, |
| "learning_rate": 5.406250871938912e-08, |
| "loss": 0.4235, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.937394247038917, |
| "grad_norm": 1.7883013486862183, |
| "learning_rate": 5.265415348879005e-08, |
| "loss": 0.4966, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.9382402707275804, |
| "grad_norm": 1.671594500541687, |
| "learning_rate": 5.126419011529993e-08, |
| "loss": 0.5989, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.9390862944162437, |
| "grad_norm": 1.4351028203964233, |
| "learning_rate": 4.989262904452369e-08, |
| "loss": 0.4604, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.939932318104907, |
| "grad_norm": 1.4522861242294312, |
| "learning_rate": 4.853948058377245e-08, |
| "loss": 0.5147, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.9407783417935702, |
| "grad_norm": 1.4232293367385864, |
| "learning_rate": 4.720475490198634e-08, |
| "loss": 0.4657, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.9416243654822335, |
| "grad_norm": 1.4143102169036865, |
| "learning_rate": 4.5888462029658186e-08, |
| "loss": 0.5611, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.9424703891708968, |
| "grad_norm": 1.4739620685577393, |
| "learning_rate": 4.4590611858756906e-08, |
| "loss": 0.5188, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.94331641285956, |
| "grad_norm": 1.3989241123199463, |
| "learning_rate": 4.3311214142654766e-08, |
| "loss": 0.5242, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.9441624365482234, |
| "grad_norm": 1.3451173305511475, |
| "learning_rate": 4.205027849605359e-08, |
| "loss": 0.4451, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.9450084602368867, |
| "grad_norm": 1.3867361545562744, |
| "learning_rate": 4.0807814394911996e-08, |
| "loss": 0.5151, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.9458544839255499, |
| "grad_norm": 1.344876766204834, |
| "learning_rate": 3.9583831176374654e-08, |
| "loss": 0.4618, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.9467005076142132, |
| "grad_norm": 1.4002844095230103, |
| "learning_rate": 3.837833803870178e-08, |
| "loss": 0.4737, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.9475465313028765, |
| "grad_norm": 1.4744623899459839, |
| "learning_rate": 3.7191344041200836e-08, |
| "loss": 0.5001, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.9483925549915397, |
| "grad_norm": 1.4199622869491577, |
| "learning_rate": 3.602285810415718e-08, |
| "loss": 0.4648, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.949238578680203, |
| "grad_norm": 1.4660803079605103, |
| "learning_rate": 3.4872889008767954e-08, |
| "loss": 0.5241, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.9500846023688664, |
| "grad_norm": 1.4228583574295044, |
| "learning_rate": 3.37414453970758e-08, |
| "loss": 0.4474, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.9509306260575296, |
| "grad_norm": 1.6068693399429321, |
| "learning_rate": 3.262853577190445e-08, |
| "loss": 0.5052, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.9517766497461929, |
| "grad_norm": 1.5408366918563843, |
| "learning_rate": 3.153416849679347e-08, |
| "loss": 0.5581, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.9526226734348562, |
| "grad_norm": 1.6030367612838745, |
| "learning_rate": 3.04583517959367e-08, |
| "loss": 0.5493, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.9534686971235194, |
| "grad_norm": 1.3581690788269043, |
| "learning_rate": 2.940109375411976e-08, |
| "loss": 0.4723, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.9543147208121827, |
| "grad_norm": 1.3531447649002075, |
| "learning_rate": 2.8362402316660374e-08, |
| "loss": 0.4892, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.955160744500846, |
| "grad_norm": 1.4876835346221924, |
| "learning_rate": 2.734228528934679e-08, |
| "loss": 0.5043, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.9560067681895094, |
| "grad_norm": 1.703354835510254, |
| "learning_rate": 2.634075033838057e-08, |
| "loss": 0.4791, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.9568527918781726, |
| "grad_norm": 1.5040783882141113, |
| "learning_rate": 2.535780499031887e-08, |
| "loss": 0.4949, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.9576988155668359, |
| "grad_norm": 1.524038314819336, |
| "learning_rate": 2.4393456632016977e-08, |
| "loss": 0.5314, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.9585448392554992, |
| "grad_norm": 1.4656578302383423, |
| "learning_rate": 2.3447712510573928e-08, |
| "loss": 0.5389, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.9593908629441624, |
| "grad_norm": 1.492907166481018, |
| "learning_rate": 2.2520579733277258e-08, |
| "loss": 0.4928, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.9602368866328257, |
| "grad_norm": 1.4177685976028442, |
| "learning_rate": 2.161206526754972e-08, |
| "loss": 0.4866, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.961082910321489, |
| "grad_norm": 1.377503514289856, |
| "learning_rate": 2.072217594089765e-08, |
| "loss": 0.5325, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.9619289340101523, |
| "grad_norm": 1.5548027753829956, |
| "learning_rate": 1.985091844085796e-08, |
| "loss": 0.5236, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.9627749576988156, |
| "grad_norm": 1.4531538486480713, |
| "learning_rate": 1.899829931495012e-08, |
| "loss": 0.5555, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.9636209813874789, |
| "grad_norm": 1.5614752769470215, |
| "learning_rate": 1.8164324970625646e-08, |
| "loss": 0.4734, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.9644670050761421, |
| "grad_norm": 1.4508945941925049, |
| "learning_rate": 1.7349001675219245e-08, |
| "loss": 0.4839, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.9653130287648054, |
| "grad_norm": 1.348341464996338, |
| "learning_rate": 1.65523355559033e-08, |
| "loss": 0.4769, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.9661590524534687, |
| "grad_norm": 1.5168187618255615, |
| "learning_rate": 1.5774332599641228e-08, |
| "loss": 0.4738, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.9670050761421319, |
| "grad_norm": 1.6730620861053467, |
| "learning_rate": 1.501499865314171e-08, |
| "loss": 0.6032, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.9678510998307953, |
| "grad_norm": 1.559067726135254, |
| "learning_rate": 1.4274339422816197e-08, |
| "loss": 0.4942, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.9686971235194586, |
| "grad_norm": 1.3421351909637451, |
| "learning_rate": 1.3552360474734794e-08, |
| "loss": 0.4029, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.9695431472081218, |
| "grad_norm": 1.4648717641830444, |
| "learning_rate": 1.2849067234584623e-08, |
| "loss": 0.49, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.9703891708967851, |
| "grad_norm": 1.444698691368103, |
| "learning_rate": 1.2164464987630131e-08, |
| "loss": 0.4617, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.9712351945854484, |
| "grad_norm": 1.3629822731018066, |
| "learning_rate": 1.1498558878672017e-08, |
| "loss": 0.5096, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.9720812182741116, |
| "grad_norm": 1.6496318578720093, |
| "learning_rate": 1.0851353912008644e-08, |
| "loss": 0.5134, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.9729272419627749, |
| "grad_norm": 1.529559850692749, |
| "learning_rate": 1.0222854951399408e-08, |
| "loss": 0.5383, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.9737732656514383, |
| "grad_norm": 1.5004898309707642, |
| "learning_rate": 9.613066720028097e-09, |
| "loss": 0.5168, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.9746192893401016, |
| "grad_norm": 1.316214919090271, |
| "learning_rate": 9.021993800466256e-09, |
| "loss": 0.439, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.9754653130287648, |
| "grad_norm": 1.5756371021270752, |
| "learning_rate": 8.449640634639878e-09, |
| "loss": 0.5111, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.9763113367174281, |
| "grad_norm": 1.5366209745407104, |
| "learning_rate": 7.896011523794989e-09, |
| "loss": 0.5319, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.9771573604060914, |
| "grad_norm": 1.3801406621932983, |
| "learning_rate": 7.361110628466839e-09, |
| "loss": 0.5529, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.9780033840947546, |
| "grad_norm": 1.5490000247955322, |
| "learning_rate": 6.84494196844715e-09, |
| "loss": 0.5038, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.9788494077834179, |
| "grad_norm": 1.5412358045578003, |
| "learning_rate": 6.347509422754139e-09, |
| "loss": 0.4155, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.9796954314720813, |
| "grad_norm": 1.4816324710845947, |
| "learning_rate": 5.868816729604765e-09, |
| "loss": 0.4967, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.9805414551607445, |
| "grad_norm": 1.3229267597198486, |
| "learning_rate": 5.408867486384473e-09, |
| "loss": 0.4326, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.9813874788494078, |
| "grad_norm": 1.5264832973480225, |
| "learning_rate": 4.9676651496222136e-09, |
| "loss": 0.4914, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.9822335025380711, |
| "grad_norm": 1.4101085662841797, |
| "learning_rate": 4.5452130349629694e-09, |
| "loss": 0.5168, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.9830795262267343, |
| "grad_norm": 1.574628472328186, |
| "learning_rate": 4.1415143171436026e-09, |
| "loss": 0.504, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.9839255499153976, |
| "grad_norm": 1.586148738861084, |
| "learning_rate": 3.756572029968708e-09, |
| "loss": 0.4851, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.9847715736040609, |
| "grad_norm": 1.5436687469482422, |
| "learning_rate": 3.390389066287858e-09, |
| "loss": 0.4857, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.9856175972927242, |
| "grad_norm": 1.4142553806304932, |
| "learning_rate": 3.0429681779739485e-09, |
| "loss": 0.4534, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.9864636209813875, |
| "grad_norm": 1.3092448711395264, |
| "learning_rate": 2.7143119759026614e-09, |
| "loss": 0.4619, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.9873096446700508, |
| "grad_norm": 1.4233113527297974, |
| "learning_rate": 2.404422929932204e-09, |
| "loss": 0.5151, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.988155668358714, |
| "grad_norm": 1.3810383081436157, |
| "learning_rate": 2.1133033688858217e-09, |
| "loss": 0.4998, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.9890016920473773, |
| "grad_norm": 1.4071393013000488, |
| "learning_rate": 1.8409554805329243e-09, |
| "loss": 0.4839, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.9898477157360406, |
| "grad_norm": 1.4692143201828003, |
| "learning_rate": 1.5873813115740989e-09, |
| "loss": 0.4573, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.9906937394247038, |
| "grad_norm": 1.4754483699798584, |
| "learning_rate": 1.3525827676247327e-09, |
| "loss": 0.5774, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.9915397631133672, |
| "grad_norm": 1.3921698331832886, |
| "learning_rate": 1.1365616132008595e-09, |
| "loss": 0.488, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.9923857868020305, |
| "grad_norm": 1.3214542865753174, |
| "learning_rate": 9.393194717061127e-10, |
| "loss": 0.5449, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.9932318104906938, |
| "grad_norm": 1.474719524383545, |
| "learning_rate": 7.608578254195143e-10, |
| "loss": 0.4718, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.994077834179357, |
| "grad_norm": 1.3366878032684326, |
| "learning_rate": 6.011780154843716e-10, |
| "loss": 0.5207, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.9949238578680203, |
| "grad_norm": 1.5625263452529907, |
| "learning_rate": 4.602812418974534e-10, |
| "loss": 0.5025, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.9957698815566836, |
| "grad_norm": 1.4968297481536865, |
| "learning_rate": 3.3816856350177284e-10, |
| "loss": 0.5409, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.9966159052453468, |
| "grad_norm": 1.4987884759902954, |
| "learning_rate": 2.348408979760408e-10, |
| "loss": 0.4497, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.9974619289340102, |
| "grad_norm": 1.578500509262085, |
| "learning_rate": 1.502990218302247e-10, |
| "loss": 0.5731, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.9983079526226735, |
| "grad_norm": 1.4984197616577148, |
| "learning_rate": 8.454357039860972e-11, |
| "loss": 0.4767, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9991539763113367, |
| "grad_norm": 1.5081285238265991, |
| "learning_rate": 3.7575037834247655e-11, |
| "loss": 0.5204, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 1.4636836051940918, |
| "learning_rate": 9.393777107291614e-12, |
| "loss": 0.5414, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1182, |
| "total_flos": 8.436898797652541e+17, |
| "train_loss": 0.5417148913627147, |
| "train_runtime": 7080.4391, |
| "train_samples_per_second": 4.674, |
| "train_steps_per_second": 0.167 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1182, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.436898797652541e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|