| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9958417169684776, |
| "eval_steps": 500, |
| "global_step": 232, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004292421193829644, |
| "grad_norm": 3.342827936598552, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 0.679, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.008584842387659289, |
| "grad_norm": 2.896501931914046, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 0.6709, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.012877263581488933, |
| "grad_norm": 3.3718581451101177, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 0.6619, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.017169684775318578, |
| "grad_norm": 2.529609856453942, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 0.7753, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.021462105969148222, |
| "grad_norm": 2.4044812444703103, |
| "learning_rate": 7.1428571428571436e-06, |
| "loss": 0.7833, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.025754527162977867, |
| "grad_norm": 2.152377713157304, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 0.7411, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03004694835680751, |
| "grad_norm": 7.163921239673759, |
| "learning_rate": 1e-05, |
| "loss": 0.6751, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.034339369550637155, |
| "grad_norm": 1.9811371567636475, |
| "learning_rate": 9.999512620046523e-06, |
| "loss": 0.6019, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0386317907444668, |
| "grad_norm": 2.2991427332066663, |
| "learning_rate": 9.998050575201772e-06, |
| "loss": 0.6195, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.042924211938296444, |
| "grad_norm": 2.633277122254043, |
| "learning_rate": 9.995614150494293e-06, |
| "loss": 0.6773, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04721663313212609, |
| "grad_norm": 0.8137858553380385, |
| "learning_rate": 9.992203820909906e-06, |
| "loss": 0.44, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05150905432595573, |
| "grad_norm": 2.5789476047477495, |
| "learning_rate": 9.987820251299121e-06, |
| "loss": 0.6856, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05580147551978538, |
| "grad_norm": 2.325022123665024, |
| "learning_rate": 9.982464296247523e-06, |
| "loss": 0.6573, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06009389671361502, |
| "grad_norm": 2.1016133484431814, |
| "learning_rate": 9.976136999909156e-06, |
| "loss": 0.5795, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06438631790744467, |
| "grad_norm": 2.203575792232459, |
| "learning_rate": 9.968839595802982e-06, |
| "loss": 0.5512, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.06867873910127431, |
| "grad_norm": 1.7649770138174061, |
| "learning_rate": 9.960573506572391e-06, |
| "loss": 0.5991, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07297116029510396, |
| "grad_norm": 2.125017710581645, |
| "learning_rate": 9.951340343707852e-06, |
| "loss": 0.6961, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0772635814889336, |
| "grad_norm": 1.9648109979705357, |
| "learning_rate": 9.941141907232766e-06, |
| "loss": 0.6274, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.08155600268276325, |
| "grad_norm": 2.1430042253940997, |
| "learning_rate": 9.929980185352525e-06, |
| "loss": 0.5933, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.08584842387659289, |
| "grad_norm": 2.187065629596637, |
| "learning_rate": 9.91785735406693e-06, |
| "loss": 0.6537, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09014084507042254, |
| "grad_norm": 2.0411377847693366, |
| "learning_rate": 9.904775776745959e-06, |
| "loss": 0.5277, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09443326626425218, |
| "grad_norm": 1.9222476925913479, |
| "learning_rate": 9.890738003669029e-06, |
| "loss": 0.6009, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.09872568745808183, |
| "grad_norm": 2.202338617043183, |
| "learning_rate": 9.875746771527817e-06, |
| "loss": 0.6808, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10301810865191147, |
| "grad_norm": 1.8704298128204246, |
| "learning_rate": 9.859805002892733e-06, |
| "loss": 0.5834, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.10731052984574112, |
| "grad_norm": 2.26606517803316, |
| "learning_rate": 9.842915805643156e-06, |
| "loss": 0.6416, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11160295103957076, |
| "grad_norm": 1.957724168370316, |
| "learning_rate": 9.825082472361558e-06, |
| "loss": 0.5565, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.1158953722334004, |
| "grad_norm": 2.7304041309488367, |
| "learning_rate": 9.806308479691595e-06, |
| "loss": 0.6941, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12018779342723004, |
| "grad_norm": 2.252924472572446, |
| "learning_rate": 9.786597487660336e-06, |
| "loss": 0.6806, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1244802146210597, |
| "grad_norm": 2.208805819097276, |
| "learning_rate": 9.765953338964736e-06, |
| "loss": 0.6123, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.12877263581488935, |
| "grad_norm": 2.0330562196938238, |
| "learning_rate": 9.744380058222483e-06, |
| "loss": 0.5966, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.13306505700871898, |
| "grad_norm": 2.4286898255231533, |
| "learning_rate": 9.721881851187406e-06, |
| "loss": 0.6148, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.13735747820254862, |
| "grad_norm": 1.911339918779555, |
| "learning_rate": 9.698463103929542e-06, |
| "loss": 0.5662, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14164989939637826, |
| "grad_norm": 2.0543803346952583, |
| "learning_rate": 9.674128381980073e-06, |
| "loss": 0.6602, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.14594232059020792, |
| "grad_norm": 2.086982956085146, |
| "learning_rate": 9.648882429441258e-06, |
| "loss": 0.5432, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15023474178403756, |
| "grad_norm": 2.0901966575723474, |
| "learning_rate": 9.622730168061568e-06, |
| "loss": 0.5274, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1545271629778672, |
| "grad_norm": 4.381472469442544, |
| "learning_rate": 9.595676696276173e-06, |
| "loss": 0.583, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.15881958417169684, |
| "grad_norm": 2.3265871885088503, |
| "learning_rate": 9.567727288213005e-06, |
| "loss": 0.5623, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1631120053655265, |
| "grad_norm": 2.0021378156582963, |
| "learning_rate": 9.538887392664544e-06, |
| "loss": 0.6318, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.16740442655935614, |
| "grad_norm": 1.0984958441170714, |
| "learning_rate": 9.50916263202557e-06, |
| "loss": 0.4548, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.17169684775318578, |
| "grad_norm": 1.8318936065536409, |
| "learning_rate": 9.478558801197065e-06, |
| "loss": 0.5985, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17598926894701541, |
| "grad_norm": 2.7994275799684076, |
| "learning_rate": 9.44708186645649e-06, |
| "loss": 0.5926, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.18028169014084508, |
| "grad_norm": 2.072220455177539, |
| "learning_rate": 9.414737964294636e-06, |
| "loss": 0.5318, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.18457411133467472, |
| "grad_norm": 2.2812531791006645, |
| "learning_rate": 9.381533400219319e-06, |
| "loss": 0.5948, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.18886653252850436, |
| "grad_norm": 1.8766396474219533, |
| "learning_rate": 9.347474647526095e-06, |
| "loss": 0.5514, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.193158953722334, |
| "grad_norm": 1.9908306217286644, |
| "learning_rate": 9.312568346036288e-06, |
| "loss": 0.5282, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.19745137491616366, |
| "grad_norm": 2.2942225048556284, |
| "learning_rate": 9.276821300802535e-06, |
| "loss": 0.5875, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2017437961099933, |
| "grad_norm": 3.1824151916415087, |
| "learning_rate": 9.24024048078213e-06, |
| "loss": 0.6144, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.20603621730382293, |
| "grad_norm": 0.992721768550404, |
| "learning_rate": 9.202833017478421e-06, |
| "loss": 0.4847, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.21032863849765257, |
| "grad_norm": 1.9310743502843328, |
| "learning_rate": 9.164606203550498e-06, |
| "loss": 0.5974, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.21462105969148224, |
| "grad_norm": 2.1730863843947392, |
| "learning_rate": 9.125567491391476e-06, |
| "loss": 0.6293, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21891348088531187, |
| "grad_norm": 0.8680338310564839, |
| "learning_rate": 9.085724491675642e-06, |
| "loss": 0.4757, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2232059020791415, |
| "grad_norm": 2.1915863963781916, |
| "learning_rate": 9.045084971874738e-06, |
| "loss": 0.5925, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.22749832327297115, |
| "grad_norm": 2.3492931220336195, |
| "learning_rate": 9.003656854743667e-06, |
| "loss": 0.6402, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.2317907444668008, |
| "grad_norm": 0.8164522682092396, |
| "learning_rate": 8.961448216775955e-06, |
| "loss": 0.4382, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.23608316566063045, |
| "grad_norm": 2.2933091603858418, |
| "learning_rate": 8.9184672866292e-06, |
| "loss": 0.5367, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2403755868544601, |
| "grad_norm": 2.051540581160434, |
| "learning_rate": 8.874722443520898e-06, |
| "loss": 0.5736, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.24466800804828973, |
| "grad_norm": 1.9508118237454382, |
| "learning_rate": 8.83022221559489e-06, |
| "loss": 0.6196, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2489604292421194, |
| "grad_norm": 2.4166528722303036, |
| "learning_rate": 8.784975278258783e-06, |
| "loss": 0.5279, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.25325285043594903, |
| "grad_norm": 1.9719959484618856, |
| "learning_rate": 8.73899045249266e-06, |
| "loss": 0.5248, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2575452716297787, |
| "grad_norm": 2.088094168914141, |
| "learning_rate": 8.692276703129421e-06, |
| "loss": 0.5436, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2618376928236083, |
| "grad_norm": 2.347294342285649, |
| "learning_rate": 8.644843137107058e-06, |
| "loss": 0.7, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.26613011401743797, |
| "grad_norm": 2.152662276659629, |
| "learning_rate": 8.596699001693257e-06, |
| "loss": 0.5052, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.2704225352112676, |
| "grad_norm": 2.1260026022790552, |
| "learning_rate": 8.547853682682605e-06, |
| "loss": 0.6121, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.27471495640509724, |
| "grad_norm": 1.9148541801577235, |
| "learning_rate": 8.498316702566828e-06, |
| "loss": 0.5516, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2790073775989269, |
| "grad_norm": 2.134479085245887, |
| "learning_rate": 8.44809771867835e-06, |
| "loss": 0.6297, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.2832997987927565, |
| "grad_norm": 1.776446730314099, |
| "learning_rate": 8.397206521307584e-06, |
| "loss": 0.5337, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2875922199865862, |
| "grad_norm": 1.955780895559264, |
| "learning_rate": 8.345653031794292e-06, |
| "loss": 0.6187, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.29188464118041585, |
| "grad_norm": 2.065677438153802, |
| "learning_rate": 8.293447300593402e-06, |
| "loss": 0.4712, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.29617706237424546, |
| "grad_norm": 1.929637625719691, |
| "learning_rate": 8.240599505315656e-06, |
| "loss": 0.5638, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3004694835680751, |
| "grad_norm": 2.1234613246255294, |
| "learning_rate": 8.18711994874345e-06, |
| "loss": 0.5622, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3047619047619048, |
| "grad_norm": 2.4974658951008935, |
| "learning_rate": 8.133019056822303e-06, |
| "loss": 0.5656, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3090543259557344, |
| "grad_norm": 2.1148121496211028, |
| "learning_rate": 8.078307376628292e-06, |
| "loss": 0.5706, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.31334674714956406, |
| "grad_norm": 1.9637387645317304, |
| "learning_rate": 8.022995574311876e-06, |
| "loss": 0.6431, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3176391683433937, |
| "grad_norm": 2.2321260500467996, |
| "learning_rate": 7.967094433018508e-06, |
| "loss": 0.6038, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.32193158953722334, |
| "grad_norm": 2.1333068986028136, |
| "learning_rate": 7.910614850786448e-06, |
| "loss": 0.5666, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.326224010731053, |
| "grad_norm": 2.195983244813881, |
| "learning_rate": 7.85356783842216e-06, |
| "loss": 0.5877, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3305164319248826, |
| "grad_norm": 2.9185529975089644, |
| "learning_rate": 7.795964517353734e-06, |
| "loss": 0.5221, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3348088531187123, |
| "grad_norm": 2.1356064453519363, |
| "learning_rate": 7.737816117462752e-06, |
| "loss": 0.5536, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.33910127431254194, |
| "grad_norm": 0.9888358843543892, |
| "learning_rate": 7.679133974894984e-06, |
| "loss": 0.427, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.34339369550637155, |
| "grad_norm": 0.9532274184167417, |
| "learning_rate": 7.619929529850397e-06, |
| "loss": 0.4607, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3476861167002012, |
| "grad_norm": 1.969695168470127, |
| "learning_rate": 7.560214324352858e-06, |
| "loss": 0.525, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.35197853789403083, |
| "grad_norm": 2.2331654792734272, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.6041, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3562709590878605, |
| "grad_norm": 2.4884565474083606, |
| "learning_rate": 7.4392982956936644e-06, |
| "loss": 0.5886, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.36056338028169016, |
| "grad_norm": 1.8314120774170615, |
| "learning_rate": 7.378121045351378e-06, |
| "loss": 0.5349, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.36485580147551977, |
| "grad_norm": 2.1762807073421127, |
| "learning_rate": 7.31648017559931e-06, |
| "loss": 0.5606, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.36914822266934944, |
| "grad_norm": 2.422109142275802, |
| "learning_rate": 7.254387703447154e-06, |
| "loss": 0.5431, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.3734406438631791, |
| "grad_norm": 1.9079552738678454, |
| "learning_rate": 7.191855733945388e-06, |
| "loss": 0.5553, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.3777330650570087, |
| "grad_norm": 1.760125442274873, |
| "learning_rate": 7.128896457825364e-06, |
| "loss": 0.5808, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.3820254862508384, |
| "grad_norm": 0.9979331285164651, |
| "learning_rate": 7.06552214912271e-06, |
| "loss": 0.4579, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.386317907444668, |
| "grad_norm": 1.8496753131089991, |
| "learning_rate": 7.0017451627844765e-06, |
| "loss": 0.591, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.39061032863849765, |
| "grad_norm": 2.3343608471053265, |
| "learning_rate": 6.9375779322605154e-06, |
| "loss": 0.6091, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.3949027498323273, |
| "grad_norm": 1.9668213618430554, |
| "learning_rate": 6.873032967079562e-06, |
| "loss": 0.6944, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.3991951710261569, |
| "grad_norm": 1.8725242772507493, |
| "learning_rate": 6.808122850410461e-06, |
| "loss": 0.6055, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4034875922199866, |
| "grad_norm": 1.9966003945224369, |
| "learning_rate": 6.7428602366090764e-06, |
| "loss": 0.5595, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.40778001341381626, |
| "grad_norm": 3.053074851635477, |
| "learning_rate": 6.677257848751276e-06, |
| "loss": 0.5857, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.41207243460764587, |
| "grad_norm": 2.010221226340498, |
| "learning_rate": 6.611328476152557e-06, |
| "loss": 0.5995, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.41636485580147553, |
| "grad_norm": 1.742641730594434, |
| "learning_rate": 6.545084971874738e-06, |
| "loss": 0.5389, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.42065727699530514, |
| "grad_norm": 0.7858607450939203, |
| "learning_rate": 6.4785402502202345e-06, |
| "loss": 0.4598, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4249496981891348, |
| "grad_norm": 1.9026874984032676, |
| "learning_rate": 6.411707284214384e-06, |
| "loss": 0.5824, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.42924211938296447, |
| "grad_norm": 1.7972924537621116, |
| "learning_rate": 6.344599103076329e-06, |
| "loss": 0.5605, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4335345405767941, |
| "grad_norm": 1.6566505050926905, |
| "learning_rate": 6.277228789678953e-06, |
| "loss": 0.55, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.43782696177062375, |
| "grad_norm": 0.7871200662869098, |
| "learning_rate": 6.209609477998339e-06, |
| "loss": 0.4487, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.4421193829644534, |
| "grad_norm": 2.118924742242862, |
| "learning_rate": 6.141754350553279e-06, |
| "loss": 0.5791, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.446411804158283, |
| "grad_norm": 1.9417216221368725, |
| "learning_rate": 6.073676635835317e-06, |
| "loss": 0.5321, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.4507042253521127, |
| "grad_norm": 0.8301362013675532, |
| "learning_rate": 6.005389605729824e-06, |
| "loss": 0.471, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4549966465459423, |
| "grad_norm": 1.9216842351031653, |
| "learning_rate": 5.936906572928625e-06, |
| "loss": 0.4981, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.45928906773977196, |
| "grad_norm": 2.1309977101502913, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.523, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.4635814889336016, |
| "grad_norm": 6.307531137311205, |
| "learning_rate": 5.799405938459175e-06, |
| "loss": 0.5631, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.46787391012743124, |
| "grad_norm": 0.8161016261331794, |
| "learning_rate": 5.730415142812059e-06, |
| "loss": 0.4739, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.4721663313212609, |
| "grad_norm": 1.9009887448608773, |
| "learning_rate": 5.661281951285613e-06, |
| "loss": 0.6272, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.47645875251509057, |
| "grad_norm": 2.2577462172723233, |
| "learning_rate": 5.592019841532507e-06, |
| "loss": 0.5796, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.4807511737089202, |
| "grad_norm": 2.1012817228049814, |
| "learning_rate": 5.522642316338268e-06, |
| "loss": 0.6074, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.48504359490274984, |
| "grad_norm": 3.169512060210574, |
| "learning_rate": 5.453162900988902e-06, |
| "loss": 0.5817, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.48933601609657945, |
| "grad_norm": 0.7965146219233805, |
| "learning_rate": 5.383595140634093e-06, |
| "loss": 0.4628, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.4936284372904091, |
| "grad_norm": 2.528449046345642, |
| "learning_rate": 5.3139525976465675e-06, |
| "loss": 0.6113, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4979208584842388, |
| "grad_norm": 2.169410644155486, |
| "learning_rate": 5.244248848978067e-06, |
| "loss": 0.6655, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5022132796780684, |
| "grad_norm": 2.114218695353453, |
| "learning_rate": 5.174497483512506e-06, |
| "loss": 0.5438, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5065057008718981, |
| "grad_norm": 2.322736697640736, |
| "learning_rate": 5.1047120994167855e-06, |
| "loss": 0.619, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5107981220657277, |
| "grad_norm": 1.9355642603619068, |
| "learning_rate": 5.034906301489808e-06, |
| "loss": 0.568, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5150905432595574, |
| "grad_norm": 0.8275929454164218, |
| "learning_rate": 4.965093698510192e-06, |
| "loss": 0.4642, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5193829644533869, |
| "grad_norm": 0.7826606670717038, |
| "learning_rate": 4.895287900583216e-06, |
| "loss": 0.428, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5236753856472166, |
| "grad_norm": 2.227565392356624, |
| "learning_rate": 4.825502516487497e-06, |
| "loss": 0.616, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5279678068410463, |
| "grad_norm": 1.9049065560319947, |
| "learning_rate": 4.755751151021934e-06, |
| "loss": 0.6396, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5322602280348759, |
| "grad_norm": 2.360278324447153, |
| "learning_rate": 4.686047402353433e-06, |
| "loss": 0.581, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5365526492287056, |
| "grad_norm": 2.1436744795832463, |
| "learning_rate": 4.6164048593659076e-06, |
| "loss": 0.6027, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.5408450704225352, |
| "grad_norm": 2.0687980774005856, |
| "learning_rate": 4.546837099011101e-06, |
| "loss": 0.5455, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.5451374916163648, |
| "grad_norm": 2.094975644434325, |
| "learning_rate": 4.477357683661734e-06, |
| "loss": 0.5659, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5494299128101945, |
| "grad_norm": 2.183849966879906, |
| "learning_rate": 4.4079801584674955e-06, |
| "loss": 0.578, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5537223340040242, |
| "grad_norm": 1.8848396606946802, |
| "learning_rate": 4.3387180487143875e-06, |
| "loss": 0.5052, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5580147551978538, |
| "grad_norm": 0.843007824971089, |
| "learning_rate": 4.269584857187942e-06, |
| "loss": 0.4997, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5623071763916835, |
| "grad_norm": 2.763950937739874, |
| "learning_rate": 4.200594061540827e-06, |
| "loss": 0.5819, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.566599597585513, |
| "grad_norm": 1.7630340741332657, |
| "learning_rate": 4.131759111665349e-06, |
| "loss": 0.604, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.5708920187793427, |
| "grad_norm": 3.2094078977714897, |
| "learning_rate": 4.063093427071376e-06, |
| "loss": 0.6265, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.5751844399731724, |
| "grad_norm": 1.8312046959677455, |
| "learning_rate": 3.994610394270178e-06, |
| "loss": 0.5885, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.579476861167002, |
| "grad_norm": 1.8491871843778356, |
| "learning_rate": 3.926323364164684e-06, |
| "loss": 0.634, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5837692823608317, |
| "grad_norm": 2.6473671182169167, |
| "learning_rate": 3.8582456494467214e-06, |
| "loss": 0.6355, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.5880617035546613, |
| "grad_norm": 2.999849822112049, |
| "learning_rate": 3.790390522001662e-06, |
| "loss": 0.529, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.5923541247484909, |
| "grad_norm": 1.9445772581815945, |
| "learning_rate": 3.7227712103210485e-06, |
| "loss": 0.5575, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.5966465459423206, |
| "grad_norm": 1.8104560751827103, |
| "learning_rate": 3.655400896923672e-06, |
| "loss": 0.5254, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6009389671361502, |
| "grad_norm": 2.3671152557639346, |
| "learning_rate": 3.5882927157856175e-06, |
| "loss": 0.5583, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6052313883299799, |
| "grad_norm": 2.074346620633345, |
| "learning_rate": 3.521459749779769e-06, |
| "loss": 0.6084, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6095238095238096, |
| "grad_norm": 2.035892537869217, |
| "learning_rate": 3.4549150281252635e-06, |
| "loss": 0.5832, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6138162307176391, |
| "grad_norm": 2.1099675037548966, |
| "learning_rate": 3.3886715238474454e-06, |
| "loss": 0.5579, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6181086519114688, |
| "grad_norm": 2.0468759829486443, |
| "learning_rate": 3.322742151248726e-06, |
| "loss": 0.5848, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6224010731052985, |
| "grad_norm": 1.9674918076912449, |
| "learning_rate": 3.2571397633909252e-06, |
| "loss": 0.5398, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6266934942991281, |
| "grad_norm": 1.7459454556549392, |
| "learning_rate": 3.1918771495895395e-06, |
| "loss": 0.6756, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6309859154929578, |
| "grad_norm": 0.7735085423697842, |
| "learning_rate": 3.12696703292044e-06, |
| "loss": 0.4194, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.6352783366867873, |
| "grad_norm": 1.820298479603609, |
| "learning_rate": 3.0624220677394854e-06, |
| "loss": 0.5858, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.639570757880617, |
| "grad_norm": 3.1630846211682, |
| "learning_rate": 2.9982548372155264e-06, |
| "loss": 0.5303, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6438631790744467, |
| "grad_norm": 2.413654673008136, |
| "learning_rate": 2.934477850877292e-06, |
| "loss": 0.5315, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6481556002682763, |
| "grad_norm": 2.508188149902217, |
| "learning_rate": 2.871103542174637e-06, |
| "loss": 0.5468, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.652448021462106, |
| "grad_norm": 1.801696332460669, |
| "learning_rate": 2.8081442660546126e-06, |
| "loss": 0.5817, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6567404426559356, |
| "grad_norm": 2.26133822829944, |
| "learning_rate": 2.7456122965528475e-06, |
| "loss": 0.522, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6610328638497652, |
| "grad_norm": 2.4471397871687834, |
| "learning_rate": 2.683519824400693e-06, |
| "loss": 0.5892, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.6653252850435949, |
| "grad_norm": 1.9131453194028882, |
| "learning_rate": 2.6218789546486235e-06, |
| "loss": 0.5554, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.6696177062374246, |
| "grad_norm": 2.4034600764131606, |
| "learning_rate": 2.560701704306336e-06, |
| "loss": 0.5969, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.6739101274312542, |
| "grad_norm": 2.1929545250468423, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.5703, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.6782025486250839, |
| "grad_norm": 2.2198350864402348, |
| "learning_rate": 2.4397856756471435e-06, |
| "loss": 0.5812, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.6824949698189134, |
| "grad_norm": 2.0128371852734332, |
| "learning_rate": 2.380070470149605e-06, |
| "loss": 0.6307, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.6867873910127431, |
| "grad_norm": 2.217606056909539, |
| "learning_rate": 2.320866025105016e-06, |
| "loss": 0.5601, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6910798122065728, |
| "grad_norm": 2.0541472609321065, |
| "learning_rate": 2.2621838825372496e-06, |
| "loss": 0.6326, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.6953722334004024, |
| "grad_norm": 1.7762192766929534, |
| "learning_rate": 2.204035482646267e-06, |
| "loss": 0.501, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.6996646545942321, |
| "grad_norm": 2.4025706210449322, |
| "learning_rate": 2.146432161577842e-06, |
| "loss": 0.5587, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7039570757880617, |
| "grad_norm": 3.686601475649348, |
| "learning_rate": 2.0893851492135536e-06, |
| "loss": 0.5732, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7082494969818913, |
| "grad_norm": 1.8617954191026609, |
| "learning_rate": 2.0329055669814936e-06, |
| "loss": 0.5018, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.712541918175721, |
| "grad_norm": 3.1015155924797804, |
| "learning_rate": 1.977004425688126e-06, |
| "loss": 0.6083, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7168343393695507, |
| "grad_norm": 11.133598367985865, |
| "learning_rate": 1.9216926233717087e-06, |
| "loss": 0.5543, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7211267605633803, |
| "grad_norm": 1.8192465630367278, |
| "learning_rate": 1.8669809431776991e-06, |
| "loss": 0.5863, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.7254191817572099, |
| "grad_norm": 2.0788969216539757, |
| "learning_rate": 1.8128800512565514e-06, |
| "loss": 0.5291, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7297116029510395, |
| "grad_norm": 1.8368930044145042, |
| "learning_rate": 1.7594004946843458e-06, |
| "loss": 0.5378, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7340040241448692, |
| "grad_norm": 2.0502782524895315, |
| "learning_rate": 1.7065526994065973e-06, |
| "loss": 0.5478, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7382964453386989, |
| "grad_norm": 1.7763584764334766, |
| "learning_rate": 1.6543469682057105e-06, |
| "loss": 0.5448, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7425888665325285, |
| "grad_norm": 1.7918310960966537, |
| "learning_rate": 1.6027934786924187e-06, |
| "loss": 0.6076, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7468812877263582, |
| "grad_norm": 2.3563140946836523, |
| "learning_rate": 1.551902281321651e-06, |
| "loss": 0.565, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7511737089201878, |
| "grad_norm": 1.8987524052104983, |
| "learning_rate": 1.5016832974331725e-06, |
| "loss": 0.5367, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7554661301140174, |
| "grad_norm": 1.9189932438009185, |
| "learning_rate": 1.4521463173173966e-06, |
| "loss": 0.5435, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7597585513078471, |
| "grad_norm": 2.42896938145852, |
| "learning_rate": 1.4033009983067454e-06, |
| "loss": 0.5141, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.7640509725016768, |
| "grad_norm": 2.5249548327367766, |
| "learning_rate": 1.3551568628929434e-06, |
| "loss": 0.674, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.7683433936955064, |
| "grad_norm": 2.806361559097513, |
| "learning_rate": 1.3077232968705805e-06, |
| "loss": 0.5517, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.772635814889336, |
| "grad_norm": 2.2153752757917555, |
| "learning_rate": 1.2610095475073415e-06, |
| "loss": 0.6408, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7769282360831656, |
| "grad_norm": 2.5628201933255124, |
| "learning_rate": 1.2150247217412186e-06, |
| "loss": 0.5957, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.7812206572769953, |
| "grad_norm": 2.079321530887709, |
| "learning_rate": 1.1697777844051105e-06, |
| "loss": 0.6155, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.785513078470825, |
| "grad_norm": 3.2999932447363416, |
| "learning_rate": 1.1252775564791023e-06, |
| "loss": 0.5515, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.7898054996646546, |
| "grad_norm": 0.7905984900012323, |
| "learning_rate": 1.0815327133708015e-06, |
| "loss": 0.4313, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.7940979208584842, |
| "grad_norm": 1.9368152867312256, |
| "learning_rate": 1.0385517832240472e-06, |
| "loss": 0.6071, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7983903420523139, |
| "grad_norm": 2.366580720258878, |
| "learning_rate": 9.963431452563331e-07, |
| "loss": 0.5578, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8026827632461435, |
| "grad_norm": 2.3822261396548576, |
| "learning_rate": 9.549150281252633e-07, |
| "loss": 0.499, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8069751844399732, |
| "grad_norm": 1.7417985644400509, |
| "learning_rate": 9.142755083243577e-07, |
| "loss": 0.5696, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8112676056338028, |
| "grad_norm": 3.9103317258184376, |
| "learning_rate": 8.744325086085248e-07, |
| "loss": 0.5079, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.8155600268276325, |
| "grad_norm": 3.0678763221012586, |
| "learning_rate": 8.353937964495029e-07, |
| "loss": 0.5418, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8198524480214621, |
| "grad_norm": 2.042653243276808, |
| "learning_rate": 7.971669825215789e-07, |
| "loss": 0.5748, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8241448692152917, |
| "grad_norm": 2.39821396059648, |
| "learning_rate": 7.597595192178702e-07, |
| "loss": 0.5428, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8284372904091214, |
| "grad_norm": 2.762719441820346, |
| "learning_rate": 7.23178699197467e-07, |
| "loss": 0.5673, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8327297116029511, |
| "grad_norm": 1.9044906325399893, |
| "learning_rate": 6.874316539637127e-07, |
| "loss": 0.5932, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8370221327967807, |
| "grad_norm": 1.856435008918489, |
| "learning_rate": 6.52525352473905e-07, |
| "loss": 0.5787, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8413145539906103, |
| "grad_norm": 2.3652417447261995, |
| "learning_rate": 6.184665997806832e-07, |
| "loss": 0.5167, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.84560697518444, |
| "grad_norm": 2.723770526598716, |
| "learning_rate": 5.852620357053651e-07, |
| "loss": 0.571, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8498993963782696, |
| "grad_norm": 2.4817466195220437, |
| "learning_rate": 5.529181335435124e-07, |
| "loss": 0.58, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8541918175720993, |
| "grad_norm": 2.0455723374907397, |
| "learning_rate": 5.214411988029355e-07, |
| "loss": 0.5313, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.8584842387659289, |
| "grad_norm": 1.9318379229380933, |
| "learning_rate": 4.908373679744316e-07, |
| "loss": 0.5439, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8627766599597585, |
| "grad_norm": 0.7959253630064103, |
| "learning_rate": 4.6111260733545714e-07, |
| "loss": 0.4454, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.8670690811535882, |
| "grad_norm": 1.9913907969524394, |
| "learning_rate": 4.322727117869951e-07, |
| "loss": 0.5008, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.8713615023474178, |
| "grad_norm": 2.1458695326057082, |
| "learning_rate": 4.043233037238281e-07, |
| "loss": 0.5459, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.8756539235412475, |
| "grad_norm": 1.9619666249914482, |
| "learning_rate": 3.772698319384349e-07, |
| "loss": 0.4999, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.8799463447350772, |
| "grad_norm": 4.3401484961367744, |
| "learning_rate": 3.511175705587433e-07, |
| "loss": 0.5758, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.8842387659289068, |
| "grad_norm": 2.5430076829964174, |
| "learning_rate": 3.258716180199278e-07, |
| "loss": 0.5761, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.8885311871227364, |
| "grad_norm": 0.767231711493243, |
| "learning_rate": 3.015368960704584e-07, |
| "loss": 0.4614, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.892823608316566, |
| "grad_norm": 2.193097249503329, |
| "learning_rate": 2.7811814881259503e-07, |
| "loss": 0.6247, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.8971160295103957, |
| "grad_norm": 1.9331493033717462, |
| "learning_rate": 2.556199417775174e-07, |
| "loss": 0.56, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9014084507042254, |
| "grad_norm": 2.057966480432949, |
| "learning_rate": 2.3404666103526542e-07, |
| "loss": 0.5618, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.905700871898055, |
| "grad_norm": 2.351596162489054, |
| "learning_rate": 2.134025123396638e-07, |
| "loss": 0.4834, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9099932930918846, |
| "grad_norm": 1.9120169610557864, |
| "learning_rate": 1.9369152030840553e-07, |
| "loss": 0.5541, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 2.2049446195768763, |
| "learning_rate": 1.7491752763844294e-07, |
| "loss": 0.5889, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9185781354795439, |
| "grad_norm": 2.0434712402893145, |
| "learning_rate": 1.5708419435684463e-07, |
| "loss": 0.5638, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9228705566733736, |
| "grad_norm": 2.1757829918674916, |
| "learning_rate": 1.4019499710726913e-07, |
| "loss": 0.642, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9271629778672033, |
| "grad_norm": 1.9838844707633234, |
| "learning_rate": 1.2425322847218368e-07, |
| "loss": 0.6747, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9314553990610329, |
| "grad_norm": 2.3027124358123836, |
| "learning_rate": 1.0926199633097156e-07, |
| "loss": 0.5337, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9357478202548625, |
| "grad_norm": 1.8834569228162623, |
| "learning_rate": 9.522422325404234e-08, |
| "loss": 0.4917, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9400402414486921, |
| "grad_norm": 1.8818328820882426, |
| "learning_rate": 8.214264593307097e-08, |
| "loss": 0.5806, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9443326626425218, |
| "grad_norm": 2.121602640833575, |
| "learning_rate": 7.001981464747565e-08, |
| "loss": 0.6032, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9486250838363515, |
| "grad_norm": 1.7145222003788387, |
| "learning_rate": 5.8858092767236084e-08, |
| "loss": 0.5455, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.9529175050301811, |
| "grad_norm": 1.9516038348847442, |
| "learning_rate": 4.865965629214819e-08, |
| "loss": 0.4984, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.9572099262240107, |
| "grad_norm": 2.156767335001454, |
| "learning_rate": 3.9426493427611177e-08, |
| "loss": 0.5284, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.9615023474178404, |
| "grad_norm": 1.8089215543018735, |
| "learning_rate": 3.1160404197018155e-08, |
| "loss": 0.5852, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.96579476861167, |
| "grad_norm": 3.2819358158442062, |
| "learning_rate": 2.386300009084408e-08, |
| "loss": 0.535, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.9700871898054997, |
| "grad_norm": 1.7304415336384433, |
| "learning_rate": 1.753570375247815e-08, |
| "loss": 0.5152, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.9743796109993293, |
| "grad_norm": 2.7057456233183785, |
| "learning_rate": 1.2179748700879013e-08, |
| "loss": 0.5592, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.9786720321931589, |
| "grad_norm": 2.0005265863891872, |
| "learning_rate": 7.796179090094891e-09, |
| "loss": 0.548, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.9829644533869886, |
| "grad_norm": 3.129587457460373, |
| "learning_rate": 4.385849505708084e-09, |
| "loss": 0.5154, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.9872568745808182, |
| "grad_norm": 2.082407038336782, |
| "learning_rate": 1.9494247982282386e-09, |
| "loss": 0.5263, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.9915492957746479, |
| "grad_norm": 2.035928140228953, |
| "learning_rate": 4.87379953478806e-10, |
| "loss": 0.5001, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.9958417169684776, |
| "grad_norm": 1.9493320834510575, |
| "learning_rate": 0.0, |
| "loss": 0.5539, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.9958417169684776, |
| "step": 232, |
| "total_flos": 57740051464192.0, |
| "train_loss": 0.5700135146235598, |
| "train_runtime": 22473.305, |
| "train_samples_per_second": 1.327, |
| "train_steps_per_second": 0.01 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 232, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 5000.0, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 57740051464192.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|