| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9990539262062441, |
| "eval_steps": 500, |
| "global_step": 198, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005045726900031536, |
| "grad_norm": 0.16816571847556824, |
| "learning_rate": 2.9999839160139495e-06, |
| "loss": 0.7782, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.010091453800063072, |
| "grad_norm": 0.1469143977253523, |
| "learning_rate": 2.9999356645057024e-06, |
| "loss": 0.6817, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.015137180700094607, |
| "grad_norm": 0.07996774677933757, |
| "learning_rate": 2.9998552468249567e-06, |
| "loss": 0.6735, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.020182907600126143, |
| "grad_norm": 0.0800127664777818, |
| "learning_rate": 2.999742665221167e-06, |
| "loss": 0.6569, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.02522863450015768, |
| "grad_norm": 0.08070188267575489, |
| "learning_rate": 2.999597922843484e-06, |
| "loss": 0.6283, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.030274361400189215, |
| "grad_norm": 0.06839180655145351, |
| "learning_rate": 2.999421023740663e-06, |
| "loss": 0.6446, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03532008830022075, |
| "grad_norm": 0.05534188923028301, |
| "learning_rate": 2.9992119728609516e-06, |
| "loss": 0.6371, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.040365815200252286, |
| "grad_norm": 0.07094943987370793, |
| "learning_rate": 2.9989707760519526e-06, |
| "loss": 0.6111, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04541154210028382, |
| "grad_norm": 0.06436005389698786, |
| "learning_rate": 2.9986974400604593e-06, |
| "loss": 0.588, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.05045726900031536, |
| "grad_norm": 0.05699223365274786, |
| "learning_rate": 2.9983919725322667e-06, |
| "loss": 0.6101, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.055502995900346894, |
| "grad_norm": 0.058843386030182285, |
| "learning_rate": 2.9980543820119585e-06, |
| "loss": 0.6047, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.06054872280037843, |
| "grad_norm": 0.047228554008764044, |
| "learning_rate": 2.997684677942667e-06, |
| "loss": 0.5937, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.06559444970040997, |
| "grad_norm": 0.04830399085917525, |
| "learning_rate": 2.9972828706658102e-06, |
| "loss": 0.6448, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.0706401766004415, |
| "grad_norm": 0.04469640349332499, |
| "learning_rate": 2.996848971420801e-06, |
| "loss": 0.6145, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.07568590350047304, |
| "grad_norm": 0.048907003957727534, |
| "learning_rate": 2.996382992344734e-06, |
| "loss": 0.5755, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.08073163040050457, |
| "grad_norm": 0.04502223888969105, |
| "learning_rate": 2.9958849464720457e-06, |
| "loss": 0.5765, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.08577735730053611, |
| "grad_norm": 0.04485565875842678, |
| "learning_rate": 2.9953548477341497e-06, |
| "loss": 0.6364, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.09082308420056764, |
| "grad_norm": 0.04319237430058616, |
| "learning_rate": 2.9947927109590477e-06, |
| "loss": 0.568, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.09586881110059918, |
| "grad_norm": 0.042093297202993624, |
| "learning_rate": 2.994198551870913e-06, |
| "loss": 0.6184, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.10091453800063072, |
| "grad_norm": 0.04087623899598573, |
| "learning_rate": 2.993572387089653e-06, |
| "loss": 0.5822, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10596026490066225, |
| "grad_norm": 0.042619877493329586, |
| "learning_rate": 2.992914234130442e-06, |
| "loss": 0.5983, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.11100599180069379, |
| "grad_norm": 0.04314774114784986, |
| "learning_rate": 2.9922241114032345e-06, |
| "loss": 0.6058, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.11605171870072532, |
| "grad_norm": 0.04125496902035363, |
| "learning_rate": 2.9915020382122458e-06, |
| "loss": 0.5741, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.12109744560075686, |
| "grad_norm": 0.03985368427853683, |
| "learning_rate": 2.990748034755415e-06, |
| "loss": 0.6002, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.1261431725007884, |
| "grad_norm": 0.04603566805698703, |
| "learning_rate": 2.9899621221238394e-06, |
| "loss": 0.5616, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.13118889940081993, |
| "grad_norm": 0.033944581121186666, |
| "learning_rate": 2.989144322301186e-06, |
| "loss": 0.591, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.13623462630085148, |
| "grad_norm": 0.0352127486146018, |
| "learning_rate": 2.988294658163073e-06, |
| "loss": 0.575, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.141280353200883, |
| "grad_norm": 0.04026082684896548, |
| "learning_rate": 2.9874131534764325e-06, |
| "loss": 0.5783, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.14632608010091455, |
| "grad_norm": 0.038584952671910096, |
| "learning_rate": 2.9864998328988463e-06, |
| "loss": 0.5814, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.15137180700094607, |
| "grad_norm": 0.03294755370363045, |
| "learning_rate": 2.985554721977853e-06, |
| "loss": 0.5688, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.15641753390097762, |
| "grad_norm": 0.035774614388450525, |
| "learning_rate": 2.984577847150239e-06, |
| "loss": 0.5914, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.16146326080100915, |
| "grad_norm": 0.04512017281393784, |
| "learning_rate": 2.983569235741291e-06, |
| "loss": 0.557, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.1665089877010407, |
| "grad_norm": 0.03447545680264101, |
| "learning_rate": 2.9825289159640397e-06, |
| "loss": 0.568, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.17155471460107222, |
| "grad_norm": 0.033658505681229516, |
| "learning_rate": 2.9814569169184642e-06, |
| "loss": 0.5868, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.17660044150110377, |
| "grad_norm": 0.03071546221735757, |
| "learning_rate": 2.980353268590683e-06, |
| "loss": 0.5487, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1816461684011353, |
| "grad_norm": 0.07417860742940319, |
| "learning_rate": 2.9792180018521128e-06, |
| "loss": 0.6099, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.18669189530116684, |
| "grad_norm": 0.032180325133544276, |
| "learning_rate": 2.978051148458604e-06, |
| "loss": 0.5939, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.19173762220119836, |
| "grad_norm": 0.031347752245340116, |
| "learning_rate": 2.976852741049554e-06, |
| "loss": 0.5764, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1967833491012299, |
| "grad_norm": 0.035873383222778825, |
| "learning_rate": 2.975622813146996e-06, |
| "loss": 0.57, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.20182907600126143, |
| "grad_norm": 0.03130302787258777, |
| "learning_rate": 2.9743613991546548e-06, |
| "loss": 0.5503, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.20687480290129298, |
| "grad_norm": 0.04111552220221803, |
| "learning_rate": 2.9730685343569934e-06, |
| "loss": 0.6028, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.2119205298013245, |
| "grad_norm": 0.031561335436647305, |
| "learning_rate": 2.971744254918218e-06, |
| "loss": 0.5682, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.21696625670135605, |
| "grad_norm": 0.03466870924962832, |
| "learning_rate": 2.9703885978812726e-06, |
| "loss": 0.55, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.22201198360138757, |
| "grad_norm": 0.03396258277418921, |
| "learning_rate": 2.9690016011667974e-06, |
| "loss": 0.5953, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.22705771050141912, |
| "grad_norm": 0.033463552224919146, |
| "learning_rate": 2.967583303572073e-06, |
| "loss": 0.6231, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.23210343740145065, |
| "grad_norm": 0.03747113039368738, |
| "learning_rate": 2.9661337447699316e-06, |
| "loss": 0.5742, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.2371491643014822, |
| "grad_norm": 0.04159182229285405, |
| "learning_rate": 2.9646529653076493e-06, |
| "loss": 0.5681, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.24219489120151372, |
| "grad_norm": 0.032311171301265075, |
| "learning_rate": 2.9631410066058098e-06, |
| "loss": 0.5464, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.24724061810154527, |
| "grad_norm": 0.035494911254562625, |
| "learning_rate": 2.9615979109571493e-06, |
| "loss": 0.5377, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.2522863450015768, |
| "grad_norm": 0.032401750473671755, |
| "learning_rate": 2.9600237215253696e-06, |
| "loss": 0.6043, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.25733207190160834, |
| "grad_norm": 0.03477400444401883, |
| "learning_rate": 2.9584184823439337e-06, |
| "loss": 0.6078, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.26237779880163986, |
| "grad_norm": 0.03586539534553979, |
| "learning_rate": 2.9567822383148315e-06, |
| "loss": 0.5857, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2674235257016714, |
| "grad_norm": 0.034776366845092124, |
| "learning_rate": 2.955115035207326e-06, |
| "loss": 0.5652, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.27246925260170296, |
| "grad_norm": 0.047916672806890825, |
| "learning_rate": 2.953416919656672e-06, |
| "loss": 0.529, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.2775149795017345, |
| "grad_norm": 0.035512253032401846, |
| "learning_rate": 2.9516879391628125e-06, |
| "loss": 0.6018, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.282560706401766, |
| "grad_norm": 0.0669654595534551, |
| "learning_rate": 2.9499281420890474e-06, |
| "loss": 0.5832, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2876064333017975, |
| "grad_norm": 0.04009377576904152, |
| "learning_rate": 2.948137577660685e-06, |
| "loss": 0.5376, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2926521602018291, |
| "grad_norm": 0.05517678453435375, |
| "learning_rate": 2.946316295963661e-06, |
| "loss": 0.5725, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2976978871018606, |
| "grad_norm": 0.040682905696082294, |
| "learning_rate": 2.9444643479431393e-06, |
| "loss": 0.5887, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.30274361400189215, |
| "grad_norm": 0.044774454426992336, |
| "learning_rate": 2.9425817854020873e-06, |
| "loss": 0.5756, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.30778934090192367, |
| "grad_norm": 0.03263011910584542, |
| "learning_rate": 2.940668660999826e-06, |
| "loss": 0.5693, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.31283506780195525, |
| "grad_norm": 0.032496230797448664, |
| "learning_rate": 2.9387250282505583e-06, |
| "loss": 0.586, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.31788079470198677, |
| "grad_norm": 0.03310861754959189, |
| "learning_rate": 2.9367509415218687e-06, |
| "loss": 0.5548, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.3229265216020183, |
| "grad_norm": 0.031816229512850104, |
| "learning_rate": 2.9347464560332084e-06, |
| "loss": 0.6, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.3279722485020498, |
| "grad_norm": 0.036465675122600016, |
| "learning_rate": 2.932711627854344e-06, |
| "loss": 0.5613, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3330179754020814, |
| "grad_norm": 0.03107217546123426, |
| "learning_rate": 2.9306465139037947e-06, |
| "loss": 0.5421, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3380637023021129, |
| "grad_norm": 0.031633841290591734, |
| "learning_rate": 2.9285511719472367e-06, |
| "loss": 0.58, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.34310942920214443, |
| "grad_norm": 0.030853855883844275, |
| "learning_rate": 2.9264256605958885e-06, |
| "loss": 0.5496, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.34815515610217596, |
| "grad_norm": 0.036265638918391636, |
| "learning_rate": 2.924270039304873e-06, |
| "loss": 0.5939, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.35320088300220753, |
| "grad_norm": 0.03703293289195566, |
| "learning_rate": 2.9220843683715497e-06, |
| "loss": 0.5311, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.35824660990223905, |
| "grad_norm": 0.07682301940151573, |
| "learning_rate": 2.9198687089338345e-06, |
| "loss": 0.5655, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3632923368022706, |
| "grad_norm": 0.03240731857642153, |
| "learning_rate": 2.9176231229684835e-06, |
| "loss": 0.5436, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3683380637023021, |
| "grad_norm": 0.03550209155305971, |
| "learning_rate": 2.9153476732893646e-06, |
| "loss": 0.5529, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3733837906023337, |
| "grad_norm": 0.03572110732287988, |
| "learning_rate": 2.913042423545696e-06, |
| "loss": 0.5601, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3784295175023652, |
| "grad_norm": 0.030318267187340705, |
| "learning_rate": 2.910707438220269e-06, |
| "loss": 0.5827, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3834752444023967, |
| "grad_norm": 0.030779462298936085, |
| "learning_rate": 2.9083427826276414e-06, |
| "loss": 0.5366, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.38852097130242824, |
| "grad_norm": 0.033078267956613755, |
| "learning_rate": 2.905948522912315e-06, |
| "loss": 0.5769, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3935666982024598, |
| "grad_norm": 0.032022182515529865, |
| "learning_rate": 2.90352472604688e-06, |
| "loss": 0.6059, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.39861242510249134, |
| "grad_norm": 0.032572741826790486, |
| "learning_rate": 2.901071459830145e-06, |
| "loss": 0.5325, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.40365815200252286, |
| "grad_norm": 0.03322477222052267, |
| "learning_rate": 2.89858879288524e-06, |
| "loss": 0.6102, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.4087038789025544, |
| "grad_norm": 0.03290381540817977, |
| "learning_rate": 2.896076794657696e-06, |
| "loss": 0.5297, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.41374960580258596, |
| "grad_norm": 0.02986308712893659, |
| "learning_rate": 2.893535535413504e-06, |
| "loss": 0.6016, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.4187953327026175, |
| "grad_norm": 0.03708195442407903, |
| "learning_rate": 2.8909650862371465e-06, |
| "loss": 0.5644, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.423841059602649, |
| "grad_norm": 0.05585756602200335, |
| "learning_rate": 2.888365519029615e-06, |
| "loss": 0.5645, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.42888678650268053, |
| "grad_norm": 0.03232081336561299, |
| "learning_rate": 2.8857369065063893e-06, |
| "loss": 0.5492, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4339325134027121, |
| "grad_norm": 0.03807439954613977, |
| "learning_rate": 2.883079322195415e-06, |
| "loss": 0.5694, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.4389782403027436, |
| "grad_norm": 0.03718669059491054, |
| "learning_rate": 2.880392840435036e-06, |
| "loss": 0.5603, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.44402396720277515, |
| "grad_norm": 0.02993361885707706, |
| "learning_rate": 2.8776775363719244e-06, |
| "loss": 0.5193, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.44906969410280667, |
| "grad_norm": 0.03471859873605538, |
| "learning_rate": 2.8749334859589696e-06, |
| "loss": 0.5195, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.45411542100283825, |
| "grad_norm": 0.03468783264087511, |
| "learning_rate": 2.872160765953162e-06, |
| "loss": 0.5685, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.45916114790286977, |
| "grad_norm": 0.06028818099354049, |
| "learning_rate": 2.86935945391344e-06, |
| "loss": 0.5875, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4642068748029013, |
| "grad_norm": 0.03189592063110031, |
| "learning_rate": 2.8665296281985232e-06, |
| "loss": 0.5627, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4692526017029328, |
| "grad_norm": 0.032489964455779306, |
| "learning_rate": 2.8636713679647195e-06, |
| "loss": 0.5398, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4742983286029644, |
| "grad_norm": 0.03300509062363307, |
| "learning_rate": 2.8607847531637127e-06, |
| "loss": 0.5675, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.4793440555029959, |
| "grad_norm": 0.034805575232998785, |
| "learning_rate": 2.857869864540323e-06, |
| "loss": 0.5526, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.48438978240302744, |
| "grad_norm": 0.03304213276713402, |
| "learning_rate": 2.854926783630253e-06, |
| "loss": 0.5475, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.48943550930305896, |
| "grad_norm": 0.03753659611183512, |
| "learning_rate": 2.851955592757801e-06, |
| "loss": 0.5511, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.49448123620309054, |
| "grad_norm": 0.033892234979303396, |
| "learning_rate": 2.848956375033562e-06, |
| "loss": 0.5232, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.49952696310312206, |
| "grad_norm": 0.037074509268233, |
| "learning_rate": 2.845929214352105e-06, |
| "loss": 0.5655, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.5045726900031536, |
| "grad_norm": 0.03224402404614455, |
| "learning_rate": 2.8428741953896195e-06, |
| "loss": 0.5556, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5096184169031851, |
| "grad_norm": 0.03072821069928307, |
| "learning_rate": 2.839791403601555e-06, |
| "loss": 0.5472, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.5146641438032167, |
| "grad_norm": 0.03380249813156003, |
| "learning_rate": 2.8366809252202235e-06, |
| "loss": 0.5413, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.5197098707032481, |
| "grad_norm": 0.0335216929092493, |
| "learning_rate": 2.8335428472523927e-06, |
| "loss": 0.5479, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5247555976032797, |
| "grad_norm": 0.030808915999862914, |
| "learning_rate": 2.8303772574768482e-06, |
| "loss": 0.548, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5298013245033113, |
| "grad_norm": 0.02912454306651085, |
| "learning_rate": 2.8271842444419414e-06, |
| "loss": 0.548, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5348470514033428, |
| "grad_norm": 0.05012632142858796, |
| "learning_rate": 2.8239638974631112e-06, |
| "loss": 0.5152, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5398927783033743, |
| "grad_norm": 0.0344361048789296, |
| "learning_rate": 2.8207163066203843e-06, |
| "loss": 0.5698, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5449385052034059, |
| "grad_norm": 0.10550189124699653, |
| "learning_rate": 2.8174415627558584e-06, |
| "loss": 0.522, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5499842321034374, |
| "grad_norm": 0.0323910546595259, |
| "learning_rate": 2.8141397574711587e-06, |
| "loss": 0.5518, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.555029959003469, |
| "grad_norm": 0.032448012717327133, |
| "learning_rate": 2.810810983124877e-06, |
| "loss": 0.5839, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5600756859035004, |
| "grad_norm": 0.03112632332891334, |
| "learning_rate": 2.807455332829987e-06, |
| "loss": 0.5635, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.565121412803532, |
| "grad_norm": 0.035282498495490644, |
| "learning_rate": 2.8040729004512415e-06, |
| "loss": 0.535, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5701671397035636, |
| "grad_norm": 0.02966255382156268, |
| "learning_rate": 2.800663780602545e-06, |
| "loss": 0.5492, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.575212866603595, |
| "grad_norm": 0.03707668859813438, |
| "learning_rate": 2.7972280686443077e-06, |
| "loss": 0.5663, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5802585935036266, |
| "grad_norm": 0.03659428904439035, |
| "learning_rate": 2.793765860680779e-06, |
| "loss": 0.542, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5853043204036582, |
| "grad_norm": 0.035159210590374, |
| "learning_rate": 2.790277253557359e-06, |
| "loss": 0.5738, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5903500473036897, |
| "grad_norm": 0.030990014790480254, |
| "learning_rate": 2.7867623448578863e-06, |
| "loss": 0.5892, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5953957742037213, |
| "grad_norm": 0.03399071129755366, |
| "learning_rate": 2.783221232901914e-06, |
| "loss": 0.5677, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.6004415011037527, |
| "grad_norm": 0.03247591991435437, |
| "learning_rate": 2.7796540167419567e-06, |
| "loss": 0.5412, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.6054872280037843, |
| "grad_norm": 0.032370031907677656, |
| "learning_rate": 2.7760607961607174e-06, |
| "loss": 0.5556, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6105329549038159, |
| "grad_norm": 0.033174635710333106, |
| "learning_rate": 2.7724416716683005e-06, |
| "loss": 0.5668, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.6155786818038473, |
| "grad_norm": 0.03148435780507138, |
| "learning_rate": 2.7687967444993976e-06, |
| "loss": 0.5205, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.6206244087038789, |
| "grad_norm": 0.03187378459330813, |
| "learning_rate": 2.7651261166104574e-06, |
| "loss": 0.5563, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.6256701356039105, |
| "grad_norm": 0.03429711811644175, |
| "learning_rate": 2.7614298906768316e-06, |
| "loss": 0.5167, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.630715862503942, |
| "grad_norm": 0.03859718827910278, |
| "learning_rate": 2.757708170089906e-06, |
| "loss": 0.559, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6357615894039735, |
| "grad_norm": 0.03623858619383074, |
| "learning_rate": 2.7539610589542057e-06, |
| "loss": 0.5795, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.640807316304005, |
| "grad_norm": 0.03263585179382894, |
| "learning_rate": 2.750188662084484e-06, |
| "loss": 0.5566, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.6458530432040366, |
| "grad_norm": 0.03877113662246629, |
| "learning_rate": 2.746391085002791e-06, |
| "loss": 0.6018, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6508987701040682, |
| "grad_norm": 0.03400954730094289, |
| "learning_rate": 2.7425684339355203e-06, |
| "loss": 0.5438, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6559444970040996, |
| "grad_norm": 0.03140528838198611, |
| "learning_rate": 2.7387208158104406e-06, |
| "loss": 0.5554, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6609902239041312, |
| "grad_norm": 0.037206585802644396, |
| "learning_rate": 2.7348483382537015e-06, |
| "loss": 0.5634, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.6660359508041628, |
| "grad_norm": 0.03211586967766076, |
| "learning_rate": 2.7309511095868246e-06, |
| "loss": 0.5391, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.6710816777041942, |
| "grad_norm": 0.03396187095971342, |
| "learning_rate": 2.727029238823674e-06, |
| "loss": 0.5406, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.6761274046042258, |
| "grad_norm": 0.03502286862929664, |
| "learning_rate": 2.7230828356674047e-06, |
| "loss": 0.5753, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.6811731315042573, |
| "grad_norm": 0.03214739270222773, |
| "learning_rate": 2.7191120105073974e-06, |
| "loss": 0.5245, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6862188584042889, |
| "grad_norm": 0.03387635714681519, |
| "learning_rate": 2.7151168744161664e-06, |
| "loss": 0.54, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6912645853043204, |
| "grad_norm": 0.032951263305626276, |
| "learning_rate": 2.7110975391462574e-06, |
| "loss": 0.5259, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6963103122043519, |
| "grad_norm": 0.05015537211126262, |
| "learning_rate": 2.707054117127118e-06, |
| "loss": 0.5267, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.7013560391043835, |
| "grad_norm": 0.035135429069184716, |
| "learning_rate": 2.7029867214619533e-06, |
| "loss": 0.5518, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.7064017660044151, |
| "grad_norm": 0.0320337788657044, |
| "learning_rate": 2.698895465924565e-06, |
| "loss": 0.5555, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7114474929044465, |
| "grad_norm": 0.03645990761972981, |
| "learning_rate": 2.6947804649561633e-06, |
| "loss": 0.572, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.7164932198044781, |
| "grad_norm": 0.03470519450277183, |
| "learning_rate": 2.6906418336621724e-06, |
| "loss": 0.5505, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.7215389467045096, |
| "grad_norm": 0.031235480044832488, |
| "learning_rate": 2.686479687809006e-06, |
| "loss": 0.5377, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.7265846736045412, |
| "grad_norm": 0.029941451042590675, |
| "learning_rate": 2.6822941438208306e-06, |
| "loss": 0.5381, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.7316304005045727, |
| "grad_norm": 0.035617952049989555, |
| "learning_rate": 2.6780853187763096e-06, |
| "loss": 0.5546, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7366761274046042, |
| "grad_norm": 0.03343536591046196, |
| "learning_rate": 2.673853330405326e-06, |
| "loss": 0.5519, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.7417218543046358, |
| "grad_norm": 0.03110711029893078, |
| "learning_rate": 2.6695982970856925e-06, |
| "loss": 0.5744, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.7467675812046674, |
| "grad_norm": 0.030535258085941947, |
| "learning_rate": 2.6653203378398375e-06, |
| "loss": 0.5239, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7518133081046988, |
| "grad_norm": 0.06889481171001853, |
| "learning_rate": 2.661019572331478e-06, |
| "loss": 0.5445, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.7568590350047304, |
| "grad_norm": 0.034136168134648794, |
| "learning_rate": 2.6566961208622696e-06, |
| "loss": 0.5403, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7619047619047619, |
| "grad_norm": 0.03361146386829785, |
| "learning_rate": 2.652350104368444e-06, |
| "loss": 0.5252, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.7669504888047934, |
| "grad_norm": 0.03232558697340943, |
| "learning_rate": 2.6479816444174253e-06, |
| "loss": 0.5537, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.771996215704825, |
| "grad_norm": 0.031246440682898686, |
| "learning_rate": 2.643590863204429e-06, |
| "loss": 0.5358, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.7770419426048565, |
| "grad_norm": 0.03123193076665316, |
| "learning_rate": 2.6391778835490438e-06, |
| "loss": 0.5162, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.7820876695048881, |
| "grad_norm": 0.042414916882850415, |
| "learning_rate": 2.6347428288917972e-06, |
| "loss": 0.5522, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7871333964049196, |
| "grad_norm": 0.03663820317771632, |
| "learning_rate": 2.630285823290702e-06, |
| "loss": 0.5395, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.7921791233049511, |
| "grad_norm": 0.0323447950510978, |
| "learning_rate": 2.625806991417786e-06, |
| "loss": 0.5471, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.7972248502049827, |
| "grad_norm": 0.03301071769705723, |
| "learning_rate": 2.621306458555604e-06, |
| "loss": 0.5529, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.8022705771050141, |
| "grad_norm": 0.03261309640826233, |
| "learning_rate": 2.6167843505937356e-06, |
| "loss": 0.5507, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.8073163040050457, |
| "grad_norm": 0.03240576468524181, |
| "learning_rate": 2.6122407940252608e-06, |
| "loss": 0.5468, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8123620309050773, |
| "grad_norm": 0.03018210963467149, |
| "learning_rate": 2.6076759159432237e-06, |
| "loss": 0.5583, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.8174077578051088, |
| "grad_norm": 0.031036132663069035, |
| "learning_rate": 2.603089844037078e-06, |
| "loss": 0.5226, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.8224534847051403, |
| "grad_norm": 0.0347386534073021, |
| "learning_rate": 2.5984827065891126e-06, |
| "loss": 0.5529, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.8274992116051719, |
| "grad_norm": 0.044827581860260125, |
| "learning_rate": 2.593854632470866e-06, |
| "loss": 0.6117, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.8325449385052034, |
| "grad_norm": 0.030210519399600917, |
| "learning_rate": 2.5892057511395202e-06, |
| "loss": 0.5436, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.837590665405235, |
| "grad_norm": 0.031447473869825514, |
| "learning_rate": 2.5845361926342794e-06, |
| "loss": 0.5228, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8426363923052664, |
| "grad_norm": 0.035332567006398724, |
| "learning_rate": 2.5798460875727326e-06, |
| "loss": 0.5478, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.847682119205298, |
| "grad_norm": 0.02886367248116525, |
| "learning_rate": 2.575135567147201e-06, |
| "loss": 0.5114, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.8527278461053296, |
| "grad_norm": 0.031532384271644245, |
| "learning_rate": 2.5704047631210664e-06, |
| "loss": 0.5623, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.8577735730053611, |
| "grad_norm": 0.034567108800704585, |
| "learning_rate": 2.5656538078250873e-06, |
| "loss": 0.5657, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8628192999053926, |
| "grad_norm": 0.03160145253530057, |
| "learning_rate": 2.560882834153696e-06, |
| "loss": 0.5136, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.8678650268054242, |
| "grad_norm": 0.03135869657202659, |
| "learning_rate": 2.5560919755612823e-06, |
| "loss": 0.544, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.8729107537054557, |
| "grad_norm": 0.03597121576219098, |
| "learning_rate": 2.5512813660584597e-06, |
| "loss": 0.5152, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.8779564806054873, |
| "grad_norm": 0.030221173382051915, |
| "learning_rate": 2.5464511402083166e-06, |
| "loss": 0.5251, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.8830022075055187, |
| "grad_norm": 0.030000072996604673, |
| "learning_rate": 2.541601433122654e-06, |
| "loss": 0.5186, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.8880479344055503, |
| "grad_norm": 0.03376922343601561, |
| "learning_rate": 2.536732380458204e-06, |
| "loss": 0.5164, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.8930936613055819, |
| "grad_norm": 0.03156034083736404, |
| "learning_rate": 2.531844118412837e-06, |
| "loss": 0.5316, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.8981393882056133, |
| "grad_norm": 0.03377666641180589, |
| "learning_rate": 2.5269367837217488e-06, |
| "loss": 0.5054, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.9031851151056449, |
| "grad_norm": 0.030954166615192916, |
| "learning_rate": 2.522010513653642e-06, |
| "loss": 0.5256, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.9082308420056765, |
| "grad_norm": 0.03484714660203487, |
| "learning_rate": 2.517065446006878e-06, |
| "loss": 0.5225, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.913276568905708, |
| "grad_norm": 0.06223673456920668, |
| "learning_rate": 2.5121017191056306e-06, |
| "loss": 0.5207, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.9183222958057395, |
| "grad_norm": 0.04079770792332633, |
| "learning_rate": 2.507119471796011e-06, |
| "loss": 0.555, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.923368022705771, |
| "grad_norm": 0.030029925631475055, |
| "learning_rate": 2.5021188434421863e-06, |
| "loss": 0.5435, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.9284137496058026, |
| "grad_norm": 0.03519161994895625, |
| "learning_rate": 2.4970999739224816e-06, |
| "loss": 0.5817, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.9334594765058342, |
| "grad_norm": 0.03194895322649527, |
| "learning_rate": 2.492063003625466e-06, |
| "loss": 0.5288, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.9385052034058656, |
| "grad_norm": 0.03862824281648028, |
| "learning_rate": 2.487008073446027e-06, |
| "loss": 0.5428, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9435509303058972, |
| "grad_norm": 0.040703435887500077, |
| "learning_rate": 2.481935324781427e-06, |
| "loss": 0.5407, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.9485966572059288, |
| "grad_norm": 0.03251789567309417, |
| "learning_rate": 2.4768448995273514e-06, |
| "loss": 0.5305, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.9536423841059603, |
| "grad_norm": 0.031182531135357086, |
| "learning_rate": 2.4717369400739372e-06, |
| "loss": 0.5436, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.9586881110059918, |
| "grad_norm": 0.033544076361382125, |
| "learning_rate": 2.466611589301791e-06, |
| "loss": 0.5849, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9637338379060233, |
| "grad_norm": 0.03146239237618079, |
| "learning_rate": 2.4614689905779907e-06, |
| "loss": 0.5424, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.9687795648060549, |
| "grad_norm": 0.031938896005507596, |
| "learning_rate": 2.4563092877520776e-06, |
| "loss": 0.5541, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.9738252917060864, |
| "grad_norm": 0.031136299616893074, |
| "learning_rate": 2.4511326251520325e-06, |
| "loss": 0.58, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.9788710186061179, |
| "grad_norm": 0.03323472651029714, |
| "learning_rate": 2.445939147580235e-06, |
| "loss": 0.5073, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.9839167455061495, |
| "grad_norm": 0.03213726601057571, |
| "learning_rate": 2.4407290003094177e-06, |
| "loss": 0.5758, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.9889624724061811, |
| "grad_norm": 0.032768188978415214, |
| "learning_rate": 2.4355023290785993e-06, |
| "loss": 0.5354, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.9940081993062125, |
| "grad_norm": 0.10156434597935675, |
| "learning_rate": 2.4302592800890095e-06, |
| "loss": 0.5784, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.9990539262062441, |
| "grad_norm": 0.03993430739998164, |
| "learning_rate": 2.425e-06, |
| "loss": 0.532, |
| "step": 198 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 594, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 99, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 821230496120832.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|