| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9870598856455013, |
| "eval_steps": 500, |
| "global_step": 621, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004814926271441469, |
| "grad_norm": 0.3190668225288391, |
| "learning_rate": 9.98389694041868e-06, |
| "loss": 14.3722, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.009629852542882938, |
| "grad_norm": 0.704807698726654, |
| "learning_rate": 9.96779388083736e-06, |
| "loss": 17.3468, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.014444778814324405, |
| "grad_norm": 0.5227249264717102, |
| "learning_rate": 9.95169082125604e-06, |
| "loss": 18.7806, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.019259705085765876, |
| "grad_norm": 0.34830373525619507, |
| "learning_rate": 9.93558776167472e-06, |
| "loss": 18.8868, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.024074631357207343, |
| "grad_norm": 0.36558371782302856, |
| "learning_rate": 9.919484702093398e-06, |
| "loss": 17.0113, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02888955762864881, |
| "grad_norm": 0.464693546295166, |
| "learning_rate": 9.903381642512077e-06, |
| "loss": 16.2247, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.03370448390009028, |
| "grad_norm": 0.45501771569252014, |
| "learning_rate": 9.887278582930757e-06, |
| "loss": 15.7179, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03851941017153175, |
| "grad_norm": 0.6688278317451477, |
| "learning_rate": 9.871175523349438e-06, |
| "loss": 18.7794, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.043334336442973215, |
| "grad_norm": 0.40696507692337036, |
| "learning_rate": 9.855072463768118e-06, |
| "loss": 16.3889, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.048149262714414685, |
| "grad_norm": 0.38113319873809814, |
| "learning_rate": 9.838969404186796e-06, |
| "loss": 16.7515, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.052964188985856156, |
| "grad_norm": 0.35052913427352905, |
| "learning_rate": 9.822866344605476e-06, |
| "loss": 15.7069, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05777911525729762, |
| "grad_norm": 0.47708237171173096, |
| "learning_rate": 9.806763285024155e-06, |
| "loss": 16.9549, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0625940415287391, |
| "grad_norm": 0.4960598945617676, |
| "learning_rate": 9.790660225442835e-06, |
| "loss": 16.8876, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06740896780018056, |
| "grad_norm": 0.39951273798942566, |
| "learning_rate": 9.774557165861515e-06, |
| "loss": 15.1888, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.07222389407162202, |
| "grad_norm": 0.21550379693508148, |
| "learning_rate": 9.758454106280194e-06, |
| "loss": 14.6054, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.0770388203430635, |
| "grad_norm": 0.30602937936782837, |
| "learning_rate": 9.742351046698874e-06, |
| "loss": 16.4524, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.08185374661450497, |
| "grad_norm": 0.30777233839035034, |
| "learning_rate": 9.726247987117554e-06, |
| "loss": 14.3263, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.08666867288594643, |
| "grad_norm": 0.35533130168914795, |
| "learning_rate": 9.710144927536233e-06, |
| "loss": 16.2459, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.09148359915738791, |
| "grad_norm": 0.23820991814136505, |
| "learning_rate": 9.694041867954911e-06, |
| "loss": 14.2147, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.09629852542882937, |
| "grad_norm": 0.2193877398967743, |
| "learning_rate": 9.677938808373591e-06, |
| "loss": 14.4833, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10111345170027083, |
| "grad_norm": 0.24645549058914185, |
| "learning_rate": 9.66183574879227e-06, |
| "loss": 14.6868, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.10592837797171231, |
| "grad_norm": 0.2614218592643738, |
| "learning_rate": 9.64573268921095e-06, |
| "loss": 15.4011, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.11074330424315378, |
| "grad_norm": 0.3114742040634155, |
| "learning_rate": 9.62962962962963e-06, |
| "loss": 14.9497, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.11555823051459524, |
| "grad_norm": 0.20465250313282013, |
| "learning_rate": 9.61352657004831e-06, |
| "loss": 13.4598, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.12037315678603672, |
| "grad_norm": 0.3349449336528778, |
| "learning_rate": 9.59742351046699e-06, |
| "loss": 14.3437, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.1251880830574782, |
| "grad_norm": 0.4164576828479767, |
| "learning_rate": 9.581320450885669e-06, |
| "loss": 14.5754, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.13000300932891964, |
| "grad_norm": 0.3533851206302643, |
| "learning_rate": 9.565217391304349e-06, |
| "loss": 14.7727, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.13481793560036112, |
| "grad_norm": 0.3998354375362396, |
| "learning_rate": 9.549114331723028e-06, |
| "loss": 13.1538, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.1396328618718026, |
| "grad_norm": 0.3069708049297333, |
| "learning_rate": 9.533011272141708e-06, |
| "loss": 13.2611, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.14444778814324405, |
| "grad_norm": 0.20584744215011597, |
| "learning_rate": 9.516908212560388e-06, |
| "loss": 14.7522, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14926271441468553, |
| "grad_norm": 0.2097318172454834, |
| "learning_rate": 9.500805152979067e-06, |
| "loss": 15.0184, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.154077640686127, |
| "grad_norm": 0.3266746401786804, |
| "learning_rate": 9.484702093397747e-06, |
| "loss": 13.2641, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.15889256695756845, |
| "grad_norm": 0.2459367960691452, |
| "learning_rate": 9.468599033816425e-06, |
| "loss": 15.1449, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.16370749322900993, |
| "grad_norm": 0.4554983973503113, |
| "learning_rate": 9.452495974235105e-06, |
| "loss": 14.6538, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.1685224195004514, |
| "grad_norm": 0.3142286241054535, |
| "learning_rate": 9.436392914653784e-06, |
| "loss": 14.0927, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.17333734577189286, |
| "grad_norm": 0.2828330993652344, |
| "learning_rate": 9.420289855072464e-06, |
| "loss": 14.5898, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.17815227204333434, |
| "grad_norm": 0.25663697719573975, |
| "learning_rate": 9.404186795491144e-06, |
| "loss": 12.7267, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.18296719831477581, |
| "grad_norm": 0.4929574728012085, |
| "learning_rate": 9.388083735909823e-06, |
| "loss": 14.2236, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.18778212458621726, |
| "grad_norm": 0.414725661277771, |
| "learning_rate": 9.371980676328503e-06, |
| "loss": 13.9036, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.19259705085765874, |
| "grad_norm": 0.2808246910572052, |
| "learning_rate": 9.355877616747183e-06, |
| "loss": 14.7754, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19741197712910022, |
| "grad_norm": 0.2846072316169739, |
| "learning_rate": 9.339774557165862e-06, |
| "loss": 13.519, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.20222690340054167, |
| "grad_norm": 0.2638435661792755, |
| "learning_rate": 9.323671497584542e-06, |
| "loss": 14.5145, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.20704182967198315, |
| "grad_norm": 0.22342148423194885, |
| "learning_rate": 9.307568438003222e-06, |
| "loss": 12.7617, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.21185675594342462, |
| "grad_norm": 0.2732909619808197, |
| "learning_rate": 9.291465378421901e-06, |
| "loss": 13.6826, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.21667168221486607, |
| "grad_norm": 0.23550738394260406, |
| "learning_rate": 9.275362318840581e-06, |
| "loss": 13.1958, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.22148660848630755, |
| "grad_norm": 0.2673870027065277, |
| "learning_rate": 9.25925925925926e-06, |
| "loss": 14.1934, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.22630153475774903, |
| "grad_norm": 0.303568571805954, |
| "learning_rate": 9.243156199677939e-06, |
| "loss": 11.8819, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.23111646102919048, |
| "grad_norm": 0.27822041511535645, |
| "learning_rate": 9.227053140096618e-06, |
| "loss": 11.8434, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.23593138730063196, |
| "grad_norm": 0.21598580479621887, |
| "learning_rate": 9.210950080515298e-06, |
| "loss": 13.0898, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.24074631357207343, |
| "grad_norm": 0.22329603135585785, |
| "learning_rate": 9.194847020933978e-06, |
| "loss": 12.1004, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.24556123984351488, |
| "grad_norm": 0.22152170538902283, |
| "learning_rate": 9.178743961352658e-06, |
| "loss": 13.8467, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.2503761661149564, |
| "grad_norm": 0.2549304664134979, |
| "learning_rate": 9.162640901771337e-06, |
| "loss": 13.3853, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.2551910923863978, |
| "grad_norm": 0.2308962047100067, |
| "learning_rate": 9.146537842190017e-06, |
| "loss": 12.9115, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.2600060186578393, |
| "grad_norm": 0.19011437892913818, |
| "learning_rate": 9.130434782608697e-06, |
| "loss": 12.7455, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.26482094492928077, |
| "grad_norm": 0.21280792355537415, |
| "learning_rate": 9.114331723027376e-06, |
| "loss": 13.4157, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.26963587120072224, |
| "grad_norm": 0.35571521520614624, |
| "learning_rate": 9.098228663446056e-06, |
| "loss": 13.961, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2744507974721637, |
| "grad_norm": 0.25055205821990967, |
| "learning_rate": 9.082125603864736e-06, |
| "loss": 13.5945, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2792657237436052, |
| "grad_norm": 0.22618041932582855, |
| "learning_rate": 9.066022544283415e-06, |
| "loss": 13.6034, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2840806500150466, |
| "grad_norm": 0.2419959455728531, |
| "learning_rate": 9.049919484702095e-06, |
| "loss": 13.5142, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2888955762864881, |
| "grad_norm": 0.3027523458003998, |
| "learning_rate": 9.033816425120775e-06, |
| "loss": 12.9825, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2937105025579296, |
| "grad_norm": 0.1812627613544464, |
| "learning_rate": 9.017713365539453e-06, |
| "loss": 12.4197, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.29852542882937105, |
| "grad_norm": 0.2510731518268585, |
| "learning_rate": 9.001610305958132e-06, |
| "loss": 13.2717, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.30334035510081253, |
| "grad_norm": 0.2064312994480133, |
| "learning_rate": 8.985507246376812e-06, |
| "loss": 13.3493, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.308155281372254, |
| "grad_norm": 0.2627861797809601, |
| "learning_rate": 8.969404186795492e-06, |
| "loss": 13.3364, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.31297020764369543, |
| "grad_norm": 0.22463975846767426, |
| "learning_rate": 8.953301127214171e-06, |
| "loss": 12.0759, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.3177851339151369, |
| "grad_norm": 0.3166675865650177, |
| "learning_rate": 8.937198067632851e-06, |
| "loss": 12.8393, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.3226000601865784, |
| "grad_norm": 0.16428841650485992, |
| "learning_rate": 8.92109500805153e-06, |
| "loss": 12.6723, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.32741498645801986, |
| "grad_norm": 0.1815037578344345, |
| "learning_rate": 8.90499194847021e-06, |
| "loss": 12.6212, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.33222991272946134, |
| "grad_norm": 0.2504093050956726, |
| "learning_rate": 8.888888888888888e-06, |
| "loss": 12.6547, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.3370448390009028, |
| "grad_norm": 0.17379416525363922, |
| "learning_rate": 8.87278582930757e-06, |
| "loss": 10.5245, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.34185976527234424, |
| "grad_norm": 0.20780153572559357, |
| "learning_rate": 8.85668276972625e-06, |
| "loss": 11.1868, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3466746915437857, |
| "grad_norm": 0.2680881917476654, |
| "learning_rate": 8.840579710144929e-06, |
| "loss": 11.9582, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3514896178152272, |
| "grad_norm": 0.1777425855398178, |
| "learning_rate": 8.824476650563609e-06, |
| "loss": 11.3178, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.3563045440866687, |
| "grad_norm": 0.20199166238307953, |
| "learning_rate": 8.808373590982288e-06, |
| "loss": 12.5066, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.36111947035811015, |
| "grad_norm": 0.23542606830596924, |
| "learning_rate": 8.792270531400966e-06, |
| "loss": 11.885, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.36593439662955163, |
| "grad_norm": 0.23038695752620697, |
| "learning_rate": 8.776167471819646e-06, |
| "loss": 11.1026, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3707493229009931, |
| "grad_norm": 0.2536081075668335, |
| "learning_rate": 8.760064412238326e-06, |
| "loss": 13.065, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.37556424917243453, |
| "grad_norm": 0.2599170207977295, |
| "learning_rate": 8.743961352657005e-06, |
| "loss": 12.4683, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.380379175443876, |
| "grad_norm": 0.23882345855236053, |
| "learning_rate": 8.727858293075685e-06, |
| "loss": 11.6778, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3851941017153175, |
| "grad_norm": 0.23855774104595184, |
| "learning_rate": 8.711755233494365e-06, |
| "loss": 13.026, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.39000902798675896, |
| "grad_norm": 0.26537057757377625, |
| "learning_rate": 8.695652173913044e-06, |
| "loss": 12.4535, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.39482395425820044, |
| "grad_norm": 0.21693478524684906, |
| "learning_rate": 8.679549114331724e-06, |
| "loss": 12.9436, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.3996388805296419, |
| "grad_norm": 0.162302166223526, |
| "learning_rate": 8.663446054750402e-06, |
| "loss": 11.3558, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.40445380680108334, |
| "grad_norm": 0.271846741437912, |
| "learning_rate": 8.647342995169082e-06, |
| "loss": 11.0237, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.4092687330725248, |
| "grad_norm": 0.16958190500736237, |
| "learning_rate": 8.631239935587761e-06, |
| "loss": 11.3822, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.4140836593439663, |
| "grad_norm": 0.19066102802753448, |
| "learning_rate": 8.615136876006443e-06, |
| "loss": 11.6137, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.41889858561540777, |
| "grad_norm": 0.21410760283470154, |
| "learning_rate": 8.599033816425122e-06, |
| "loss": 11.1353, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.42371351188684925, |
| "grad_norm": 0.17947272956371307, |
| "learning_rate": 8.582930756843802e-06, |
| "loss": 11.0955, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.4285284381582907, |
| "grad_norm": 0.2798727750778198, |
| "learning_rate": 8.56682769726248e-06, |
| "loss": 11.5026, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.43334336442973215, |
| "grad_norm": 0.19547878205776215, |
| "learning_rate": 8.55072463768116e-06, |
| "loss": 11.2341, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.4381582907011736, |
| "grad_norm": 0.20346851646900177, |
| "learning_rate": 8.53462157809984e-06, |
| "loss": 11.7612, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4429732169726151, |
| "grad_norm": 0.22177843749523163, |
| "learning_rate": 8.518518518518519e-06, |
| "loss": 12.027, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4477881432440566, |
| "grad_norm": 0.14566639065742493, |
| "learning_rate": 8.502415458937199e-06, |
| "loss": 12.1414, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.45260306951549806, |
| "grad_norm": 0.19193682074546814, |
| "learning_rate": 8.486312399355879e-06, |
| "loss": 11.2928, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.45741799578693954, |
| "grad_norm": 0.18830566108226776, |
| "learning_rate": 8.470209339774558e-06, |
| "loss": 12.3402, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.46223292205838096, |
| "grad_norm": 0.19319747388362885, |
| "learning_rate": 8.454106280193238e-06, |
| "loss": 11.4159, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.46704784832982243, |
| "grad_norm": 0.2581634521484375, |
| "learning_rate": 8.438003220611916e-06, |
| "loss": 12.5042, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4718627746012639, |
| "grad_norm": 0.2127319574356079, |
| "learning_rate": 8.421900161030596e-06, |
| "loss": 11.8059, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4766777008727054, |
| "grad_norm": 0.18906573951244354, |
| "learning_rate": 8.405797101449275e-06, |
| "loss": 12.773, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.48149262714414687, |
| "grad_norm": 0.2039322406053543, |
| "learning_rate": 8.389694041867955e-06, |
| "loss": 11.2793, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.48630755341558835, |
| "grad_norm": 0.17869459092617035, |
| "learning_rate": 8.373590982286636e-06, |
| "loss": 12.1488, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.49112247968702977, |
| "grad_norm": 0.24505895376205444, |
| "learning_rate": 8.357487922705316e-06, |
| "loss": 12.6911, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.49593740595847124, |
| "grad_norm": 0.24129539728164673, |
| "learning_rate": 8.341384863123994e-06, |
| "loss": 11.1825, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.5007523322299128, |
| "grad_norm": 0.20321142673492432, |
| "learning_rate": 8.325281803542674e-06, |
| "loss": 11.3817, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.5055672585013542, |
| "grad_norm": 0.2557075321674347, |
| "learning_rate": 8.309178743961353e-06, |
| "loss": 13.0008, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.5103821847727956, |
| "grad_norm": 0.27801477909088135, |
| "learning_rate": 8.293075684380033e-06, |
| "loss": 10.5208, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.5151971110442372, |
| "grad_norm": 0.18863140046596527, |
| "learning_rate": 8.276972624798713e-06, |
| "loss": 11.17, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.5200120373156786, |
| "grad_norm": 0.1997506022453308, |
| "learning_rate": 8.260869565217392e-06, |
| "loss": 11.094, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.5248269635871201, |
| "grad_norm": 0.17764043807983398, |
| "learning_rate": 8.244766505636072e-06, |
| "loss": 10.9546, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.5296418898585615, |
| "grad_norm": 0.22004744410514832, |
| "learning_rate": 8.228663446054752e-06, |
| "loss": 10.8977, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.534456816130003, |
| "grad_norm": 0.20619215071201324, |
| "learning_rate": 8.212560386473431e-06, |
| "loss": 12.0217, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5392717424014445, |
| "grad_norm": 0.1944962590932846, |
| "learning_rate": 8.19645732689211e-06, |
| "loss": 11.5528, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5440866686728859, |
| "grad_norm": 0.13986949622631073, |
| "learning_rate": 8.180354267310789e-06, |
| "loss": 10.8501, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.5489015949443274, |
| "grad_norm": 0.18104106187820435, |
| "learning_rate": 8.164251207729469e-06, |
| "loss": 12.0401, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5537165212157689, |
| "grad_norm": 0.22354455292224884, |
| "learning_rate": 8.148148148148148e-06, |
| "loss": 12.3038, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.5585314474872104, |
| "grad_norm": 0.21359990537166595, |
| "learning_rate": 8.132045088566828e-06, |
| "loss": 10.9812, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5633463737586518, |
| "grad_norm": 0.25966572761535645, |
| "learning_rate": 8.115942028985508e-06, |
| "loss": 11.0717, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5681613000300932, |
| "grad_norm": 0.18161477148532867, |
| "learning_rate": 8.099838969404187e-06, |
| "loss": 10.8503, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5729762263015348, |
| "grad_norm": 0.30178895592689514, |
| "learning_rate": 8.083735909822867e-06, |
| "loss": 12.6225, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5777911525729762, |
| "grad_norm": 0.13033385574817657, |
| "learning_rate": 8.067632850241547e-06, |
| "loss": 11.2823, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5826060788444177, |
| "grad_norm": 0.2345341593027115, |
| "learning_rate": 8.051529790660226e-06, |
| "loss": 10.6418, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5874210051158592, |
| "grad_norm": 0.23290252685546875, |
| "learning_rate": 8.035426731078906e-06, |
| "loss": 10.7231, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5922359313873007, |
| "grad_norm": 0.19367018342018127, |
| "learning_rate": 8.019323671497586e-06, |
| "loss": 10.2351, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5970508576587421, |
| "grad_norm": 0.22510769963264465, |
| "learning_rate": 8.003220611916265e-06, |
| "loss": 10.3216, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.6018657839301835, |
| "grad_norm": 0.21876239776611328, |
| "learning_rate": 7.987117552334945e-06, |
| "loss": 11.0453, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.6066807102016251, |
| "grad_norm": 0.23988570272922516, |
| "learning_rate": 7.971014492753623e-06, |
| "loss": 10.9186, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.6114956364730665, |
| "grad_norm": 0.1909828633069992, |
| "learning_rate": 7.954911433172303e-06, |
| "loss": 10.7444, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.616310562744508, |
| "grad_norm": 0.2268180102109909, |
| "learning_rate": 7.938808373590982e-06, |
| "loss": 12.1826, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.6211254890159494, |
| "grad_norm": 0.18531453609466553, |
| "learning_rate": 7.922705314009662e-06, |
| "loss": 11.0919, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.6259404152873909, |
| "grad_norm": 0.24563215672969818, |
| "learning_rate": 7.906602254428342e-06, |
| "loss": 10.825, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.6307553415588324, |
| "grad_norm": 0.26069939136505127, |
| "learning_rate": 7.890499194847021e-06, |
| "loss": 10.9237, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.6355702678302738, |
| "grad_norm": 0.18118217587471008, |
| "learning_rate": 7.874396135265701e-06, |
| "loss": 11.2994, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.6403851941017153, |
| "grad_norm": 0.2178242951631546, |
| "learning_rate": 7.85829307568438e-06, |
| "loss": 10.6764, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.6452001203731568, |
| "grad_norm": 0.18861421942710876, |
| "learning_rate": 7.84219001610306e-06, |
| "loss": 11.7684, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.6500150466445983, |
| "grad_norm": 0.2540731430053711, |
| "learning_rate": 7.82608695652174e-06, |
| "loss": 10.4613, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6548299729160397, |
| "grad_norm": 0.22468675673007965, |
| "learning_rate": 7.80998389694042e-06, |
| "loss": 11.0479, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.6596448991874811, |
| "grad_norm": 0.18307951092720032, |
| "learning_rate": 7.7938808373591e-06, |
| "loss": 11.7074, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6644598254589227, |
| "grad_norm": 0.2777751088142395, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 11.5034, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6692747517303641, |
| "grad_norm": 0.20376338064670563, |
| "learning_rate": 7.761674718196459e-06, |
| "loss": 11.178, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6740896780018056, |
| "grad_norm": 0.19434967637062073, |
| "learning_rate": 7.745571658615137e-06, |
| "loss": 10.198, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6789046042732471, |
| "grad_norm": 0.28449344635009766, |
| "learning_rate": 7.729468599033817e-06, |
| "loss": 11.1956, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6837195305446885, |
| "grad_norm": 0.18125340342521667, |
| "learning_rate": 7.713365539452496e-06, |
| "loss": 12.0773, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.68853445681613, |
| "grad_norm": 0.2260919064283371, |
| "learning_rate": 7.697262479871176e-06, |
| "loss": 11.2763, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6933493830875714, |
| "grad_norm": 0.23274123668670654, |
| "learning_rate": 7.681159420289856e-06, |
| "loss": 11.9908, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.698164309359013, |
| "grad_norm": 0.16333813965320587, |
| "learning_rate": 7.665056360708535e-06, |
| "loss": 12.1754, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.7029792356304544, |
| "grad_norm": 0.19147440791130066, |
| "learning_rate": 7.648953301127215e-06, |
| "loss": 10.8012, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.7077941619018959, |
| "grad_norm": 0.24757863581180573, |
| "learning_rate": 7.632850241545895e-06, |
| "loss": 11.222, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.7126090881733373, |
| "grad_norm": 0.2936674952507019, |
| "learning_rate": 7.616747181964574e-06, |
| "loss": 10.7104, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.7174240144447788, |
| "grad_norm": 0.25289615988731384, |
| "learning_rate": 7.600644122383254e-06, |
| "loss": 11.2394, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.7222389407162203, |
| "grad_norm": 0.16242274641990662, |
| "learning_rate": 7.584541062801934e-06, |
| "loss": 10.9335, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.7270538669876617, |
| "grad_norm": 0.16051234304904938, |
| "learning_rate": 7.568438003220613e-06, |
| "loss": 10.5078, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.7318687932591033, |
| "grad_norm": 0.19001922011375427, |
| "learning_rate": 7.552334943639292e-06, |
| "loss": 10.2024, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.7366837195305447, |
| "grad_norm": 0.1944311112165451, |
| "learning_rate": 7.536231884057972e-06, |
| "loss": 10.3439, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.7414986458019862, |
| "grad_norm": 0.22597943246364594, |
| "learning_rate": 7.5201288244766514e-06, |
| "loss": 9.8315, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.7463135720734276, |
| "grad_norm": 0.16061653196811676, |
| "learning_rate": 7.504025764895331e-06, |
| "loss": 10.1577, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7511284983448691, |
| "grad_norm": 0.18217833340168, |
| "learning_rate": 7.48792270531401e-06, |
| "loss": 11.1101, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.7559434246163106, |
| "grad_norm": 0.24722352623939514, |
| "learning_rate": 7.47181964573269e-06, |
| "loss": 9.7077, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.760758350887752, |
| "grad_norm": 0.19641828536987305, |
| "learning_rate": 7.455716586151369e-06, |
| "loss": 10.4689, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.7655732771591935, |
| "grad_norm": 0.2800208330154419, |
| "learning_rate": 7.439613526570049e-06, |
| "loss": 11.402, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.770388203430635, |
| "grad_norm": 0.19170229136943817, |
| "learning_rate": 7.423510466988728e-06, |
| "loss": 10.1995, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7752031297020764, |
| "grad_norm": 0.1706549972295761, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 10.3468, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7800180559735179, |
| "grad_norm": 0.21024712920188904, |
| "learning_rate": 7.391304347826087e-06, |
| "loss": 11.7541, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7848329822449593, |
| "grad_norm": 0.22287265956401825, |
| "learning_rate": 7.375201288244767e-06, |
| "loss": 11.3616, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7896479085164009, |
| "grad_norm": 0.195387065410614, |
| "learning_rate": 7.359098228663447e-06, |
| "loss": 9.8747, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7944628347878423, |
| "grad_norm": 0.2072424590587616, |
| "learning_rate": 7.342995169082127e-06, |
| "loss": 11.2013, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7992777610592838, |
| "grad_norm": 0.17055857181549072, |
| "learning_rate": 7.326892109500806e-06, |
| "loss": 9.3551, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.8040926873307253, |
| "grad_norm": 0.2913988530635834, |
| "learning_rate": 7.3107890499194855e-06, |
| "loss": 9.5701, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.8089076136021667, |
| "grad_norm": 0.27838587760925293, |
| "learning_rate": 7.294685990338165e-06, |
| "loss": 9.1273, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.8137225398736082, |
| "grad_norm": 0.16759181022644043, |
| "learning_rate": 7.278582930756845e-06, |
| "loss": 12.0986, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.8185374661450496, |
| "grad_norm": 0.2335626184940338, |
| "learning_rate": 7.262479871175524e-06, |
| "loss": 9.4595, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.8233523924164912, |
| "grad_norm": 0.22770944237709045, |
| "learning_rate": 7.246376811594203e-06, |
| "loss": 11.482, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.8281673186879326, |
| "grad_norm": 0.16300161182880402, |
| "learning_rate": 7.230273752012883e-06, |
| "loss": 10.084, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.832982244959374, |
| "grad_norm": 0.1577334851026535, |
| "learning_rate": 7.214170692431563e-06, |
| "loss": 11.7738, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.8377971712308155, |
| "grad_norm": 0.26999086141586304, |
| "learning_rate": 7.1980676328502416e-06, |
| "loss": 11.7664, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.842612097502257, |
| "grad_norm": 0.17184922099113464, |
| "learning_rate": 7.181964573268921e-06, |
| "loss": 10.0153, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.8474270237736985, |
| "grad_norm": 0.19260835647583008, |
| "learning_rate": 7.165861513687601e-06, |
| "loss": 11.0206, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.8522419500451399, |
| "grad_norm": 0.13800834119319916, |
| "learning_rate": 7.149758454106281e-06, |
| "loss": 11.2491, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.8570568763165815, |
| "grad_norm": 0.18511894345283508, |
| "learning_rate": 7.1336553945249594e-06, |
| "loss": 10.0002, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.8618718025880229, |
| "grad_norm": 0.19319257140159607, |
| "learning_rate": 7.117552334943641e-06, |
| "loss": 10.3143, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.8666867288594643, |
| "grad_norm": 0.23793131113052368, |
| "learning_rate": 7.10144927536232e-06, |
| "loss": 9.9405, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8715016551309058, |
| "grad_norm": 0.22520898282527924, |
| "learning_rate": 7.085346215780999e-06, |
| "loss": 11.7933, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.8763165814023472, |
| "grad_norm": 0.1998303085565567, |
| "learning_rate": 7.069243156199679e-06, |
| "loss": 10.8949, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8811315076737888, |
| "grad_norm": 0.2205827236175537, |
| "learning_rate": 7.053140096618359e-06, |
| "loss": 11.2439, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8859464339452302, |
| "grad_norm": 0.18895015120506287, |
| "learning_rate": 7.0370370370370375e-06, |
| "loss": 11.1144, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8907613602166716, |
| "grad_norm": 0.17686723172664642, |
| "learning_rate": 7.020933977455717e-06, |
| "loss": 10.2206, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8955762864881132, |
| "grad_norm": 0.2033979296684265, |
| "learning_rate": 7.004830917874397e-06, |
| "loss": 11.4571, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.9003912127595546, |
| "grad_norm": 0.19752806425094604, |
| "learning_rate": 6.9887278582930765e-06, |
| "loss": 10.0299, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.9052061390309961, |
| "grad_norm": 0.26918548345565796, |
| "learning_rate": 6.972624798711755e-06, |
| "loss": 10.0987, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.9100210653024375, |
| "grad_norm": 0.14812156558036804, |
| "learning_rate": 6.956521739130435e-06, |
| "loss": 10.1505, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.9148359915738791, |
| "grad_norm": 0.21255257725715637, |
| "learning_rate": 6.940418679549115e-06, |
| "loss": 10.48, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.9196509178453205, |
| "grad_norm": 0.20056240260601044, |
| "learning_rate": 6.924315619967794e-06, |
| "loss": 10.615, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.9244658441167619, |
| "grad_norm": 0.2510916590690613, |
| "learning_rate": 6.908212560386473e-06, |
| "loss": 9.2769, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.9292807703882034, |
| "grad_norm": 0.19624245166778564, |
| "learning_rate": 6.892109500805153e-06, |
| "loss": 10.3494, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.9340956966596449, |
| "grad_norm": 0.19198696315288544, |
| "learning_rate": 6.8760064412238326e-06, |
| "loss": 11.5993, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.9389106229310864, |
| "grad_norm": 0.18541178107261658, |
| "learning_rate": 6.859903381642513e-06, |
| "loss": 11.1401, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.9437255492025278, |
| "grad_norm": 0.2111266553401947, |
| "learning_rate": 6.843800322061193e-06, |
| "loss": 10.4827, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.9485404754739694, |
| "grad_norm": 0.19431617856025696, |
| "learning_rate": 6.8276972624798724e-06, |
| "loss": 9.9673, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.9533554017454108, |
| "grad_norm": 0.20121034979820251, |
| "learning_rate": 6.811594202898551e-06, |
| "loss": 10.0902, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.9581703280168522, |
| "grad_norm": 0.24719102680683136, |
| "learning_rate": 6.795491143317231e-06, |
| "loss": 9.7116, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.9629852542882937, |
| "grad_norm": 0.14550495147705078, |
| "learning_rate": 6.779388083735911e-06, |
| "loss": 10.3404, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9678001805597352, |
| "grad_norm": 0.19170908629894257, |
| "learning_rate": 6.76328502415459e-06, |
| "loss": 10.6444, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.9726151068311767, |
| "grad_norm": 0.23954305052757263, |
| "learning_rate": 6.747181964573269e-06, |
| "loss": 10.3116, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.9774300331026181, |
| "grad_norm": 0.15414614975452423, |
| "learning_rate": 6.731078904991949e-06, |
| "loss": 10.4329, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.9822449593740595, |
| "grad_norm": 0.19790370762348175, |
| "learning_rate": 6.7149758454106285e-06, |
| "loss": 10.6693, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.9870598856455011, |
| "grad_norm": 0.23332847654819489, |
| "learning_rate": 6.698872785829308e-06, |
| "loss": 11.7078, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.9918748119169425, |
| "grad_norm": 0.1728251725435257, |
| "learning_rate": 6.682769726247987e-06, |
| "loss": 10.1582, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.996689738188384, |
| "grad_norm": 0.18887676298618317, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 10.0072, |
| "step": 207 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.18887676298618317, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 7.3169, |
| "step": 208 |
| }, |
| { |
| "epoch": 1.0048149262714414, |
| "grad_norm": 0.22321873903274536, |
| "learning_rate": 6.650563607085346e-06, |
| "loss": 9.9937, |
| "step": 209 |
| }, |
| { |
| "epoch": 1.0096298525428828, |
| "grad_norm": 0.23789285123348236, |
| "learning_rate": 6.634460547504026e-06, |
| "loss": 10.1548, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.0144447788143245, |
| "grad_norm": 0.2545947730541229, |
| "learning_rate": 6.6183574879227065e-06, |
| "loss": 10.1225, |
| "step": 211 |
| }, |
| { |
| "epoch": 1.019259705085766, |
| "grad_norm": 0.19479095935821533, |
| "learning_rate": 6.602254428341386e-06, |
| "loss": 9.4662, |
| "step": 212 |
| }, |
| { |
| "epoch": 1.0240746313572073, |
| "grad_norm": 0.1563379466533661, |
| "learning_rate": 6.586151368760065e-06, |
| "loss": 10.5629, |
| "step": 213 |
| }, |
| { |
| "epoch": 1.0288895576286488, |
| "grad_norm": 0.25045880675315857, |
| "learning_rate": 6.570048309178745e-06, |
| "loss": 9.0693, |
| "step": 214 |
| }, |
| { |
| "epoch": 1.0337044839000902, |
| "grad_norm": 0.20094619691371918, |
| "learning_rate": 6.553945249597424e-06, |
| "loss": 10.7736, |
| "step": 215 |
| }, |
| { |
| "epoch": 1.0385194101715318, |
| "grad_norm": 0.2038065642118454, |
| "learning_rate": 6.537842190016104e-06, |
| "loss": 11.2619, |
| "step": 216 |
| }, |
| { |
| "epoch": 1.0433343364429732, |
| "grad_norm": 0.1970120072364807, |
| "learning_rate": 6.521739130434783e-06, |
| "loss": 10.3251, |
| "step": 217 |
| }, |
| { |
| "epoch": 1.0481492627144147, |
| "grad_norm": 0.19979062676429749, |
| "learning_rate": 6.5056360708534626e-06, |
| "loss": 10.6034, |
| "step": 218 |
| }, |
| { |
| "epoch": 1.052964188985856, |
| "grad_norm": 0.16085349023342133, |
| "learning_rate": 6.489533011272142e-06, |
| "loss": 9.5934, |
| "step": 219 |
| }, |
| { |
| "epoch": 1.0577791152572975, |
| "grad_norm": 0.20374242961406708, |
| "learning_rate": 6.473429951690822e-06, |
| "loss": 8.9761, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.0625940415287392, |
| "grad_norm": 0.19417604804039001, |
| "learning_rate": 6.457326892109501e-06, |
| "loss": 9.8925, |
| "step": 221 |
| }, |
| { |
| "epoch": 1.0674089678001806, |
| "grad_norm": 0.1641014963388443, |
| "learning_rate": 6.44122383252818e-06, |
| "loss": 10.019, |
| "step": 222 |
| }, |
| { |
| "epoch": 1.072223894071622, |
| "grad_norm": 0.15444359183311462, |
| "learning_rate": 6.42512077294686e-06, |
| "loss": 9.8515, |
| "step": 223 |
| }, |
| { |
| "epoch": 1.0770388203430634, |
| "grad_norm": 0.31960421800613403, |
| "learning_rate": 6.40901771336554e-06, |
| "loss": 9.6703, |
| "step": 224 |
| }, |
| { |
| "epoch": 1.081853746614505, |
| "grad_norm": 0.18809086084365845, |
| "learning_rate": 6.3929146537842194e-06, |
| "loss": 10.5771, |
| "step": 225 |
| }, |
| { |
| "epoch": 1.0866686728859465, |
| "grad_norm": 0.2899991571903229, |
| "learning_rate": 6.376811594202898e-06, |
| "loss": 9.9195, |
| "step": 226 |
| }, |
| { |
| "epoch": 1.091483599157388, |
| "grad_norm": 0.20936541259288788, |
| "learning_rate": 6.360708534621579e-06, |
| "loss": 9.9802, |
| "step": 227 |
| }, |
| { |
| "epoch": 1.0962985254288293, |
| "grad_norm": 0.20921356976032257, |
| "learning_rate": 6.3446054750402585e-06, |
| "loss": 10.7929, |
| "step": 228 |
| }, |
| { |
| "epoch": 1.1011134517002708, |
| "grad_norm": 0.16953137516975403, |
| "learning_rate": 6.328502415458938e-06, |
| "loss": 9.2881, |
| "step": 229 |
| }, |
| { |
| "epoch": 1.1059283779717124, |
| "grad_norm": 0.16596080362796783, |
| "learning_rate": 6.312399355877618e-06, |
| "loss": 9.9087, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.1107433042431538, |
| "grad_norm": 0.17415396869182587, |
| "learning_rate": 6.296296296296297e-06, |
| "loss": 10.5827, |
| "step": 231 |
| }, |
| { |
| "epoch": 1.1155582305145952, |
| "grad_norm": 0.1941956877708435, |
| "learning_rate": 6.280193236714976e-06, |
| "loss": 10.6707, |
| "step": 232 |
| }, |
| { |
| "epoch": 1.1203731567860367, |
| "grad_norm": 0.2597000300884247, |
| "learning_rate": 6.264090177133656e-06, |
| "loss": 12.2734, |
| "step": 233 |
| }, |
| { |
| "epoch": 1.1251880830574783, |
| "grad_norm": 0.1953185349702835, |
| "learning_rate": 6.247987117552336e-06, |
| "loss": 9.6042, |
| "step": 234 |
| }, |
| { |
| "epoch": 1.1300030093289197, |
| "grad_norm": 0.19797232747077942, |
| "learning_rate": 6.2318840579710145e-06, |
| "loss": 10.6643, |
| "step": 235 |
| }, |
| { |
| "epoch": 1.1348179356003611, |
| "grad_norm": 0.18180033564567566, |
| "learning_rate": 6.215780998389694e-06, |
| "loss": 10.2799, |
| "step": 236 |
| }, |
| { |
| "epoch": 1.1396328618718026, |
| "grad_norm": 0.17393337190151215, |
| "learning_rate": 6.199677938808374e-06, |
| "loss": 9.4656, |
| "step": 237 |
| }, |
| { |
| "epoch": 1.144447788143244, |
| "grad_norm": 0.1834544539451599, |
| "learning_rate": 6.1835748792270535e-06, |
| "loss": 8.9495, |
| "step": 238 |
| }, |
| { |
| "epoch": 1.1492627144146854, |
| "grad_norm": 0.14842462539672852, |
| "learning_rate": 6.167471819645733e-06, |
| "loss": 9.6697, |
| "step": 239 |
| }, |
| { |
| "epoch": 1.154077640686127, |
| "grad_norm": 0.2158040702342987, |
| "learning_rate": 6.151368760064412e-06, |
| "loss": 9.1501, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.1588925669575685, |
| "grad_norm": 0.18131056427955627, |
| "learning_rate": 6.135265700483092e-06, |
| "loss": 10.3998, |
| "step": 241 |
| }, |
| { |
| "epoch": 1.16370749322901, |
| "grad_norm": 0.22484710812568665, |
| "learning_rate": 6.119162640901772e-06, |
| "loss": 10.0693, |
| "step": 242 |
| }, |
| { |
| "epoch": 1.1685224195004513, |
| "grad_norm": 0.18370361626148224, |
| "learning_rate": 6.103059581320452e-06, |
| "loss": 10.4377, |
| "step": 243 |
| }, |
| { |
| "epoch": 1.173337345771893, |
| "grad_norm": 0.2081800103187561, |
| "learning_rate": 6.086956521739132e-06, |
| "loss": 9.8576, |
| "step": 244 |
| }, |
| { |
| "epoch": 1.1781522720433344, |
| "grad_norm": 0.1726984828710556, |
| "learning_rate": 6.0708534621578104e-06, |
| "loss": 10.4158, |
| "step": 245 |
| }, |
| { |
| "epoch": 1.1829671983147758, |
| "grad_norm": 0.22167733311653137, |
| "learning_rate": 6.05475040257649e-06, |
| "loss": 9.2583, |
| "step": 246 |
| }, |
| { |
| "epoch": 1.1877821245862172, |
| "grad_norm": 0.24326634407043457, |
| "learning_rate": 6.03864734299517e-06, |
| "loss": 10.1546, |
| "step": 247 |
| }, |
| { |
| "epoch": 1.1925970508576587, |
| "grad_norm": 0.20624417066574097, |
| "learning_rate": 6.0225442834138495e-06, |
| "loss": 10.3518, |
| "step": 248 |
| }, |
| { |
| "epoch": 1.1974119771291003, |
| "grad_norm": 0.22262895107269287, |
| "learning_rate": 6.006441223832528e-06, |
| "loss": 10.2671, |
| "step": 249 |
| }, |
| { |
| "epoch": 1.2022269034005417, |
| "grad_norm": 0.18244238197803497, |
| "learning_rate": 5.990338164251208e-06, |
| "loss": 10.3412, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.2070418296719831, |
| "grad_norm": 0.12642191350460052, |
| "learning_rate": 5.974235104669888e-06, |
| "loss": 8.3136, |
| "step": 251 |
| }, |
| { |
| "epoch": 1.2118567559434246, |
| "grad_norm": 0.22949941456317902, |
| "learning_rate": 5.958132045088567e-06, |
| "loss": 9.8669, |
| "step": 252 |
| }, |
| { |
| "epoch": 1.216671682214866, |
| "grad_norm": 0.17824606597423553, |
| "learning_rate": 5.942028985507247e-06, |
| "loss": 8.9438, |
| "step": 253 |
| }, |
| { |
| "epoch": 1.2214866084863076, |
| "grad_norm": 0.21706126630306244, |
| "learning_rate": 5.925925925925926e-06, |
| "loss": 10.5671, |
| "step": 254 |
| }, |
| { |
| "epoch": 1.226301534757749, |
| "grad_norm": 0.1777815967798233, |
| "learning_rate": 5.9098228663446055e-06, |
| "loss": 9.0566, |
| "step": 255 |
| }, |
| { |
| "epoch": 1.2311164610291905, |
| "grad_norm": 0.16943249106407166, |
| "learning_rate": 5.893719806763285e-06, |
| "loss": 10.5991, |
| "step": 256 |
| }, |
| { |
| "epoch": 1.235931387300632, |
| "grad_norm": 0.19475321471691132, |
| "learning_rate": 5.877616747181965e-06, |
| "loss": 9.7751, |
| "step": 257 |
| }, |
| { |
| "epoch": 1.2407463135720733, |
| "grad_norm": 0.15499532222747803, |
| "learning_rate": 5.861513687600645e-06, |
| "loss": 9.2155, |
| "step": 258 |
| }, |
| { |
| "epoch": 1.245561239843515, |
| "grad_norm": 0.21997332572937012, |
| "learning_rate": 5.845410628019324e-06, |
| "loss": 9.9653, |
| "step": 259 |
| }, |
| { |
| "epoch": 1.2503761661149564, |
| "grad_norm": 0.2071482390165329, |
| "learning_rate": 5.829307568438004e-06, |
| "loss": 9.9446, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.2551910923863978, |
| "grad_norm": 0.18931487202644348, |
| "learning_rate": 5.8132045088566835e-06, |
| "loss": 10.322, |
| "step": 261 |
| }, |
| { |
| "epoch": 1.2600060186578392, |
| "grad_norm": 0.14098307490348816, |
| "learning_rate": 5.797101449275363e-06, |
| "loss": 9.7707, |
| "step": 262 |
| }, |
| { |
| "epoch": 1.2648209449292809, |
| "grad_norm": 0.22090758383274078, |
| "learning_rate": 5.780998389694043e-06, |
| "loss": 9.702, |
| "step": 263 |
| }, |
| { |
| "epoch": 1.2696358712007223, |
| "grad_norm": 0.181729257106781, |
| "learning_rate": 5.764895330112722e-06, |
| "loss": 9.5123, |
| "step": 264 |
| }, |
| { |
| "epoch": 1.2744507974721637, |
| "grad_norm": 0.1258496791124344, |
| "learning_rate": 5.748792270531401e-06, |
| "loss": 9.0927, |
| "step": 265 |
| }, |
| { |
| "epoch": 1.2792657237436051, |
| "grad_norm": 0.21762683987617493, |
| "learning_rate": 5.732689210950081e-06, |
| "loss": 8.5398, |
| "step": 266 |
| }, |
| { |
| "epoch": 1.2840806500150466, |
| "grad_norm": 0.14968731999397278, |
| "learning_rate": 5.716586151368761e-06, |
| "loss": 9.486, |
| "step": 267 |
| }, |
| { |
| "epoch": 1.288895576286488, |
| "grad_norm": 0.17779159545898438, |
| "learning_rate": 5.70048309178744e-06, |
| "loss": 10.4383, |
| "step": 268 |
| }, |
| { |
| "epoch": 1.2937105025579296, |
| "grad_norm": 0.19466915726661682, |
| "learning_rate": 5.684380032206119e-06, |
| "loss": 10.2354, |
| "step": 269 |
| }, |
| { |
| "epoch": 1.298525428829371, |
| "grad_norm": 0.22139185667037964, |
| "learning_rate": 5.668276972624799e-06, |
| "loss": 9.7784, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.3033403551008125, |
| "grad_norm": 0.21013078093528748, |
| "learning_rate": 5.652173913043479e-06, |
| "loss": 11.4605, |
| "step": 271 |
| }, |
| { |
| "epoch": 1.3081552813722541, |
| "grad_norm": 0.17095215618610382, |
| "learning_rate": 5.6360708534621574e-06, |
| "loss": 9.6968, |
| "step": 272 |
| }, |
| { |
| "epoch": 1.3129702076436955, |
| "grad_norm": 0.15703898668289185, |
| "learning_rate": 5.619967793880838e-06, |
| "loss": 8.9945, |
| "step": 273 |
| }, |
| { |
| "epoch": 1.317785133915137, |
| "grad_norm": 0.16166311502456665, |
| "learning_rate": 5.603864734299518e-06, |
| "loss": 8.9587, |
| "step": 274 |
| }, |
| { |
| "epoch": 1.3226000601865784, |
| "grad_norm": 0.18226633965969086, |
| "learning_rate": 5.587761674718197e-06, |
| "loss": 9.5976, |
| "step": 275 |
| }, |
| { |
| "epoch": 1.3274149864580198, |
| "grad_norm": 0.16516032814979553, |
| "learning_rate": 5.571658615136877e-06, |
| "loss": 8.9366, |
| "step": 276 |
| }, |
| { |
| "epoch": 1.3322299127294612, |
| "grad_norm": 0.18485237658023834, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 8.6867, |
| "step": 277 |
| }, |
| { |
| "epoch": 1.3370448390009029, |
| "grad_norm": 0.16183756291866302, |
| "learning_rate": 5.5394524959742355e-06, |
| "loss": 10.0091, |
| "step": 278 |
| }, |
| { |
| "epoch": 1.3418597652723443, |
| "grad_norm": 0.18236857652664185, |
| "learning_rate": 5.523349436392915e-06, |
| "loss": 11.0512, |
| "step": 279 |
| }, |
| { |
| "epoch": 1.3466746915437857, |
| "grad_norm": 0.16111883521080017, |
| "learning_rate": 5.507246376811595e-06, |
| "loss": 9.9661, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.3514896178152271, |
| "grad_norm": 0.17416836321353912, |
| "learning_rate": 5.4911433172302745e-06, |
| "loss": 8.5023, |
| "step": 281 |
| }, |
| { |
| "epoch": 1.3563045440866688, |
| "grad_norm": 0.1845031976699829, |
| "learning_rate": 5.475040257648953e-06, |
| "loss": 8.1757, |
| "step": 282 |
| }, |
| { |
| "epoch": 1.3611194703581102, |
| "grad_norm": 0.14829057455062866, |
| "learning_rate": 5.458937198067633e-06, |
| "loss": 10.4167, |
| "step": 283 |
| }, |
| { |
| "epoch": 1.3659343966295516, |
| "grad_norm": 0.18102510273456573, |
| "learning_rate": 5.442834138486313e-06, |
| "loss": 10.1862, |
| "step": 284 |
| }, |
| { |
| "epoch": 1.370749322900993, |
| "grad_norm": 0.1877845823764801, |
| "learning_rate": 5.426731078904992e-06, |
| "loss": 10.4398, |
| "step": 285 |
| }, |
| { |
| "epoch": 1.3755642491724345, |
| "grad_norm": 0.19289150834083557, |
| "learning_rate": 5.410628019323671e-06, |
| "loss": 10.1863, |
| "step": 286 |
| }, |
| { |
| "epoch": 1.380379175443876, |
| "grad_norm": 0.14551950991153717, |
| "learning_rate": 5.394524959742351e-06, |
| "loss": 8.6225, |
| "step": 287 |
| }, |
| { |
| "epoch": 1.3851941017153175, |
| "grad_norm": 0.15998440980911255, |
| "learning_rate": 5.3784219001610306e-06, |
| "loss": 9.6626, |
| "step": 288 |
| }, |
| { |
| "epoch": 1.390009027986759, |
| "grad_norm": 0.15218336880207062, |
| "learning_rate": 5.362318840579711e-06, |
| "loss": 9.0365, |
| "step": 289 |
| }, |
| { |
| "epoch": 1.3948239542582004, |
| "grad_norm": 0.19268082082271576, |
| "learning_rate": 5.346215780998391e-06, |
| "loss": 9.069, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.399638880529642, |
| "grad_norm": 0.15415695309638977, |
| "learning_rate": 5.3301127214170704e-06, |
| "loss": 10.0018, |
| "step": 291 |
| }, |
| { |
| "epoch": 1.4044538068010834, |
| "grad_norm": 0.1783796101808548, |
| "learning_rate": 5.314009661835749e-06, |
| "loss": 8.5794, |
| "step": 292 |
| }, |
| { |
| "epoch": 1.4092687330725249, |
| "grad_norm": 0.23539525270462036, |
| "learning_rate": 5.297906602254429e-06, |
| "loss": 9.2077, |
| "step": 293 |
| }, |
| { |
| "epoch": 1.4140836593439663, |
| "grad_norm": 0.19150039553642273, |
| "learning_rate": 5.281803542673109e-06, |
| "loss": 8.6828, |
| "step": 294 |
| }, |
| { |
| "epoch": 1.4188985856154077, |
| "grad_norm": 0.18820087611675262, |
| "learning_rate": 5.265700483091788e-06, |
| "loss": 9.4677, |
| "step": 295 |
| }, |
| { |
| "epoch": 1.4237135118868491, |
| "grad_norm": 0.5018635988235474, |
| "learning_rate": 5.249597423510467e-06, |
| "loss": 9.6455, |
| "step": 296 |
| }, |
| { |
| "epoch": 1.4285284381582908, |
| "grad_norm": 0.17721492052078247, |
| "learning_rate": 5.233494363929147e-06, |
| "loss": 8.3182, |
| "step": 297 |
| }, |
| { |
| "epoch": 1.4333433644297322, |
| "grad_norm": 0.20144477486610413, |
| "learning_rate": 5.2173913043478265e-06, |
| "loss": 9.1568, |
| "step": 298 |
| }, |
| { |
| "epoch": 1.4381582907011736, |
| "grad_norm": 0.18805253505706787, |
| "learning_rate": 5.201288244766506e-06, |
| "loss": 9.7496, |
| "step": 299 |
| }, |
| { |
| "epoch": 1.442973216972615, |
| "grad_norm": 0.1500595659017563, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 9.9708, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.4477881432440567, |
| "grad_norm": 0.19444873929023743, |
| "learning_rate": 5.169082125603865e-06, |
| "loss": 9.5143, |
| "step": 301 |
| }, |
| { |
| "epoch": 1.4526030695154981, |
| "grad_norm": 0.18682818114757538, |
| "learning_rate": 5.152979066022544e-06, |
| "loss": 10.2596, |
| "step": 302 |
| }, |
| { |
| "epoch": 1.4574179957869395, |
| "grad_norm": 0.17984358966350555, |
| "learning_rate": 5.136876006441224e-06, |
| "loss": 10.2697, |
| "step": 303 |
| }, |
| { |
| "epoch": 1.462232922058381, |
| "grad_norm": 0.17564424872398376, |
| "learning_rate": 5.1207729468599045e-06, |
| "loss": 9.9508, |
| "step": 304 |
| }, |
| { |
| "epoch": 1.4670478483298224, |
| "grad_norm": 0.1954619437456131, |
| "learning_rate": 5.104669887278584e-06, |
| "loss": 10.603, |
| "step": 305 |
| }, |
| { |
| "epoch": 1.4718627746012638, |
| "grad_norm": 0.16032911837100983, |
| "learning_rate": 5.088566827697263e-06, |
| "loss": 10.1388, |
| "step": 306 |
| }, |
| { |
| "epoch": 1.4766777008727054, |
| "grad_norm": 0.18712233006954193, |
| "learning_rate": 5.072463768115943e-06, |
| "loss": 10.603, |
| "step": 307 |
| }, |
| { |
| "epoch": 1.4814926271441469, |
| "grad_norm": 0.18479761481285095, |
| "learning_rate": 5.056360708534622e-06, |
| "loss": 9.8074, |
| "step": 308 |
| }, |
| { |
| "epoch": 1.4863075534155883, |
| "grad_norm": 0.14700675010681152, |
| "learning_rate": 5.040257648953302e-06, |
| "loss": 10.4248, |
| "step": 309 |
| }, |
| { |
| "epoch": 1.4911224796870297, |
| "grad_norm": 0.13533058762550354, |
| "learning_rate": 5.024154589371981e-06, |
| "loss": 9.1913, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.4959374059584714, |
| "grad_norm": 0.1617136150598526, |
| "learning_rate": 5.0080515297906606e-06, |
| "loss": 8.7997, |
| "step": 311 |
| }, |
| { |
| "epoch": 1.5007523322299128, |
| "grad_norm": 0.14999867975711823, |
| "learning_rate": 4.99194847020934e-06, |
| "loss": 10.7806, |
| "step": 312 |
| }, |
| { |
| "epoch": 1.5055672585013542, |
| "grad_norm": 0.1483631134033203, |
| "learning_rate": 4.97584541062802e-06, |
| "loss": 8.9508, |
| "step": 313 |
| }, |
| { |
| "epoch": 1.5103821847727956, |
| "grad_norm": 0.1401262730360031, |
| "learning_rate": 4.959742351046699e-06, |
| "loss": 9.3086, |
| "step": 314 |
| }, |
| { |
| "epoch": 1.515197111044237, |
| "grad_norm": 0.20340582728385925, |
| "learning_rate": 4.9436392914653784e-06, |
| "loss": 9.6934, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.5200120373156785, |
| "grad_norm": 0.10809484124183655, |
| "learning_rate": 4.927536231884059e-06, |
| "loss": 8.5021, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.52482696358712, |
| "grad_norm": 0.18179920315742493, |
| "learning_rate": 4.911433172302738e-06, |
| "loss": 8.7153, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.5296418898585615, |
| "grad_norm": 0.1383148580789566, |
| "learning_rate": 4.8953301127214175e-06, |
| "loss": 10.6062, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.534456816130003, |
| "grad_norm": 0.21121209859848022, |
| "learning_rate": 4.879227053140097e-06, |
| "loss": 8.7374, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.5392717424014446, |
| "grad_norm": 0.19276529550552368, |
| "learning_rate": 4.863123993558777e-06, |
| "loss": 8.7942, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.544086668672886, |
| "grad_norm": 0.18534629046916962, |
| "learning_rate": 4.847020933977456e-06, |
| "loss": 7.3924, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.5489015949443274, |
| "grad_norm": 0.11499077826738358, |
| "learning_rate": 4.830917874396135e-06, |
| "loss": 9.042, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.5537165212157689, |
| "grad_norm": 0.19323264062404633, |
| "learning_rate": 4.814814814814815e-06, |
| "loss": 9.5308, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.5585314474872103, |
| "grad_norm": 0.163632333278656, |
| "learning_rate": 4.798711755233495e-06, |
| "loss": 9.5654, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.5633463737586517, |
| "grad_norm": 0.24960660934448242, |
| "learning_rate": 4.782608695652174e-06, |
| "loss": 8.9639, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.5681613000300931, |
| "grad_norm": 0.13659049570560455, |
| "learning_rate": 4.766505636070854e-06, |
| "loss": 9.2311, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.5729762263015348, |
| "grad_norm": 0.19566506147384644, |
| "learning_rate": 4.750402576489534e-06, |
| "loss": 9.9125, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.5777911525729762, |
| "grad_norm": 0.13559715449810028, |
| "learning_rate": 4.7342995169082125e-06, |
| "loss": 10.2432, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.5826060788444178, |
| "grad_norm": 0.20595477521419525, |
| "learning_rate": 4.718196457326892e-06, |
| "loss": 9.3677, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.5874210051158593, |
| "grad_norm": 0.1580948680639267, |
| "learning_rate": 4.702093397745572e-06, |
| "loss": 10.0316, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.5922359313873007, |
| "grad_norm": 0.1536228209733963, |
| "learning_rate": 4.6859903381642516e-06, |
| "loss": 10.8169, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.597050857658742, |
| "grad_norm": 0.17159651219844818, |
| "learning_rate": 4.669887278582931e-06, |
| "loss": 9.8849, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.6018657839301835, |
| "grad_norm": 0.14754590392112732, |
| "learning_rate": 4.653784219001611e-06, |
| "loss": 8.2419, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.606680710201625, |
| "grad_norm": 0.15272633731365204, |
| "learning_rate": 4.637681159420291e-06, |
| "loss": 10.358, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.6114956364730664, |
| "grad_norm": 0.23571325838565826, |
| "learning_rate": 4.621578099838969e-06, |
| "loss": 8.9846, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.616310562744508, |
| "grad_norm": 0.1520383059978485, |
| "learning_rate": 4.605475040257649e-06, |
| "loss": 9.627, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.6211254890159494, |
| "grad_norm": 0.16789157688617706, |
| "learning_rate": 4.589371980676329e-06, |
| "loss": 8.9584, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.6259404152873909, |
| "grad_norm": 0.23156379163265228, |
| "learning_rate": 4.5732689210950084e-06, |
| "loss": 9.4165, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.6307553415588325, |
| "grad_norm": 0.2569849491119385, |
| "learning_rate": 4.557165861513688e-06, |
| "loss": 8.6545, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.635570267830274, |
| "grad_norm": 0.1471448540687561, |
| "learning_rate": 4.541062801932368e-06, |
| "loss": 9.7279, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.6403851941017153, |
| "grad_norm": 0.19168996810913086, |
| "learning_rate": 4.5249597423510475e-06, |
| "loss": 8.6823, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.6452001203731568, |
| "grad_norm": 0.16900351643562317, |
| "learning_rate": 4.508856682769726e-06, |
| "loss": 9.3926, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.6500150466445982, |
| "grad_norm": 0.1279803216457367, |
| "learning_rate": 4.492753623188406e-06, |
| "loss": 8.8222, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.6548299729160396, |
| "grad_norm": 0.16592150926589966, |
| "learning_rate": 4.476650563607086e-06, |
| "loss": 9.3739, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.659644899187481, |
| "grad_norm": 0.18117226660251617, |
| "learning_rate": 4.460547504025765e-06, |
| "loss": 8.6028, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.6644598254589227, |
| "grad_norm": 0.15843939781188965, |
| "learning_rate": 4.444444444444444e-06, |
| "loss": 7.3552, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.669274751730364, |
| "grad_norm": 0.1672333925962448, |
| "learning_rate": 4.428341384863125e-06, |
| "loss": 8.1781, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.6740896780018057, |
| "grad_norm": 0.1798122376203537, |
| "learning_rate": 4.412238325281804e-06, |
| "loss": 9.5976, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.6789046042732472, |
| "grad_norm": 0.15125727653503418, |
| "learning_rate": 4.396135265700483e-06, |
| "loss": 9.2915, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.6837195305446886, |
| "grad_norm": 0.15909244120121002, |
| "learning_rate": 4.380032206119163e-06, |
| "loss": 8.5171, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.68853445681613, |
| "grad_norm": 0.19835767149925232, |
| "learning_rate": 4.3639291465378425e-06, |
| "loss": 9.3835, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.6933493830875714, |
| "grad_norm": 0.30680009722709656, |
| "learning_rate": 4.347826086956522e-06, |
| "loss": 9.3733, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.6981643093590129, |
| "grad_norm": 0.13429707288742065, |
| "learning_rate": 4.331723027375201e-06, |
| "loss": 9.6266, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.7029792356304543, |
| "grad_norm": 0.16423039138317108, |
| "learning_rate": 4.315619967793881e-06, |
| "loss": 9.4546, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.707794161901896, |
| "grad_norm": 0.14078310132026672, |
| "learning_rate": 4.299516908212561e-06, |
| "loss": 9.1663, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.7126090881733373, |
| "grad_norm": 0.2016141414642334, |
| "learning_rate": 4.28341384863124e-06, |
| "loss": 8.6698, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.7174240144447788, |
| "grad_norm": 0.13229703903198242, |
| "learning_rate": 4.26731078904992e-06, |
| "loss": 8.5468, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.7222389407162204, |
| "grad_norm": 0.22356487810611725, |
| "learning_rate": 4.251207729468599e-06, |
| "loss": 9.311, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.7270538669876618, |
| "grad_norm": 0.19844292104244232, |
| "learning_rate": 4.235104669887279e-06, |
| "loss": 9.054, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.7318687932591033, |
| "grad_norm": 0.18081983923912048, |
| "learning_rate": 4.219001610305958e-06, |
| "loss": 8.9678, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.7366837195305447, |
| "grad_norm": 0.2216968685388565, |
| "learning_rate": 4.202898550724638e-06, |
| "loss": 8.6121, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.741498645801986, |
| "grad_norm": 0.14121295511722565, |
| "learning_rate": 4.186795491143318e-06, |
| "loss": 9.2074, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.7463135720734275, |
| "grad_norm": 0.148764505982399, |
| "learning_rate": 4.170692431561997e-06, |
| "loss": 9.1965, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.751128498344869, |
| "grad_norm": 0.20818910002708435, |
| "learning_rate": 4.154589371980677e-06, |
| "loss": 8.5382, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.7559434246163106, |
| "grad_norm": 0.1755458116531372, |
| "learning_rate": 4.138486312399356e-06, |
| "loss": 9.0389, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.760758350887752, |
| "grad_norm": 0.15656408667564392, |
| "learning_rate": 4.122383252818036e-06, |
| "loss": 9.226, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.7655732771591937, |
| "grad_norm": 0.14213398098945618, |
| "learning_rate": 4.106280193236716e-06, |
| "loss": 8.2302, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.770388203430635, |
| "grad_norm": 0.1693073809146881, |
| "learning_rate": 4.0901771336553945e-06, |
| "loss": 9.6989, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.7752031297020765, |
| "grad_norm": 0.15878278017044067, |
| "learning_rate": 4.074074074074074e-06, |
| "loss": 9.4632, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.780018055973518, |
| "grad_norm": 0.22463774681091309, |
| "learning_rate": 4.057971014492754e-06, |
| "loss": 9.7328, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.7848329822449593, |
| "grad_norm": 0.4724883437156677, |
| "learning_rate": 4.0418679549114335e-06, |
| "loss": 10.2544, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.7896479085164008, |
| "grad_norm": 0.17619994282722473, |
| "learning_rate": 4.025764895330113e-06, |
| "loss": 9.6433, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.7944628347878422, |
| "grad_norm": 0.16114237904548645, |
| "learning_rate": 4.009661835748793e-06, |
| "loss": 10.651, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.7992777610592838, |
| "grad_norm": 0.2053680568933487, |
| "learning_rate": 3.9935587761674725e-06, |
| "loss": 8.8208, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.8040926873307253, |
| "grad_norm": 0.17200101912021637, |
| "learning_rate": 3.977455716586151e-06, |
| "loss": 8.9841, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.8089076136021667, |
| "grad_norm": 0.12033673375844955, |
| "learning_rate": 3.961352657004831e-06, |
| "loss": 8.552, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.8137225398736083, |
| "grad_norm": 0.17469695210456848, |
| "learning_rate": 3.945249597423511e-06, |
| "loss": 8.6438, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.8185374661450497, |
| "grad_norm": 0.19993340969085693, |
| "learning_rate": 3.92914653784219e-06, |
| "loss": 9.326, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.8233523924164912, |
| "grad_norm": 0.18282270431518555, |
| "learning_rate": 3.91304347826087e-06, |
| "loss": 8.6382, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.8281673186879326, |
| "grad_norm": 0.21918214857578278, |
| "learning_rate": 3.89694041867955e-06, |
| "loss": 9.7637, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.832982244959374, |
| "grad_norm": 0.19311483204364777, |
| "learning_rate": 3.880837359098229e-06, |
| "loss": 9.7215, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.8377971712308154, |
| "grad_norm": 0.2024223506450653, |
| "learning_rate": 3.864734299516908e-06, |
| "loss": 9.1813, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.8426120975022569, |
| "grad_norm": 0.15196166932582855, |
| "learning_rate": 3.848631239935588e-06, |
| "loss": 9.4212, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.8474270237736985, |
| "grad_norm": 0.20014698803424835, |
| "learning_rate": 3.832528180354268e-06, |
| "loss": 9.0854, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.85224195004514, |
| "grad_norm": 0.2045230120420456, |
| "learning_rate": 3.816425120772947e-06, |
| "loss": 9.3168, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.8570568763165816, |
| "grad_norm": 0.13044817745685577, |
| "learning_rate": 3.800322061191627e-06, |
| "loss": 8.4085, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.861871802588023, |
| "grad_norm": 0.19362546503543854, |
| "learning_rate": 3.7842190016103066e-06, |
| "loss": 8.5539, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.8666867288594644, |
| "grad_norm": 0.19143155217170715, |
| "learning_rate": 3.768115942028986e-06, |
| "loss": 9.0545, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.8715016551309058, |
| "grad_norm": 0.18278856575489044, |
| "learning_rate": 3.7520128824476656e-06, |
| "loss": 8.6361, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.8763165814023472, |
| "grad_norm": 0.20836183428764343, |
| "learning_rate": 3.735909822866345e-06, |
| "loss": 9.0898, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.8811315076737887, |
| "grad_norm": 0.18853327631950378, |
| "learning_rate": 3.7198067632850245e-06, |
| "loss": 9.8428, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.88594643394523, |
| "grad_norm": 0.13650333881378174, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 8.8437, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.8907613602166715, |
| "grad_norm": 0.20635420083999634, |
| "learning_rate": 3.6876006441223834e-06, |
| "loss": 9.0691, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.8955762864881132, |
| "grad_norm": 0.16736768186092377, |
| "learning_rate": 3.6714975845410635e-06, |
| "loss": 9.318, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.9003912127595546, |
| "grad_norm": 0.21544639766216278, |
| "learning_rate": 3.6553945249597428e-06, |
| "loss": 8.9387, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.9052061390309962, |
| "grad_norm": 0.17389844357967377, |
| "learning_rate": 3.6392914653784224e-06, |
| "loss": 9.549, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.9100210653024376, |
| "grad_norm": 0.21728019416332245, |
| "learning_rate": 3.6231884057971017e-06, |
| "loss": 8.0753, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.914835991573879, |
| "grad_norm": 0.199959859251976, |
| "learning_rate": 3.6070853462157814e-06, |
| "loss": 9.9683, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.9196509178453205, |
| "grad_norm": 0.16808640956878662, |
| "learning_rate": 3.5909822866344606e-06, |
| "loss": 9.2667, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.924465844116762, |
| "grad_norm": 0.15371474623680115, |
| "learning_rate": 3.5748792270531403e-06, |
| "loss": 8.9782, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.9292807703882033, |
| "grad_norm": 0.22420039772987366, |
| "learning_rate": 3.5587761674718204e-06, |
| "loss": 9.5041, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.9340956966596448, |
| "grad_norm": 0.19234929978847504, |
| "learning_rate": 3.5426731078904997e-06, |
| "loss": 8.8785, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.9389106229310864, |
| "grad_norm": 0.13435740768909454, |
| "learning_rate": 3.5265700483091793e-06, |
| "loss": 9.3544, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.9437255492025278, |
| "grad_norm": 0.21900928020477295, |
| "learning_rate": 3.5104669887278586e-06, |
| "loss": 8.1942, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.9485404754739695, |
| "grad_norm": 0.16180120408535004, |
| "learning_rate": 3.4943639291465383e-06, |
| "loss": 9.4132, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.953355401745411, |
| "grad_norm": 0.2743014991283417, |
| "learning_rate": 3.4782608695652175e-06, |
| "loss": 10.1588, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.9581703280168523, |
| "grad_norm": 0.14160144329071045, |
| "learning_rate": 3.462157809983897e-06, |
| "loss": 9.0612, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.9629852542882937, |
| "grad_norm": 0.1383216828107834, |
| "learning_rate": 3.4460547504025764e-06, |
| "loss": 7.8391, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.9678001805597352, |
| "grad_norm": 0.16990961134433746, |
| "learning_rate": 3.4299516908212565e-06, |
| "loss": 9.6392, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.9726151068311766, |
| "grad_norm": 0.17103661596775055, |
| "learning_rate": 3.4138486312399362e-06, |
| "loss": 9.8119, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.977430033102618, |
| "grad_norm": 0.13866282999515533, |
| "learning_rate": 3.3977455716586155e-06, |
| "loss": 8.2033, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.9822449593740594, |
| "grad_norm": 0.21080395579338074, |
| "learning_rate": 3.381642512077295e-06, |
| "loss": 10.3113, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.987059885645501, |
| "grad_norm": 0.19845469295978546, |
| "learning_rate": 3.3655394524959744e-06, |
| "loss": 8.2103, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.9918748119169425, |
| "grad_norm": 0.1903708279132843, |
| "learning_rate": 3.349436392914654e-06, |
| "loss": 9.1371, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.9966897381883841, |
| "grad_norm": 0.16223041713237762, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 7.368, |
| "step": 415 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.14595092833042145, |
| "learning_rate": 3.317230273752013e-06, |
| "loss": 5.8598, |
| "step": 416 |
| }, |
| { |
| "epoch": 2.0048149262714414, |
| "grad_norm": 0.15010525286197662, |
| "learning_rate": 3.301127214170693e-06, |
| "loss": 8.1315, |
| "step": 417 |
| }, |
| { |
| "epoch": 2.009629852542883, |
| "grad_norm": 0.23141905665397644, |
| "learning_rate": 3.2850241545893724e-06, |
| "loss": 9.3878, |
| "step": 418 |
| }, |
| { |
| "epoch": 2.0144447788143243, |
| "grad_norm": 0.11268898099660873, |
| "learning_rate": 3.268921095008052e-06, |
| "loss": 7.8098, |
| "step": 419 |
| }, |
| { |
| "epoch": 2.0192597050857657, |
| "grad_norm": 0.16212859749794006, |
| "learning_rate": 3.2528180354267313e-06, |
| "loss": 8.5319, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.0240746313572076, |
| "grad_norm": 0.1565706580877304, |
| "learning_rate": 3.236714975845411e-06, |
| "loss": 7.8942, |
| "step": 421 |
| }, |
| { |
| "epoch": 2.028889557628649, |
| "grad_norm": 0.1680455058813095, |
| "learning_rate": 3.22061191626409e-06, |
| "loss": 8.2839, |
| "step": 422 |
| }, |
| { |
| "epoch": 2.0337044839000904, |
| "grad_norm": 0.2539815306663513, |
| "learning_rate": 3.20450885668277e-06, |
| "loss": 8.9937, |
| "step": 423 |
| }, |
| { |
| "epoch": 2.038519410171532, |
| "grad_norm": 0.238030806183815, |
| "learning_rate": 3.188405797101449e-06, |
| "loss": 8.4321, |
| "step": 424 |
| }, |
| { |
| "epoch": 2.0433343364429732, |
| "grad_norm": 0.19473034143447876, |
| "learning_rate": 3.1723027375201292e-06, |
| "loss": 8.0307, |
| "step": 425 |
| }, |
| { |
| "epoch": 2.0481492627144147, |
| "grad_norm": 0.16554652154445648, |
| "learning_rate": 3.156199677938809e-06, |
| "loss": 9.5945, |
| "step": 426 |
| }, |
| { |
| "epoch": 2.052964188985856, |
| "grad_norm": 0.19130951166152954, |
| "learning_rate": 3.140096618357488e-06, |
| "loss": 8.0234, |
| "step": 427 |
| }, |
| { |
| "epoch": 2.0577791152572975, |
| "grad_norm": 0.14681276679039001, |
| "learning_rate": 3.123993558776168e-06, |
| "loss": 8.6784, |
| "step": 428 |
| }, |
| { |
| "epoch": 2.062594041528739, |
| "grad_norm": 0.10328257828950882, |
| "learning_rate": 3.107890499194847e-06, |
| "loss": 8.0287, |
| "step": 429 |
| }, |
| { |
| "epoch": 2.0674089678001804, |
| "grad_norm": 0.19125495851039886, |
| "learning_rate": 3.0917874396135268e-06, |
| "loss": 8.0046, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.072223894071622, |
| "grad_norm": 0.1793103963136673, |
| "learning_rate": 3.075684380032206e-06, |
| "loss": 7.0518, |
| "step": 431 |
| }, |
| { |
| "epoch": 2.0770388203430636, |
| "grad_norm": 0.2568497657775879, |
| "learning_rate": 3.059581320450886e-06, |
| "loss": 8.2794, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.081853746614505, |
| "grad_norm": 0.18120069801807404, |
| "learning_rate": 3.043478260869566e-06, |
| "loss": 10.1022, |
| "step": 433 |
| }, |
| { |
| "epoch": 2.0866686728859465, |
| "grad_norm": 0.27532005310058594, |
| "learning_rate": 3.027375201288245e-06, |
| "loss": 9.3059, |
| "step": 434 |
| }, |
| { |
| "epoch": 2.091483599157388, |
| "grad_norm": 0.15648192167282104, |
| "learning_rate": 3.0112721417069247e-06, |
| "loss": 7.8617, |
| "step": 435 |
| }, |
| { |
| "epoch": 2.0962985254288293, |
| "grad_norm": 0.17381350696086884, |
| "learning_rate": 2.995169082125604e-06, |
| "loss": 9.0082, |
| "step": 436 |
| }, |
| { |
| "epoch": 2.1011134517002708, |
| "grad_norm": 0.13711951673030853, |
| "learning_rate": 2.9790660225442837e-06, |
| "loss": 7.764, |
| "step": 437 |
| }, |
| { |
| "epoch": 2.105928377971712, |
| "grad_norm": 0.23948128521442413, |
| "learning_rate": 2.962962962962963e-06, |
| "loss": 8.4041, |
| "step": 438 |
| }, |
| { |
| "epoch": 2.1107433042431536, |
| "grad_norm": 0.15631070733070374, |
| "learning_rate": 2.9468599033816426e-06, |
| "loss": 8.1708, |
| "step": 439 |
| }, |
| { |
| "epoch": 2.115558230514595, |
| "grad_norm": 0.1608411967754364, |
| "learning_rate": 2.9307568438003227e-06, |
| "loss": 8.301, |
| "step": 440 |
| }, |
| { |
| "epoch": 2.120373156786037, |
| "grad_norm": 0.16660411655902863, |
| "learning_rate": 2.914653784219002e-06, |
| "loss": 9.5365, |
| "step": 441 |
| }, |
| { |
| "epoch": 2.1251880830574783, |
| "grad_norm": 0.17191386222839355, |
| "learning_rate": 2.8985507246376816e-06, |
| "loss": 8.9256, |
| "step": 442 |
| }, |
| { |
| "epoch": 2.1300030093289197, |
| "grad_norm": 0.18492081761360168, |
| "learning_rate": 2.882447665056361e-06, |
| "loss": 9.0577, |
| "step": 443 |
| }, |
| { |
| "epoch": 2.134817935600361, |
| "grad_norm": 0.2561168670654297, |
| "learning_rate": 2.8663446054750405e-06, |
| "loss": 8.8758, |
| "step": 444 |
| }, |
| { |
| "epoch": 2.1396328618718026, |
| "grad_norm": 0.1588340848684311, |
| "learning_rate": 2.85024154589372e-06, |
| "loss": 8.1364, |
| "step": 445 |
| }, |
| { |
| "epoch": 2.144447788143244, |
| "grad_norm": 0.1650805026292801, |
| "learning_rate": 2.8341384863123995e-06, |
| "loss": 8.337, |
| "step": 446 |
| }, |
| { |
| "epoch": 2.1492627144146854, |
| "grad_norm": 0.2011885941028595, |
| "learning_rate": 2.8180354267310787e-06, |
| "loss": 8.6296, |
| "step": 447 |
| }, |
| { |
| "epoch": 2.154077640686127, |
| "grad_norm": 0.18557001650333405, |
| "learning_rate": 2.801932367149759e-06, |
| "loss": 8.6218, |
| "step": 448 |
| }, |
| { |
| "epoch": 2.1588925669575683, |
| "grad_norm": 0.1598547399044037, |
| "learning_rate": 2.7858293075684385e-06, |
| "loss": 8.4741, |
| "step": 449 |
| }, |
| { |
| "epoch": 2.16370749322901, |
| "grad_norm": 0.17089636623859406, |
| "learning_rate": 2.7697262479871177e-06, |
| "loss": 9.0788, |
| "step": 450 |
| }, |
| { |
| "epoch": 2.1685224195004515, |
| "grad_norm": 0.1817985475063324, |
| "learning_rate": 2.7536231884057974e-06, |
| "loss": 9.9717, |
| "step": 451 |
| }, |
| { |
| "epoch": 2.173337345771893, |
| "grad_norm": 0.23914600908756256, |
| "learning_rate": 2.7375201288244767e-06, |
| "loss": 8.7548, |
| "step": 452 |
| }, |
| { |
| "epoch": 2.1781522720433344, |
| "grad_norm": 0.17113572359085083, |
| "learning_rate": 2.7214170692431564e-06, |
| "loss": 7.7566, |
| "step": 453 |
| }, |
| { |
| "epoch": 2.182967198314776, |
| "grad_norm": 0.14485716819763184, |
| "learning_rate": 2.7053140096618356e-06, |
| "loss": 8.9532, |
| "step": 454 |
| }, |
| { |
| "epoch": 2.1877821245862172, |
| "grad_norm": 0.14129236340522766, |
| "learning_rate": 2.6892109500805153e-06, |
| "loss": 9.2833, |
| "step": 455 |
| }, |
| { |
| "epoch": 2.1925970508576587, |
| "grad_norm": 0.23692472279071808, |
| "learning_rate": 2.6731078904991954e-06, |
| "loss": 8.3895, |
| "step": 456 |
| }, |
| { |
| "epoch": 2.1974119771291, |
| "grad_norm": 0.16027197241783142, |
| "learning_rate": 2.6570048309178746e-06, |
| "loss": 7.7012, |
| "step": 457 |
| }, |
| { |
| "epoch": 2.2022269034005415, |
| "grad_norm": 0.1416737139225006, |
| "learning_rate": 2.6409017713365543e-06, |
| "loss": 9.0799, |
| "step": 458 |
| }, |
| { |
| "epoch": 2.207041829671983, |
| "grad_norm": 0.20678099989891052, |
| "learning_rate": 2.6247987117552336e-06, |
| "loss": 9.3679, |
| "step": 459 |
| }, |
| { |
| "epoch": 2.211856755943425, |
| "grad_norm": 0.1649148017168045, |
| "learning_rate": 2.6086956521739132e-06, |
| "loss": 8.0503, |
| "step": 460 |
| }, |
| { |
| "epoch": 2.216671682214866, |
| "grad_norm": 0.21159884333610535, |
| "learning_rate": 2.5925925925925925e-06, |
| "loss": 9.0968, |
| "step": 461 |
| }, |
| { |
| "epoch": 2.2214866084863076, |
| "grad_norm": 0.13705681264400482, |
| "learning_rate": 2.576489533011272e-06, |
| "loss": 8.9948, |
| "step": 462 |
| }, |
| { |
| "epoch": 2.226301534757749, |
| "grad_norm": 0.16624397039413452, |
| "learning_rate": 2.5603864734299523e-06, |
| "loss": 8.6079, |
| "step": 463 |
| }, |
| { |
| "epoch": 2.2311164610291905, |
| "grad_norm": 0.1475958675146103, |
| "learning_rate": 2.5442834138486315e-06, |
| "loss": 8.0187, |
| "step": 464 |
| }, |
| { |
| "epoch": 2.235931387300632, |
| "grad_norm": 0.13494673371315002, |
| "learning_rate": 2.528180354267311e-06, |
| "loss": 8.6545, |
| "step": 465 |
| }, |
| { |
| "epoch": 2.2407463135720733, |
| "grad_norm": 0.17623811960220337, |
| "learning_rate": 2.5120772946859904e-06, |
| "loss": 9.4341, |
| "step": 466 |
| }, |
| { |
| "epoch": 2.2455612398435147, |
| "grad_norm": 0.1706833392381668, |
| "learning_rate": 2.49597423510467e-06, |
| "loss": 8.7199, |
| "step": 467 |
| }, |
| { |
| "epoch": 2.2503761661149566, |
| "grad_norm": 0.1953025609254837, |
| "learning_rate": 2.4798711755233494e-06, |
| "loss": 8.9361, |
| "step": 468 |
| }, |
| { |
| "epoch": 2.255191092386398, |
| "grad_norm": 0.20142245292663574, |
| "learning_rate": 2.4637681159420295e-06, |
| "loss": 8.0552, |
| "step": 469 |
| }, |
| { |
| "epoch": 2.2600060186578395, |
| "grad_norm": 0.20138177275657654, |
| "learning_rate": 2.4476650563607087e-06, |
| "loss": 8.5942, |
| "step": 470 |
| }, |
| { |
| "epoch": 2.264820944929281, |
| "grad_norm": 0.16559800505638123, |
| "learning_rate": 2.4315619967793884e-06, |
| "loss": 8.8228, |
| "step": 471 |
| }, |
| { |
| "epoch": 2.2696358712007223, |
| "grad_norm": 0.19990870356559753, |
| "learning_rate": 2.4154589371980677e-06, |
| "loss": 8.8207, |
| "step": 472 |
| }, |
| { |
| "epoch": 2.2744507974721637, |
| "grad_norm": 0.21723681688308716, |
| "learning_rate": 2.3993558776167473e-06, |
| "loss": 8.4973, |
| "step": 473 |
| }, |
| { |
| "epoch": 2.279265723743605, |
| "grad_norm": 0.17915472388267517, |
| "learning_rate": 2.383252818035427e-06, |
| "loss": 9.6049, |
| "step": 474 |
| }, |
| { |
| "epoch": 2.2840806500150466, |
| "grad_norm": 0.16757084429264069, |
| "learning_rate": 2.3671497584541063e-06, |
| "loss": 9.7332, |
| "step": 475 |
| }, |
| { |
| "epoch": 2.288895576286488, |
| "grad_norm": 0.16891081631183624, |
| "learning_rate": 2.351046698872786e-06, |
| "loss": 8.8673, |
| "step": 476 |
| }, |
| { |
| "epoch": 2.2937105025579294, |
| "grad_norm": 0.20567509531974792, |
| "learning_rate": 2.3349436392914656e-06, |
| "loss": 7.8363, |
| "step": 477 |
| }, |
| { |
| "epoch": 2.298525428829371, |
| "grad_norm": 0.1999160349369049, |
| "learning_rate": 2.3188405797101453e-06, |
| "loss": 8.7728, |
| "step": 478 |
| }, |
| { |
| "epoch": 2.3033403551008127, |
| "grad_norm": 0.2348831444978714, |
| "learning_rate": 2.3027375201288245e-06, |
| "loss": 9.1277, |
| "step": 479 |
| }, |
| { |
| "epoch": 2.308155281372254, |
| "grad_norm": 0.1700768917798996, |
| "learning_rate": 2.2866344605475042e-06, |
| "loss": 8.6687, |
| "step": 480 |
| }, |
| { |
| "epoch": 2.3129702076436955, |
| "grad_norm": 0.16349351406097412, |
| "learning_rate": 2.270531400966184e-06, |
| "loss": 8.0606, |
| "step": 481 |
| }, |
| { |
| "epoch": 2.317785133915137, |
| "grad_norm": 0.1540592461824417, |
| "learning_rate": 2.254428341384863e-06, |
| "loss": 7.3249, |
| "step": 482 |
| }, |
| { |
| "epoch": 2.3226000601865784, |
| "grad_norm": 0.1774080991744995, |
| "learning_rate": 2.238325281803543e-06, |
| "loss": 8.3134, |
| "step": 483 |
| }, |
| { |
| "epoch": 2.32741498645802, |
| "grad_norm": 0.14969424903392792, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 7.8938, |
| "step": 484 |
| }, |
| { |
| "epoch": 2.3322299127294612, |
| "grad_norm": 0.20331765711307526, |
| "learning_rate": 2.206119162640902e-06, |
| "loss": 8.3293, |
| "step": 485 |
| }, |
| { |
| "epoch": 2.3370448390009027, |
| "grad_norm": 0.1849997490644455, |
| "learning_rate": 2.1900161030595814e-06, |
| "loss": 7.811, |
| "step": 486 |
| }, |
| { |
| "epoch": 2.341859765272344, |
| "grad_norm": 0.1732867807149887, |
| "learning_rate": 2.173913043478261e-06, |
| "loss": 10.516, |
| "step": 487 |
| }, |
| { |
| "epoch": 2.346674691543786, |
| "grad_norm": 0.21279215812683105, |
| "learning_rate": 2.1578099838969404e-06, |
| "loss": 9.1675, |
| "step": 488 |
| }, |
| { |
| "epoch": 2.3514896178152274, |
| "grad_norm": 0.1616515964269638, |
| "learning_rate": 2.14170692431562e-06, |
| "loss": 7.8694, |
| "step": 489 |
| }, |
| { |
| "epoch": 2.356304544086669, |
| "grad_norm": 0.1548496037721634, |
| "learning_rate": 2.1256038647342997e-06, |
| "loss": 9.4236, |
| "step": 490 |
| }, |
| { |
| "epoch": 2.36111947035811, |
| "grad_norm": 0.19034922122955322, |
| "learning_rate": 2.109500805152979e-06, |
| "loss": 8.6999, |
| "step": 491 |
| }, |
| { |
| "epoch": 2.3659343966295516, |
| "grad_norm": 0.15850062668323517, |
| "learning_rate": 2.093397745571659e-06, |
| "loss": 9.3389, |
| "step": 492 |
| }, |
| { |
| "epoch": 2.370749322900993, |
| "grad_norm": 0.17764140665531158, |
| "learning_rate": 2.0772946859903383e-06, |
| "loss": 8.3777, |
| "step": 493 |
| }, |
| { |
| "epoch": 2.3755642491724345, |
| "grad_norm": 0.1516241729259491, |
| "learning_rate": 2.061191626409018e-06, |
| "loss": 8.215, |
| "step": 494 |
| }, |
| { |
| "epoch": 2.380379175443876, |
| "grad_norm": 0.19306409358978271, |
| "learning_rate": 2.0450885668276972e-06, |
| "loss": 8.5159, |
| "step": 495 |
| }, |
| { |
| "epoch": 2.3851941017153173, |
| "grad_norm": 0.18563927710056305, |
| "learning_rate": 2.028985507246377e-06, |
| "loss": 9.1431, |
| "step": 496 |
| }, |
| { |
| "epoch": 2.3900090279867587, |
| "grad_norm": 0.2177901268005371, |
| "learning_rate": 2.0128824476650566e-06, |
| "loss": 8.7708, |
| "step": 497 |
| }, |
| { |
| "epoch": 2.3948239542582006, |
| "grad_norm": 0.18854300677776337, |
| "learning_rate": 1.9967793880837363e-06, |
| "loss": 7.7517, |
| "step": 498 |
| }, |
| { |
| "epoch": 2.399638880529642, |
| "grad_norm": 0.19311924278736115, |
| "learning_rate": 1.9806763285024155e-06, |
| "loss": 8.5485, |
| "step": 499 |
| }, |
| { |
| "epoch": 2.4044538068010834, |
| "grad_norm": 0.1653197556734085, |
| "learning_rate": 1.964573268921095e-06, |
| "loss": 8.2121, |
| "step": 500 |
| }, |
| { |
| "epoch": 2.409268733072525, |
| "grad_norm": 0.14467386901378632, |
| "learning_rate": 1.948470209339775e-06, |
| "loss": 7.212, |
| "step": 501 |
| }, |
| { |
| "epoch": 2.4140836593439663, |
| "grad_norm": 0.127033531665802, |
| "learning_rate": 1.932367149758454e-06, |
| "loss": 7.9942, |
| "step": 502 |
| }, |
| { |
| "epoch": 2.4188985856154077, |
| "grad_norm": 0.22416523098945618, |
| "learning_rate": 1.916264090177134e-06, |
| "loss": 9.2825, |
| "step": 503 |
| }, |
| { |
| "epoch": 2.423713511886849, |
| "grad_norm": 0.15797053277492523, |
| "learning_rate": 1.9001610305958135e-06, |
| "loss": 9.0045, |
| "step": 504 |
| }, |
| { |
| "epoch": 2.4285284381582906, |
| "grad_norm": 0.16567374765872955, |
| "learning_rate": 1.884057971014493e-06, |
| "loss": 7.8467, |
| "step": 505 |
| }, |
| { |
| "epoch": 2.433343364429732, |
| "grad_norm": 0.2187729775905609, |
| "learning_rate": 1.8679549114331724e-06, |
| "loss": 6.8691, |
| "step": 506 |
| }, |
| { |
| "epoch": 2.438158290701174, |
| "grad_norm": 0.1330510675907135, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 8.6586, |
| "step": 507 |
| }, |
| { |
| "epoch": 2.4429732169726153, |
| "grad_norm": 0.18938250839710236, |
| "learning_rate": 1.8357487922705318e-06, |
| "loss": 8.7543, |
| "step": 508 |
| }, |
| { |
| "epoch": 2.4477881432440567, |
| "grad_norm": 0.16788271069526672, |
| "learning_rate": 1.8196457326892112e-06, |
| "loss": 7.234, |
| "step": 509 |
| }, |
| { |
| "epoch": 2.452603069515498, |
| "grad_norm": 0.13278517127037048, |
| "learning_rate": 1.8035426731078907e-06, |
| "loss": 8.4826, |
| "step": 510 |
| }, |
| { |
| "epoch": 2.4574179957869395, |
| "grad_norm": 0.12632611393928528, |
| "learning_rate": 1.7874396135265702e-06, |
| "loss": 8.6997, |
| "step": 511 |
| }, |
| { |
| "epoch": 2.462232922058381, |
| "grad_norm": 0.21339954435825348, |
| "learning_rate": 1.7713365539452498e-06, |
| "loss": 8.8112, |
| "step": 512 |
| }, |
| { |
| "epoch": 2.4670478483298224, |
| "grad_norm": 0.17126010358333588, |
| "learning_rate": 1.7552334943639293e-06, |
| "loss": 7.5743, |
| "step": 513 |
| }, |
| { |
| "epoch": 2.471862774601264, |
| "grad_norm": 0.13244563341140747, |
| "learning_rate": 1.7391304347826088e-06, |
| "loss": 7.7622, |
| "step": 514 |
| }, |
| { |
| "epoch": 2.4766777008727052, |
| "grad_norm": 0.21267832815647125, |
| "learning_rate": 1.7230273752012882e-06, |
| "loss": 6.7007, |
| "step": 515 |
| }, |
| { |
| "epoch": 2.4814926271441466, |
| "grad_norm": 0.12102889269590378, |
| "learning_rate": 1.7069243156199681e-06, |
| "loss": 9.1424, |
| "step": 516 |
| }, |
| { |
| "epoch": 2.4863075534155885, |
| "grad_norm": 0.13392595946788788, |
| "learning_rate": 1.6908212560386476e-06, |
| "loss": 8.5965, |
| "step": 517 |
| }, |
| { |
| "epoch": 2.49112247968703, |
| "grad_norm": 0.1512872725725174, |
| "learning_rate": 1.674718196457327e-06, |
| "loss": 8.3953, |
| "step": 518 |
| }, |
| { |
| "epoch": 2.4959374059584714, |
| "grad_norm": 0.13532410562038422, |
| "learning_rate": 1.6586151368760065e-06, |
| "loss": 8.184, |
| "step": 519 |
| }, |
| { |
| "epoch": 2.5007523322299128, |
| "grad_norm": 0.1816960871219635, |
| "learning_rate": 1.6425120772946862e-06, |
| "loss": 9.9139, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.505567258501354, |
| "grad_norm": 0.11753327399492264, |
| "learning_rate": 1.6264090177133656e-06, |
| "loss": 8.4936, |
| "step": 521 |
| }, |
| { |
| "epoch": 2.5103821847727956, |
| "grad_norm": 0.20234891772270203, |
| "learning_rate": 1.610305958132045e-06, |
| "loss": 8.1004, |
| "step": 522 |
| }, |
| { |
| "epoch": 2.515197111044237, |
| "grad_norm": 0.14017826318740845, |
| "learning_rate": 1.5942028985507246e-06, |
| "loss": 8.4294, |
| "step": 523 |
| }, |
| { |
| "epoch": 2.5200120373156785, |
| "grad_norm": 0.1481131762266159, |
| "learning_rate": 1.5780998389694045e-06, |
| "loss": 8.3886, |
| "step": 524 |
| }, |
| { |
| "epoch": 2.5248269635871203, |
| "grad_norm": 0.2701749801635742, |
| "learning_rate": 1.561996779388084e-06, |
| "loss": 8.1065, |
| "step": 525 |
| }, |
| { |
| "epoch": 2.5296418898585618, |
| "grad_norm": 0.16109466552734375, |
| "learning_rate": 1.5458937198067634e-06, |
| "loss": 8.4212, |
| "step": 526 |
| }, |
| { |
| "epoch": 2.534456816130003, |
| "grad_norm": 0.18063953518867493, |
| "learning_rate": 1.529790660225443e-06, |
| "loss": 7.8353, |
| "step": 527 |
| }, |
| { |
| "epoch": 2.5392717424014446, |
| "grad_norm": 0.16267195343971252, |
| "learning_rate": 1.5136876006441225e-06, |
| "loss": 7.9062, |
| "step": 528 |
| }, |
| { |
| "epoch": 2.544086668672886, |
| "grad_norm": 0.1997467577457428, |
| "learning_rate": 1.497584541062802e-06, |
| "loss": 8.6919, |
| "step": 529 |
| }, |
| { |
| "epoch": 2.5489015949443274, |
| "grad_norm": 0.15415464341640472, |
| "learning_rate": 1.4814814814814815e-06, |
| "loss": 8.2469, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.553716521215769, |
| "grad_norm": 0.1869962513446808, |
| "learning_rate": 1.4653784219001613e-06, |
| "loss": 8.216, |
| "step": 531 |
| }, |
| { |
| "epoch": 2.5585314474872103, |
| "grad_norm": 0.14521171152591705, |
| "learning_rate": 1.4492753623188408e-06, |
| "loss": 8.2956, |
| "step": 532 |
| }, |
| { |
| "epoch": 2.5633463737586517, |
| "grad_norm": 0.1761654019355774, |
| "learning_rate": 1.4331723027375203e-06, |
| "loss": 8.3107, |
| "step": 533 |
| }, |
| { |
| "epoch": 2.568161300030093, |
| "grad_norm": 0.1776813566684723, |
| "learning_rate": 1.4170692431561997e-06, |
| "loss": 7.2249, |
| "step": 534 |
| }, |
| { |
| "epoch": 2.5729762263015346, |
| "grad_norm": 0.19041168689727783, |
| "learning_rate": 1.4009661835748794e-06, |
| "loss": 8.6567, |
| "step": 535 |
| }, |
| { |
| "epoch": 2.577791152572976, |
| "grad_norm": 0.1729832887649536, |
| "learning_rate": 1.3848631239935589e-06, |
| "loss": 9.1775, |
| "step": 536 |
| }, |
| { |
| "epoch": 2.582606078844418, |
| "grad_norm": 0.1917349100112915, |
| "learning_rate": 1.3687600644122383e-06, |
| "loss": 8.1724, |
| "step": 537 |
| }, |
| { |
| "epoch": 2.5874210051158593, |
| "grad_norm": 0.19829866290092468, |
| "learning_rate": 1.3526570048309178e-06, |
| "loss": 9.4741, |
| "step": 538 |
| }, |
| { |
| "epoch": 2.5922359313873007, |
| "grad_norm": 0.17467886209487915, |
| "learning_rate": 1.3365539452495977e-06, |
| "loss": 8.3608, |
| "step": 539 |
| }, |
| { |
| "epoch": 2.597050857658742, |
| "grad_norm": 0.25771814584732056, |
| "learning_rate": 1.3204508856682772e-06, |
| "loss": 8.5837, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.6018657839301835, |
| "grad_norm": 0.13524986803531647, |
| "learning_rate": 1.3043478260869566e-06, |
| "loss": 7.8586, |
| "step": 541 |
| }, |
| { |
| "epoch": 2.606680710201625, |
| "grad_norm": 0.20528331398963928, |
| "learning_rate": 1.288244766505636e-06, |
| "loss": 8.9202, |
| "step": 542 |
| }, |
| { |
| "epoch": 2.6114956364730664, |
| "grad_norm": 0.18491816520690918, |
| "learning_rate": 1.2721417069243158e-06, |
| "loss": 8.1091, |
| "step": 543 |
| }, |
| { |
| "epoch": 2.6163105627445082, |
| "grad_norm": 0.14208512008190155, |
| "learning_rate": 1.2560386473429952e-06, |
| "loss": 8.8829, |
| "step": 544 |
| }, |
| { |
| "epoch": 2.6211254890159497, |
| "grad_norm": 0.22715114057064056, |
| "learning_rate": 1.2399355877616747e-06, |
| "loss": 8.4472, |
| "step": 545 |
| }, |
| { |
| "epoch": 2.625940415287391, |
| "grad_norm": 0.18286040425300598, |
| "learning_rate": 1.2238325281803544e-06, |
| "loss": 8.129, |
| "step": 546 |
| }, |
| { |
| "epoch": 2.6307553415588325, |
| "grad_norm": 0.18549402058124542, |
| "learning_rate": 1.2077294685990338e-06, |
| "loss": 8.3491, |
| "step": 547 |
| }, |
| { |
| "epoch": 2.635570267830274, |
| "grad_norm": 0.16227751970291138, |
| "learning_rate": 1.1916264090177135e-06, |
| "loss": 8.0191, |
| "step": 548 |
| }, |
| { |
| "epoch": 2.6403851941017153, |
| "grad_norm": 0.17795391380786896, |
| "learning_rate": 1.175523349436393e-06, |
| "loss": 7.1223, |
| "step": 549 |
| }, |
| { |
| "epoch": 2.6452001203731568, |
| "grad_norm": 0.17126573622226715, |
| "learning_rate": 1.1594202898550726e-06, |
| "loss": 8.2455, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.650015046644598, |
| "grad_norm": 0.17369426786899567, |
| "learning_rate": 1.1433172302737521e-06, |
| "loss": 9.6182, |
| "step": 551 |
| }, |
| { |
| "epoch": 2.6548299729160396, |
| "grad_norm": 0.14956361055374146, |
| "learning_rate": 1.1272141706924316e-06, |
| "loss": 7.7231, |
| "step": 552 |
| }, |
| { |
| "epoch": 2.659644899187481, |
| "grad_norm": 0.17787741124629974, |
| "learning_rate": 1.111111111111111e-06, |
| "loss": 8.531, |
| "step": 553 |
| }, |
| { |
| "epoch": 2.6644598254589225, |
| "grad_norm": 0.16423143446445465, |
| "learning_rate": 1.0950080515297907e-06, |
| "loss": 9.07, |
| "step": 554 |
| }, |
| { |
| "epoch": 2.669274751730364, |
| "grad_norm": 0.18575292825698853, |
| "learning_rate": 1.0789049919484702e-06, |
| "loss": 9.2116, |
| "step": 555 |
| }, |
| { |
| "epoch": 2.6740896780018057, |
| "grad_norm": 0.1774529069662094, |
| "learning_rate": 1.0628019323671499e-06, |
| "loss": 7.6522, |
| "step": 556 |
| }, |
| { |
| "epoch": 2.678904604273247, |
| "grad_norm": 0.12618403136730194, |
| "learning_rate": 1.0466988727858295e-06, |
| "loss": 8.2429, |
| "step": 557 |
| }, |
| { |
| "epoch": 2.6837195305446886, |
| "grad_norm": 0.1379764825105667, |
| "learning_rate": 1.030595813204509e-06, |
| "loss": 9.0101, |
| "step": 558 |
| }, |
| { |
| "epoch": 2.68853445681613, |
| "grad_norm": 0.1804221123456955, |
| "learning_rate": 1.0144927536231885e-06, |
| "loss": 7.1618, |
| "step": 559 |
| }, |
| { |
| "epoch": 2.6933493830875714, |
| "grad_norm": 0.2020816057920456, |
| "learning_rate": 9.983896940418681e-07, |
| "loss": 8.0899, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.698164309359013, |
| "grad_norm": 0.1975187063217163, |
| "learning_rate": 9.822866344605476e-07, |
| "loss": 6.9638, |
| "step": 561 |
| }, |
| { |
| "epoch": 2.7029792356304543, |
| "grad_norm": 0.21582917869091034, |
| "learning_rate": 9.66183574879227e-07, |
| "loss": 8.3501, |
| "step": 562 |
| }, |
| { |
| "epoch": 2.707794161901896, |
| "grad_norm": 0.1378657966852188, |
| "learning_rate": 9.500805152979067e-07, |
| "loss": 7.8302, |
| "step": 563 |
| }, |
| { |
| "epoch": 2.7126090881733376, |
| "grad_norm": 0.17029066383838654, |
| "learning_rate": 9.339774557165862e-07, |
| "loss": 8.3873, |
| "step": 564 |
| }, |
| { |
| "epoch": 2.717424014444779, |
| "grad_norm": 0.1723220944404602, |
| "learning_rate": 9.178743961352659e-07, |
| "loss": 7.8308, |
| "step": 565 |
| }, |
| { |
| "epoch": 2.7222389407162204, |
| "grad_norm": 0.15351563692092896, |
| "learning_rate": 9.017713365539453e-07, |
| "loss": 7.8458, |
| "step": 566 |
| }, |
| { |
| "epoch": 2.727053866987662, |
| "grad_norm": 0.15215708315372467, |
| "learning_rate": 8.856682769726249e-07, |
| "loss": 8.4561, |
| "step": 567 |
| }, |
| { |
| "epoch": 2.7318687932591033, |
| "grad_norm": 0.18015427887439728, |
| "learning_rate": 8.695652173913044e-07, |
| "loss": 7.986, |
| "step": 568 |
| }, |
| { |
| "epoch": 2.7366837195305447, |
| "grad_norm": 0.18228279054164886, |
| "learning_rate": 8.534621578099841e-07, |
| "loss": 8.3809, |
| "step": 569 |
| }, |
| { |
| "epoch": 2.741498645801986, |
| "grad_norm": 0.17855818569660187, |
| "learning_rate": 8.373590982286635e-07, |
| "loss": 7.2222, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.7463135720734275, |
| "grad_norm": 0.12565724551677704, |
| "learning_rate": 8.212560386473431e-07, |
| "loss": 8.319, |
| "step": 571 |
| }, |
| { |
| "epoch": 2.751128498344869, |
| "grad_norm": 0.1549467146396637, |
| "learning_rate": 8.051529790660226e-07, |
| "loss": 7.4813, |
| "step": 572 |
| }, |
| { |
| "epoch": 2.7559434246163104, |
| "grad_norm": 0.19094257056713104, |
| "learning_rate": 7.890499194847022e-07, |
| "loss": 7.5127, |
| "step": 573 |
| }, |
| { |
| "epoch": 2.760758350887752, |
| "grad_norm": 0.18528102338314056, |
| "learning_rate": 7.729468599033817e-07, |
| "loss": 8.4165, |
| "step": 574 |
| }, |
| { |
| "epoch": 2.7655732771591937, |
| "grad_norm": 0.17467372119426727, |
| "learning_rate": 7.568438003220613e-07, |
| "loss": 8.4328, |
| "step": 575 |
| }, |
| { |
| "epoch": 2.770388203430635, |
| "grad_norm": 0.1786053627729416, |
| "learning_rate": 7.407407407407407e-07, |
| "loss": 7.6748, |
| "step": 576 |
| }, |
| { |
| "epoch": 2.7752031297020765, |
| "grad_norm": 0.2303641140460968, |
| "learning_rate": 7.246376811594204e-07, |
| "loss": 7.6819, |
| "step": 577 |
| }, |
| { |
| "epoch": 2.780018055973518, |
| "grad_norm": 0.20672529935836792, |
| "learning_rate": 7.085346215780999e-07, |
| "loss": 7.6466, |
| "step": 578 |
| }, |
| { |
| "epoch": 2.7848329822449593, |
| "grad_norm": 0.20678630471229553, |
| "learning_rate": 6.924315619967794e-07, |
| "loss": 7.83, |
| "step": 579 |
| }, |
| { |
| "epoch": 2.7896479085164008, |
| "grad_norm": 0.22579342126846313, |
| "learning_rate": 6.763285024154589e-07, |
| "loss": 8.352, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.794462834787842, |
| "grad_norm": 0.21970775723457336, |
| "learning_rate": 6.602254428341386e-07, |
| "loss": 9.0786, |
| "step": 581 |
| }, |
| { |
| "epoch": 2.799277761059284, |
| "grad_norm": 0.15649309754371643, |
| "learning_rate": 6.44122383252818e-07, |
| "loss": 8.0365, |
| "step": 582 |
| }, |
| { |
| "epoch": 2.8040926873307255, |
| "grad_norm": 0.15020275115966797, |
| "learning_rate": 6.280193236714976e-07, |
| "loss": 7.5352, |
| "step": 583 |
| }, |
| { |
| "epoch": 2.808907613602167, |
| "grad_norm": 0.1699695736169815, |
| "learning_rate": 6.119162640901772e-07, |
| "loss": 7.9234, |
| "step": 584 |
| }, |
| { |
| "epoch": 2.8137225398736083, |
| "grad_norm": 0.14597013592720032, |
| "learning_rate": 5.958132045088568e-07, |
| "loss": 8.406, |
| "step": 585 |
| }, |
| { |
| "epoch": 2.8185374661450497, |
| "grad_norm": 0.1936945766210556, |
| "learning_rate": 5.797101449275363e-07, |
| "loss": 7.1909, |
| "step": 586 |
| }, |
| { |
| "epoch": 2.823352392416491, |
| "grad_norm": 0.1677147001028061, |
| "learning_rate": 5.636070853462158e-07, |
| "loss": 8.3866, |
| "step": 587 |
| }, |
| { |
| "epoch": 2.8281673186879326, |
| "grad_norm": 0.1816486269235611, |
| "learning_rate": 5.475040257648954e-07, |
| "loss": 8.0837, |
| "step": 588 |
| }, |
| { |
| "epoch": 2.832982244959374, |
| "grad_norm": 0.18202394247055054, |
| "learning_rate": 5.314009661835749e-07, |
| "loss": 9.1406, |
| "step": 589 |
| }, |
| { |
| "epoch": 2.8377971712308154, |
| "grad_norm": 0.2390686720609665, |
| "learning_rate": 5.152979066022545e-07, |
| "loss": 8.5755, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.842612097502257, |
| "grad_norm": 0.18315307796001434, |
| "learning_rate": 4.991948470209341e-07, |
| "loss": 8.1501, |
| "step": 591 |
| }, |
| { |
| "epoch": 2.8474270237736983, |
| "grad_norm": 0.17412015795707703, |
| "learning_rate": 4.830917874396135e-07, |
| "loss": 8.2024, |
| "step": 592 |
| }, |
| { |
| "epoch": 2.8522419500451397, |
| "grad_norm": 0.18761633336544037, |
| "learning_rate": 4.669887278582931e-07, |
| "loss": 7.597, |
| "step": 593 |
| }, |
| { |
| "epoch": 2.8570568763165816, |
| "grad_norm": 0.1563250869512558, |
| "learning_rate": 4.5088566827697267e-07, |
| "loss": 8.4617, |
| "step": 594 |
| }, |
| { |
| "epoch": 2.861871802588023, |
| "grad_norm": 0.13112574815750122, |
| "learning_rate": 4.347826086956522e-07, |
| "loss": 7.6717, |
| "step": 595 |
| }, |
| { |
| "epoch": 2.8666867288594644, |
| "grad_norm": 0.1944950670003891, |
| "learning_rate": 4.1867954911433176e-07, |
| "loss": 8.5494, |
| "step": 596 |
| }, |
| { |
| "epoch": 2.871501655130906, |
| "grad_norm": 0.18215830624103546, |
| "learning_rate": 4.025764895330113e-07, |
| "loss": 8.4453, |
| "step": 597 |
| }, |
| { |
| "epoch": 2.8763165814023472, |
| "grad_norm": 0.15392394363880157, |
| "learning_rate": 3.8647342995169085e-07, |
| "loss": 7.0528, |
| "step": 598 |
| }, |
| { |
| "epoch": 2.8811315076737887, |
| "grad_norm": 0.17069800198078156, |
| "learning_rate": 3.7037037037037036e-07, |
| "loss": 9.8743, |
| "step": 599 |
| }, |
| { |
| "epoch": 2.88594643394523, |
| "grad_norm": 0.13186608254909515, |
| "learning_rate": 3.5426731078904993e-07, |
| "loss": 7.9741, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.8907613602166715, |
| "grad_norm": 0.15300041437149048, |
| "learning_rate": 3.3816425120772945e-07, |
| "loss": 8.788, |
| "step": 601 |
| }, |
| { |
| "epoch": 2.8955762864881134, |
| "grad_norm": 0.15090717375278473, |
| "learning_rate": 3.22061191626409e-07, |
| "loss": 7.801, |
| "step": 602 |
| }, |
| { |
| "epoch": 2.900391212759555, |
| "grad_norm": 0.1606573611497879, |
| "learning_rate": 3.059581320450886e-07, |
| "loss": 8.5165, |
| "step": 603 |
| }, |
| { |
| "epoch": 2.9052061390309962, |
| "grad_norm": 0.15746456384658813, |
| "learning_rate": 2.8985507246376816e-07, |
| "loss": 7.3699, |
| "step": 604 |
| }, |
| { |
| "epoch": 2.9100210653024376, |
| "grad_norm": 0.1550646871328354, |
| "learning_rate": 2.737520128824477e-07, |
| "loss": 7.6848, |
| "step": 605 |
| }, |
| { |
| "epoch": 2.914835991573879, |
| "grad_norm": 0.14871163666248322, |
| "learning_rate": 2.5764895330112725e-07, |
| "loss": 7.6812, |
| "step": 606 |
| }, |
| { |
| "epoch": 2.9196509178453205, |
| "grad_norm": 0.2426673322916031, |
| "learning_rate": 2.4154589371980677e-07, |
| "loss": 7.7157, |
| "step": 607 |
| }, |
| { |
| "epoch": 2.924465844116762, |
| "grad_norm": 0.19695597887039185, |
| "learning_rate": 2.2544283413848634e-07, |
| "loss": 7.9716, |
| "step": 608 |
| }, |
| { |
| "epoch": 2.9292807703882033, |
| "grad_norm": 0.18192477524280548, |
| "learning_rate": 2.0933977455716588e-07, |
| "loss": 7.4395, |
| "step": 609 |
| }, |
| { |
| "epoch": 2.9340956966596448, |
| "grad_norm": 0.18087869882583618, |
| "learning_rate": 1.9323671497584542e-07, |
| "loss": 7.6177, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.938910622931086, |
| "grad_norm": 0.1489817202091217, |
| "learning_rate": 1.7713365539452497e-07, |
| "loss": 7.4342, |
| "step": 611 |
| }, |
| { |
| "epoch": 2.9437255492025276, |
| "grad_norm": 0.12941974401474, |
| "learning_rate": 1.610305958132045e-07, |
| "loss": 9.0655, |
| "step": 612 |
| }, |
| { |
| "epoch": 2.9485404754739695, |
| "grad_norm": 0.1680421680212021, |
| "learning_rate": 1.4492753623188408e-07, |
| "loss": 7.2567, |
| "step": 613 |
| }, |
| { |
| "epoch": 2.953355401745411, |
| "grad_norm": 0.18065397441387177, |
| "learning_rate": 1.2882447665056362e-07, |
| "loss": 7.9862, |
| "step": 614 |
| }, |
| { |
| "epoch": 2.9581703280168523, |
| "grad_norm": 0.1599837988615036, |
| "learning_rate": 1.1272141706924317e-07, |
| "loss": 8.2424, |
| "step": 615 |
| }, |
| { |
| "epoch": 2.9629852542882937, |
| "grad_norm": 0.1959857940673828, |
| "learning_rate": 9.661835748792271e-08, |
| "loss": 7.4787, |
| "step": 616 |
| }, |
| { |
| "epoch": 2.967800180559735, |
| "grad_norm": 0.15649034082889557, |
| "learning_rate": 8.051529790660226e-08, |
| "loss": 8.1575, |
| "step": 617 |
| }, |
| { |
| "epoch": 2.9726151068311766, |
| "grad_norm": 0.1679297834634781, |
| "learning_rate": 6.441223832528181e-08, |
| "loss": 7.2253, |
| "step": 618 |
| }, |
| { |
| "epoch": 2.977430033102618, |
| "grad_norm": 0.15790539979934692, |
| "learning_rate": 4.8309178743961356e-08, |
| "loss": 7.6764, |
| "step": 619 |
| }, |
| { |
| "epoch": 2.9822449593740594, |
| "grad_norm": 0.1694169044494629, |
| "learning_rate": 3.2206119162640906e-08, |
| "loss": 8.6771, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.9870598856455013, |
| "grad_norm": 0.17527279257774353, |
| "learning_rate": 1.6103059581320453e-08, |
| "loss": 10.7562, |
| "step": 621 |
| }, |
| { |
| "epoch": 2.9870598856455013, |
| "step": 621, |
| "total_flos": 2.802876100504453e+18, |
| "train_loss": 10.002562027622536, |
| "train_runtime": 59343.0737, |
| "train_samples_per_second": 1.344, |
| "train_steps_per_second": 0.01 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 621, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.802876100504453e+18, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|