| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.031181202890604, |
| "eval_steps": 500, |
| "global_step": 63000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0016368085506878688, |
| "grad_norm": 0.5328027606010437, |
| "learning_rate": 3.600654664484452e-07, |
| "loss": 1.6968, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0032736171013757376, |
| "grad_norm": 0.5594077706336975, |
| "learning_rate": 7.237679578105111e-07, |
| "loss": 1.6883, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.004910425652063607, |
| "grad_norm": 0.6636043787002563, |
| "learning_rate": 1.087470449172577e-06, |
| "loss": 1.6196, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.006547234202751475, |
| "grad_norm": 0.6200364828109741, |
| "learning_rate": 1.4511729405346428e-06, |
| "loss": 1.511, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.008184042753439345, |
| "grad_norm": 0.4777531623840332, |
| "learning_rate": 1.8148754318967086e-06, |
| "loss": 1.342, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.009820851304127213, |
| "grad_norm": 0.3041970133781433, |
| "learning_rate": 2.1785779232587743e-06, |
| "loss": 1.2154, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.011457659854815082, |
| "grad_norm": 0.21760690212249756, |
| "learning_rate": 2.54228041462084e-06, |
| "loss": 1.1427, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01309446840550295, |
| "grad_norm": 0.22987280786037445, |
| "learning_rate": 2.9059829059829063e-06, |
| "loss": 1.0943, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.014731276956190819, |
| "grad_norm": 0.24943482875823975, |
| "learning_rate": 3.269685397344972e-06, |
| "loss": 1.0696, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.01636808550687869, |
| "grad_norm": 0.2619542181491852, |
| "learning_rate": 3.633387888707038e-06, |
| "loss": 1.0318, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.018004894057566556, |
| "grad_norm": 0.2811136841773987, |
| "learning_rate": 3.997090380069103e-06, |
| "loss": 1.0035, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.019641702608254426, |
| "grad_norm": 0.3045084476470947, |
| "learning_rate": 4.36079287143117e-06, |
| "loss": 0.9726, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.021278511158942293, |
| "grad_norm": 0.3168332278728485, |
| "learning_rate": 4.7244953627932355e-06, |
| "loss": 0.971, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.022915319709630164, |
| "grad_norm": 0.33685848116874695, |
| "learning_rate": 5.088197854155301e-06, |
| "loss": 0.952, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.02455212826031803, |
| "grad_norm": 0.3198516368865967, |
| "learning_rate": 5.451900345517367e-06, |
| "loss": 0.9385, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0261889368110059, |
| "grad_norm": 0.3457159101963043, |
| "learning_rate": 5.815602836879432e-06, |
| "loss": 0.9291, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.02782574536169377, |
| "grad_norm": 0.3343696594238281, |
| "learning_rate": 6.179305328241499e-06, |
| "loss": 0.9251, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.029462553912381638, |
| "grad_norm": 0.4662475287914276, |
| "learning_rate": 6.543007819603565e-06, |
| "loss": 0.9328, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.03109936246306951, |
| "grad_norm": 0.3559871017932892, |
| "learning_rate": 6.906710310965631e-06, |
| "loss": 0.9126, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.03273617101375738, |
| "grad_norm": 0.3852447271347046, |
| "learning_rate": 7.270412802327696e-06, |
| "loss": 0.9024, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.034372979564445245, |
| "grad_norm": 0.36482807993888855, |
| "learning_rate": 7.634115293689762e-06, |
| "loss": 0.9086, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.03600978811513311, |
| "grad_norm": 0.39493420720100403, |
| "learning_rate": 7.997817785051828e-06, |
| "loss": 0.9144, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.03764659666582098, |
| "grad_norm": 0.4406372010707855, |
| "learning_rate": 8.361520276413894e-06, |
| "loss": 0.9067, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.03928340521650885, |
| "grad_norm": 0.43684300780296326, |
| "learning_rate": 8.72522276777596e-06, |
| "loss": 0.898, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.04092021376719672, |
| "grad_norm": 0.4949699342250824, |
| "learning_rate": 9.088925259138026e-06, |
| "loss": 0.8893, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.04255702231788459, |
| "grad_norm": 0.4759005308151245, |
| "learning_rate": 9.452627750500092e-06, |
| "loss": 0.9036, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.04419383086857246, |
| "grad_norm": 0.4733336567878723, |
| "learning_rate": 9.816330241862157e-06, |
| "loss": 0.9046, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.04583063941926033, |
| "grad_norm": 0.5515408515930176, |
| "learning_rate": 1.0180032733224223e-05, |
| "loss": 0.8899, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.047467447969948194, |
| "grad_norm": 0.5026727318763733, |
| "learning_rate": 1.054373522458629e-05, |
| "loss": 0.8868, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.04910425652063606, |
| "grad_norm": 0.5517929196357727, |
| "learning_rate": 1.0907437715948354e-05, |
| "loss": 0.8905, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.050741065071323935, |
| "grad_norm": 0.5139409899711609, |
| "learning_rate": 1.127114020731042e-05, |
| "loss": 0.8711, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.0523778736220118, |
| "grad_norm": 0.5762068033218384, |
| "learning_rate": 1.1634842698672486e-05, |
| "loss": 0.9, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.05401468217269967, |
| "grad_norm": 0.5540242791175842, |
| "learning_rate": 1.1998545190034552e-05, |
| "loss": 0.8854, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.05565149072338754, |
| "grad_norm": 0.6651942133903503, |
| "learning_rate": 1.236224768139662e-05, |
| "loss": 0.875, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.05728829927407541, |
| "grad_norm": 0.6157256364822388, |
| "learning_rate": 1.2725950172758685e-05, |
| "loss": 0.87, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.058925107824763276, |
| "grad_norm": 0.6638494729995728, |
| "learning_rate": 1.3089652664120751e-05, |
| "loss": 0.8666, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.06056191637545114, |
| "grad_norm": 0.6535647511482239, |
| "learning_rate": 1.3453355155482817e-05, |
| "loss": 0.8675, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.06219872492613902, |
| "grad_norm": 0.7346630692481995, |
| "learning_rate": 1.3817057646844883e-05, |
| "loss": 0.8724, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.06383553347682688, |
| "grad_norm": 0.7002882957458496, |
| "learning_rate": 1.4180760138206948e-05, |
| "loss": 0.8476, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.06547234202751476, |
| "grad_norm": 0.6632655262947083, |
| "learning_rate": 1.4544462629569014e-05, |
| "loss": 0.8641, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.06710915057820262, |
| "grad_norm": 0.7253566384315491, |
| "learning_rate": 1.490816512093108e-05, |
| "loss": 0.8611, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.06874595912889049, |
| "grad_norm": 0.7651970386505127, |
| "learning_rate": 1.5271867612293146e-05, |
| "loss": 0.8597, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.07038276767957836, |
| "grad_norm": 0.6781213879585266, |
| "learning_rate": 1.563557010365521e-05, |
| "loss": 0.844, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.07201957623026622, |
| "grad_norm": 0.7465602159500122, |
| "learning_rate": 1.5999272595017275e-05, |
| "loss": 0.8558, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.0736563847809541, |
| "grad_norm": 0.7796695828437805, |
| "learning_rate": 1.6362975086379343e-05, |
| "loss": 0.8533, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.07529319333164196, |
| "grad_norm": 0.7622010111808777, |
| "learning_rate": 1.6726677577741408e-05, |
| "loss": 0.8414, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.07693000188232983, |
| "grad_norm": 0.7499621510505676, |
| "learning_rate": 1.7090380069103472e-05, |
| "loss": 0.8459, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.0785668104330177, |
| "grad_norm": 0.7822730541229248, |
| "learning_rate": 1.745408256046554e-05, |
| "loss": 0.8468, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.08020361898370557, |
| "grad_norm": 0.7850978970527649, |
| "learning_rate": 1.7817785051827608e-05, |
| "loss": 0.8603, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.08184042753439344, |
| "grad_norm": 0.8370286822319031, |
| "learning_rate": 1.8181487543189672e-05, |
| "loss": 0.837, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.08347723608508131, |
| "grad_norm": 0.821024477481842, |
| "learning_rate": 1.854519003455174e-05, |
| "loss": 0.8464, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.08511404463576917, |
| "grad_norm": 0.8516008257865906, |
| "learning_rate": 1.8908892525913805e-05, |
| "loss": 0.837, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.08675085318645705, |
| "grad_norm": 0.7816336750984192, |
| "learning_rate": 1.927259501727587e-05, |
| "loss": 0.8471, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.08838766173714492, |
| "grad_norm": 0.8347124457359314, |
| "learning_rate": 1.9636297508637937e-05, |
| "loss": 0.8333, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.09002447028783278, |
| "grad_norm": 0.8995541334152222, |
| "learning_rate": 2e-05, |
| "loss": 0.8341, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.09166127883852065, |
| "grad_norm": 0.9787241816520691, |
| "learning_rate": 1.9999984387425675e-05, |
| "loss": 0.8431, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.09329808738920851, |
| "grad_norm": 0.8093689680099487, |
| "learning_rate": 1.999993754975144e-05, |
| "loss": 0.8325, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.09493489593989639, |
| "grad_norm": 0.9042837023735046, |
| "learning_rate": 1.999985948712355e-05, |
| "loss": 0.828, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.09657170449058426, |
| "grad_norm": 0.9188331961631775, |
| "learning_rate": 1.999975019978576e-05, |
| "loss": 0.8291, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.09820851304127212, |
| "grad_norm": 0.8699648380279541, |
| "learning_rate": 1.9999609688079316e-05, |
| "loss": 0.8277, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.09984532159196, |
| "grad_norm": 0.9138243794441223, |
| "learning_rate": 1.999943795244297e-05, |
| "loss": 0.8367, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.10148213014264787, |
| "grad_norm": 0.9293233156204224, |
| "learning_rate": 1.9999234993412973e-05, |
| "loss": 0.8281, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.10311893869333573, |
| "grad_norm": 0.9346773624420166, |
| "learning_rate": 1.999900081162306e-05, |
| "loss": 0.8323, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.1047557472440236, |
| "grad_norm": 0.9332927465438843, |
| "learning_rate": 1.999873540780447e-05, |
| "loss": 0.8259, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.10639255579471148, |
| "grad_norm": 0.8887437582015991, |
| "learning_rate": 1.9998438782785937e-05, |
| "loss": 0.8305, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.10802936434539934, |
| "grad_norm": 0.9184074401855469, |
| "learning_rate": 1.999811093749367e-05, |
| "loss": 0.829, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.10966617289608721, |
| "grad_norm": 0.8532683849334717, |
| "learning_rate": 1.999775187295137e-05, |
| "loss": 0.8275, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.11130298144677508, |
| "grad_norm": 0.9298515915870667, |
| "learning_rate": 1.9997361590280225e-05, |
| "loss": 0.8192, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.11293978999746294, |
| "grad_norm": 0.9617123603820801, |
| "learning_rate": 1.9996940090698896e-05, |
| "loss": 0.8198, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.11457659854815082, |
| "grad_norm": 1.0112113952636719, |
| "learning_rate": 1.9996487375523524e-05, |
| "loss": 0.8239, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.11621340709883868, |
| "grad_norm": 0.9226319193840027, |
| "learning_rate": 1.9996003446167718e-05, |
| "loss": 0.8281, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.11785021564952655, |
| "grad_norm": 1.0199968814849854, |
| "learning_rate": 1.999548830414255e-05, |
| "loss": 0.82, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.11948702420021443, |
| "grad_norm": 0.9594390988349915, |
| "learning_rate": 1.999494195105657e-05, |
| "loss": 0.8139, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.12112383275090229, |
| "grad_norm": 0.9685386419296265, |
| "learning_rate": 1.9994364388615763e-05, |
| "loss": 0.8193, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.12276064130159016, |
| "grad_norm": 0.9797342419624329, |
| "learning_rate": 1.999375561862358e-05, |
| "loss": 0.815, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.12439744985227803, |
| "grad_norm": 1.0541061162948608, |
| "learning_rate": 1.9993115642980912e-05, |
| "loss": 0.8239, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.1260342584029659, |
| "grad_norm": 0.9543519616127014, |
| "learning_rate": 1.99924444636861e-05, |
| "loss": 0.8145, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.12767106695365377, |
| "grad_norm": 0.9379186630249023, |
| "learning_rate": 1.99917420828349e-05, |
| "loss": 0.817, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.12930787550434164, |
| "grad_norm": 0.9919012188911438, |
| "learning_rate": 1.9991008502620515e-05, |
| "loss": 0.8208, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.13094468405502951, |
| "grad_norm": 0.9344952702522278, |
| "learning_rate": 1.999024372533356e-05, |
| "loss": 0.8167, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.13258149260571736, |
| "grad_norm": 0.9583950638771057, |
| "learning_rate": 1.9989447753362058e-05, |
| "loss": 0.8125, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.13421830115640523, |
| "grad_norm": 0.9945580363273621, |
| "learning_rate": 1.998862058919145e-05, |
| "loss": 0.8225, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.1358551097070931, |
| "grad_norm": 0.9583763480186462, |
| "learning_rate": 1.9987762235404566e-05, |
| "loss": 0.8105, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.13749191825778098, |
| "grad_norm": 1.025468349456787, |
| "learning_rate": 1.998687269468162e-05, |
| "loss": 0.8107, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.13912872680846886, |
| "grad_norm": 1.0057779550552368, |
| "learning_rate": 1.998595196980023e-05, |
| "loss": 0.8138, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.14076553535915673, |
| "grad_norm": 0.9300206899642944, |
| "learning_rate": 1.9985000063635365e-05, |
| "loss": 0.8207, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.14240234390984458, |
| "grad_norm": 1.0241742134094238, |
| "learning_rate": 1.9984016979159368e-05, |
| "loss": 0.8046, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.14403915246053245, |
| "grad_norm": 0.9688097238540649, |
| "learning_rate": 1.9983002719441935e-05, |
| "loss": 0.8193, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.14567596101122032, |
| "grad_norm": 0.9877735376358032, |
| "learning_rate": 1.9981957287650107e-05, |
| "loss": 0.8003, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.1473127695619082, |
| "grad_norm": 0.9533541202545166, |
| "learning_rate": 1.9980880687048257e-05, |
| "loss": 0.8089, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.14894957811259607, |
| "grad_norm": 1.0934607982635498, |
| "learning_rate": 1.997977292099809e-05, |
| "loss": 0.7971, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.15058638666328392, |
| "grad_norm": 0.9715205430984497, |
| "learning_rate": 1.9978633992958624e-05, |
| "loss": 0.8194, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.1522231952139718, |
| "grad_norm": 0.9527362585067749, |
| "learning_rate": 1.9977463906486175e-05, |
| "loss": 0.8095, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.15386000376465966, |
| "grad_norm": 1.0439358949661255, |
| "learning_rate": 1.9976262665234357e-05, |
| "loss": 0.7997, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.15549681231534754, |
| "grad_norm": 1.1087926626205444, |
| "learning_rate": 1.9975030272954066e-05, |
| "loss": 0.8012, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.1571336208660354, |
| "grad_norm": 1.0532102584838867, |
| "learning_rate": 1.9973766733493458e-05, |
| "loss": 0.8006, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.15877042941672329, |
| "grad_norm": 0.9958882331848145, |
| "learning_rate": 1.997247205079796e-05, |
| "loss": 0.8138, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.16040723796741113, |
| "grad_norm": 1.0133436918258667, |
| "learning_rate": 1.9971146228910236e-05, |
| "loss": 0.7942, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.162044046518099, |
| "grad_norm": 0.9266718029975891, |
| "learning_rate": 1.9969789271970187e-05, |
| "loss": 0.7917, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.16368085506878688, |
| "grad_norm": 1.0468189716339111, |
| "learning_rate": 1.9968401184214924e-05, |
| "loss": 0.8012, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.16531766361947475, |
| "grad_norm": 1.0444200038909912, |
| "learning_rate": 1.9966981969978782e-05, |
| "loss": 0.7979, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.16695447217016263, |
| "grad_norm": 1.0317082405090332, |
| "learning_rate": 1.9965531633693268e-05, |
| "loss": 0.8209, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.16859128072085047, |
| "grad_norm": 1.0699563026428223, |
| "learning_rate": 1.9964050179887088e-05, |
| "loss": 0.8035, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.17022808927153835, |
| "grad_norm": 0.9806187748908997, |
| "learning_rate": 1.9962537613186096e-05, |
| "loss": 0.7957, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.17186489782222622, |
| "grad_norm": 1.0728228092193604, |
| "learning_rate": 1.996099393831331e-05, |
| "loss": 0.791, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.1735017063729141, |
| "grad_norm": 1.028189778327942, |
| "learning_rate": 1.9959419160088874e-05, |
| "loss": 0.7964, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.17513851492360197, |
| "grad_norm": 1.0126999616622925, |
| "learning_rate": 1.9957813283430054e-05, |
| "loss": 0.799, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.17677532347428984, |
| "grad_norm": 0.96955406665802, |
| "learning_rate": 1.995617631335123e-05, |
| "loss": 0.8118, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.1784121320249777, |
| "grad_norm": 1.0654776096343994, |
| "learning_rate": 1.9954508254963865e-05, |
| "loss": 0.8084, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.18004894057566556, |
| "grad_norm": 0.9537600874900818, |
| "learning_rate": 1.9952809113476493e-05, |
| "loss": 0.8011, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.18168574912635344, |
| "grad_norm": 0.9695281982421875, |
| "learning_rate": 1.9951078894194708e-05, |
| "loss": 0.8054, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.1833225576770413, |
| "grad_norm": 1.0722426176071167, |
| "learning_rate": 1.9949317602521144e-05, |
| "loss": 0.7917, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.18495936622772918, |
| "grad_norm": 0.9706518054008484, |
| "learning_rate": 1.9947525243955467e-05, |
| "loss": 0.8055, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.18659617477841703, |
| "grad_norm": 0.9769388437271118, |
| "learning_rate": 1.994570182409434e-05, |
| "loss": 0.7981, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.1882329833291049, |
| "grad_norm": 0.9185972809791565, |
| "learning_rate": 1.9943847348631415e-05, |
| "loss": 0.7907, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.18986979187979278, |
| "grad_norm": 1.0683258771896362, |
| "learning_rate": 1.9941961823357322e-05, |
| "loss": 0.8021, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.19150660043048065, |
| "grad_norm": 0.9599470496177673, |
| "learning_rate": 1.9940045254159644e-05, |
| "loss": 0.7923, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.19314340898116852, |
| "grad_norm": 0.9822320938110352, |
| "learning_rate": 1.9938097647022895e-05, |
| "loss": 0.7864, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.1947802175318564, |
| "grad_norm": 1.180939793586731, |
| "learning_rate": 1.9936119008028503e-05, |
| "loss": 0.7841, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.19641702608254424, |
| "grad_norm": 1.1611251831054688, |
| "learning_rate": 1.9934109343354808e-05, |
| "loss": 0.7855, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.19805383463323212, |
| "grad_norm": 1.0176281929016113, |
| "learning_rate": 1.9932068659277006e-05, |
| "loss": 0.7936, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.19969064318392, |
| "grad_norm": 1.05084228515625, |
| "learning_rate": 1.992999696216717e-05, |
| "loss": 0.7856, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.20132745173460787, |
| "grad_norm": 1.1582859754562378, |
| "learning_rate": 1.9927894258494204e-05, |
| "loss": 0.8064, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.20296426028529574, |
| "grad_norm": 0.9974379539489746, |
| "learning_rate": 1.992576055482383e-05, |
| "loss": 0.7923, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.2046010688359836, |
| "grad_norm": 1.0076924562454224, |
| "learning_rate": 1.9923595857818573e-05, |
| "loss": 0.801, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.20623787738667146, |
| "grad_norm": 1.104923129081726, |
| "learning_rate": 1.9921400174237732e-05, |
| "loss": 0.8053, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.20787468593735933, |
| "grad_norm": 1.0884004831314087, |
| "learning_rate": 1.9919173510937355e-05, |
| "loss": 0.7948, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.2095114944880472, |
| "grad_norm": 0.9803980588912964, |
| "learning_rate": 1.9916915874870234e-05, |
| "loss": 0.791, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.21114830303873508, |
| "grad_norm": 1.0630168914794922, |
| "learning_rate": 1.9914627273085876e-05, |
| "loss": 0.7813, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.21278511158942295, |
| "grad_norm": 1.0575711727142334, |
| "learning_rate": 1.9912307712730468e-05, |
| "loss": 0.7862, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.2144219201401108, |
| "grad_norm": 1.0258235931396484, |
| "learning_rate": 1.9909957201046875e-05, |
| "loss": 0.7855, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.21605872869079867, |
| "grad_norm": 0.970610499382019, |
| "learning_rate": 1.9907575745374605e-05, |
| "loss": 0.7845, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.21769553724148655, |
| "grad_norm": 1.0707366466522217, |
| "learning_rate": 1.9905163353149787e-05, |
| "loss": 0.7986, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.21933234579217442, |
| "grad_norm": 0.9396125674247742, |
| "learning_rate": 1.9902720031905153e-05, |
| "loss": 0.7798, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.2209691543428623, |
| "grad_norm": 1.0123385190963745, |
| "learning_rate": 1.9900245789270006e-05, |
| "loss": 0.7866, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.22260596289355017, |
| "grad_norm": 0.9208526015281677, |
| "learning_rate": 1.989774063297021e-05, |
| "loss": 0.79, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.22424277144423801, |
| "grad_norm": 1.0145132541656494, |
| "learning_rate": 1.989520457082815e-05, |
| "loss": 0.7826, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.2258795799949259, |
| "grad_norm": 0.9474859237670898, |
| "learning_rate": 1.9892637610762723e-05, |
| "loss": 0.7904, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.22751638854561376, |
| "grad_norm": 0.997414767742157, |
| "learning_rate": 1.9890039760789294e-05, |
| "loss": 0.7863, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.22915319709630164, |
| "grad_norm": 1.0312907695770264, |
| "learning_rate": 1.9887411029019686e-05, |
| "loss": 0.7825, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.2307900056469895, |
| "grad_norm": 1.019665002822876, |
| "learning_rate": 1.9884751423662162e-05, |
| "loss": 0.7746, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.23242681419767736, |
| "grad_norm": 0.9788889288902283, |
| "learning_rate": 1.9882060953021375e-05, |
| "loss": 0.7805, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.23406362274836523, |
| "grad_norm": 1.1468379497528076, |
| "learning_rate": 1.9879339625498356e-05, |
| "loss": 0.7783, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.2357004312990531, |
| "grad_norm": 0.9630206823348999, |
| "learning_rate": 1.9876587449590496e-05, |
| "loss": 0.7785, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.23733723984974098, |
| "grad_norm": 1.0484507083892822, |
| "learning_rate": 1.98738044338915e-05, |
| "loss": 0.7577, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.23897404840042885, |
| "grad_norm": 0.9262145161628723, |
| "learning_rate": 1.987099058709138e-05, |
| "loss": 0.7847, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.24061085695111672, |
| "grad_norm": 1.0156426429748535, |
| "learning_rate": 1.9868145917976412e-05, |
| "loss": 0.7754, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.24224766550180457, |
| "grad_norm": 1.0557153224945068, |
| "learning_rate": 1.986527043542912e-05, |
| "loss": 0.783, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.24388447405249244, |
| "grad_norm": 0.9480391144752502, |
| "learning_rate": 1.9862364148428243e-05, |
| "loss": 0.7795, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.24552128260318032, |
| "grad_norm": 1.1189950704574585, |
| "learning_rate": 1.9859427066048694e-05, |
| "loss": 0.773, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.2471580911538682, |
| "grad_norm": 1.0406650304794312, |
| "learning_rate": 1.985645919746157e-05, |
| "loss": 0.7815, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.24879489970455607, |
| "grad_norm": 1.0539467334747314, |
| "learning_rate": 1.985346055193408e-05, |
| "loss": 0.7832, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.2504317082552439, |
| "grad_norm": 1.0707350969314575, |
| "learning_rate": 1.9850431138829537e-05, |
| "loss": 0.7775, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.2520685168059318, |
| "grad_norm": 1.0518571138381958, |
| "learning_rate": 1.9847370967607332e-05, |
| "loss": 0.7692, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.25370532535661966, |
| "grad_norm": 1.038328766822815, |
| "learning_rate": 1.9844280047822892e-05, |
| "loss": 0.7812, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.25534213390730753, |
| "grad_norm": 1.0571229457855225, |
| "learning_rate": 1.984115838912766e-05, |
| "loss": 0.7773, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.2569789424579954, |
| "grad_norm": 1.0450866222381592, |
| "learning_rate": 1.9838006001269064e-05, |
| "loss": 0.7789, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.2586157510086833, |
| "grad_norm": 1.107710838317871, |
| "learning_rate": 1.9834822894090478e-05, |
| "loss": 0.7628, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.26025255955937115, |
| "grad_norm": 1.0595227479934692, |
| "learning_rate": 1.9831609077531205e-05, |
| "loss": 0.7805, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.26188936811005903, |
| "grad_norm": 1.0978327989578247, |
| "learning_rate": 1.982836456162644e-05, |
| "loss": 0.7779, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.2635261766607469, |
| "grad_norm": 1.0871798992156982, |
| "learning_rate": 1.982508935650722e-05, |
| "loss": 0.7696, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.2651629852114347, |
| "grad_norm": 1.0791369676589966, |
| "learning_rate": 1.982178347240043e-05, |
| "loss": 0.7701, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.2667997937621226, |
| "grad_norm": 1.095301866531372, |
| "learning_rate": 1.981844691962874e-05, |
| "loss": 0.783, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.26843660231281047, |
| "grad_norm": 1.1223257780075073, |
| "learning_rate": 1.9815079708610588e-05, |
| "loss": 0.7785, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.27007341086349834, |
| "grad_norm": 1.0025781393051147, |
| "learning_rate": 1.9811681849860137e-05, |
| "loss": 0.7787, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.2717102194141862, |
| "grad_norm": 1.1232304573059082, |
| "learning_rate": 1.9808253353987252e-05, |
| "loss": 0.7655, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.2733470279648741, |
| "grad_norm": 0.9625865817070007, |
| "learning_rate": 1.9804794231697464e-05, |
| "loss": 0.785, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.27498383651556196, |
| "grad_norm": 1.1022255420684814, |
| "learning_rate": 1.980130449379193e-05, |
| "loss": 0.7681, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.27662064506624984, |
| "grad_norm": 1.0605260133743286, |
| "learning_rate": 1.9797784151167417e-05, |
| "loss": 0.7686, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.2782574536169377, |
| "grad_norm": 1.0693503618240356, |
| "learning_rate": 1.9794233214816237e-05, |
| "loss": 0.7653, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.2798942621676256, |
| "grad_norm": 1.0027199983596802, |
| "learning_rate": 1.979065169582625e-05, |
| "loss": 0.7802, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.28153107071831346, |
| "grad_norm": 1.002388834953308, |
| "learning_rate": 1.9787039605380792e-05, |
| "loss": 0.7668, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.2831678792690013, |
| "grad_norm": 1.0847641229629517, |
| "learning_rate": 1.9783396954758682e-05, |
| "loss": 0.7685, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.28480468781968915, |
| "grad_norm": 1.1153062582015991, |
| "learning_rate": 1.9779723755334142e-05, |
| "loss": 0.7761, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.286441496370377, |
| "grad_norm": 1.0675033330917358, |
| "learning_rate": 1.9776020018576794e-05, |
| "loss": 0.7637, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.2880783049210649, |
| "grad_norm": 1.0875293016433716, |
| "learning_rate": 1.9772285756051613e-05, |
| "loss": 0.7689, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.28971511347175277, |
| "grad_norm": 1.135380744934082, |
| "learning_rate": 1.9768520979418885e-05, |
| "loss": 0.7763, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.29135192202244065, |
| "grad_norm": 1.0305795669555664, |
| "learning_rate": 1.9764725700434183e-05, |
| "loss": 0.7688, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.2929887305731285, |
| "grad_norm": 1.0471090078353882, |
| "learning_rate": 1.976089993094832e-05, |
| "loss": 0.7573, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.2946255391238164, |
| "grad_norm": 1.0096269845962524, |
| "learning_rate": 1.9757043682907325e-05, |
| "loss": 0.7622, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.29626234767450427, |
| "grad_norm": 1.103242039680481, |
| "learning_rate": 1.9753156968352388e-05, |
| "loss": 0.7573, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.29789915622519214, |
| "grad_norm": 1.1128453016281128, |
| "learning_rate": 1.9749239799419827e-05, |
| "loss": 0.7692, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.29953596477588, |
| "grad_norm": 1.0762085914611816, |
| "learning_rate": 1.974529218834106e-05, |
| "loss": 0.7838, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.30117277332656783, |
| "grad_norm": 1.0150110721588135, |
| "learning_rate": 1.9741314147442573e-05, |
| "loss": 0.773, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.3028095818772557, |
| "grad_norm": 1.0824315547943115, |
| "learning_rate": 1.9737305689145842e-05, |
| "loss": 0.7636, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.3044463904279436, |
| "grad_norm": 1.2597285509109497, |
| "learning_rate": 1.973326682596735e-05, |
| "loss": 0.7688, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.30608319897863145, |
| "grad_norm": 1.112971544265747, |
| "learning_rate": 1.97291975705185e-05, |
| "loss": 0.762, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.30772000752931933, |
| "grad_norm": 1.11709725856781, |
| "learning_rate": 1.9725097935505607e-05, |
| "loss": 0.7674, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.3093568160800072, |
| "grad_norm": 1.0609350204467773, |
| "learning_rate": 1.972096793372984e-05, |
| "loss": 0.7603, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.3109936246306951, |
| "grad_norm": 1.111243486404419, |
| "learning_rate": 1.9716807578087193e-05, |
| "loss": 0.7572, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.31263043318138295, |
| "grad_norm": 0.9914565086364746, |
| "learning_rate": 1.971261688156843e-05, |
| "loss": 0.7558, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.3142672417320708, |
| "grad_norm": 1.030030369758606, |
| "learning_rate": 1.9708395857259077e-05, |
| "loss": 0.7558, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.3159040502827587, |
| "grad_norm": 1.1039714813232422, |
| "learning_rate": 1.9704144518339336e-05, |
| "loss": 0.7507, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.31754085883344657, |
| "grad_norm": 1.0048165321350098, |
| "learning_rate": 1.969986287808408e-05, |
| "loss": 0.7806, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.3191776673841344, |
| "grad_norm": 1.2964001893997192, |
| "learning_rate": 1.969555094986279e-05, |
| "loss": 0.7504, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.32081447593482226, |
| "grad_norm": 1.198273777961731, |
| "learning_rate": 1.9691208747139527e-05, |
| "loss": 0.7597, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.32245128448551014, |
| "grad_norm": 1.0260130167007446, |
| "learning_rate": 1.968683628347289e-05, |
| "loss": 0.7571, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.324088093036198, |
| "grad_norm": 1.1643099784851074, |
| "learning_rate": 1.9682433572515952e-05, |
| "loss": 0.7712, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.3257249015868859, |
| "grad_norm": 1.1653162240982056, |
| "learning_rate": 1.9678000628016248e-05, |
| "loss": 0.7599, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.32736171013757376, |
| "grad_norm": 1.5513461828231812, |
| "learning_rate": 1.9673537463815718e-05, |
| "loss": 0.7673, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.32899851868826163, |
| "grad_norm": 1.138498306274414, |
| "learning_rate": 1.9669044093850652e-05, |
| "loss": 0.7521, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.3306353272389495, |
| "grad_norm": 1.0548768043518066, |
| "learning_rate": 1.9664520532151664e-05, |
| "loss": 0.7596, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.3322721357896374, |
| "grad_norm": 1.0597394704818726, |
| "learning_rate": 1.965996679284365e-05, |
| "loss": 0.7586, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.33390894434032525, |
| "grad_norm": 1.1359139680862427, |
| "learning_rate": 1.965538289014572e-05, |
| "loss": 0.7618, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.3355457528910131, |
| "grad_norm": 1.1026830673217773, |
| "learning_rate": 1.9650768838371182e-05, |
| "loss": 0.7613, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.33718256144170095, |
| "grad_norm": 1.0065330266952515, |
| "learning_rate": 1.9646124651927484e-05, |
| "loss": 0.7394, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.3388193699923888, |
| "grad_norm": 0.9368694424629211, |
| "learning_rate": 1.964145034531616e-05, |
| "loss": 0.761, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.3404561785430767, |
| "grad_norm": 0.9686558246612549, |
| "learning_rate": 1.9636745933132807e-05, |
| "loss": 0.7597, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.34209298709376457, |
| "grad_norm": 1.114066243171692, |
| "learning_rate": 1.9632011430067024e-05, |
| "loss": 0.7675, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.34372979564445244, |
| "grad_norm": 1.1572498083114624, |
| "learning_rate": 1.9627246850902363e-05, |
| "loss": 0.7576, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.3453666041951403, |
| "grad_norm": 1.0342215299606323, |
| "learning_rate": 1.9622452210516296e-05, |
| "loss": 0.7629, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.3470034127458282, |
| "grad_norm": 1.0652525424957275, |
| "learning_rate": 1.9617627523880158e-05, |
| "loss": 0.7636, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.34864022129651606, |
| "grad_norm": 1.048869013786316, |
| "learning_rate": 1.9612772806059104e-05, |
| "loss": 0.7625, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.35027702984720394, |
| "grad_norm": 1.1751947402954102, |
| "learning_rate": 1.9607888072212062e-05, |
| "loss": 0.7475, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.3519138383978918, |
| "grad_norm": 1.2830709218978882, |
| "learning_rate": 1.9602973337591688e-05, |
| "loss": 0.7558, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.3535506469485797, |
| "grad_norm": 1.1591740846633911, |
| "learning_rate": 1.9598028617544313e-05, |
| "loss": 0.7435, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.3551874554992675, |
| "grad_norm": 0.9801552295684814, |
| "learning_rate": 1.95930539275099e-05, |
| "loss": 0.7621, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.3568242640499554, |
| "grad_norm": 1.126760721206665, |
| "learning_rate": 1.958804928302199e-05, |
| "loss": 0.7672, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.35846107260064325, |
| "grad_norm": 1.0655152797698975, |
| "learning_rate": 1.958301469970766e-05, |
| "loss": 0.7491, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.3600978811513311, |
| "grad_norm": 1.1613372564315796, |
| "learning_rate": 1.9577950193287475e-05, |
| "loss": 0.7733, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.361734689702019, |
| "grad_norm": 0.9363147020339966, |
| "learning_rate": 1.9572855779575427e-05, |
| "loss": 0.7522, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.36337149825270687, |
| "grad_norm": 1.1021246910095215, |
| "learning_rate": 1.9567731474478903e-05, |
| "loss": 0.7539, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.36500830680339474, |
| "grad_norm": 1.084695816040039, |
| "learning_rate": 1.9562577293998616e-05, |
| "loss": 0.7514, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.3666451153540826, |
| "grad_norm": 1.1221933364868164, |
| "learning_rate": 1.9557393254228575e-05, |
| "loss": 0.7608, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.3682819239047705, |
| "grad_norm": 1.073371410369873, |
| "learning_rate": 1.9552179371356024e-05, |
| "loss": 0.7509, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.36991873245545837, |
| "grad_norm": 1.124243140220642, |
| "learning_rate": 1.9546935661661382e-05, |
| "loss": 0.7552, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.37155554100614624, |
| "grad_norm": 1.0397138595581055, |
| "learning_rate": 1.9541662141518222e-05, |
| "loss": 0.7451, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.37319234955683406, |
| "grad_norm": 1.0600475072860718, |
| "learning_rate": 1.9536358827393177e-05, |
| "loss": 0.7358, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.37482915810752193, |
| "grad_norm": 1.1461478471755981, |
| "learning_rate": 1.953102573584593e-05, |
| "loss": 0.7513, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.3764659666582098, |
| "grad_norm": 1.093103051185608, |
| "learning_rate": 1.952566288352914e-05, |
| "loss": 0.7369, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.3781027752088977, |
| "grad_norm": 1.2357380390167236, |
| "learning_rate": 1.952027028718839e-05, |
| "loss": 0.7628, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.37973958375958555, |
| "grad_norm": 0.9737277030944824, |
| "learning_rate": 1.9514847963662144e-05, |
| "loss": 0.7358, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.3813763923102734, |
| "grad_norm": 1.0810784101486206, |
| "learning_rate": 1.9509395929881683e-05, |
| "loss": 0.7431, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.3830132008609613, |
| "grad_norm": 1.0600659847259521, |
| "learning_rate": 1.9503914202871072e-05, |
| "loss": 0.7465, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.3846500094116492, |
| "grad_norm": 1.129676342010498, |
| "learning_rate": 1.9498402799747077e-05, |
| "loss": 0.746, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.38628681796233705, |
| "grad_norm": 1.0627739429473877, |
| "learning_rate": 1.9492861737719145e-05, |
| "loss": 0.7517, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.3879236265130249, |
| "grad_norm": 1.0382601022720337, |
| "learning_rate": 1.9487291034089316e-05, |
| "loss": 0.7466, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.3895604350637128, |
| "grad_norm": 1.0782064199447632, |
| "learning_rate": 1.9481690706252198e-05, |
| "loss": 0.7436, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.39119724361440067, |
| "grad_norm": 1.052713394165039, |
| "learning_rate": 1.94760607716949e-05, |
| "loss": 0.7363, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.3928340521650885, |
| "grad_norm": 1.0485634803771973, |
| "learning_rate": 1.947040124799697e-05, |
| "loss": 0.7491, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.39447086071577636, |
| "grad_norm": 1.1206567287445068, |
| "learning_rate": 1.9464712152830368e-05, |
| "loss": 0.7372, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.39610766926646424, |
| "grad_norm": 1.0319308042526245, |
| "learning_rate": 1.9458993503959368e-05, |
| "loss": 0.7493, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.3977444778171521, |
| "grad_norm": 1.1401089429855347, |
| "learning_rate": 1.9453245319240533e-05, |
| "loss": 0.7693, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.39938128636784, |
| "grad_norm": 1.2440853118896484, |
| "learning_rate": 1.944746761662266e-05, |
| "loss": 0.7477, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.40101809491852786, |
| "grad_norm": 1.1666104793548584, |
| "learning_rate": 1.9441660414146715e-05, |
| "loss": 0.7364, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.40265490346921573, |
| "grad_norm": 1.0812019109725952, |
| "learning_rate": 1.9435823729945768e-05, |
| "loss": 0.7278, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.4042917120199036, |
| "grad_norm": 1.1338680982589722, |
| "learning_rate": 1.9429957582244957e-05, |
| "loss": 0.7396, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.4059285205705915, |
| "grad_norm": 1.0170310735702515, |
| "learning_rate": 1.942406198936141e-05, |
| "loss": 0.7373, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.40756532912127935, |
| "grad_norm": 1.0910414457321167, |
| "learning_rate": 1.941813696970421e-05, |
| "loss": 0.743, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.4092021376719672, |
| "grad_norm": 0.9840279221534729, |
| "learning_rate": 1.9412182541774312e-05, |
| "loss": 0.7432, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.41083894622265504, |
| "grad_norm": 1.1482113599777222, |
| "learning_rate": 1.9406198724164515e-05, |
| "loss": 0.7457, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.4124757547733429, |
| "grad_norm": 0.9647344946861267, |
| "learning_rate": 1.9400185535559366e-05, |
| "loss": 0.7494, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.4141125633240308, |
| "grad_norm": 1.1271613836288452, |
| "learning_rate": 1.9394142994735147e-05, |
| "loss": 0.7358, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.41574937187471867, |
| "grad_norm": 1.1209514141082764, |
| "learning_rate": 1.9388071120559774e-05, |
| "loss": 0.7477, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.41738618042540654, |
| "grad_norm": 1.1221638917922974, |
| "learning_rate": 1.9381969931992768e-05, |
| "loss": 0.7401, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.4190229889760944, |
| "grad_norm": 1.1341800689697266, |
| "learning_rate": 1.937583944808518e-05, |
| "loss": 0.7341, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.4206597975267823, |
| "grad_norm": 1.0561330318450928, |
| "learning_rate": 1.9369679687979538e-05, |
| "loss": 0.7427, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.42229660607747016, |
| "grad_norm": 1.0445774793624878, |
| "learning_rate": 1.9363490670909788e-05, |
| "loss": 0.7485, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.42393341462815803, |
| "grad_norm": 1.1463161706924438, |
| "learning_rate": 1.9357272416201214e-05, |
| "loss": 0.7345, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.4255702231788459, |
| "grad_norm": 1.1426818370819092, |
| "learning_rate": 1.9351024943270426e-05, |
| "loss": 0.7369, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.4272070317295338, |
| "grad_norm": 1.0911140441894531, |
| "learning_rate": 1.934474827162524e-05, |
| "loss": 0.7472, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.4288438402802216, |
| "grad_norm": 1.0775692462921143, |
| "learning_rate": 1.9338442420864663e-05, |
| "loss": 0.7401, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.4304806488309095, |
| "grad_norm": 1.136518955230713, |
| "learning_rate": 1.9332107410678805e-05, |
| "loss": 0.7355, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.43211745738159735, |
| "grad_norm": 1.085319995880127, |
| "learning_rate": 1.932574326084883e-05, |
| "loss": 0.7485, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.4337542659322852, |
| "grad_norm": 1.034986972808838, |
| "learning_rate": 1.9319349991246887e-05, |
| "loss": 0.7422, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.4353910744829731, |
| "grad_norm": 1.1199235916137695, |
| "learning_rate": 1.9312927621836058e-05, |
| "loss": 0.7362, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.43702788303366097, |
| "grad_norm": 1.1646606922149658, |
| "learning_rate": 1.930647617267029e-05, |
| "loss": 0.7274, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.43866469158434884, |
| "grad_norm": 1.1620571613311768, |
| "learning_rate": 1.9299995663894325e-05, |
| "loss": 0.7351, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.4403015001350367, |
| "grad_norm": 1.1194571256637573, |
| "learning_rate": 1.9293486115743646e-05, |
| "loss": 0.7309, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.4419383086857246, |
| "grad_norm": 1.1805561780929565, |
| "learning_rate": 1.928694754854442e-05, |
| "loss": 0.7378, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.44357511723641246, |
| "grad_norm": 1.1845600605010986, |
| "learning_rate": 1.9280379982713417e-05, |
| "loss": 0.7319, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.44521192578710034, |
| "grad_norm": 1.2962830066680908, |
| "learning_rate": 1.927378343875796e-05, |
| "loss": 0.7305, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.44684873433778816, |
| "grad_norm": 1.0655794143676758, |
| "learning_rate": 1.9267157937275854e-05, |
| "loss": 0.7236, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.44848554288847603, |
| "grad_norm": 1.0807515382766724, |
| "learning_rate": 1.9260503498955326e-05, |
| "loss": 0.7326, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.4501223514391639, |
| "grad_norm": 1.0515137910842896, |
| "learning_rate": 1.9253820144574958e-05, |
| "loss": 0.7293, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.4517591599898518, |
| "grad_norm": 1.103508710861206, |
| "learning_rate": 1.9247107895003628e-05, |
| "loss": 0.7473, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.45339596854053965, |
| "grad_norm": 1.1016185283660889, |
| "learning_rate": 1.924036677120043e-05, |
| "loss": 0.7264, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.4550327770912275, |
| "grad_norm": 1.0213091373443604, |
| "learning_rate": 1.9233596794214623e-05, |
| "loss": 0.7325, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.4566695856419154, |
| "grad_norm": 1.1028705835342407, |
| "learning_rate": 1.9226797985185565e-05, |
| "loss": 0.7381, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.4583063941926033, |
| "grad_norm": 1.0844396352767944, |
| "learning_rate": 1.9219970365342634e-05, |
| "loss": 0.7279, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.45994320274329115, |
| "grad_norm": 1.037714958190918, |
| "learning_rate": 1.9213113956005176e-05, |
| "loss": 0.7433, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.461580011293979, |
| "grad_norm": 1.2123370170593262, |
| "learning_rate": 1.9206228778582435e-05, |
| "loss": 0.7341, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.4632168198446669, |
| "grad_norm": 1.013845682144165, |
| "learning_rate": 1.9199314854573474e-05, |
| "loss": 0.7369, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.4648536283953547, |
| "grad_norm": 1.0552864074707031, |
| "learning_rate": 1.9192372205567123e-05, |
| "loss": 0.7202, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.4664904369460426, |
| "grad_norm": 1.049025058746338, |
| "learning_rate": 1.9185400853241917e-05, |
| "loss": 0.7246, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.46812724549673046, |
| "grad_norm": 1.0877737998962402, |
| "learning_rate": 1.9178400819365994e-05, |
| "loss": 0.7261, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.46976405404741833, |
| "grad_norm": 1.099348783493042, |
| "learning_rate": 1.9171372125797072e-05, |
| "loss": 0.7327, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.4714008625981062, |
| "grad_norm": 1.1000944375991821, |
| "learning_rate": 1.916431479448235e-05, |
| "loss": 0.7305, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.4730376711487941, |
| "grad_norm": 1.0979351997375488, |
| "learning_rate": 1.9157228847458446e-05, |
| "loss": 0.7279, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.47467447969948195, |
| "grad_norm": 1.0918766260147095, |
| "learning_rate": 1.9150114306851336e-05, |
| "loss": 0.7215, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.47631128825016983, |
| "grad_norm": 1.109971046447754, |
| "learning_rate": 1.9142971194876284e-05, |
| "loss": 0.7322, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.4779480968008577, |
| "grad_norm": 1.1282057762145996, |
| "learning_rate": 1.913579953383776e-05, |
| "loss": 0.7257, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.4795849053515456, |
| "grad_norm": 1.1076371669769287, |
| "learning_rate": 1.912859934612938e-05, |
| "loss": 0.7516, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.48122171390223345, |
| "grad_norm": 1.1480896472930908, |
| "learning_rate": 1.9121370654233843e-05, |
| "loss": 0.728, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.48285852245292127, |
| "grad_norm": 1.1083163022994995, |
| "learning_rate": 1.911411348072284e-05, |
| "loss": 0.7235, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.48449533100360914, |
| "grad_norm": 1.2141623497009277, |
| "learning_rate": 1.9106827848257007e-05, |
| "loss": 0.7237, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.486132139554297, |
| "grad_norm": 1.0334457159042358, |
| "learning_rate": 1.9099513779585836e-05, |
| "loss": 0.7306, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.4877689481049849, |
| "grad_norm": 1.1086657047271729, |
| "learning_rate": 1.909217129754762e-05, |
| "loss": 0.7295, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.48940575665567276, |
| "grad_norm": 1.0128360986709595, |
| "learning_rate": 1.908480042506937e-05, |
| "loss": 0.733, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.49104256520636064, |
| "grad_norm": 1.1484946012496948, |
| "learning_rate": 1.907740118516674e-05, |
| "loss": 0.7396, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.4926793737570485, |
| "grad_norm": 1.031750202178955, |
| "learning_rate": 1.9069973600943962e-05, |
| "loss": 0.7204, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.4943161823077364, |
| "grad_norm": 1.1274133920669556, |
| "learning_rate": 1.9062517695593792e-05, |
| "loss": 0.7235, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.49595299085842426, |
| "grad_norm": 1.1863317489624023, |
| "learning_rate": 1.9055033492397396e-05, |
| "loss": 0.7329, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.49758979940911213, |
| "grad_norm": 1.0985053777694702, |
| "learning_rate": 1.9047521014724303e-05, |
| "loss": 0.7341, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.4992266079598, |
| "grad_norm": 1.136760950088501, |
| "learning_rate": 1.9039980286032353e-05, |
| "loss": 0.7189, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.5008634165104878, |
| "grad_norm": 1.0787100791931152, |
| "learning_rate": 1.9032411329867573e-05, |
| "loss": 0.7298, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.5025002250611758, |
| "grad_norm": 1.3436377048492432, |
| "learning_rate": 1.902481416986414e-05, |
| "loss": 0.719, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.5041370336118636, |
| "grad_norm": 1.1863504648208618, |
| "learning_rate": 1.9017188829744305e-05, |
| "loss": 0.7125, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.5057738421625515, |
| "grad_norm": 1.0385360717773438, |
| "learning_rate": 1.90095353333183e-05, |
| "loss": 0.7297, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.5074106507132393, |
| "grad_norm": 1.1736425161361694, |
| "learning_rate": 1.9001853704484285e-05, |
| "loss": 0.7205, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.5090474592639272, |
| "grad_norm": 1.0939114093780518, |
| "learning_rate": 1.899414396722826e-05, |
| "loss": 0.741, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.5106842678146151, |
| "grad_norm": 1.3368091583251953, |
| "learning_rate": 1.8986406145623996e-05, |
| "loss": 0.7277, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.5123210763653029, |
| "grad_norm": 1.1556004285812378, |
| "learning_rate": 1.897864026383295e-05, |
| "loss": 0.7383, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.5139578849159908, |
| "grad_norm": 1.2308059930801392, |
| "learning_rate": 1.897084634610421e-05, |
| "loss": 0.7188, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.5155946934666786, |
| "grad_norm": 1.1211739778518677, |
| "learning_rate": 1.8963024416774393e-05, |
| "loss": 0.7241, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.5172315020173666, |
| "grad_norm": 1.1302770376205444, |
| "learning_rate": 1.8955174500267596e-05, |
| "loss": 0.7207, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.5188683105680544, |
| "grad_norm": 1.1893266439437866, |
| "learning_rate": 1.8947296621095297e-05, |
| "loss": 0.7088, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.5205051191187423, |
| "grad_norm": 1.2034817934036255, |
| "learning_rate": 1.893939080385629e-05, |
| "loss": 0.7225, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.5221419276694301, |
| "grad_norm": 1.0935208797454834, |
| "learning_rate": 1.8931457073236612e-05, |
| "loss": 0.7219, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.5237787362201181, |
| "grad_norm": 1.2129491567611694, |
| "learning_rate": 1.892349545400945e-05, |
| "loss": 0.7323, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.5254155447708059, |
| "grad_norm": 1.0750499963760376, |
| "learning_rate": 1.8915505971035077e-05, |
| "loss": 0.7213, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.5270523533214938, |
| "grad_norm": 1.1311250925064087, |
| "learning_rate": 1.8907488649260775e-05, |
| "loss": 0.7265, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.5286891618721816, |
| "grad_norm": 1.1503121852874756, |
| "learning_rate": 1.889944351372075e-05, |
| "loss": 0.7177, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.5303259704228694, |
| "grad_norm": 1.3034614324569702, |
| "learning_rate": 1.8891370589536058e-05, |
| "loss": 0.7118, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.5319627789735574, |
| "grad_norm": 1.0626057386398315, |
| "learning_rate": 1.8883269901914524e-05, |
| "loss": 0.7205, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.5335995875242452, |
| "grad_norm": 1.2290301322937012, |
| "learning_rate": 1.8875141476150664e-05, |
| "loss": 0.73, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.5352363960749331, |
| "grad_norm": 1.2172757387161255, |
| "learning_rate": 1.8866985337625615e-05, |
| "loss": 0.7234, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.5368732046256209, |
| "grad_norm": 1.0496524572372437, |
| "learning_rate": 1.885880151180703e-05, |
| "loss": 0.7127, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.5385100131763089, |
| "grad_norm": 0.9903925061225891, |
| "learning_rate": 1.8850590024249037e-05, |
| "loss": 0.728, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.5401468217269967, |
| "grad_norm": 1.2562659978866577, |
| "learning_rate": 1.8842350900592122e-05, |
| "loss": 0.7188, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.5417836302776846, |
| "grad_norm": 1.2212430238723755, |
| "learning_rate": 1.8834084166563072e-05, |
| "loss": 0.7086, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.5434204388283724, |
| "grad_norm": 1.1504745483398438, |
| "learning_rate": 1.882578984797489e-05, |
| "loss": 0.7198, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.5450572473790604, |
| "grad_norm": 1.1029900312423706, |
| "learning_rate": 1.8817467970726704e-05, |
| "loss": 0.729, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.5466940559297482, |
| "grad_norm": 1.1274054050445557, |
| "learning_rate": 1.8809118560803704e-05, |
| "loss": 0.7249, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.548330864480436, |
| "grad_norm": 1.093854546546936, |
| "learning_rate": 1.880074164427704e-05, |
| "loss": 0.704, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.5499676730311239, |
| "grad_norm": 1.0846567153930664, |
| "learning_rate": 1.879233724730377e-05, |
| "loss": 0.7194, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.5516044815818117, |
| "grad_norm": 1.35237455368042, |
| "learning_rate": 1.8783905396126737e-05, |
| "loss": 0.7205, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.5532412901324997, |
| "grad_norm": 0.9714828133583069, |
| "learning_rate": 1.8775446117074528e-05, |
| "loss": 0.7334, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.5548780986831875, |
| "grad_norm": 1.2619616985321045, |
| "learning_rate": 1.8766959436561363e-05, |
| "loss": 0.718, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.5565149072338754, |
| "grad_norm": 1.036129355430603, |
| "learning_rate": 1.8758445381087034e-05, |
| "loss": 0.7191, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.5581517157845632, |
| "grad_norm": 1.097095012664795, |
| "learning_rate": 1.8749903977236802e-05, |
| "loss": 0.7171, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.5597885243352512, |
| "grad_norm": 1.1133558750152588, |
| "learning_rate": 1.8741335251681328e-05, |
| "loss": 0.7179, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.561425332885939, |
| "grad_norm": 1.0562981367111206, |
| "learning_rate": 1.8732739231176587e-05, |
| "loss": 0.7201, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.5630621414366269, |
| "grad_norm": 1.20978581905365, |
| "learning_rate": 1.8724115942563773e-05, |
| "loss": 0.7129, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.5646989499873147, |
| "grad_norm": 1.0966860055923462, |
| "learning_rate": 1.8715465412769243e-05, |
| "loss": 0.715, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.5663357585380026, |
| "grad_norm": 1.2173317670822144, |
| "learning_rate": 1.87067876688044e-05, |
| "loss": 0.7052, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.5679725670886905, |
| "grad_norm": 1.126670241355896, |
| "learning_rate": 1.869808273776563e-05, |
| "loss": 0.7172, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.5696093756393783, |
| "grad_norm": 1.0486496686935425, |
| "learning_rate": 1.8689350646834207e-05, |
| "loss": 0.7269, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.5712461841900662, |
| "grad_norm": 1.1730561256408691, |
| "learning_rate": 1.868059142327622e-05, |
| "loss": 0.7191, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.572882992740754, |
| "grad_norm": 1.1153805255889893, |
| "learning_rate": 1.867180509444247e-05, |
| "loss": 0.7124, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.574519801291442, |
| "grad_norm": 1.200767159461975, |
| "learning_rate": 1.8662991687768394e-05, |
| "loss": 0.7342, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.5761566098421298, |
| "grad_norm": 1.093985676765442, |
| "learning_rate": 1.8654151230774e-05, |
| "loss": 0.7073, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.5777934183928177, |
| "grad_norm": 1.1902211904525757, |
| "learning_rate": 1.8645283751063734e-05, |
| "loss": 0.7147, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.5794302269435055, |
| "grad_norm": 1.1363279819488525, |
| "learning_rate": 1.863638927632644e-05, |
| "loss": 0.7162, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.5810670354941935, |
| "grad_norm": 1.2271382808685303, |
| "learning_rate": 1.8627467834335243e-05, |
| "loss": 0.7042, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.5827038440448813, |
| "grad_norm": 1.1823738813400269, |
| "learning_rate": 1.8618519452947484e-05, |
| "loss": 0.7197, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.5843406525955691, |
| "grad_norm": 1.042771577835083, |
| "learning_rate": 1.8609544160104608e-05, |
| "loss": 0.7103, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.585977461146257, |
| "grad_norm": 1.2053323984146118, |
| "learning_rate": 1.8600541983832114e-05, |
| "loss": 0.7206, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.5876142696969449, |
| "grad_norm": 1.2077679634094238, |
| "learning_rate": 1.8591512952239416e-05, |
| "loss": 0.7003, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.5892510782476328, |
| "grad_norm": 1.2675883769989014, |
| "learning_rate": 1.8582457093519806e-05, |
| "loss": 0.7119, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.5908878867983206, |
| "grad_norm": 1.102798342704773, |
| "learning_rate": 1.857337443595034e-05, |
| "loss": 0.7097, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.5925246953490085, |
| "grad_norm": 1.0432052612304688, |
| "learning_rate": 1.8564265007891747e-05, |
| "loss": 0.7197, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.5941615038996964, |
| "grad_norm": 1.1461999416351318, |
| "learning_rate": 1.8555128837788356e-05, |
| "loss": 0.7128, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.5957983124503843, |
| "grad_norm": 1.1425740718841553, |
| "learning_rate": 1.854596595416799e-05, |
| "loss": 0.7221, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.5974351210010721, |
| "grad_norm": 1.1499603986740112, |
| "learning_rate": 1.8536776385641896e-05, |
| "loss": 0.7118, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.59907192955176, |
| "grad_norm": 1.1369038820266724, |
| "learning_rate": 1.8527560160904628e-05, |
| "loss": 0.7101, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.6007087381024478, |
| "grad_norm": 1.3000248670578003, |
| "learning_rate": 1.8518317308733987e-05, |
| "loss": 0.7042, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.6023455466531357, |
| "grad_norm": 1.193550944328308, |
| "learning_rate": 1.8509047857990925e-05, |
| "loss": 0.7143, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.6039823552038236, |
| "grad_norm": 1.1038364171981812, |
| "learning_rate": 1.849975183761943e-05, |
| "loss": 0.6953, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.6056191637545114, |
| "grad_norm": 1.2535215616226196, |
| "learning_rate": 1.849042927664647e-05, |
| "loss": 0.7021, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.6072559723051993, |
| "grad_norm": 1.1770461797714233, |
| "learning_rate": 1.848108020418188e-05, |
| "loss": 0.6971, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.6088927808558872, |
| "grad_norm": 1.3245750665664673, |
| "learning_rate": 1.8471704649418272e-05, |
| "loss": 0.7062, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.6105295894065751, |
| "grad_norm": 1.064820408821106, |
| "learning_rate": 1.8462302641630957e-05, |
| "loss": 0.7247, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.6121663979572629, |
| "grad_norm": 1.2426869869232178, |
| "learning_rate": 1.8452874210177853e-05, |
| "loss": 0.697, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.6138032065079508, |
| "grad_norm": 1.0495688915252686, |
| "learning_rate": 1.8443419384499367e-05, |
| "loss": 0.7066, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.6154400150586387, |
| "grad_norm": 1.0227185487747192, |
| "learning_rate": 1.8433938194118332e-05, |
| "loss": 0.6975, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.6170768236093266, |
| "grad_norm": 1.1213784217834473, |
| "learning_rate": 1.8424430668639916e-05, |
| "loss": 0.7101, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.6187136321600144, |
| "grad_norm": 1.3823000192642212, |
| "learning_rate": 1.8414896837751497e-05, |
| "loss": 0.7143, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.6203504407107022, |
| "grad_norm": 1.280870795249939, |
| "learning_rate": 1.8405336731222615e-05, |
| "loss": 0.7137, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.6219872492613902, |
| "grad_norm": 1.1578929424285889, |
| "learning_rate": 1.839575037890483e-05, |
| "loss": 0.7035, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.623624057812078, |
| "grad_norm": 1.1784029006958008, |
| "learning_rate": 1.838613781073169e-05, |
| "loss": 0.7003, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.6252608663627659, |
| "grad_norm": 1.5140550136566162, |
| "learning_rate": 1.8376499056718563e-05, |
| "loss": 0.7182, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.6268976749134537, |
| "grad_norm": 1.1795947551727295, |
| "learning_rate": 1.8366834146962613e-05, |
| "loss": 0.707, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.6285344834641416, |
| "grad_norm": 1.2156872749328613, |
| "learning_rate": 1.8357143111642658e-05, |
| "loss": 0.7041, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.6301712920148295, |
| "grad_norm": 1.120609164237976, |
| "learning_rate": 1.8347425981019104e-05, |
| "loss": 0.7087, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.6318081005655174, |
| "grad_norm": 1.0960373878479004, |
| "learning_rate": 1.8337682785433838e-05, |
| "loss": 0.7136, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.6334449091162052, |
| "grad_norm": 1.2065433263778687, |
| "learning_rate": 1.8327913555310125e-05, |
| "loss": 0.7077, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.6350817176668931, |
| "grad_norm": 1.158570647239685, |
| "learning_rate": 1.8318118321152534e-05, |
| "loss": 0.7199, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.636718526217581, |
| "grad_norm": 1.1315112113952637, |
| "learning_rate": 1.8308297113546834e-05, |
| "loss": 0.7157, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.6383553347682688, |
| "grad_norm": 1.567763328552246, |
| "learning_rate": 1.829844996315989e-05, |
| "loss": 0.7024, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.6399921433189567, |
| "grad_norm": 1.3154592514038086, |
| "learning_rate": 1.8288576900739573e-05, |
| "loss": 0.7093, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.6416289518696445, |
| "grad_norm": 1.2426626682281494, |
| "learning_rate": 1.8278677957114666e-05, |
| "loss": 0.7108, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.6432657604203325, |
| "grad_norm": 1.2186305522918701, |
| "learning_rate": 1.8268753163194773e-05, |
| "loss": 0.704, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.6449025689710203, |
| "grad_norm": 1.049307942390442, |
| "learning_rate": 1.8258802549970206e-05, |
| "loss": 0.7057, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.6465393775217082, |
| "grad_norm": 1.3523504734039307, |
| "learning_rate": 1.8248826148511908e-05, |
| "loss": 0.6965, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.648176186072396, |
| "grad_norm": 1.2402653694152832, |
| "learning_rate": 1.823882398997133e-05, |
| "loss": 0.704, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.649812994623084, |
| "grad_norm": 1.3009974956512451, |
| "learning_rate": 1.8228796105580373e-05, |
| "loss": 0.6892, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.6514498031737718, |
| "grad_norm": 1.161328673362732, |
| "learning_rate": 1.821874252665125e-05, |
| "loss": 0.7099, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.6530866117244597, |
| "grad_norm": 1.5753206014633179, |
| "learning_rate": 1.820866328457641e-05, |
| "loss": 0.6958, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.6547234202751475, |
| "grad_norm": 1.1261160373687744, |
| "learning_rate": 1.8198558410828436e-05, |
| "loss": 0.7048, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.6563602288258353, |
| "grad_norm": 1.2303427457809448, |
| "learning_rate": 1.818842793695995e-05, |
| "loss": 0.7024, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.6579970373765233, |
| "grad_norm": 1.2187303304672241, |
| "learning_rate": 1.8178271894603502e-05, |
| "loss": 0.696, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.6596338459272111, |
| "grad_norm": 1.1081221103668213, |
| "learning_rate": 1.8168090315471488e-05, |
| "loss": 0.7082, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.661270654477899, |
| "grad_norm": 1.1961265802383423, |
| "learning_rate": 1.8157883231356036e-05, |
| "loss": 0.6875, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.6629074630285868, |
| "grad_norm": 1.1577361822128296, |
| "learning_rate": 1.8147650674128927e-05, |
| "loss": 0.7004, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.6645442715792748, |
| "grad_norm": 1.1837248802185059, |
| "learning_rate": 1.813739267574147e-05, |
| "loss": 0.7084, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.6661810801299626, |
| "grad_norm": 1.140136957168579, |
| "learning_rate": 1.8127109268224414e-05, |
| "loss": 0.6897, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.6678178886806505, |
| "grad_norm": 1.132994532585144, |
| "learning_rate": 1.811680048368785e-05, |
| "loss": 0.6999, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.6694546972313383, |
| "grad_norm": 1.184187889099121, |
| "learning_rate": 1.8106466354321113e-05, |
| "loss": 0.6994, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.6710915057820263, |
| "grad_norm": 1.1196414232254028, |
| "learning_rate": 1.809610691239268e-05, |
| "loss": 0.7008, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.6727283143327141, |
| "grad_norm": 1.1688846349716187, |
| "learning_rate": 1.808572219025006e-05, |
| "loss": 0.6954, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.6743651228834019, |
| "grad_norm": 1.222205638885498, |
| "learning_rate": 1.80753122203197e-05, |
| "loss": 0.6918, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.6760019314340898, |
| "grad_norm": 1.1374167203903198, |
| "learning_rate": 1.8064877035106887e-05, |
| "loss": 0.6906, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.6776387399847776, |
| "grad_norm": 1.0707694292068481, |
| "learning_rate": 1.8054416667195643e-05, |
| "loss": 0.6943, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.6792755485354656, |
| "grad_norm": 1.1394332647323608, |
| "learning_rate": 1.8043931149248625e-05, |
| "loss": 0.7073, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.6809123570861534, |
| "grad_norm": 1.118058443069458, |
| "learning_rate": 1.803342051400701e-05, |
| "loss": 0.6983, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.6825491656368413, |
| "grad_norm": 1.3730331659317017, |
| "learning_rate": 1.8022884794290417e-05, |
| "loss": 0.6924, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.6841859741875291, |
| "grad_norm": 1.1573492288589478, |
| "learning_rate": 1.801232402299679e-05, |
| "loss": 0.6964, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.6858227827382171, |
| "grad_norm": 1.1315394639968872, |
| "learning_rate": 1.80017382331023e-05, |
| "loss": 0.693, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.6874595912889049, |
| "grad_norm": 1.1479718685150146, |
| "learning_rate": 1.799112745766122e-05, |
| "loss": 0.6985, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.6890963998395928, |
| "grad_norm": 1.1869304180145264, |
| "learning_rate": 1.7980491729805858e-05, |
| "loss": 0.7132, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.6907332083902806, |
| "grad_norm": 1.322792887687683, |
| "learning_rate": 1.796983108274644e-05, |
| "loss": 0.7085, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.6923700169409684, |
| "grad_norm": 1.1635984182357788, |
| "learning_rate": 1.7959145549770985e-05, |
| "loss": 0.7117, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.6940068254916564, |
| "grad_norm": 1.1490191221237183, |
| "learning_rate": 1.7948435164245236e-05, |
| "loss": 0.697, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.6956436340423442, |
| "grad_norm": 1.2376859188079834, |
| "learning_rate": 1.7937699959612523e-05, |
| "loss": 0.7079, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.6972804425930321, |
| "grad_norm": 1.2555029392242432, |
| "learning_rate": 1.7926939969393693e-05, |
| "loss": 0.6895, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.6989172511437199, |
| "grad_norm": 1.1793533563613892, |
| "learning_rate": 1.7916155227186966e-05, |
| "loss": 0.6784, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.7005540596944079, |
| "grad_norm": 1.0882368087768555, |
| "learning_rate": 1.7905345766667867e-05, |
| "loss": 0.6875, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.7021908682450957, |
| "grad_norm": 1.2925825119018555, |
| "learning_rate": 1.789451162158909e-05, |
| "loss": 0.7072, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.7038276767957836, |
| "grad_norm": 1.2188570499420166, |
| "learning_rate": 1.7883652825780418e-05, |
| "loss": 0.7084, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.7054644853464714, |
| "grad_norm": 1.2425892353057861, |
| "learning_rate": 1.7872769413148602e-05, |
| "loss": 0.7059, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.7071012938971594, |
| "grad_norm": 1.3490030765533447, |
| "learning_rate": 1.786186141767726e-05, |
| "loss": 0.6861, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.7087381024478472, |
| "grad_norm": 1.2493983507156372, |
| "learning_rate": 1.785092887342677e-05, |
| "loss": 0.6862, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.710374910998535, |
| "grad_norm": 1.1606495380401611, |
| "learning_rate": 1.7839971814534163e-05, |
| "loss": 0.6959, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.7120117195492229, |
| "grad_norm": 1.0867750644683838, |
| "learning_rate": 1.7828990275213023e-05, |
| "loss": 0.6838, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.7136485280999108, |
| "grad_norm": 1.4481595754623413, |
| "learning_rate": 1.781798428975336e-05, |
| "loss": 0.6877, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.7152853366505987, |
| "grad_norm": 1.0603893995285034, |
| "learning_rate": 1.7806953892521536e-05, |
| "loss": 0.6922, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.7169221452012865, |
| "grad_norm": 1.1686676740646362, |
| "learning_rate": 1.7795899117960126e-05, |
| "loss": 0.6933, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.7185589537519744, |
| "grad_norm": 1.423593282699585, |
| "learning_rate": 1.7784820000587828e-05, |
| "loss": 0.6947, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.7201957623026622, |
| "grad_norm": 1.2158969640731812, |
| "learning_rate": 1.7773716574999354e-05, |
| "loss": 0.6832, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.7218325708533502, |
| "grad_norm": 1.3259363174438477, |
| "learning_rate": 1.776258887586531e-05, |
| "loss": 0.6836, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.723469379404038, |
| "grad_norm": 1.2114306688308716, |
| "learning_rate": 1.775143693793211e-05, |
| "loss": 0.6934, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.7251061879547259, |
| "grad_norm": 1.0769015550613403, |
| "learning_rate": 1.774026079602184e-05, |
| "loss": 0.692, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.7267429965054137, |
| "grad_norm": 1.098381519317627, |
| "learning_rate": 1.7729060485032167e-05, |
| "loss": 0.6929, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.7283798050561016, |
| "grad_norm": 1.1960115432739258, |
| "learning_rate": 1.7717836039936235e-05, |
| "loss": 0.6895, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.7300166136067895, |
| "grad_norm": 1.2899237871170044, |
| "learning_rate": 1.7706587495782538e-05, |
| "loss": 0.6891, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.7316534221574773, |
| "grad_norm": 1.1849106550216675, |
| "learning_rate": 1.769531488769482e-05, |
| "loss": 0.6994, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.7332902307081652, |
| "grad_norm": 1.0840647220611572, |
| "learning_rate": 1.7684018250871967e-05, |
| "loss": 0.6902, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.734927039258853, |
| "grad_norm": 1.1262308359146118, |
| "learning_rate": 1.7672697620587904e-05, |
| "loss": 0.686, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.736563847809541, |
| "grad_norm": 1.2281126976013184, |
| "learning_rate": 1.7661353032191458e-05, |
| "loss": 0.6971, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.7382006563602288, |
| "grad_norm": 1.0803622007369995, |
| "learning_rate": 1.7649984521106282e-05, |
| "loss": 0.694, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.7398374649109167, |
| "grad_norm": 1.4072610139846802, |
| "learning_rate": 1.763859212283071e-05, |
| "loss": 0.704, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.7414742734616045, |
| "grad_norm": 1.2351950407028198, |
| "learning_rate": 1.7627175872937686e-05, |
| "loss": 0.6991, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.7431110820122925, |
| "grad_norm": 1.1985889673233032, |
| "learning_rate": 1.7615735807074616e-05, |
| "loss": 0.6947, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.7447478905629803, |
| "grad_norm": 1.1948813199996948, |
| "learning_rate": 1.7604271960963274e-05, |
| "loss": 0.6986, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.7463846991136681, |
| "grad_norm": 1.2745295763015747, |
| "learning_rate": 1.759278437039969e-05, |
| "loss": 0.6989, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.748021507664356, |
| "grad_norm": 1.1414821147918701, |
| "learning_rate": 1.7581273071254038e-05, |
| "loss": 0.6883, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.7496583162150439, |
| "grad_norm": 1.1246697902679443, |
| "learning_rate": 1.7569738099470524e-05, |
| "loss": 0.6818, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.7512951247657318, |
| "grad_norm": 1.1820296049118042, |
| "learning_rate": 1.7558179491067263e-05, |
| "loss": 0.7079, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.7529319333164196, |
| "grad_norm": 1.1293789148330688, |
| "learning_rate": 1.7546597282136186e-05, |
| "loss": 0.696, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.7545687418671075, |
| "grad_norm": 1.2405450344085693, |
| "learning_rate": 1.753499150884291e-05, |
| "loss": 0.6912, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.7562055504177954, |
| "grad_norm": 1.2177417278289795, |
| "learning_rate": 1.7523362207426634e-05, |
| "loss": 0.6824, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.7578423589684833, |
| "grad_norm": 1.124414086341858, |
| "learning_rate": 1.7511709414200024e-05, |
| "loss": 0.6868, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.7594791675191711, |
| "grad_norm": 1.1439573764801025, |
| "learning_rate": 1.7500033165549105e-05, |
| "loss": 0.6882, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.761115976069859, |
| "grad_norm": 1.1549428701400757, |
| "learning_rate": 1.7488333497933133e-05, |
| "loss": 0.681, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.7627527846205469, |
| "grad_norm": 1.3092726469039917, |
| "learning_rate": 1.7476610447884492e-05, |
| "loss": 0.6973, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.7643895931712347, |
| "grad_norm": 1.5812910795211792, |
| "learning_rate": 1.7464864052008586e-05, |
| "loss": 0.6855, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.7660264017219226, |
| "grad_norm": 1.189775824546814, |
| "learning_rate": 1.7453094346983707e-05, |
| "loss": 0.6983, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.7676632102726104, |
| "grad_norm": 1.3100470304489136, |
| "learning_rate": 1.7441301369560934e-05, |
| "loss": 0.6938, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.7693000188232983, |
| "grad_norm": 1.227925419807434, |
| "learning_rate": 1.7429485156564014e-05, |
| "loss": 0.6762, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.7709368273739862, |
| "grad_norm": 1.3295223712921143, |
| "learning_rate": 1.7417645744889248e-05, |
| "loss": 0.6823, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.7725736359246741, |
| "grad_norm": 1.1091123819351196, |
| "learning_rate": 1.740578317150538e-05, |
| "loss": 0.6978, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.7742104444753619, |
| "grad_norm": 1.2926867008209229, |
| "learning_rate": 1.7393897473453462e-05, |
| "loss": 0.6853, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.7758472530260498, |
| "grad_norm": 1.279630422592163, |
| "learning_rate": 1.738198868784677e-05, |
| "loss": 0.6911, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.7774840615767377, |
| "grad_norm": 1.1175949573516846, |
| "learning_rate": 1.7370056851870665e-05, |
| "loss": 0.687, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.7791208701274256, |
| "grad_norm": 1.0889476537704468, |
| "learning_rate": 1.7358102002782477e-05, |
| "loss": 0.689, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.7807576786781134, |
| "grad_norm": 1.1944537162780762, |
| "learning_rate": 1.7346124177911402e-05, |
| "loss": 0.6841, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.7823944872288013, |
| "grad_norm": 1.208275556564331, |
| "learning_rate": 1.7334123414658376e-05, |
| "loss": 0.6777, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.7840312957794892, |
| "grad_norm": 1.1608806848526, |
| "learning_rate": 1.7322099750495964e-05, |
| "loss": 0.6841, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.785668104330177, |
| "grad_norm": 1.0674712657928467, |
| "learning_rate": 1.731005322296823e-05, |
| "loss": 0.6765, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.7873049128808649, |
| "grad_norm": 1.1852935552597046, |
| "learning_rate": 1.729798386969064e-05, |
| "loss": 0.6968, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.7889417214315527, |
| "grad_norm": 1.1918047666549683, |
| "learning_rate": 1.728589172834993e-05, |
| "loss": 0.6815, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.7905785299822407, |
| "grad_norm": 1.3117504119873047, |
| "learning_rate": 1.7273776836703985e-05, |
| "loss": 0.6799, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.7922153385329285, |
| "grad_norm": 1.2398260831832886, |
| "learning_rate": 1.726163923258174e-05, |
| "loss": 0.6869, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.7938521470836164, |
| "grad_norm": 1.2091760635375977, |
| "learning_rate": 1.724947895388304e-05, |
| "loss": 0.6679, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.7954889556343042, |
| "grad_norm": 1.1533339023590088, |
| "learning_rate": 1.723729603857854e-05, |
| "loss": 0.6877, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.7971257641849921, |
| "grad_norm": 1.2629398107528687, |
| "learning_rate": 1.7225090524709577e-05, |
| "loss": 0.6878, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.79876257273568, |
| "grad_norm": 1.202531099319458, |
| "learning_rate": 1.7212862450388037e-05, |
| "loss": 0.6911, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.8003993812863679, |
| "grad_norm": 1.189326286315918, |
| "learning_rate": 1.7200611853796278e-05, |
| "loss": 0.6966, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.8020361898370557, |
| "grad_norm": 1.2614778280258179, |
| "learning_rate": 1.718833877318696e-05, |
| "loss": 0.6952, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.8036729983877435, |
| "grad_norm": 1.1864616870880127, |
| "learning_rate": 1.7176043246882966e-05, |
| "loss": 0.6756, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.8053098069384315, |
| "grad_norm": 1.205569863319397, |
| "learning_rate": 1.7163725313277255e-05, |
| "loss": 0.6748, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.8069466154891193, |
| "grad_norm": 1.2782241106033325, |
| "learning_rate": 1.715138501083276e-05, |
| "loss": 0.6903, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.8085834240398072, |
| "grad_norm": 1.0571094751358032, |
| "learning_rate": 1.7139022378082256e-05, |
| "loss": 0.6871, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.810220232590495, |
| "grad_norm": 1.3369005918502808, |
| "learning_rate": 1.712663745362826e-05, |
| "loss": 0.6746, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.811857041141183, |
| "grad_norm": 1.2506871223449707, |
| "learning_rate": 1.7114230276142866e-05, |
| "loss": 0.6935, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.8134938496918708, |
| "grad_norm": 1.3436931371688843, |
| "learning_rate": 1.7101800884367676e-05, |
| "loss": 0.6859, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.8151306582425587, |
| "grad_norm": 1.3217076063156128, |
| "learning_rate": 1.708934931711365e-05, |
| "loss": 0.6766, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.8167674667932465, |
| "grad_norm": 1.3521711826324463, |
| "learning_rate": 1.7076875613261e-05, |
| "loss": 0.6828, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.8184042753439345, |
| "grad_norm": 1.1544018983840942, |
| "learning_rate": 1.706437981175904e-05, |
| "loss": 0.6866, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.8200410838946223, |
| "grad_norm": 1.3795074224472046, |
| "learning_rate": 1.7051861951626105e-05, |
| "loss": 0.6893, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.8216778924453101, |
| "grad_norm": 1.2545524835586548, |
| "learning_rate": 1.7039322071949396e-05, |
| "loss": 0.6865, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.823314700995998, |
| "grad_norm": 1.3663312196731567, |
| "learning_rate": 1.702676021188487e-05, |
| "loss": 0.6858, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.8249515095466858, |
| "grad_norm": 1.4371784925460815, |
| "learning_rate": 1.701417641065713e-05, |
| "loss": 0.6827, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.8265883180973738, |
| "grad_norm": 1.465648889541626, |
| "learning_rate": 1.7001570707559274e-05, |
| "loss": 0.6813, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.8282251266480616, |
| "grad_norm": 1.1045328378677368, |
| "learning_rate": 1.69889431419528e-05, |
| "loss": 0.6858, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.8298619351987495, |
| "grad_norm": 1.1676952838897705, |
| "learning_rate": 1.6976293753267467e-05, |
| "loss": 0.662, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.8314987437494373, |
| "grad_norm": 1.2377560138702393, |
| "learning_rate": 1.6963622581001188e-05, |
| "loss": 0.6853, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.8331355523001253, |
| "grad_norm": 1.2052476406097412, |
| "learning_rate": 1.6950929664719883e-05, |
| "loss": 0.6898, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.8347723608508131, |
| "grad_norm": 1.400944709777832, |
| "learning_rate": 1.6938215044057363e-05, |
| "loss": 0.6905, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.836409169401501, |
| "grad_norm": 1.2622673511505127, |
| "learning_rate": 1.6925478758715226e-05, |
| "loss": 0.6651, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.8380459779521888, |
| "grad_norm": 1.1664501428604126, |
| "learning_rate": 1.691272084846272e-05, |
| "loss": 0.6851, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.8396827865028766, |
| "grad_norm": 1.2591482400894165, |
| "learning_rate": 1.68999413531366e-05, |
| "loss": 0.6936, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.8413195950535646, |
| "grad_norm": 1.163874864578247, |
| "learning_rate": 1.6887140312641036e-05, |
| "loss": 0.6886, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.8429564036042524, |
| "grad_norm": 1.2441082000732422, |
| "learning_rate": 1.6874317766947458e-05, |
| "loss": 0.6761, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.8445932121549403, |
| "grad_norm": 1.1966642141342163, |
| "learning_rate": 1.6861473756094464e-05, |
| "loss": 0.6758, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.8462300207056281, |
| "grad_norm": 1.1858773231506348, |
| "learning_rate": 1.6848608320187668e-05, |
| "loss": 0.6806, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.8478668292563161, |
| "grad_norm": 1.1656018495559692, |
| "learning_rate": 1.6835721499399583e-05, |
| "loss": 0.6768, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.8495036378070039, |
| "grad_norm": 1.2097491025924683, |
| "learning_rate": 1.6822813333969495e-05, |
| "loss": 0.6936, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.8511404463576918, |
| "grad_norm": 1.4976009130477905, |
| "learning_rate": 1.6809883864203352e-05, |
| "loss": 0.6721, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.8527772549083796, |
| "grad_norm": 1.3640004396438599, |
| "learning_rate": 1.6796933130473606e-05, |
| "loss": 0.6738, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.8544140634590676, |
| "grad_norm": 1.2159740924835205, |
| "learning_rate": 1.6783961173219116e-05, |
| "loss": 0.6755, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.8560508720097554, |
| "grad_norm": 1.23357355594635, |
| "learning_rate": 1.677096803294502e-05, |
| "loss": 0.6789, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.8576876805604432, |
| "grad_norm": 1.2574186325073242, |
| "learning_rate": 1.6757953750222586e-05, |
| "loss": 0.6892, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.8593244891111311, |
| "grad_norm": 1.2394073009490967, |
| "learning_rate": 1.6744918365689106e-05, |
| "loss": 0.6726, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.860961297661819, |
| "grad_norm": 1.2098554372787476, |
| "learning_rate": 1.6731861920047758e-05, |
| "loss": 0.6714, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.8625981062125069, |
| "grad_norm": 1.3548126220703125, |
| "learning_rate": 1.6718784454067495e-05, |
| "loss": 0.6849, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.8642349147631947, |
| "grad_norm": 1.5218019485473633, |
| "learning_rate": 1.670568600858289e-05, |
| "loss": 0.6744, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.8658717233138826, |
| "grad_norm": 1.3826264142990112, |
| "learning_rate": 1.669256662449404e-05, |
| "loss": 0.6762, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.8675085318645704, |
| "grad_norm": 1.2154985666275024, |
| "learning_rate": 1.667942634276642e-05, |
| "loss": 0.6711, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.8691453404152584, |
| "grad_norm": 1.3120452165603638, |
| "learning_rate": 1.666626520443075e-05, |
| "loss": 0.6788, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.8707821489659462, |
| "grad_norm": 1.2221883535385132, |
| "learning_rate": 1.665308325058288e-05, |
| "loss": 0.6661, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.8724189575166341, |
| "grad_norm": 1.385396957397461, |
| "learning_rate": 1.6639880522383655e-05, |
| "loss": 0.6714, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.8740557660673219, |
| "grad_norm": 1.2685418128967285, |
| "learning_rate": 1.6626657061058797e-05, |
| "loss": 0.668, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.8756925746180098, |
| "grad_norm": 1.513152837753296, |
| "learning_rate": 1.661341290789875e-05, |
| "loss": 0.6706, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.8773293831686977, |
| "grad_norm": 1.2810958623886108, |
| "learning_rate": 1.6600148104258594e-05, |
| "loss": 0.6904, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.8789661917193855, |
| "grad_norm": 1.2695286273956299, |
| "learning_rate": 1.6586862691557863e-05, |
| "loss": 0.6733, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.8806030002700734, |
| "grad_norm": 1.0760889053344727, |
| "learning_rate": 1.6573556711280457e-05, |
| "loss": 0.6743, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.8822398088207613, |
| "grad_norm": 1.3402081727981567, |
| "learning_rate": 1.6560230204974502e-05, |
| "loss": 0.6706, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.8838766173714492, |
| "grad_norm": 1.191873550415039, |
| "learning_rate": 1.654688321425221e-05, |
| "loss": 0.6764, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.885513425922137, |
| "grad_norm": 1.1215344667434692, |
| "learning_rate": 1.6533515780789758e-05, |
| "loss": 0.6857, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.8871502344728249, |
| "grad_norm": 1.1322293281555176, |
| "learning_rate": 1.6520127946327155e-05, |
| "loss": 0.6723, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.8887870430235127, |
| "grad_norm": 1.7162648439407349, |
| "learning_rate": 1.6506719752668115e-05, |
| "loss": 0.679, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.8904238515742007, |
| "grad_norm": 1.5632336139678955, |
| "learning_rate": 1.6493291241679922e-05, |
| "loss": 0.6807, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.8920606601248885, |
| "grad_norm": 1.0530614852905273, |
| "learning_rate": 1.6479842455293297e-05, |
| "loss": 0.6681, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.8936974686755763, |
| "grad_norm": 1.2179269790649414, |
| "learning_rate": 1.6466373435502276e-05, |
| "loss": 0.6614, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.8953342772262642, |
| "grad_norm": 1.3225027322769165, |
| "learning_rate": 1.6452884224364082e-05, |
| "loss": 0.671, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.8969710857769521, |
| "grad_norm": 1.3610303401947021, |
| "learning_rate": 1.6439374863998966e-05, |
| "loss": 0.6801, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.89860789432764, |
| "grad_norm": 1.3277727365493774, |
| "learning_rate": 1.6425845396590114e-05, |
| "loss": 0.6746, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.9002447028783278, |
| "grad_norm": 1.2963169813156128, |
| "learning_rate": 1.6412295864383487e-05, |
| "loss": 0.6817, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.9018815114290157, |
| "grad_norm": 1.475885033607483, |
| "learning_rate": 1.6398726309687704e-05, |
| "loss": 0.6891, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.9035183199797036, |
| "grad_norm": 1.2722758054733276, |
| "learning_rate": 1.638513677487389e-05, |
| "loss": 0.6709, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.9051551285303915, |
| "grad_norm": 1.3521857261657715, |
| "learning_rate": 1.637152730237558e-05, |
| "loss": 0.6812, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.9067919370810793, |
| "grad_norm": 1.2276744842529297, |
| "learning_rate": 1.6357897934688555e-05, |
| "loss": 0.6644, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.9084287456317672, |
| "grad_norm": 1.5432332754135132, |
| "learning_rate": 1.634424871437071e-05, |
| "loss": 0.6817, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.910065554182455, |
| "grad_norm": 1.2314627170562744, |
| "learning_rate": 1.6330579684041946e-05, |
| "loss": 0.6761, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.9117023627331429, |
| "grad_norm": 1.473347544670105, |
| "learning_rate": 1.631689088638401e-05, |
| "loss": 0.6587, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.9133391712838308, |
| "grad_norm": 1.4029542207717896, |
| "learning_rate": 1.6303182364140376e-05, |
| "loss": 0.6863, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.9149759798345186, |
| "grad_norm": 1.1235482692718506, |
| "learning_rate": 1.628945416011611e-05, |
| "loss": 0.6717, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.9166127883852065, |
| "grad_norm": 1.1514254808425903, |
| "learning_rate": 1.6275706317177732e-05, |
| "loss": 0.6815, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.9182495969358944, |
| "grad_norm": 1.388074517250061, |
| "learning_rate": 1.6261938878253086e-05, |
| "loss": 0.6849, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.9198864054865823, |
| "grad_norm": 1.1814851760864258, |
| "learning_rate": 1.6248151886331208e-05, |
| "loss": 0.6641, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.9215232140372701, |
| "grad_norm": 1.4052802324295044, |
| "learning_rate": 1.6234345384462174e-05, |
| "loss": 0.6787, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.923160022587958, |
| "grad_norm": 1.5508378744125366, |
| "learning_rate": 1.6220519415757005e-05, |
| "loss": 0.6808, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.9247968311386459, |
| "grad_norm": 1.3127562999725342, |
| "learning_rate": 1.620667402338749e-05, |
| "loss": 0.6663, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.9264336396893338, |
| "grad_norm": 1.2677356004714966, |
| "learning_rate": 1.619280925058607e-05, |
| "loss": 0.6723, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.9280704482400216, |
| "grad_norm": 1.2480475902557373, |
| "learning_rate": 1.61789251406457e-05, |
| "loss": 0.6583, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.9297072567907094, |
| "grad_norm": 1.1523864269256592, |
| "learning_rate": 1.616502173691973e-05, |
| "loss": 0.6858, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.9313440653413974, |
| "grad_norm": 1.2443100214004517, |
| "learning_rate": 1.615109908282174e-05, |
| "loss": 0.6842, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.9329808738920852, |
| "grad_norm": 1.172663927078247, |
| "learning_rate": 1.6137157221825418e-05, |
| "loss": 0.6708, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.9346176824427731, |
| "grad_norm": 1.2049202919006348, |
| "learning_rate": 1.6123196197464445e-05, |
| "loss": 0.6665, |
| "step": 57100 |
| }, |
| { |
| "epoch": 0.9362544909934609, |
| "grad_norm": 1.3395051956176758, |
| "learning_rate": 1.6109216053332313e-05, |
| "loss": 0.6593, |
| "step": 57200 |
| }, |
| { |
| "epoch": 0.9378912995441488, |
| "grad_norm": 1.4670510292053223, |
| "learning_rate": 1.6095216833082242e-05, |
| "loss": 0.6715, |
| "step": 57300 |
| }, |
| { |
| "epoch": 0.9395281080948367, |
| "grad_norm": 1.349523663520813, |
| "learning_rate": 1.6081198580427e-05, |
| "loss": 0.6724, |
| "step": 57400 |
| }, |
| { |
| "epoch": 0.9411649166455246, |
| "grad_norm": 1.5846613645553589, |
| "learning_rate": 1.606716133913879e-05, |
| "loss": 0.6716, |
| "step": 57500 |
| }, |
| { |
| "epoch": 0.9428017251962124, |
| "grad_norm": 1.1905144453048706, |
| "learning_rate": 1.6053105153049103e-05, |
| "loss": 0.6702, |
| "step": 57600 |
| }, |
| { |
| "epoch": 0.9444385337469003, |
| "grad_norm": 1.4006574153900146, |
| "learning_rate": 1.6039030066048592e-05, |
| "loss": 0.6665, |
| "step": 57700 |
| }, |
| { |
| "epoch": 0.9460753422975882, |
| "grad_norm": 1.3038159608840942, |
| "learning_rate": 1.602493612208693e-05, |
| "loss": 0.665, |
| "step": 57800 |
| }, |
| { |
| "epoch": 0.947712150848276, |
| "grad_norm": 1.336591124534607, |
| "learning_rate": 1.601082336517266e-05, |
| "loss": 0.6572, |
| "step": 57900 |
| }, |
| { |
| "epoch": 0.9493489593989639, |
| "grad_norm": 1.3096286058425903, |
| "learning_rate": 1.5996691839373077e-05, |
| "loss": 0.6651, |
| "step": 58000 |
| }, |
| { |
| "epoch": 0.9509857679496517, |
| "grad_norm": 1.3385711908340454, |
| "learning_rate": 1.5982541588814083e-05, |
| "loss": 0.6708, |
| "step": 58100 |
| }, |
| { |
| "epoch": 0.9526225765003397, |
| "grad_norm": 1.2425600290298462, |
| "learning_rate": 1.596837265768004e-05, |
| "loss": 0.6629, |
| "step": 58200 |
| }, |
| { |
| "epoch": 0.9542593850510275, |
| "grad_norm": 1.1755977869033813, |
| "learning_rate": 1.5954185090213653e-05, |
| "loss": 0.6618, |
| "step": 58300 |
| }, |
| { |
| "epoch": 0.9558961936017154, |
| "grad_norm": 1.5241588354110718, |
| "learning_rate": 1.5939978930715808e-05, |
| "loss": 0.6747, |
| "step": 58400 |
| }, |
| { |
| "epoch": 0.9575330021524032, |
| "grad_norm": 1.113451361656189, |
| "learning_rate": 1.5925754223545452e-05, |
| "loss": 0.6779, |
| "step": 58500 |
| }, |
| { |
| "epoch": 0.9591698107030912, |
| "grad_norm": 1.2721067667007446, |
| "learning_rate": 1.5911511013119438e-05, |
| "loss": 0.6586, |
| "step": 58600 |
| }, |
| { |
| "epoch": 0.960806619253779, |
| "grad_norm": 1.5037124156951904, |
| "learning_rate": 1.589724934391241e-05, |
| "loss": 0.6646, |
| "step": 58700 |
| }, |
| { |
| "epoch": 0.9624434278044669, |
| "grad_norm": 1.2813490629196167, |
| "learning_rate": 1.588296926045664e-05, |
| "loss": 0.6644, |
| "step": 58800 |
| }, |
| { |
| "epoch": 0.9640802363551547, |
| "grad_norm": 1.2610142230987549, |
| "learning_rate": 1.58686708073419e-05, |
| "loss": 0.6717, |
| "step": 58900 |
| }, |
| { |
| "epoch": 0.9657170449058425, |
| "grad_norm": 1.2408130168914795, |
| "learning_rate": 1.585435402921532e-05, |
| "loss": 0.6695, |
| "step": 59000 |
| }, |
| { |
| "epoch": 0.9673538534565305, |
| "grad_norm": 1.4657983779907227, |
| "learning_rate": 1.584001897078126e-05, |
| "loss": 0.6777, |
| "step": 59100 |
| }, |
| { |
| "epoch": 0.9689906620072183, |
| "grad_norm": 1.370548129081726, |
| "learning_rate": 1.5825665676801145e-05, |
| "loss": 0.6881, |
| "step": 59200 |
| }, |
| { |
| "epoch": 0.9706274705579062, |
| "grad_norm": 1.3695186376571655, |
| "learning_rate": 1.5811294192093353e-05, |
| "loss": 0.6594, |
| "step": 59300 |
| }, |
| { |
| "epoch": 0.972264279108594, |
| "grad_norm": 1.2767751216888428, |
| "learning_rate": 1.5796904561533054e-05, |
| "loss": 0.6661, |
| "step": 59400 |
| }, |
| { |
| "epoch": 0.973901087659282, |
| "grad_norm": 1.293419361114502, |
| "learning_rate": 1.578249683005209e-05, |
| "loss": 0.6781, |
| "step": 59500 |
| }, |
| { |
| "epoch": 0.9755378962099698, |
| "grad_norm": 1.5075045824050903, |
| "learning_rate": 1.576807104263881e-05, |
| "loss": 0.6706, |
| "step": 59600 |
| }, |
| { |
| "epoch": 0.9771747047606577, |
| "grad_norm": 1.1597870588302612, |
| "learning_rate": 1.5753627244337958e-05, |
| "loss": 0.6709, |
| "step": 59700 |
| }, |
| { |
| "epoch": 0.9788115133113455, |
| "grad_norm": 1.5488371849060059, |
| "learning_rate": 1.5739165480250504e-05, |
| "loss": 0.6611, |
| "step": 59800 |
| }, |
| { |
| "epoch": 0.9804483218620335, |
| "grad_norm": 1.3339688777923584, |
| "learning_rate": 1.5724685795533518e-05, |
| "loss": 0.679, |
| "step": 59900 |
| }, |
| { |
| "epoch": 0.9820851304127213, |
| "grad_norm": 1.3151462078094482, |
| "learning_rate": 1.571018823540004e-05, |
| "loss": 0.6636, |
| "step": 60000 |
| }, |
| { |
| "epoch": 0.9837219389634091, |
| "grad_norm": 1.3205444812774658, |
| "learning_rate": 1.5695672845118903e-05, |
| "loss": 0.6623, |
| "step": 60100 |
| }, |
| { |
| "epoch": 0.985358747514097, |
| "grad_norm": 1.294420599937439, |
| "learning_rate": 1.5681139670014643e-05, |
| "loss": 0.6666, |
| "step": 60200 |
| }, |
| { |
| "epoch": 0.9869955560647848, |
| "grad_norm": 1.3142366409301758, |
| "learning_rate": 1.566658875546731e-05, |
| "loss": 0.6629, |
| "step": 60300 |
| }, |
| { |
| "epoch": 0.9886323646154728, |
| "grad_norm": 1.3516416549682617, |
| "learning_rate": 1.565202014691235e-05, |
| "loss": 0.6664, |
| "step": 60400 |
| }, |
| { |
| "epoch": 0.9902691731661606, |
| "grad_norm": 1.2360502481460571, |
| "learning_rate": 1.5637433889840455e-05, |
| "loss": 0.6608, |
| "step": 60500 |
| }, |
| { |
| "epoch": 0.9919059817168485, |
| "grad_norm": 1.155104398727417, |
| "learning_rate": 1.562283002979744e-05, |
| "loss": 0.6676, |
| "step": 60600 |
| }, |
| { |
| "epoch": 0.9935427902675363, |
| "grad_norm": 1.2880823612213135, |
| "learning_rate": 1.560820861238407e-05, |
| "loss": 0.6632, |
| "step": 60700 |
| }, |
| { |
| "epoch": 0.9951795988182243, |
| "grad_norm": 1.2748744487762451, |
| "learning_rate": 1.5593569683255936e-05, |
| "loss": 0.6723, |
| "step": 60800 |
| }, |
| { |
| "epoch": 0.9968164073689121, |
| "grad_norm": 1.2065379619598389, |
| "learning_rate": 1.557891328812332e-05, |
| "loss": 0.6831, |
| "step": 60900 |
| }, |
| { |
| "epoch": 0.9984532159196, |
| "grad_norm": 1.143071174621582, |
| "learning_rate": 1.5564239472751022e-05, |
| "loss": 0.6656, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.0000818404275345, |
| "grad_norm": 1.1476441621780396, |
| "learning_rate": 1.5549548282958253e-05, |
| "loss": 0.6591, |
| "step": 61100 |
| }, |
| { |
| "epoch": 1.0017186489782222, |
| "grad_norm": 1.210295557975769, |
| "learning_rate": 1.5534839764618477e-05, |
| "loss": 0.6559, |
| "step": 61200 |
| }, |
| { |
| "epoch": 1.00335545752891, |
| "grad_norm": 1.5003302097320557, |
| "learning_rate": 1.5520113963659257e-05, |
| "loss": 0.6615, |
| "step": 61300 |
| }, |
| { |
| "epoch": 1.004992266079598, |
| "grad_norm": 1.235449194908142, |
| "learning_rate": 1.550537092606212e-05, |
| "loss": 0.6709, |
| "step": 61400 |
| }, |
| { |
| "epoch": 1.006629074630286, |
| "grad_norm": 1.1739157438278198, |
| "learning_rate": 1.549061069786243e-05, |
| "loss": 0.668, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.0082658831809737, |
| "grad_norm": 1.2646570205688477, |
| "learning_rate": 1.5475833325149215e-05, |
| "loss": 0.6553, |
| "step": 61600 |
| }, |
| { |
| "epoch": 1.0099026917316616, |
| "grad_norm": 1.2951397895812988, |
| "learning_rate": 1.546103885406504e-05, |
| "loss": 0.6584, |
| "step": 61700 |
| }, |
| { |
| "epoch": 1.0115395002823495, |
| "grad_norm": 1.2838189601898193, |
| "learning_rate": 1.544622733080586e-05, |
| "loss": 0.6518, |
| "step": 61800 |
| }, |
| { |
| "epoch": 1.0131763088330374, |
| "grad_norm": 1.3708552122116089, |
| "learning_rate": 1.543139880162088e-05, |
| "loss": 0.6628, |
| "step": 61900 |
| }, |
| { |
| "epoch": 1.0148131173837251, |
| "grad_norm": 1.301353931427002, |
| "learning_rate": 1.54165533128124e-05, |
| "loss": 0.6478, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.016449925934413, |
| "grad_norm": 1.3044975996017456, |
| "learning_rate": 1.5401690910735677e-05, |
| "loss": 0.6439, |
| "step": 62100 |
| }, |
| { |
| "epoch": 1.018086734485101, |
| "grad_norm": 1.4568370580673218, |
| "learning_rate": 1.5386811641798785e-05, |
| "loss": 0.6482, |
| "step": 62200 |
| }, |
| { |
| "epoch": 1.0197235430357887, |
| "grad_norm": 1.3758224248886108, |
| "learning_rate": 1.5371915552462466e-05, |
| "loss": 0.663, |
| "step": 62300 |
| }, |
| { |
| "epoch": 1.0213603515864766, |
| "grad_norm": 1.6428395509719849, |
| "learning_rate": 1.535700268923998e-05, |
| "loss": 0.6533, |
| "step": 62400 |
| }, |
| { |
| "epoch": 1.0229971601371646, |
| "grad_norm": 1.3830885887145996, |
| "learning_rate": 1.5342073098696956e-05, |
| "loss": 0.6632, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.0246339686878525, |
| "grad_norm": 1.426006555557251, |
| "learning_rate": 1.5327126827451272e-05, |
| "loss": 0.6491, |
| "step": 62600 |
| }, |
| { |
| "epoch": 1.0262707772385402, |
| "grad_norm": 1.4166696071624756, |
| "learning_rate": 1.531216392217288e-05, |
| "loss": 0.6465, |
| "step": 62700 |
| }, |
| { |
| "epoch": 1.0279075857892281, |
| "grad_norm": 1.224443793296814, |
| "learning_rate": 1.529718442958367e-05, |
| "loss": 0.6642, |
| "step": 62800 |
| }, |
| { |
| "epoch": 1.029544394339916, |
| "grad_norm": 1.250406265258789, |
| "learning_rate": 1.528218839645733e-05, |
| "loss": 0.6516, |
| "step": 62900 |
| }, |
| { |
| "epoch": 1.031181202890604, |
| "grad_norm": 1.2630037069320679, |
| "learning_rate": 1.52671758696192e-05, |
| "loss": 0.6649, |
| "step": 63000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 183285, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.34907099427588e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|