| { |
| "best_global_step": 9356, |
| "best_metric": 0.9798825256975033, |
| "best_model_checkpoint": "runs/de_sapbert/checkpoint-9356", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 9356, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0010688328345446773, |
| "grad_norm": 683.587890625, |
| "learning_rate": 8.547008547008549e-08, |
| "loss": 50.3236, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0021376656690893546, |
| "grad_norm": 546.2178955078125, |
| "learning_rate": 1.9230769230769234e-07, |
| "loss": 59.3515, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0032064985036340315, |
| "grad_norm": 602.040771484375, |
| "learning_rate": 2.991452991452992e-07, |
| "loss": 62.6096, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.004275331338178709, |
| "grad_norm": 697.5074462890625, |
| "learning_rate": 4.05982905982906e-07, |
| "loss": 71.224, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.005344164172723386, |
| "grad_norm": 629.8714599609375, |
| "learning_rate": 5.128205128205128e-07, |
| "loss": 58.249, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.006412997007268063, |
| "grad_norm": 850.3056640625, |
| "learning_rate": 6.196581196581197e-07, |
| "loss": 53.1647, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.007481829841812741, |
| "grad_norm": 535.2101440429688, |
| "learning_rate": 7.264957264957266e-07, |
| "loss": 74.0864, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.008550662676357419, |
| "grad_norm": 877.2177734375, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 68.1396, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.009619495510902095, |
| "grad_norm": 827.8037109375, |
| "learning_rate": 9.401709401709402e-07, |
| "loss": 67.5764, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.010688328345446772, |
| "grad_norm": 738.37158203125, |
| "learning_rate": 1.047008547008547e-06, |
| "loss": 59.9784, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01175716117999145, |
| "grad_norm": 698.6246337890625, |
| "learning_rate": 1.153846153846154e-06, |
| "loss": 51.3131, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.012825994014536126, |
| "grad_norm": 683.5339965820312, |
| "learning_rate": 1.2606837606837608e-06, |
| "loss": 68.0457, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.013894826849080803, |
| "grad_norm": 766.8721923828125, |
| "learning_rate": 1.3675213675213678e-06, |
| "loss": 58.9018, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.014963659683625482, |
| "grad_norm": 694.8308715820312, |
| "learning_rate": 1.4743589743589745e-06, |
| "loss": 59.4911, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01603249251817016, |
| "grad_norm": 585.236572265625, |
| "learning_rate": 1.5811965811965813e-06, |
| "loss": 58.0199, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.017101325352714837, |
| "grad_norm": 778.44287109375, |
| "learning_rate": 1.6880341880341883e-06, |
| "loss": 63.7679, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.018170158187259512, |
| "grad_norm": 646.8430786132812, |
| "learning_rate": 1.794871794871795e-06, |
| "loss": 60.7314, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.01923899102180419, |
| "grad_norm": 989.9755249023438, |
| "learning_rate": 1.9017094017094018e-06, |
| "loss": 77.8096, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.020307823856348866, |
| "grad_norm": 834.5411987304688, |
| "learning_rate": 2.008547008547009e-06, |
| "loss": 66.2088, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.021376656690893545, |
| "grad_norm": 643.298095703125, |
| "learning_rate": 2.1153846153846155e-06, |
| "loss": 65.6054, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.02244548952543822, |
| "grad_norm": 1178.525146484375, |
| "learning_rate": 2.222222222222222e-06, |
| "loss": 80.6449, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0235143223599829, |
| "grad_norm": 697.0106201171875, |
| "learning_rate": 2.3290598290598295e-06, |
| "loss": 69.9855, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.024583155194527577, |
| "grad_norm": 783.412841796875, |
| "learning_rate": 2.435897435897436e-06, |
| "loss": 56.6307, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.025651988029072252, |
| "grad_norm": 692.8482666015625, |
| "learning_rate": 2.542735042735043e-06, |
| "loss": 46.5356, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.02672082086361693, |
| "grad_norm": 731.8605346679688, |
| "learning_rate": 2.64957264957265e-06, |
| "loss": 57.7131, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.027789653698161606, |
| "grad_norm": 710.8729858398438, |
| "learning_rate": 2.756410256410257e-06, |
| "loss": 49.0737, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.028858486532706284, |
| "grad_norm": 608.5044555664062, |
| "learning_rate": 2.8632478632478635e-06, |
| "loss": 50.7495, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.029927319367250963, |
| "grad_norm": 723.5994873046875, |
| "learning_rate": 2.9700854700854705e-06, |
| "loss": 57.1029, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.030996152201795638, |
| "grad_norm": 697.4583740234375, |
| "learning_rate": 3.0769230769230774e-06, |
| "loss": 45.4298, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.03206498503634032, |
| "grad_norm": 567.723388671875, |
| "learning_rate": 3.183760683760684e-06, |
| "loss": 57.8409, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.03313381787088499, |
| "grad_norm": 688.6309814453125, |
| "learning_rate": 3.290598290598291e-06, |
| "loss": 68.8388, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.034202650705429674, |
| "grad_norm": 628.3114624023438, |
| "learning_rate": 3.397435897435898e-06, |
| "loss": 64.5809, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.03527148353997435, |
| "grad_norm": 556.7693481445312, |
| "learning_rate": 3.5042735042735045e-06, |
| "loss": 54.5407, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.036340316374519024, |
| "grad_norm": 494.6075744628906, |
| "learning_rate": 3.6111111111111115e-06, |
| "loss": 48.341, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0374091492090637, |
| "grad_norm": 554.0121459960938, |
| "learning_rate": 3.7179487179487184e-06, |
| "loss": 44.3806, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.03847798204360838, |
| "grad_norm": 758.5612182617188, |
| "learning_rate": 3.8247863247863246e-06, |
| "loss": 59.123, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.03954681487815306, |
| "grad_norm": 716.5413208007812, |
| "learning_rate": 3.9316239316239315e-06, |
| "loss": 59.4863, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.04061564771269773, |
| "grad_norm": 503.7677307128906, |
| "learning_rate": 4.0384615384615385e-06, |
| "loss": 65.4498, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.041684480547242414, |
| "grad_norm": 613.170654296875, |
| "learning_rate": 4.145299145299146e-06, |
| "loss": 52.9526, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.04275331338178709, |
| "grad_norm": 725.29833984375, |
| "learning_rate": 4.2521367521367524e-06, |
| "loss": 46.6744, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.043822146216331764, |
| "grad_norm": 525.0426635742188, |
| "learning_rate": 4.358974358974359e-06, |
| "loss": 37.1728, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.04489097905087644, |
| "grad_norm": 627.0368041992188, |
| "learning_rate": 4.465811965811966e-06, |
| "loss": 63.3973, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.04595981188542112, |
| "grad_norm": 501.9342956542969, |
| "learning_rate": 4.5726495726495725e-06, |
| "loss": 51.1136, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0470286447199658, |
| "grad_norm": 535.6387329101562, |
| "learning_rate": 4.6794871794871795e-06, |
| "loss": 40.9712, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.04809747755451047, |
| "grad_norm": 492.3857421875, |
| "learning_rate": 4.786324786324787e-06, |
| "loss": 46.4765, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.049166310389055154, |
| "grad_norm": 579.775390625, |
| "learning_rate": 4.8931623931623934e-06, |
| "loss": 47.3894, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05023514322359983, |
| "grad_norm": 530.4070434570312, |
| "learning_rate": 5e-06, |
| "loss": 36.866, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.051303976058144504, |
| "grad_norm": 476.2954406738281, |
| "learning_rate": 5.1068376068376065e-06, |
| "loss": 32.3704, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.052372808892689186, |
| "grad_norm": 412.430419921875, |
| "learning_rate": 5.213675213675214e-06, |
| "loss": 36.0298, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.05344164172723386, |
| "grad_norm": 467.79412841796875, |
| "learning_rate": 5.320512820512821e-06, |
| "loss": 43.7503, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.05451047456177854, |
| "grad_norm": 395.0292663574219, |
| "learning_rate": 5.4273504273504275e-06, |
| "loss": 33.9929, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.05557930739632321, |
| "grad_norm": 543.4691162109375, |
| "learning_rate": 5.534188034188035e-06, |
| "loss": 38.4924, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.056648140230867894, |
| "grad_norm": 446.6466369628906, |
| "learning_rate": 5.641025641025641e-06, |
| "loss": 30.8329, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.05771697306541257, |
| "grad_norm": 409.0653381347656, |
| "learning_rate": 5.7478632478632475e-06, |
| "loss": 29.0382, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.058785805899957244, |
| "grad_norm": 397.3152770996094, |
| "learning_rate": 5.854700854700855e-06, |
| "loss": 25.2408, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.059854638734501926, |
| "grad_norm": 324.9696350097656, |
| "learning_rate": 5.961538461538462e-06, |
| "loss": 30.9856, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.0609234715690466, |
| "grad_norm": 439.66241455078125, |
| "learning_rate": 6.0683760683760684e-06, |
| "loss": 25.987, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.061992304403591277, |
| "grad_norm": 299.0304260253906, |
| "learning_rate": 6.175213675213676e-06, |
| "loss": 24.4493, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.06306113723813596, |
| "grad_norm": 324.8219909667969, |
| "learning_rate": 6.282051282051282e-06, |
| "loss": 25.6893, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.06412997007268063, |
| "grad_norm": 419.6073303222656, |
| "learning_rate": 6.3888888888888885e-06, |
| "loss": 29.4356, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.06519880290722531, |
| "grad_norm": 299.56048583984375, |
| "learning_rate": 6.495726495726496e-06, |
| "loss": 22.2, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.06626763574176998, |
| "grad_norm": 303.1939697265625, |
| "learning_rate": 6.602564102564103e-06, |
| "loss": 24.3048, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.06733646857631466, |
| "grad_norm": 203.7785186767578, |
| "learning_rate": 6.7094017094017094e-06, |
| "loss": 18.5714, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.06840530141085935, |
| "grad_norm": 292.95050048828125, |
| "learning_rate": 6.816239316239317e-06, |
| "loss": 17.4822, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.06947413424540402, |
| "grad_norm": 145.5703125, |
| "learning_rate": 6.923076923076923e-06, |
| "loss": 15.8966, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.0705429670799487, |
| "grad_norm": 261.8589782714844, |
| "learning_rate": 7.02991452991453e-06, |
| "loss": 14.8771, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.07161179991449337, |
| "grad_norm": 215.4441375732422, |
| "learning_rate": 7.136752136752137e-06, |
| "loss": 16.0905, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.07268063274903805, |
| "grad_norm": 206.23388671875, |
| "learning_rate": 7.243589743589744e-06, |
| "loss": 11.9737, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.07374946558358272, |
| "grad_norm": 200.727294921875, |
| "learning_rate": 7.350427350427351e-06, |
| "loss": 12.1407, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.0748182984181274, |
| "grad_norm": 183.34083557128906, |
| "learning_rate": 7.457264957264958e-06, |
| "loss": 11.5492, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.07588713125267209, |
| "grad_norm": 155.31253051757812, |
| "learning_rate": 7.564102564102564e-06, |
| "loss": 13.0664, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.07695596408721676, |
| "grad_norm": 141.0535125732422, |
| "learning_rate": 7.670940170940172e-06, |
| "loss": 10.0428, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.07802479692176144, |
| "grad_norm": 164.8147430419922, |
| "learning_rate": 7.77777777777778e-06, |
| "loss": 9.2962, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.07909362975630611, |
| "grad_norm": 113.84266662597656, |
| "learning_rate": 7.884615384615384e-06, |
| "loss": 8.6304, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.08016246259085079, |
| "grad_norm": 90.67302703857422, |
| "learning_rate": 7.991452991452993e-06, |
| "loss": 5.7954, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.08123129542539546, |
| "grad_norm": 77.02782440185547, |
| "learning_rate": 8.098290598290598e-06, |
| "loss": 6.0213, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.08230012825994014, |
| "grad_norm": 77.24604797363281, |
| "learning_rate": 8.205128205128205e-06, |
| "loss": 6.8873, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.08336896109448483, |
| "grad_norm": 78.4577407836914, |
| "learning_rate": 8.311965811965812e-06, |
| "loss": 5.6347, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.0844377939290295, |
| "grad_norm": 70.10798645019531, |
| "learning_rate": 8.41880341880342e-06, |
| "loss": 5.7346, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.08550662676357418, |
| "grad_norm": 53.02711486816406, |
| "learning_rate": 8.525641025641026e-06, |
| "loss": 4.2817, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08657545959811885, |
| "grad_norm": 58.17922592163086, |
| "learning_rate": 8.632478632478633e-06, |
| "loss": 3.9817, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.08764429243266353, |
| "grad_norm": 47.072662353515625, |
| "learning_rate": 8.73931623931624e-06, |
| "loss": 3.1871, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0887131252672082, |
| "grad_norm": 35.73280715942383, |
| "learning_rate": 8.846153846153847e-06, |
| "loss": 3.2088, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.08978195810175288, |
| "grad_norm": 40.767581939697266, |
| "learning_rate": 8.952991452991454e-06, |
| "loss": 3.5216, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.09085079093629757, |
| "grad_norm": 32.53750991821289, |
| "learning_rate": 9.059829059829061e-06, |
| "loss": 2.3657, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.09191962377084224, |
| "grad_norm": 31.6849422454834, |
| "learning_rate": 9.166666666666666e-06, |
| "loss": 2.3054, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.09298845660538692, |
| "grad_norm": 29.081796646118164, |
| "learning_rate": 9.273504273504275e-06, |
| "loss": 2.174, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0940572894399316, |
| "grad_norm": 34.65196990966797, |
| "learning_rate": 9.38034188034188e-06, |
| "loss": 2.4017, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.09512612227447627, |
| "grad_norm": 19.63212776184082, |
| "learning_rate": 9.487179487179487e-06, |
| "loss": 2.1189, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.09619495510902094, |
| "grad_norm": 24.055822372436523, |
| "learning_rate": 9.594017094017094e-06, |
| "loss": 2.3965, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.09726378794356563, |
| "grad_norm": 18.823020935058594, |
| "learning_rate": 9.700854700854701e-06, |
| "loss": 1.7638, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.09833262077811031, |
| "grad_norm": 16.97188949584961, |
| "learning_rate": 9.807692307692308e-06, |
| "loss": 1.4081, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.09940145361265498, |
| "grad_norm": 18.629823684692383, |
| "learning_rate": 9.914529914529915e-06, |
| "loss": 1.5501, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.10047028644719966, |
| "grad_norm": 16.413358688354492, |
| "learning_rate": 1.0021367521367522e-05, |
| "loss": 1.4015, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.10153911928174433, |
| "grad_norm": 14.555413246154785, |
| "learning_rate": 1.012820512820513e-05, |
| "loss": 1.3726, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.10260795211628901, |
| "grad_norm": 17.65382194519043, |
| "learning_rate": 1.0235042735042734e-05, |
| "loss": 1.1044, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.10367678495083368, |
| "grad_norm": 13.994580268859863, |
| "learning_rate": 1.0341880341880343e-05, |
| "loss": 1.1651, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.10474561778537837, |
| "grad_norm": 13.052831649780273, |
| "learning_rate": 1.044871794871795e-05, |
| "loss": 1.1674, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.10581445061992305, |
| "grad_norm": 16.172752380371094, |
| "learning_rate": 1.0555555555555557e-05, |
| "loss": 1.2274, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.10688328345446772, |
| "grad_norm": 12.005922317504883, |
| "learning_rate": 1.0662393162393162e-05, |
| "loss": 1.0606, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1079521162890124, |
| "grad_norm": 13.400876998901367, |
| "learning_rate": 1.076923076923077e-05, |
| "loss": 1.2207, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.10902094912355707, |
| "grad_norm": 14.658180236816406, |
| "learning_rate": 1.0876068376068376e-05, |
| "loss": 1.12, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.11008978195810175, |
| "grad_norm": 11.943291664123535, |
| "learning_rate": 1.0982905982905985e-05, |
| "loss": 0.9925, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.11115861479264642, |
| "grad_norm": 10.507229804992676, |
| "learning_rate": 1.1089743589743592e-05, |
| "loss": 0.9542, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.11222744762719111, |
| "grad_norm": 12.802043914794922, |
| "learning_rate": 1.1196581196581197e-05, |
| "loss": 1.1911, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.11329628046173579, |
| "grad_norm": 10.767659187316895, |
| "learning_rate": 1.1303418803418804e-05, |
| "loss": 1.2418, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.11436511329628046, |
| "grad_norm": 12.087440490722656, |
| "learning_rate": 1.1410256410256411e-05, |
| "loss": 1.0926, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.11543394613082514, |
| "grad_norm": 9.390274047851562, |
| "learning_rate": 1.1517094017094016e-05, |
| "loss": 0.9052, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.11650277896536981, |
| "grad_norm": 10.022379875183105, |
| "learning_rate": 1.1623931623931625e-05, |
| "loss": 0.9725, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.11757161179991449, |
| "grad_norm": 12.384991645812988, |
| "learning_rate": 1.1730769230769232e-05, |
| "loss": 1.0631, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.11864044463445918, |
| "grad_norm": 10.217049598693848, |
| "learning_rate": 1.1837606837606839e-05, |
| "loss": 1.0069, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.11970927746900385, |
| "grad_norm": 9.567984580993652, |
| "learning_rate": 1.1944444444444444e-05, |
| "loss": 0.9301, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.12077811030354853, |
| "grad_norm": 7.623697280883789, |
| "learning_rate": 1.2051282051282051e-05, |
| "loss": 0.7413, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.1218469431380932, |
| "grad_norm": 12.299339294433594, |
| "learning_rate": 1.2158119658119658e-05, |
| "loss": 1.0359, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.12291577597263788, |
| "grad_norm": 10.420650482177734, |
| "learning_rate": 1.2264957264957267e-05, |
| "loss": 0.9793, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.12398460880718255, |
| "grad_norm": 9.585742950439453, |
| "learning_rate": 1.2371794871794874e-05, |
| "loss": 0.7139, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.12505344164172724, |
| "grad_norm": 8.004850387573242, |
| "learning_rate": 1.247863247863248e-05, |
| "loss": 1.0595, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.12612227447627192, |
| "grad_norm": 9.993664741516113, |
| "learning_rate": 1.2585470085470086e-05, |
| "loss": 0.8869, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1271911073108166, |
| "grad_norm": 9.177787780761719, |
| "learning_rate": 1.2692307692307693e-05, |
| "loss": 0.8318, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.12825994014536127, |
| "grad_norm": 10.756118774414062, |
| "learning_rate": 1.2799145299145298e-05, |
| "loss": 0.7328, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.12932877297990594, |
| "grad_norm": 9.876399040222168, |
| "learning_rate": 1.2905982905982907e-05, |
| "loss": 0.7785, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.13039760581445062, |
| "grad_norm": 9.022135734558105, |
| "learning_rate": 1.3012820512820514e-05, |
| "loss": 0.8507, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.1314664386489953, |
| "grad_norm": 9.838420867919922, |
| "learning_rate": 1.3119658119658121e-05, |
| "loss": 1.0039, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.13253527148353997, |
| "grad_norm": 11.163064002990723, |
| "learning_rate": 1.3226495726495728e-05, |
| "loss": 0.7514, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.13360410431808464, |
| "grad_norm": 7.968421459197998, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.6666, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.13467293715262932, |
| "grad_norm": 9.873268127441406, |
| "learning_rate": 1.3440170940170942e-05, |
| "loss": 0.6639, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.135741769987174, |
| "grad_norm": 7.193119049072266, |
| "learning_rate": 1.3547008547008549e-05, |
| "loss": 0.77, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.1368106028217187, |
| "grad_norm": 9.957530975341797, |
| "learning_rate": 1.3653846153846156e-05, |
| "loss": 0.8079, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.13787943565626337, |
| "grad_norm": 9.682191848754883, |
| "learning_rate": 1.3760683760683761e-05, |
| "loss": 0.8061, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.13894826849080805, |
| "grad_norm": 7.611622333526611, |
| "learning_rate": 1.3867521367521368e-05, |
| "loss": 0.7666, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.14001710132535272, |
| "grad_norm": 9.452914237976074, |
| "learning_rate": 1.3974358974358975e-05, |
| "loss": 0.7835, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.1410859341598974, |
| "grad_norm": 7.584820747375488, |
| "learning_rate": 1.4081196581196584e-05, |
| "loss": 0.7319, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.14215476699444207, |
| "grad_norm": 7.296449661254883, |
| "learning_rate": 1.4188034188034189e-05, |
| "loss": 0.5945, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.14322359982898675, |
| "grad_norm": 8.2069730758667, |
| "learning_rate": 1.4294871794871796e-05, |
| "loss": 0.7008, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.14429243266353142, |
| "grad_norm": 8.537630081176758, |
| "learning_rate": 1.4401709401709403e-05, |
| "loss": 0.9728, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.1453612654980761, |
| "grad_norm": 8.83273696899414, |
| "learning_rate": 1.450854700854701e-05, |
| "loss": 0.7346, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.14643009833262077, |
| "grad_norm": 9.676769256591797, |
| "learning_rate": 1.4615384615384615e-05, |
| "loss": 0.8463, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.14749893116716545, |
| "grad_norm": 8.45507526397705, |
| "learning_rate": 1.4722222222222224e-05, |
| "loss": 0.7251, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.14856776400171012, |
| "grad_norm": 9.440534591674805, |
| "learning_rate": 1.4829059829059831e-05, |
| "loss": 0.7444, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.1496365968362548, |
| "grad_norm": 7.491715431213379, |
| "learning_rate": 1.4935897435897438e-05, |
| "loss": 0.6496, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.15070542967079947, |
| "grad_norm": 6.313747406005859, |
| "learning_rate": 1.5042735042735043e-05, |
| "loss": 0.7623, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.15177426250534418, |
| "grad_norm": 7.4156060218811035, |
| "learning_rate": 1.514957264957265e-05, |
| "loss": 0.8285, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.15284309533988885, |
| "grad_norm": 8.984630584716797, |
| "learning_rate": 1.5256410256410257e-05, |
| "loss": 0.886, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.15391192817443353, |
| "grad_norm": 7.339222431182861, |
| "learning_rate": 1.5363247863247866e-05, |
| "loss": 0.7274, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1549807610089782, |
| "grad_norm": 7.579738140106201, |
| "learning_rate": 1.5470085470085473e-05, |
| "loss": 0.8669, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.15604959384352288, |
| "grad_norm": 8.190738677978516, |
| "learning_rate": 1.557692307692308e-05, |
| "loss": 0.6741, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.15711842667806755, |
| "grad_norm": 7.186857223510742, |
| "learning_rate": 1.5683760683760683e-05, |
| "loss": 0.7396, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.15818725951261223, |
| "grad_norm": 6.874704360961914, |
| "learning_rate": 1.579059829059829e-05, |
| "loss": 0.7197, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.1592560923471569, |
| "grad_norm": 9.56429386138916, |
| "learning_rate": 1.5897435897435897e-05, |
| "loss": 0.9099, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.16032492518170158, |
| "grad_norm": 7.520778179168701, |
| "learning_rate": 1.6004273504273508e-05, |
| "loss": 0.7344, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.16139375801624625, |
| "grad_norm": 5.668506622314453, |
| "learning_rate": 1.6111111111111115e-05, |
| "loss": 0.6107, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.16246259085079093, |
| "grad_norm": 9.869526863098145, |
| "learning_rate": 1.6217948717948718e-05, |
| "loss": 0.6929, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.1635314236853356, |
| "grad_norm": 8.029936790466309, |
| "learning_rate": 1.6324786324786325e-05, |
| "loss": 0.7019, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.16460025651988028, |
| "grad_norm": 8.148101806640625, |
| "learning_rate": 1.6431623931623932e-05, |
| "loss": 0.5759, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.16566908935442498, |
| "grad_norm": 7.96873664855957, |
| "learning_rate": 1.653846153846154e-05, |
| "loss": 0.5736, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.16673792218896966, |
| "grad_norm": 9.016830444335938, |
| "learning_rate": 1.6645299145299146e-05, |
| "loss": 0.5878, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.16780675502351433, |
| "grad_norm": 7.439241886138916, |
| "learning_rate": 1.6752136752136753e-05, |
| "loss": 0.7349, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.168875587858059, |
| "grad_norm": 9.404788970947266, |
| "learning_rate": 1.685897435897436e-05, |
| "loss": 0.7691, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.16994442069260368, |
| "grad_norm": 6.773132801055908, |
| "learning_rate": 1.6965811965811967e-05, |
| "loss": 0.6228, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.17101325352714836, |
| "grad_norm": 6.86265754699707, |
| "learning_rate": 1.7072649572649574e-05, |
| "loss": 0.6394, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.17208208636169303, |
| "grad_norm": 6.647765159606934, |
| "learning_rate": 1.717948717948718e-05, |
| "loss": 0.624, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.1731509191962377, |
| "grad_norm": 6.882334232330322, |
| "learning_rate": 1.7286324786324788e-05, |
| "loss": 0.6487, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.17421975203078238, |
| "grad_norm": 8.620728492736816, |
| "learning_rate": 1.7393162393162395e-05, |
| "loss": 0.6001, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.17528858486532706, |
| "grad_norm": 7.544363021850586, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 0.6685, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.17635741769987173, |
| "grad_norm": 8.941640853881836, |
| "learning_rate": 1.760683760683761e-05, |
| "loss": 0.8176, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.1774262505344164, |
| "grad_norm": 6.829235553741455, |
| "learning_rate": 1.7713675213675216e-05, |
| "loss": 0.6417, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.17849508336896108, |
| "grad_norm": 7.0878705978393555, |
| "learning_rate": 1.7820512820512823e-05, |
| "loss": 0.6353, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.17956391620350576, |
| "grad_norm": 9.062278747558594, |
| "learning_rate": 1.792735042735043e-05, |
| "loss": 0.5946, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.18063274903805046, |
| "grad_norm": 7.967255115509033, |
| "learning_rate": 1.8034188034188037e-05, |
| "loss": 0.6122, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.18170158187259514, |
| "grad_norm": 7.076515197753906, |
| "learning_rate": 1.8141025641025644e-05, |
| "loss": 0.6499, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.1827704147071398, |
| "grad_norm": 7.658061981201172, |
| "learning_rate": 1.8247863247863247e-05, |
| "loss": 0.601, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.1838392475416845, |
| "grad_norm": 7.605343341827393, |
| "learning_rate": 1.8354700854700854e-05, |
| "loss": 0.5404, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.18490808037622916, |
| "grad_norm": 6.7278900146484375, |
| "learning_rate": 1.8461538461538465e-05, |
| "loss": 0.6574, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.18597691321077384, |
| "grad_norm": 6.190138339996338, |
| "learning_rate": 1.856837606837607e-05, |
| "loss": 0.5377, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.1870457460453185, |
| "grad_norm": 5.700743198394775, |
| "learning_rate": 1.867521367521368e-05, |
| "loss": 0.5537, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.1881145788798632, |
| "grad_norm": 9.452567100524902, |
| "learning_rate": 1.8782051282051282e-05, |
| "loss": 0.7377, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.18918341171440786, |
| "grad_norm": 8.572616577148438, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 0.8466, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.19025224454895254, |
| "grad_norm": 6.903915882110596, |
| "learning_rate": 1.8995726495726496e-05, |
| "loss": 0.4808, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.1913210773834972, |
| "grad_norm": 6.828094959259033, |
| "learning_rate": 1.9102564102564106e-05, |
| "loss": 0.5482, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.1923899102180419, |
| "grad_norm": 9.153865814208984, |
| "learning_rate": 1.920940170940171e-05, |
| "loss": 0.6588, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.19345874305258656, |
| "grad_norm": 8.290953636169434, |
| "learning_rate": 1.9316239316239317e-05, |
| "loss": 0.6953, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.19452757588713127, |
| "grad_norm": 6.111261367797852, |
| "learning_rate": 1.9423076923076924e-05, |
| "loss": 0.4803, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.19559640872167594, |
| "grad_norm": 8.402656555175781, |
| "learning_rate": 1.952991452991453e-05, |
| "loss": 0.6869, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.19666524155622062, |
| "grad_norm": 5.929531574249268, |
| "learning_rate": 1.9636752136752138e-05, |
| "loss": 0.5393, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.1977340743907653, |
| "grad_norm": 7.195873260498047, |
| "learning_rate": 1.9743589743589745e-05, |
| "loss": 0.4829, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.19880290722530997, |
| "grad_norm": 8.35781192779541, |
| "learning_rate": 1.9850427350427352e-05, |
| "loss": 0.5071, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.19987174005985464, |
| "grad_norm": 7.967381954193115, |
| "learning_rate": 1.995726495726496e-05, |
| "loss": 0.6828, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.20094057289439932, |
| "grad_norm": 7.339260101318359, |
| "learning_rate": 1.999287410926366e-05, |
| "loss": 0.6468, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.202009405728944, |
| "grad_norm": 6.559432029724121, |
| "learning_rate": 1.9980997624703088e-05, |
| "loss": 0.6169, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.20307823856348867, |
| "grad_norm": 6.627939701080322, |
| "learning_rate": 1.996912114014252e-05, |
| "loss": 0.5389, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.20414707139803334, |
| "grad_norm": 6.5172119140625, |
| "learning_rate": 1.995724465558195e-05, |
| "loss": 0.4527, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.20521590423257802, |
| "grad_norm": 6.31046199798584, |
| "learning_rate": 1.994536817102138e-05, |
| "loss": 0.5405, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2062847370671227, |
| "grad_norm": 6.0619425773620605, |
| "learning_rate": 1.993349168646081e-05, |
| "loss": 0.6583, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.20735356990166737, |
| "grad_norm": 7.9872941970825195, |
| "learning_rate": 1.992161520190024e-05, |
| "loss": 0.7037, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.20842240273621207, |
| "grad_norm": 6.310743808746338, |
| "learning_rate": 1.9909738717339668e-05, |
| "loss": 0.564, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.20949123557075675, |
| "grad_norm": 6.636473655700684, |
| "learning_rate": 1.9897862232779098e-05, |
| "loss": 0.4531, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.21056006840530142, |
| "grad_norm": 6.087254047393799, |
| "learning_rate": 1.988598574821853e-05, |
| "loss": 0.5454, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.2116289012398461, |
| "grad_norm": 6.705723762512207, |
| "learning_rate": 1.987410926365796e-05, |
| "loss": 0.4836, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.21269773407439077, |
| "grad_norm": 6.238287448883057, |
| "learning_rate": 1.9862232779097387e-05, |
| "loss": 0.5458, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.21376656690893545, |
| "grad_norm": 5.439404010772705, |
| "learning_rate": 1.985035629453682e-05, |
| "loss": 0.5202, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.21483539974348012, |
| "grad_norm": 5.1525654792785645, |
| "learning_rate": 1.9838479809976248e-05, |
| "loss": 0.5532, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.2159042325780248, |
| "grad_norm": 6.952949047088623, |
| "learning_rate": 1.9826603325415678e-05, |
| "loss": 0.63, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.21697306541256947, |
| "grad_norm": 5.5152788162231445, |
| "learning_rate": 1.981472684085511e-05, |
| "loss": 0.6299, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.21804189824711415, |
| "grad_norm": 7.090893745422363, |
| "learning_rate": 1.980285035629454e-05, |
| "loss": 0.5405, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.21911073108165882, |
| "grad_norm": 5.758279323577881, |
| "learning_rate": 1.979097387173397e-05, |
| "loss": 0.4379, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.2201795639162035, |
| "grad_norm": 10.006664276123047, |
| "learning_rate": 1.9779097387173397e-05, |
| "loss": 0.7446, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.22124839675074817, |
| "grad_norm": 6.134273529052734, |
| "learning_rate": 1.9767220902612828e-05, |
| "loss": 0.6652, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.22231722958529285, |
| "grad_norm": 7.395203113555908, |
| "learning_rate": 1.9755344418052258e-05, |
| "loss": 0.6873, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.22338606241983755, |
| "grad_norm": 4.713867664337158, |
| "learning_rate": 1.974346793349169e-05, |
| "loss": 0.4904, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.22445489525438223, |
| "grad_norm": 6.1399359703063965, |
| "learning_rate": 1.973159144893112e-05, |
| "loss": 0.5763, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.2255237280889269, |
| "grad_norm": 7.5219879150390625, |
| "learning_rate": 1.971971496437055e-05, |
| "loss": 0.5801, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.22659256092347158, |
| "grad_norm": 5.3690619468688965, |
| "learning_rate": 1.9707838479809977e-05, |
| "loss": 0.518, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.22766139375801625, |
| "grad_norm": 7.701142311096191, |
| "learning_rate": 1.9695961995249407e-05, |
| "loss": 0.5928, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.22873022659256093, |
| "grad_norm": 5.431284427642822, |
| "learning_rate": 1.9684085510688838e-05, |
| "loss": 0.5575, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.2297990594271056, |
| "grad_norm": 5.841889381408691, |
| "learning_rate": 1.967220902612827e-05, |
| "loss": 0.497, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.23086789226165028, |
| "grad_norm": 6.84688663482666, |
| "learning_rate": 1.9660332541567696e-05, |
| "loss": 0.4607, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.23193672509619495, |
| "grad_norm": 7.2094645500183105, |
| "learning_rate": 1.964845605700713e-05, |
| "loss": 0.5531, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.23300555793073963, |
| "grad_norm": 8.189807891845703, |
| "learning_rate": 1.9636579572446557e-05, |
| "loss": 0.5372, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.2340743907652843, |
| "grad_norm": 6.64928674697876, |
| "learning_rate": 1.9624703087885987e-05, |
| "loss": 0.4641, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.23514322359982898, |
| "grad_norm": 5.907129764556885, |
| "learning_rate": 1.9612826603325418e-05, |
| "loss": 0.4724, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.23621205643437365, |
| "grad_norm": 5.550957202911377, |
| "learning_rate": 1.960095011876485e-05, |
| "loss": 0.4422, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.23728088926891835, |
| "grad_norm": 5.1877899169921875, |
| "learning_rate": 1.9589073634204276e-05, |
| "loss": 0.4571, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.23834972210346303, |
| "grad_norm": 6.098719120025635, |
| "learning_rate": 1.9577197149643706e-05, |
| "loss": 0.5104, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.2394185549380077, |
| "grad_norm": 5.2909770011901855, |
| "learning_rate": 1.9565320665083137e-05, |
| "loss": 0.5217, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.24048738777255238, |
| "grad_norm": 7.134459018707275, |
| "learning_rate": 1.9553444180522567e-05, |
| "loss": 0.4481, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.24155622060709706, |
| "grad_norm": 7.295234203338623, |
| "learning_rate": 1.9541567695961994e-05, |
| "loss": 0.5244, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.24262505344164173, |
| "grad_norm": 6.2853193283081055, |
| "learning_rate": 1.952969121140143e-05, |
| "loss": 0.4593, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.2436938862761864, |
| "grad_norm": 6.563023567199707, |
| "learning_rate": 1.9517814726840856e-05, |
| "loss": 0.5566, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.24476271911073108, |
| "grad_norm": 5.089166164398193, |
| "learning_rate": 1.9505938242280286e-05, |
| "loss": 0.5473, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.24583155194527576, |
| "grad_norm": 4.706131458282471, |
| "learning_rate": 1.9494061757719717e-05, |
| "loss": 0.3649, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.24690038477982043, |
| "grad_norm": 7.967005729675293, |
| "learning_rate": 1.9482185273159147e-05, |
| "loss": 0.5466, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.2479692176143651, |
| "grad_norm": 6.625776767730713, |
| "learning_rate": 1.9470308788598574e-05, |
| "loss": 0.4687, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.24903805044890978, |
| "grad_norm": 6.631053447723389, |
| "learning_rate": 1.9458432304038005e-05, |
| "loss": 0.521, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.2501068832834545, |
| "grad_norm": 5.4099555015563965, |
| "learning_rate": 1.944655581947744e-05, |
| "loss": 0.4539, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.25117571611799916, |
| "grad_norm": 6.302776336669922, |
| "learning_rate": 1.9434679334916866e-05, |
| "loss": 0.5975, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.25224454895254383, |
| "grad_norm": 6.055430889129639, |
| "learning_rate": 1.9422802850356297e-05, |
| "loss": 0.4108, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.2533133817870885, |
| "grad_norm": 6.507791519165039, |
| "learning_rate": 1.9410926365795727e-05, |
| "loss": 0.4052, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.2543822146216332, |
| "grad_norm": 4.61367130279541, |
| "learning_rate": 1.9399049881235158e-05, |
| "loss": 0.3805, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.25545104745617786, |
| "grad_norm": 7.931022644042969, |
| "learning_rate": 1.9387173396674585e-05, |
| "loss": 0.508, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.25651988029072254, |
| "grad_norm": 7.164324760437012, |
| "learning_rate": 1.9375296912114015e-05, |
| "loss": 0.4517, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2575887131252672, |
| "grad_norm": 5.5631890296936035, |
| "learning_rate": 1.9363420427553446e-05, |
| "loss": 0.5546, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.2586575459598119, |
| "grad_norm": 6.603108882904053, |
| "learning_rate": 1.9351543942992876e-05, |
| "loss": 0.5427, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.25972637879435656, |
| "grad_norm": 6.033792018890381, |
| "learning_rate": 1.9339667458432304e-05, |
| "loss": 0.4869, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.26079521162890124, |
| "grad_norm": 6.105428218841553, |
| "learning_rate": 1.9327790973871738e-05, |
| "loss": 0.5849, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2618640444634459, |
| "grad_norm": 5.2770161628723145, |
| "learning_rate": 1.9315914489311165e-05, |
| "loss": 0.4608, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.2629328772979906, |
| "grad_norm": 8.29669189453125, |
| "learning_rate": 1.9304038004750595e-05, |
| "loss": 0.6211, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.26400171013253526, |
| "grad_norm": 5.556075572967529, |
| "learning_rate": 1.9292161520190026e-05, |
| "loss": 0.5136, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.26507054296707994, |
| "grad_norm": 6.262655258178711, |
| "learning_rate": 1.9280285035629456e-05, |
| "loss": 0.4788, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.2661393758016246, |
| "grad_norm": 7.117279052734375, |
| "learning_rate": 1.9268408551068884e-05, |
| "loss": 0.4763, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.2672082086361693, |
| "grad_norm": 7.450889587402344, |
| "learning_rate": 1.9256532066508314e-05, |
| "loss": 0.5398, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.26827704147071396, |
| "grad_norm": 6.718365669250488, |
| "learning_rate": 1.9244655581947745e-05, |
| "loss": 0.5782, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.26934587430525864, |
| "grad_norm": 5.374184608459473, |
| "learning_rate": 1.9232779097387175e-05, |
| "loss": 0.4568, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.2704147071398033, |
| "grad_norm": 4.485583305358887, |
| "learning_rate": 1.9220902612826606e-05, |
| "loss": 0.3874, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.271483539974348, |
| "grad_norm": 5.478529930114746, |
| "learning_rate": 1.9209026128266036e-05, |
| "loss": 0.4604, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.27255237280889266, |
| "grad_norm": 5.911350727081299, |
| "learning_rate": 1.9197149643705463e-05, |
| "loss": 0.4202, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.2736212056434374, |
| "grad_norm": 7.953678131103516, |
| "learning_rate": 1.9185273159144894e-05, |
| "loss": 0.4467, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.27469003847798207, |
| "grad_norm": 6.318038463592529, |
| "learning_rate": 1.9173396674584325e-05, |
| "loss": 0.3631, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.27575887131252674, |
| "grad_norm": 7.290485382080078, |
| "learning_rate": 1.9161520190023755e-05, |
| "loss": 0.5917, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.2768277041470714, |
| "grad_norm": 6.057776927947998, |
| "learning_rate": 1.9149643705463182e-05, |
| "loss": 0.4315, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.2778965369816161, |
| "grad_norm": 5.482032775878906, |
| "learning_rate": 1.9137767220902613e-05, |
| "loss": 0.3898, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.27896536981616077, |
| "grad_norm": 7.219336032867432, |
| "learning_rate": 1.9125890736342047e-05, |
| "loss": 0.5546, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.28003420265070544, |
| "grad_norm": 6.556499481201172, |
| "learning_rate": 1.9114014251781474e-05, |
| "loss": 0.4766, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.2811030354852501, |
| "grad_norm": 8.849128723144531, |
| "learning_rate": 1.9102137767220904e-05, |
| "loss": 0.5883, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.2821718683197948, |
| "grad_norm": 6.604886054992676, |
| "learning_rate": 1.9090261282660335e-05, |
| "loss": 0.4837, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.28324070115433947, |
| "grad_norm": 6.3507304191589355, |
| "learning_rate": 1.9078384798099766e-05, |
| "loss": 0.4576, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.28430953398888414, |
| "grad_norm": 7.592872619628906, |
| "learning_rate": 1.9066508313539193e-05, |
| "loss": 0.3894, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.2853783668234288, |
| "grad_norm": 7.806624889373779, |
| "learning_rate": 1.9054631828978623e-05, |
| "loss": 0.5239, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.2864471996579735, |
| "grad_norm": 5.70356559753418, |
| "learning_rate": 1.9042755344418054e-05, |
| "loss": 0.4255, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.28751603249251817, |
| "grad_norm": 6.017592906951904, |
| "learning_rate": 1.9030878859857484e-05, |
| "loss": 0.3944, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.28858486532706284, |
| "grad_norm": 8.678641319274902, |
| "learning_rate": 1.9019002375296915e-05, |
| "loss": 0.6219, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.2896536981616075, |
| "grad_norm": 5.638593673706055, |
| "learning_rate": 1.9007125890736345e-05, |
| "loss": 0.4791, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.2907225309961522, |
| "grad_norm": 6.662184238433838, |
| "learning_rate": 1.8995249406175773e-05, |
| "loss": 0.4101, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.29179136383069687, |
| "grad_norm": 5.850408554077148, |
| "learning_rate": 1.8983372921615203e-05, |
| "loss": 0.4422, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.29286019666524155, |
| "grad_norm": 6.298422813415527, |
| "learning_rate": 1.8971496437054634e-05, |
| "loss": 0.4426, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.2939290294997862, |
| "grad_norm": 6.113378524780273, |
| "learning_rate": 1.8959619952494064e-05, |
| "loss": 0.4108, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.2949978623343309, |
| "grad_norm": 5.136318206787109, |
| "learning_rate": 1.894774346793349e-05, |
| "loss": 0.4734, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.29606669516887557, |
| "grad_norm": 7.1877760887146, |
| "learning_rate": 1.8935866983372922e-05, |
| "loss": 0.4678, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.29713552800342025, |
| "grad_norm": 7.322067737579346, |
| "learning_rate": 1.8923990498812352e-05, |
| "loss": 0.4334, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.2982043608379649, |
| "grad_norm": 4.906497001647949, |
| "learning_rate": 1.8912114014251783e-05, |
| "loss": 0.4211, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.2992731936725096, |
| "grad_norm": 4.929844379425049, |
| "learning_rate": 1.8900237529691214e-05, |
| "loss": 0.5807, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.30034202650705427, |
| "grad_norm": 6.196166515350342, |
| "learning_rate": 1.8888361045130644e-05, |
| "loss": 0.5956, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.30141085934159895, |
| "grad_norm": 5.226170539855957, |
| "learning_rate": 1.887648456057007e-05, |
| "loss": 0.4175, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3024796921761437, |
| "grad_norm": 4.843142509460449, |
| "learning_rate": 1.8864608076009502e-05, |
| "loss": 0.4233, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.30354852501068835, |
| "grad_norm": 5.112825393676758, |
| "learning_rate": 1.8852731591448932e-05, |
| "loss": 0.4118, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.304617357845233, |
| "grad_norm": 6.756041526794434, |
| "learning_rate": 1.8840855106888363e-05, |
| "loss": 0.3919, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.3056861906797777, |
| "grad_norm": 5.811524868011475, |
| "learning_rate": 1.882897862232779e-05, |
| "loss": 0.5152, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.3067550235143224, |
| "grad_norm": 5.891305446624756, |
| "learning_rate": 1.8817102137767224e-05, |
| "loss": 0.393, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.30782385634886705, |
| "grad_norm": 6.220530986785889, |
| "learning_rate": 1.880522565320665e-05, |
| "loss": 0.3765, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.30889268918341173, |
| "grad_norm": 6.09738826751709, |
| "learning_rate": 1.8793349168646082e-05, |
| "loss": 0.4476, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.3099615220179564, |
| "grad_norm": 4.718704700469971, |
| "learning_rate": 1.8781472684085512e-05, |
| "loss": 0.5094, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3110303548525011, |
| "grad_norm": 5.264518737792969, |
| "learning_rate": 1.8769596199524943e-05, |
| "loss": 0.4496, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.31209918768704575, |
| "grad_norm": 5.551924705505371, |
| "learning_rate": 1.8757719714964373e-05, |
| "loss": 0.4305, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.31316802052159043, |
| "grad_norm": 4.252546787261963, |
| "learning_rate": 1.87458432304038e-05, |
| "loss": 0.495, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.3142368533561351, |
| "grad_norm": 4.372467517852783, |
| "learning_rate": 1.8733966745843235e-05, |
| "loss": 0.5561, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.3153056861906798, |
| "grad_norm": 6.216442108154297, |
| "learning_rate": 1.872209026128266e-05, |
| "loss": 0.4432, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.31637451902522445, |
| "grad_norm": 3.8125741481781006, |
| "learning_rate": 1.8710213776722092e-05, |
| "loss": 0.3382, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.31744335185976913, |
| "grad_norm": 5.539150714874268, |
| "learning_rate": 1.8698337292161523e-05, |
| "loss": 0.4209, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.3185121846943138, |
| "grad_norm": 6.593637466430664, |
| "learning_rate": 1.8686460807600953e-05, |
| "loss": 0.3776, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.3195810175288585, |
| "grad_norm": 5.109198570251465, |
| "learning_rate": 1.867458432304038e-05, |
| "loss": 0.4271, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.32064985036340315, |
| "grad_norm": 7.083045959472656, |
| "learning_rate": 1.866270783847981e-05, |
| "loss": 0.5628, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.32171868319794783, |
| "grad_norm": 7.068709850311279, |
| "learning_rate": 1.865083135391924e-05, |
| "loss": 0.4438, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.3227875160324925, |
| "grad_norm": 4.62941312789917, |
| "learning_rate": 1.8638954869358672e-05, |
| "loss": 0.3863, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3238563488670372, |
| "grad_norm": 4.4039788246154785, |
| "learning_rate": 1.86270783847981e-05, |
| "loss": 0.5629, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.32492518170158186, |
| "grad_norm": 6.153443813323975, |
| "learning_rate": 1.8615201900237533e-05, |
| "loss": 0.4851, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.32599401453612653, |
| "grad_norm": 4.207914352416992, |
| "learning_rate": 1.860332541567696e-05, |
| "loss": 0.3386, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.3270628473706712, |
| "grad_norm": 5.669225692749023, |
| "learning_rate": 1.859144893111639e-05, |
| "loss": 0.4546, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.3281316802052159, |
| "grad_norm": 6.5213775634765625, |
| "learning_rate": 1.857957244655582e-05, |
| "loss": 0.4147, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.32920051303976056, |
| "grad_norm": 5.153679370880127, |
| "learning_rate": 1.8567695961995252e-05, |
| "loss": 0.403, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.33026934587430523, |
| "grad_norm": 5.655941009521484, |
| "learning_rate": 1.855581947743468e-05, |
| "loss": 0.4155, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.33133817870884996, |
| "grad_norm": 6.6513895988464355, |
| "learning_rate": 1.854394299287411e-05, |
| "loss": 0.4831, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.33240701154339464, |
| "grad_norm": 5.994706153869629, |
| "learning_rate": 1.853206650831354e-05, |
| "loss": 0.388, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.3334758443779393, |
| "grad_norm": 4.132383346557617, |
| "learning_rate": 1.852019002375297e-05, |
| "loss": 0.4184, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.334544677212484, |
| "grad_norm": 4.975070953369141, |
| "learning_rate": 1.8508313539192398e-05, |
| "loss": 0.3645, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.33561351004702866, |
| "grad_norm": 6.475266933441162, |
| "learning_rate": 1.8496437054631832e-05, |
| "loss": 0.4653, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.33668234288157334, |
| "grad_norm": 5.302603244781494, |
| "learning_rate": 1.848456057007126e-05, |
| "loss": 0.5196, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.337751175716118, |
| "grad_norm": 6.404365539550781, |
| "learning_rate": 1.847268408551069e-05, |
| "loss": 0.5252, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.3388200085506627, |
| "grad_norm": 5.3015923500061035, |
| "learning_rate": 1.846080760095012e-05, |
| "loss": 0.6971, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.33988884138520736, |
| "grad_norm": 6.321039199829102, |
| "learning_rate": 1.844893111638955e-05, |
| "loss": 0.3881, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.34095767421975204, |
| "grad_norm": 4.614476680755615, |
| "learning_rate": 1.843705463182898e-05, |
| "loss": 0.4302, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.3420265070542967, |
| "grad_norm": 5.174408912658691, |
| "learning_rate": 1.842517814726841e-05, |
| "loss": 0.3701, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3430953398888414, |
| "grad_norm": 4.7469706535339355, |
| "learning_rate": 1.8413301662707842e-05, |
| "loss": 0.3893, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.34416417272338606, |
| "grad_norm": 5.967380046844482, |
| "learning_rate": 1.840142517814727e-05, |
| "loss": 0.4246, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.34523300555793074, |
| "grad_norm": 4.841580867767334, |
| "learning_rate": 1.83895486935867e-05, |
| "loss": 0.3006, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.3463018383924754, |
| "grad_norm": 5.739339351654053, |
| "learning_rate": 1.837767220902613e-05, |
| "loss": 0.7078, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3473706712270201, |
| "grad_norm": 5.888680458068848, |
| "learning_rate": 1.836579572446556e-05, |
| "loss": 0.375, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.34843950406156476, |
| "grad_norm": 6.077122211456299, |
| "learning_rate": 1.835391923990499e-05, |
| "loss": 0.4425, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.34950833689610944, |
| "grad_norm": 6.087640762329102, |
| "learning_rate": 1.834204275534442e-05, |
| "loss": 0.3841, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.3505771697306541, |
| "grad_norm": 7.3536529541015625, |
| "learning_rate": 1.833016627078385e-05, |
| "loss": 0.4075, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3516460025651988, |
| "grad_norm": 6.833067893981934, |
| "learning_rate": 1.831828978622328e-05, |
| "loss": 0.5383, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.35271483539974346, |
| "grad_norm": 5.849217414855957, |
| "learning_rate": 1.8306413301662707e-05, |
| "loss": 0.4623, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.35378366823428814, |
| "grad_norm": 5.285182952880859, |
| "learning_rate": 1.829453681710214e-05, |
| "loss": 0.3986, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.3548525010688328, |
| "grad_norm": 5.706902980804443, |
| "learning_rate": 1.8282660332541568e-05, |
| "loss": 0.4038, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.3559213339033775, |
| "grad_norm": 4.221705436706543, |
| "learning_rate": 1.8270783847981e-05, |
| "loss": 0.5615, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.35699016673792217, |
| "grad_norm": 6.5307745933532715, |
| "learning_rate": 1.825890736342043e-05, |
| "loss": 0.5535, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.35805899957246684, |
| "grad_norm": 5.936892509460449, |
| "learning_rate": 1.824703087885986e-05, |
| "loss": 0.3316, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.3591278324070115, |
| "grad_norm": 4.413790702819824, |
| "learning_rate": 1.8235154394299287e-05, |
| "loss": 0.3916, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.36019666524155625, |
| "grad_norm": 5.399665355682373, |
| "learning_rate": 1.8223277909738718e-05, |
| "loss": 0.3723, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.3612654980761009, |
| "grad_norm": 8.413554191589355, |
| "learning_rate": 1.8211401425178148e-05, |
| "loss": 0.5188, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.3623343309106456, |
| "grad_norm": 3.7601664066314697, |
| "learning_rate": 1.819952494061758e-05, |
| "loss": 0.3817, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.36340316374519027, |
| "grad_norm": 5.661569595336914, |
| "learning_rate": 1.818764845605701e-05, |
| "loss": 0.4036, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.36447199657973495, |
| "grad_norm": 6.07588005065918, |
| "learning_rate": 1.817577197149644e-05, |
| "loss": 0.4224, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.3655408294142796, |
| "grad_norm": 5.329127311706543, |
| "learning_rate": 1.8163895486935867e-05, |
| "loss": 0.4171, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.3666096622488243, |
| "grad_norm": 7.156865119934082, |
| "learning_rate": 1.8152019002375298e-05, |
| "loss": 0.4122, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.367678495083369, |
| "grad_norm": 5.72195291519165, |
| "learning_rate": 1.8140142517814728e-05, |
| "loss": 0.4024, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.36874732791791365, |
| "grad_norm": 4.991401672363281, |
| "learning_rate": 1.812826603325416e-05, |
| "loss": 0.3882, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.3698161607524583, |
| "grad_norm": 4.662073612213135, |
| "learning_rate": 1.811638954869359e-05, |
| "loss": 0.3282, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.370884993587003, |
| "grad_norm": 5.966677188873291, |
| "learning_rate": 1.8104513064133016e-05, |
| "loss": 0.2961, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.3719538264215477, |
| "grad_norm": 5.708690166473389, |
| "learning_rate": 1.809263657957245e-05, |
| "loss": 0.4568, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.37302265925609235, |
| "grad_norm": 5.69785213470459, |
| "learning_rate": 1.8080760095011877e-05, |
| "loss": 0.3544, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.374091492090637, |
| "grad_norm": 6.101360321044922, |
| "learning_rate": 1.8068883610451308e-05, |
| "loss": 0.4282, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.3751603249251817, |
| "grad_norm": 6.585791110992432, |
| "learning_rate": 1.805700712589074e-05, |
| "loss": 0.3798, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.3762291577597264, |
| "grad_norm": 5.618402481079102, |
| "learning_rate": 1.804513064133017e-05, |
| "loss": 0.4618, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.37729799059427105, |
| "grad_norm": 6.637610912322998, |
| "learning_rate": 1.8033254156769596e-05, |
| "loss": 0.3876, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.3783668234288157, |
| "grad_norm": 4.9898295402526855, |
| "learning_rate": 1.8021377672209027e-05, |
| "loss": 0.3393, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.3794356562633604, |
| "grad_norm": 6.182595252990723, |
| "learning_rate": 1.8009501187648457e-05, |
| "loss": 0.4276, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.3805044890979051, |
| "grad_norm": 5.460147380828857, |
| "learning_rate": 1.7997624703087888e-05, |
| "loss": 0.4641, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.38157332193244975, |
| "grad_norm": 4.0731940269470215, |
| "learning_rate": 1.798574821852732e-05, |
| "loss": 0.4248, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.3826421547669944, |
| "grad_norm": 3.6468496322631836, |
| "learning_rate": 1.797387173396675e-05, |
| "loss": 0.3601, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.3837109876015391, |
| "grad_norm": 3.701404094696045, |
| "learning_rate": 1.7961995249406176e-05, |
| "loss": 0.3384, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.3847798204360838, |
| "grad_norm": 6.082109451293945, |
| "learning_rate": 1.7950118764845607e-05, |
| "loss": 0.3598, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.38584865327062845, |
| "grad_norm": 4.901666164398193, |
| "learning_rate": 1.7938242280285037e-05, |
| "loss": 0.4363, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.3869174861051731, |
| "grad_norm": 3.848799467086792, |
| "learning_rate": 1.7926365795724468e-05, |
| "loss": 0.335, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.38798631893971786, |
| "grad_norm": 4.457520484924316, |
| "learning_rate": 1.7914489311163895e-05, |
| "loss": 0.3892, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.38905515177426253, |
| "grad_norm": 6.423126697540283, |
| "learning_rate": 1.7902612826603326e-05, |
| "loss": 0.4103, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.3901239846088072, |
| "grad_norm": 5.50001335144043, |
| "learning_rate": 1.7890736342042756e-05, |
| "loss": 0.3959, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.3911928174433519, |
| "grad_norm": 3.85994553565979, |
| "learning_rate": 1.7878859857482187e-05, |
| "loss": 0.3593, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.39226165027789656, |
| "grad_norm": 6.009896278381348, |
| "learning_rate": 1.7866983372921617e-05, |
| "loss": 0.4366, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.39333048311244123, |
| "grad_norm": 4.844223499298096, |
| "learning_rate": 1.7855106888361048e-05, |
| "loss": 0.3633, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.3943993159469859, |
| "grad_norm": 5.032964706420898, |
| "learning_rate": 1.7843230403800475e-05, |
| "loss": 0.4333, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.3954681487815306, |
| "grad_norm": 5.1685872077941895, |
| "learning_rate": 1.7831353919239905e-05, |
| "loss": 0.4825, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.39653698161607526, |
| "grad_norm": 5.741828918457031, |
| "learning_rate": 1.7819477434679336e-05, |
| "loss": 0.3041, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.39760581445061993, |
| "grad_norm": 5.440220832824707, |
| "learning_rate": 1.7807600950118767e-05, |
| "loss": 0.3416, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.3986746472851646, |
| "grad_norm": 4.476759433746338, |
| "learning_rate": 1.7795724465558197e-05, |
| "loss": 0.4578, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.3997434801197093, |
| "grad_norm": 6.7310991287231445, |
| "learning_rate": 1.7783847980997628e-05, |
| "loss": 0.5441, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.40081231295425396, |
| "grad_norm": 5.929594993591309, |
| "learning_rate": 1.7771971496437058e-05, |
| "loss": 0.5124, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.40188114578879863, |
| "grad_norm": 4.516419410705566, |
| "learning_rate": 1.7760095011876485e-05, |
| "loss": 0.4026, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4029499786233433, |
| "grad_norm": 5.7698798179626465, |
| "learning_rate": 1.7748218527315916e-05, |
| "loss": 0.4537, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.404018811457888, |
| "grad_norm": 4.604269027709961, |
| "learning_rate": 1.7736342042755346e-05, |
| "loss": 0.3983, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.40508764429243266, |
| "grad_norm": 6.4217610359191895, |
| "learning_rate": 1.7724465558194777e-05, |
| "loss": 0.3894, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.40615647712697733, |
| "grad_norm": 5.296751022338867, |
| "learning_rate": 1.7712589073634204e-05, |
| "loss": 0.4747, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.407225309961522, |
| "grad_norm": 4.870068550109863, |
| "learning_rate": 1.7700712589073638e-05, |
| "loss": 0.4042, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.4082941427960667, |
| "grad_norm": 4.312191486358643, |
| "learning_rate": 1.7688836104513065e-05, |
| "loss": 0.3477, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.40936297563061136, |
| "grad_norm": 5.281498432159424, |
| "learning_rate": 1.7676959619952496e-05, |
| "loss": 0.4512, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.41043180846515603, |
| "grad_norm": 4.401067733764648, |
| "learning_rate": 1.7665083135391926e-05, |
| "loss": 0.3451, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.4115006412997007, |
| "grad_norm": 5.28626012802124, |
| "learning_rate": 1.7653206650831357e-05, |
| "loss": 0.3983, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.4125694741342454, |
| "grad_norm": 5.951436519622803, |
| "learning_rate": 1.7641330166270784e-05, |
| "loss": 0.62, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.41363830696879006, |
| "grad_norm": 3.4126088619232178, |
| "learning_rate": 1.7629453681710215e-05, |
| "loss": 0.4632, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.41470713980333473, |
| "grad_norm": 4.540611267089844, |
| "learning_rate": 1.7617577197149645e-05, |
| "loss": 0.3757, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4157759726378794, |
| "grad_norm": 5.913720607757568, |
| "learning_rate": 1.7605700712589076e-05, |
| "loss": 0.4752, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.41684480547242414, |
| "grad_norm": 4.386907577514648, |
| "learning_rate": 1.7593824228028503e-05, |
| "loss": 0.453, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.4179136383069688, |
| "grad_norm": 4.836590766906738, |
| "learning_rate": 1.7581947743467937e-05, |
| "loss": 0.4348, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.4189824711415135, |
| "grad_norm": 4.215417861938477, |
| "learning_rate": 1.7570071258907364e-05, |
| "loss": 0.3944, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.42005130397605817, |
| "grad_norm": 5.9303789138793945, |
| "learning_rate": 1.7558194774346795e-05, |
| "loss": 0.3702, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.42112013681060284, |
| "grad_norm": 5.648311138153076, |
| "learning_rate": 1.7546318289786225e-05, |
| "loss": 0.3888, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4221889696451475, |
| "grad_norm": 5.413701057434082, |
| "learning_rate": 1.7534441805225656e-05, |
| "loss": 0.3968, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.4232578024796922, |
| "grad_norm": 4.331090450286865, |
| "learning_rate": 1.7522565320665083e-05, |
| "loss": 0.3609, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.42432663531423687, |
| "grad_norm": 4.991115093231201, |
| "learning_rate": 1.7510688836104513e-05, |
| "loss": 0.4328, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.42539546814878154, |
| "grad_norm": 5.451033115386963, |
| "learning_rate": 1.7498812351543944e-05, |
| "loss": 0.516, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4264643009833262, |
| "grad_norm": 5.011542320251465, |
| "learning_rate": 1.7486935866983374e-05, |
| "loss": 0.3605, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.4275331338178709, |
| "grad_norm": 5.4983086585998535, |
| "learning_rate": 1.74750593824228e-05, |
| "loss": 0.3094, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.42860196665241557, |
| "grad_norm": 5.928680896759033, |
| "learning_rate": 1.7463182897862236e-05, |
| "loss": 0.3866, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.42967079948696024, |
| "grad_norm": 4.630986213684082, |
| "learning_rate": 1.7451306413301666e-05, |
| "loss": 0.3943, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.4307396323215049, |
| "grad_norm": 4.091104030609131, |
| "learning_rate": 1.7439429928741093e-05, |
| "loss": 0.3931, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.4318084651560496, |
| "grad_norm": 6.031238555908203, |
| "learning_rate": 1.7427553444180524e-05, |
| "loss": 0.3728, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.43287729799059427, |
| "grad_norm": 4.81741189956665, |
| "learning_rate": 1.7415676959619954e-05, |
| "loss": 0.2953, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.43394613082513894, |
| "grad_norm": 5.144311904907227, |
| "learning_rate": 1.7403800475059385e-05, |
| "loss": 0.3547, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.4350149636596836, |
| "grad_norm": 4.806643009185791, |
| "learning_rate": 1.7391923990498812e-05, |
| "loss": 0.4275, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.4360837964942283, |
| "grad_norm": 4.138782501220703, |
| "learning_rate": 1.7380047505938246e-05, |
| "loss": 0.2959, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.43715262932877297, |
| "grad_norm": 5.7593255043029785, |
| "learning_rate": 1.7368171021377673e-05, |
| "loss": 0.3245, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.43822146216331764, |
| "grad_norm": 4.043095588684082, |
| "learning_rate": 1.7356294536817104e-05, |
| "loss": 0.4148, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.4392902949978623, |
| "grad_norm": 4.848685264587402, |
| "learning_rate": 1.7344418052256534e-05, |
| "loss": 0.3542, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.440359127832407, |
| "grad_norm": 5.738672256469727, |
| "learning_rate": 1.7332541567695965e-05, |
| "loss": 0.4493, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.44142796066695167, |
| "grad_norm": 4.470565319061279, |
| "learning_rate": 1.7320665083135392e-05, |
| "loss": 0.376, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.44249679350149634, |
| "grad_norm": 4.22749137878418, |
| "learning_rate": 1.7308788598574823e-05, |
| "loss": 0.322, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.443565626336041, |
| "grad_norm": 5.158305644989014, |
| "learning_rate": 1.7296912114014253e-05, |
| "loss": 0.3227, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.4446344591705857, |
| "grad_norm": 6.257720947265625, |
| "learning_rate": 1.7285035629453684e-05, |
| "loss": 0.3046, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4457032920051304, |
| "grad_norm": 5.981179237365723, |
| "learning_rate": 1.727315914489311e-05, |
| "loss": 0.3064, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.4467721248396751, |
| "grad_norm": 5.584667682647705, |
| "learning_rate": 1.7261282660332545e-05, |
| "loss": 0.3199, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.4478409576742198, |
| "grad_norm": 5.660790920257568, |
| "learning_rate": 1.7249406175771972e-05, |
| "loss": 0.3774, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.44890979050876445, |
| "grad_norm": 4.129720687866211, |
| "learning_rate": 1.7237529691211402e-05, |
| "loss": 0.3212, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.4499786233433091, |
| "grad_norm": 3.2054107189178467, |
| "learning_rate": 1.7225653206650833e-05, |
| "loss": 0.3302, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.4510474561778538, |
| "grad_norm": 3.934522867202759, |
| "learning_rate": 1.7213776722090264e-05, |
| "loss": 0.3205, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.4521162890123985, |
| "grad_norm": 5.592263221740723, |
| "learning_rate": 1.720190023752969e-05, |
| "loss": 0.3673, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.45318512184694315, |
| "grad_norm": 5.707674026489258, |
| "learning_rate": 1.719002375296912e-05, |
| "loss": 0.3752, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.4542539546814878, |
| "grad_norm": 4.284328937530518, |
| "learning_rate": 1.7178147268408552e-05, |
| "loss": 0.3192, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.4553227875160325, |
| "grad_norm": 4.87931489944458, |
| "learning_rate": 1.7166270783847982e-05, |
| "loss": 0.3372, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.4563916203505772, |
| "grad_norm": 5.3206048011779785, |
| "learning_rate": 1.7154394299287413e-05, |
| "loss": 0.321, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.45746045318512185, |
| "grad_norm": 5.118194103240967, |
| "learning_rate": 1.7142517814726843e-05, |
| "loss": 0.4086, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.4585292860196665, |
| "grad_norm": 5.390005111694336, |
| "learning_rate": 1.7130641330166274e-05, |
| "loss": 0.4092, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.4595981188542112, |
| "grad_norm": 6.221261978149414, |
| "learning_rate": 1.71187648456057e-05, |
| "loss": 0.4591, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.4606669516887559, |
| "grad_norm": 4.9464497566223145, |
| "learning_rate": 1.7106888361045132e-05, |
| "loss": 0.503, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.46173578452330055, |
| "grad_norm": 6.745388984680176, |
| "learning_rate": 1.7095011876484562e-05, |
| "loss": 0.4767, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.4628046173578452, |
| "grad_norm": 5.506555080413818, |
| "learning_rate": 1.7083135391923993e-05, |
| "loss": 0.3425, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.4638734501923899, |
| "grad_norm": 5.21577787399292, |
| "learning_rate": 1.707125890736342e-05, |
| "loss": 0.372, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.4649422830269346, |
| "grad_norm": 4.69103479385376, |
| "learning_rate": 1.7059382422802854e-05, |
| "loss": 0.4671, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.46601111586147925, |
| "grad_norm": 4.060796737670898, |
| "learning_rate": 1.704750593824228e-05, |
| "loss": 0.3767, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.4670799486960239, |
| "grad_norm": 6.448695659637451, |
| "learning_rate": 1.703562945368171e-05, |
| "loss": 0.3096, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.4681487815305686, |
| "grad_norm": 4.255459308624268, |
| "learning_rate": 1.7023752969121142e-05, |
| "loss": 0.3654, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.4692176143651133, |
| "grad_norm": 5.383869647979736, |
| "learning_rate": 1.7011876484560573e-05, |
| "loss": 0.4243, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.47028644719965795, |
| "grad_norm": 4.97196102142334, |
| "learning_rate": 1.7e-05, |
| "loss": 0.4411, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.47135528003420263, |
| "grad_norm": 4.9628071784973145, |
| "learning_rate": 1.698812351543943e-05, |
| "loss": 0.5001, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.4724241128687473, |
| "grad_norm": 5.05242919921875, |
| "learning_rate": 1.697624703087886e-05, |
| "loss": 0.3485, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.473492945703292, |
| "grad_norm": 4.373459339141846, |
| "learning_rate": 1.696437054631829e-05, |
| "loss": 0.3546, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.4745617785378367, |
| "grad_norm": 5.0651397705078125, |
| "learning_rate": 1.6952494061757722e-05, |
| "loss": 0.4037, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.4756306113723814, |
| "grad_norm": 6.026737213134766, |
| "learning_rate": 1.6940617577197153e-05, |
| "loss": 0.3114, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.47669944420692606, |
| "grad_norm": 4.404332160949707, |
| "learning_rate": 1.692874109263658e-05, |
| "loss": 0.3422, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.47776827704147073, |
| "grad_norm": 5.780966281890869, |
| "learning_rate": 1.691686460807601e-05, |
| "loss": 0.4241, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.4788371098760154, |
| "grad_norm": 5.648661136627197, |
| "learning_rate": 1.690498812351544e-05, |
| "loss": 0.3912, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.4799059427105601, |
| "grad_norm": 3.616197109222412, |
| "learning_rate": 1.689311163895487e-05, |
| "loss": 0.3748, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.48097477554510476, |
| "grad_norm": 4.634115219116211, |
| "learning_rate": 1.68812351543943e-05, |
| "loss": 0.3746, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.48204360837964944, |
| "grad_norm": 4.268435478210449, |
| "learning_rate": 1.686935866983373e-05, |
| "loss": 0.3544, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.4831124412141941, |
| "grad_norm": 4.208693504333496, |
| "learning_rate": 1.685748218527316e-05, |
| "loss": 0.3246, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.4841812740487388, |
| "grad_norm": 7.521546840667725, |
| "learning_rate": 1.684560570071259e-05, |
| "loss": 0.3739, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.48525010688328346, |
| "grad_norm": 5.12343692779541, |
| "learning_rate": 1.683372921615202e-05, |
| "loss": 0.3606, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.48631893971782814, |
| "grad_norm": 6.54265022277832, |
| "learning_rate": 1.682185273159145e-05, |
| "loss": 0.3891, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.4873877725523728, |
| "grad_norm": 4.471118450164795, |
| "learning_rate": 1.680997624703088e-05, |
| "loss": 0.2855, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.4884566053869175, |
| "grad_norm": 7.488130569458008, |
| "learning_rate": 1.679809976247031e-05, |
| "loss": 0.4829, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.48952543822146216, |
| "grad_norm": 6.3466033935546875, |
| "learning_rate": 1.678622327790974e-05, |
| "loss": 0.4529, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.49059427105600684, |
| "grad_norm": 7.353418350219727, |
| "learning_rate": 1.677434679334917e-05, |
| "loss": 0.4819, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.4916631038905515, |
| "grad_norm": 4.575865745544434, |
| "learning_rate": 1.67624703087886e-05, |
| "loss": 0.3453, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.4927319367250962, |
| "grad_norm": 4.988368511199951, |
| "learning_rate": 1.675059382422803e-05, |
| "loss": 0.3425, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.49380076955964086, |
| "grad_norm": 5.05146598815918, |
| "learning_rate": 1.6738717339667462e-05, |
| "loss": 0.2884, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.49486960239418554, |
| "grad_norm": 6.10252571105957, |
| "learning_rate": 1.672684085510689e-05, |
| "loss": 0.3184, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.4959384352287302, |
| "grad_norm": 5.356700420379639, |
| "learning_rate": 1.671496437054632e-05, |
| "loss": 0.3043, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.4970072680632749, |
| "grad_norm": 4.550732135772705, |
| "learning_rate": 1.670308788598575e-05, |
| "loss": 0.3746, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.49807610089781956, |
| "grad_norm": 4.781940937042236, |
| "learning_rate": 1.669121140142518e-05, |
| "loss": 0.4023, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.49914493373236424, |
| "grad_norm": 3.1689300537109375, |
| "learning_rate": 1.6679334916864608e-05, |
| "loss": 0.2994, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.500213766566909, |
| "grad_norm": 5.919034004211426, |
| "learning_rate": 1.6667458432304042e-05, |
| "loss": 0.3858, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.5012825994014536, |
| "grad_norm": 4.044144153594971, |
| "learning_rate": 1.665558194774347e-05, |
| "loss": 0.3488, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.5023514322359983, |
| "grad_norm": 5.063786506652832, |
| "learning_rate": 1.66437054631829e-05, |
| "loss": 0.4467, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5034202650705429, |
| "grad_norm": 4.159796237945557, |
| "learning_rate": 1.663182897862233e-05, |
| "loss": 0.3199, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.5044890979050877, |
| "grad_norm": 4.232370853424072, |
| "learning_rate": 1.661995249406176e-05, |
| "loss": 0.3124, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.5055579307396323, |
| "grad_norm": 3.8301782608032227, |
| "learning_rate": 1.6608076009501188e-05, |
| "loss": 0.3229, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.506626763574177, |
| "grad_norm": 5.729179382324219, |
| "learning_rate": 1.6596199524940618e-05, |
| "loss": 0.3416, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5076955964087216, |
| "grad_norm": 4.137636184692383, |
| "learning_rate": 1.658432304038005e-05, |
| "loss": 0.3555, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.5087644292432664, |
| "grad_norm": 6.014377593994141, |
| "learning_rate": 1.657244655581948e-05, |
| "loss": 0.3078, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.509833262077811, |
| "grad_norm": 5.031920909881592, |
| "learning_rate": 1.6560570071258906e-05, |
| "loss": 0.3105, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.5109020949123557, |
| "grad_norm": 4.162966728210449, |
| "learning_rate": 1.654869358669834e-05, |
| "loss": 0.3565, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5119709277469003, |
| "grad_norm": 5.57382345199585, |
| "learning_rate": 1.6536817102137768e-05, |
| "loss": 0.3113, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.5130397605814451, |
| "grad_norm": 6.443201065063477, |
| "learning_rate": 1.6524940617577198e-05, |
| "loss": 0.4326, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5141085934159897, |
| "grad_norm": 7.050893306732178, |
| "learning_rate": 1.651306413301663e-05, |
| "loss": 0.4395, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.5151774262505344, |
| "grad_norm": 4.315305709838867, |
| "learning_rate": 1.650118764845606e-05, |
| "loss": 0.3549, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.516246259085079, |
| "grad_norm": 3.76841402053833, |
| "learning_rate": 1.6489311163895486e-05, |
| "loss": 0.4271, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.5173150919196238, |
| "grad_norm": 4.878926753997803, |
| "learning_rate": 1.6477434679334917e-05, |
| "loss": 0.3136, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5183839247541685, |
| "grad_norm": 4.831075668334961, |
| "learning_rate": 1.646555819477435e-05, |
| "loss": 0.3235, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.5194527575887131, |
| "grad_norm": 4.886428356170654, |
| "learning_rate": 1.6453681710213778e-05, |
| "loss": 0.2909, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5205215904232579, |
| "grad_norm": 5.281339645385742, |
| "learning_rate": 1.644180522565321e-05, |
| "loss": 0.3296, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.5215904232578025, |
| "grad_norm": 4.9752516746521, |
| "learning_rate": 1.642992874109264e-05, |
| "loss": 0.4124, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5226592560923472, |
| "grad_norm": 5.5705952644348145, |
| "learning_rate": 1.641805225653207e-05, |
| "loss": 0.4444, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.5237280889268918, |
| "grad_norm": 4.4641499519348145, |
| "learning_rate": 1.6406175771971497e-05, |
| "loss": 0.3649, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5247969217614366, |
| "grad_norm": 4.909672260284424, |
| "learning_rate": 1.6394299287410927e-05, |
| "loss": 0.3897, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.5258657545959812, |
| "grad_norm": 5.340948581695557, |
| "learning_rate": 1.6382422802850358e-05, |
| "loss": 0.36, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5269345874305259, |
| "grad_norm": 5.204975128173828, |
| "learning_rate": 1.637054631828979e-05, |
| "loss": 0.3899, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.5280034202650705, |
| "grad_norm": 5.030284881591797, |
| "learning_rate": 1.6358669833729216e-05, |
| "loss": 0.3303, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5290722530996153, |
| "grad_norm": 3.7952535152435303, |
| "learning_rate": 1.634679334916865e-05, |
| "loss": 0.3115, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.5301410859341599, |
| "grad_norm": 5.823569297790527, |
| "learning_rate": 1.6334916864608077e-05, |
| "loss": 0.4637, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5312099187687046, |
| "grad_norm": 6.1813483238220215, |
| "learning_rate": 1.6323040380047507e-05, |
| "loss": 0.4402, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.5322787516032492, |
| "grad_norm": 3.668980360031128, |
| "learning_rate": 1.6311163895486938e-05, |
| "loss": 0.2825, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.533347584437794, |
| "grad_norm": 4.954606056213379, |
| "learning_rate": 1.629928741092637e-05, |
| "loss": 0.3389, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.5344164172723386, |
| "grad_norm": 4.136919021606445, |
| "learning_rate": 1.6287410926365796e-05, |
| "loss": 0.3513, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5354852501068833, |
| "grad_norm": 5.383963108062744, |
| "learning_rate": 1.6275534441805226e-05, |
| "loss": 0.4301, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.5365540829414279, |
| "grad_norm": 4.818902015686035, |
| "learning_rate": 1.6263657957244657e-05, |
| "loss": 0.3733, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.5376229157759727, |
| "grad_norm": 4.797301769256592, |
| "learning_rate": 1.6251781472684087e-05, |
| "loss": 0.3241, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.5386917486105173, |
| "grad_norm": 5.040024757385254, |
| "learning_rate": 1.6239904988123514e-05, |
| "loss": 0.3457, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.539760581445062, |
| "grad_norm": 5.214640140533447, |
| "learning_rate": 1.622802850356295e-05, |
| "loss": 0.4533, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.5408294142796066, |
| "grad_norm": 3.6819052696228027, |
| "learning_rate": 1.6216152019002375e-05, |
| "loss": 0.3397, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.5418982471141514, |
| "grad_norm": 4.882740020751953, |
| "learning_rate": 1.6204275534441806e-05, |
| "loss": 0.4097, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.542967079948696, |
| "grad_norm": 4.784149646759033, |
| "learning_rate": 1.6192399049881237e-05, |
| "loss": 0.3083, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.5440359127832407, |
| "grad_norm": 5.621673107147217, |
| "learning_rate": 1.6180522565320667e-05, |
| "loss": 0.3199, |
| "step": 2545 |
| }, |
| { |
| "epoch": 0.5451047456177853, |
| "grad_norm": 5.204516887664795, |
| "learning_rate": 1.6168646080760094e-05, |
| "loss": 0.3971, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.54617357845233, |
| "grad_norm": 4.5771026611328125, |
| "learning_rate": 1.6156769596199525e-05, |
| "loss": 0.5229, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.5472424112868748, |
| "grad_norm": 5.919792652130127, |
| "learning_rate": 1.6144893111638955e-05, |
| "loss": 0.3765, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.5483112441214194, |
| "grad_norm": 4.573512554168701, |
| "learning_rate": 1.6133016627078386e-05, |
| "loss": 0.3727, |
| "step": 2565 |
| }, |
| { |
| "epoch": 0.5493800769559641, |
| "grad_norm": 3.752349615097046, |
| "learning_rate": 1.6121140142517816e-05, |
| "loss": 0.3252, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.5504489097905088, |
| "grad_norm": 3.7579493522644043, |
| "learning_rate": 1.6109263657957247e-05, |
| "loss": 0.2897, |
| "step": 2575 |
| }, |
| { |
| "epoch": 0.5515177426250535, |
| "grad_norm": 3.408615827560425, |
| "learning_rate": 1.6097387173396678e-05, |
| "loss": 0.2867, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.5525865754595981, |
| "grad_norm": 6.79346227645874, |
| "learning_rate": 1.6085510688836105e-05, |
| "loss": 0.3058, |
| "step": 2585 |
| }, |
| { |
| "epoch": 0.5536554082941428, |
| "grad_norm": 4.814434051513672, |
| "learning_rate": 1.6073634204275535e-05, |
| "loss": 0.461, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.5547242411286875, |
| "grad_norm": 4.379047393798828, |
| "learning_rate": 1.6061757719714966e-05, |
| "loss": 0.2341, |
| "step": 2595 |
| }, |
| { |
| "epoch": 0.5557930739632322, |
| "grad_norm": 7.072385787963867, |
| "learning_rate": 1.6049881235154396e-05, |
| "loss": 0.3446, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.5568619067977768, |
| "grad_norm": 6.0254411697387695, |
| "learning_rate": 1.6038004750593824e-05, |
| "loss": 0.2844, |
| "step": 2605 |
| }, |
| { |
| "epoch": 0.5579307396323215, |
| "grad_norm": 3.961240768432617, |
| "learning_rate": 1.6026128266033257e-05, |
| "loss": 0.3414, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.5589995724668662, |
| "grad_norm": 4.460314750671387, |
| "learning_rate": 1.6014251781472685e-05, |
| "loss": 0.3575, |
| "step": 2615 |
| }, |
| { |
| "epoch": 0.5600684053014109, |
| "grad_norm": 4.68889856338501, |
| "learning_rate": 1.6002375296912115e-05, |
| "loss": 0.3799, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.5611372381359555, |
| "grad_norm": 4.315304756164551, |
| "learning_rate": 1.5990498812351546e-05, |
| "loss": 0.3553, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.5622060709705002, |
| "grad_norm": 5.276904582977295, |
| "learning_rate": 1.5978622327790976e-05, |
| "loss": 0.3938, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.5632749038050449, |
| "grad_norm": 6.12239408493042, |
| "learning_rate": 1.5966745843230403e-05, |
| "loss": 0.4941, |
| "step": 2635 |
| }, |
| { |
| "epoch": 0.5643437366395896, |
| "grad_norm": 3.896017074584961, |
| "learning_rate": 1.5954869358669834e-05, |
| "loss": 0.3454, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.5654125694741342, |
| "grad_norm": 4.870078086853027, |
| "learning_rate": 1.5942992874109265e-05, |
| "loss": 0.3009, |
| "step": 2645 |
| }, |
| { |
| "epoch": 0.5664814023086789, |
| "grad_norm": 4.661655426025391, |
| "learning_rate": 1.5931116389548695e-05, |
| "loss": 0.4625, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.5675502351432236, |
| "grad_norm": 4.946725368499756, |
| "learning_rate": 1.5919239904988126e-05, |
| "loss": 0.3426, |
| "step": 2655 |
| }, |
| { |
| "epoch": 0.5686190679777683, |
| "grad_norm": 5.536448955535889, |
| "learning_rate": 1.5907363420427556e-05, |
| "loss": 0.3403, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.5696879008123129, |
| "grad_norm": 4.526655673980713, |
| "learning_rate": 1.5895486935866983e-05, |
| "loss": 0.3571, |
| "step": 2665 |
| }, |
| { |
| "epoch": 0.5707567336468576, |
| "grad_norm": 5.318846225738525, |
| "learning_rate": 1.5883610451306414e-05, |
| "loss": 0.235, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.5718255664814023, |
| "grad_norm": 4.3493571281433105, |
| "learning_rate": 1.5871733966745844e-05, |
| "loss": 0.3939, |
| "step": 2675 |
| }, |
| { |
| "epoch": 0.572894399315947, |
| "grad_norm": 4.984584331512451, |
| "learning_rate": 1.5859857482185275e-05, |
| "loss": 0.3041, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.5739632321504916, |
| "grad_norm": 5.118055820465088, |
| "learning_rate": 1.5847980997624702e-05, |
| "loss": 0.398, |
| "step": 2685 |
| }, |
| { |
| "epoch": 0.5750320649850363, |
| "grad_norm": 3.7693285942077637, |
| "learning_rate": 1.5836104513064136e-05, |
| "loss": 0.4327, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.5761008978195811, |
| "grad_norm": 4.113673210144043, |
| "learning_rate": 1.5824228028503563e-05, |
| "loss": 0.3622, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.5771697306541257, |
| "grad_norm": 4.5102386474609375, |
| "learning_rate": 1.5812351543942994e-05, |
| "loss": 0.3461, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.5782385634886704, |
| "grad_norm": 4.592400074005127, |
| "learning_rate": 1.5800475059382424e-05, |
| "loss": 0.409, |
| "step": 2705 |
| }, |
| { |
| "epoch": 0.579307396323215, |
| "grad_norm": 4.869931697845459, |
| "learning_rate": 1.5788598574821855e-05, |
| "loss": 0.3561, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.5803762291577598, |
| "grad_norm": 4.971279621124268, |
| "learning_rate": 1.5776722090261285e-05, |
| "loss": 0.3608, |
| "step": 2715 |
| }, |
| { |
| "epoch": 0.5814450619923044, |
| "grad_norm": 4.390021324157715, |
| "learning_rate": 1.5764845605700713e-05, |
| "loss": 0.3824, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.5825138948268491, |
| "grad_norm": 4.252533912658691, |
| "learning_rate": 1.5752969121140143e-05, |
| "loss": 0.3403, |
| "step": 2725 |
| }, |
| { |
| "epoch": 0.5835827276613937, |
| "grad_norm": 4.273214817047119, |
| "learning_rate": 1.5741092636579574e-05, |
| "loss": 0.4133, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.5846515604959385, |
| "grad_norm": 6.121555328369141, |
| "learning_rate": 1.5729216152019004e-05, |
| "loss": 0.4104, |
| "step": 2735 |
| }, |
| { |
| "epoch": 0.5857203933304831, |
| "grad_norm": 4.297682762145996, |
| "learning_rate": 1.5717339667458435e-05, |
| "loss": 0.2704, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.5867892261650278, |
| "grad_norm": 3.2164599895477295, |
| "learning_rate": 1.5705463182897865e-05, |
| "loss": 0.275, |
| "step": 2745 |
| }, |
| { |
| "epoch": 0.5878580589995724, |
| "grad_norm": 5.293271541595459, |
| "learning_rate": 1.5693586698337293e-05, |
| "loss": 0.3642, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.5889268918341172, |
| "grad_norm": 7.932840824127197, |
| "learning_rate": 1.5681710213776723e-05, |
| "loss": 0.3882, |
| "step": 2755 |
| }, |
| { |
| "epoch": 0.5899957246686618, |
| "grad_norm": 4.301117897033691, |
| "learning_rate": 1.5669833729216154e-05, |
| "loss": 0.445, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.5910645575032065, |
| "grad_norm": 5.11594820022583, |
| "learning_rate": 1.5657957244655584e-05, |
| "loss": 0.275, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.5921333903377511, |
| "grad_norm": 6.5174384117126465, |
| "learning_rate": 1.564608076009501e-05, |
| "loss": 0.3727, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.5932022231722959, |
| "grad_norm": 4.847846508026123, |
| "learning_rate": 1.5634204275534445e-05, |
| "loss": 0.3458, |
| "step": 2775 |
| }, |
| { |
| "epoch": 0.5942710560068405, |
| "grad_norm": 4.418210983276367, |
| "learning_rate": 1.5622327790973872e-05, |
| "loss": 0.3775, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.5953398888413852, |
| "grad_norm": 4.810405731201172, |
| "learning_rate": 1.5610451306413303e-05, |
| "loss": 0.3731, |
| "step": 2785 |
| }, |
| { |
| "epoch": 0.5964087216759298, |
| "grad_norm": 3.9812352657318115, |
| "learning_rate": 1.5598574821852734e-05, |
| "loss": 0.3073, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.5974775545104746, |
| "grad_norm": 4.542743682861328, |
| "learning_rate": 1.5586698337292164e-05, |
| "loss": 0.3113, |
| "step": 2795 |
| }, |
| { |
| "epoch": 0.5985463873450192, |
| "grad_norm": 4.736385345458984, |
| "learning_rate": 1.557482185273159e-05, |
| "loss": 0.3666, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.5996152201795639, |
| "grad_norm": 5.22001314163208, |
| "learning_rate": 1.5562945368171022e-05, |
| "loss": 0.3769, |
| "step": 2805 |
| }, |
| { |
| "epoch": 0.6006840530141085, |
| "grad_norm": 5.7952680587768555, |
| "learning_rate": 1.5551068883610452e-05, |
| "loss": 0.3267, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6017528858486533, |
| "grad_norm": 4.174045085906982, |
| "learning_rate": 1.5539192399049883e-05, |
| "loss": 0.2513, |
| "step": 2815 |
| }, |
| { |
| "epoch": 0.6028217186831979, |
| "grad_norm": 3.869800090789795, |
| "learning_rate": 1.552731591448931e-05, |
| "loss": 0.273, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6038905515177426, |
| "grad_norm": 4.380319118499756, |
| "learning_rate": 1.5515439429928744e-05, |
| "loss": 0.3712, |
| "step": 2825 |
| }, |
| { |
| "epoch": 0.6049593843522874, |
| "grad_norm": 3.972041368484497, |
| "learning_rate": 1.550356294536817e-05, |
| "loss": 0.4728, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.606028217186832, |
| "grad_norm": 5.212732791900635, |
| "learning_rate": 1.5491686460807602e-05, |
| "loss": 0.3608, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.6070970500213767, |
| "grad_norm": 5.559129238128662, |
| "learning_rate": 1.5479809976247032e-05, |
| "loss": 0.3765, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6081658828559213, |
| "grad_norm": 4.520800590515137, |
| "learning_rate": 1.5467933491686463e-05, |
| "loss": 0.3285, |
| "step": 2845 |
| }, |
| { |
| "epoch": 0.609234715690466, |
| "grad_norm": 6.37885856628418, |
| "learning_rate": 1.5456057007125893e-05, |
| "loss": 0.4822, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6103035485250107, |
| "grad_norm": 3.292531967163086, |
| "learning_rate": 1.544418052256532e-05, |
| "loss": 0.3185, |
| "step": 2855 |
| }, |
| { |
| "epoch": 0.6113723813595554, |
| "grad_norm": 4.683765411376953, |
| "learning_rate": 1.5432304038004754e-05, |
| "loss": 0.3027, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6124412141941, |
| "grad_norm": 6.004202365875244, |
| "learning_rate": 1.542042755344418e-05, |
| "loss": 0.3819, |
| "step": 2865 |
| }, |
| { |
| "epoch": 0.6135100470286448, |
| "grad_norm": 4.668170928955078, |
| "learning_rate": 1.5408551068883612e-05, |
| "loss": 0.2819, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6145788798631894, |
| "grad_norm": 6.2482781410217285, |
| "learning_rate": 1.5396674584323043e-05, |
| "loss": 0.3369, |
| "step": 2875 |
| }, |
| { |
| "epoch": 0.6156477126977341, |
| "grad_norm": 4.60993766784668, |
| "learning_rate": 1.5384798099762473e-05, |
| "loss": 0.331, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6167165455322787, |
| "grad_norm": 5.188110828399658, |
| "learning_rate": 1.53729216152019e-05, |
| "loss": 0.3662, |
| "step": 2885 |
| }, |
| { |
| "epoch": 0.6177853783668235, |
| "grad_norm": 5.201088905334473, |
| "learning_rate": 1.536104513064133e-05, |
| "loss": 0.3472, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6188542112013681, |
| "grad_norm": 5.363198280334473, |
| "learning_rate": 1.534916864608076e-05, |
| "loss": 0.387, |
| "step": 2895 |
| }, |
| { |
| "epoch": 0.6199230440359128, |
| "grad_norm": 5.238138198852539, |
| "learning_rate": 1.5337292161520192e-05, |
| "loss": 0.3017, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6209918768704574, |
| "grad_norm": 4.188523769378662, |
| "learning_rate": 1.532541567695962e-05, |
| "loss": 0.2628, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.6220607097050022, |
| "grad_norm": 4.730754852294922, |
| "learning_rate": 1.5313539192399053e-05, |
| "loss": 0.2683, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6231295425395468, |
| "grad_norm": 3.7036404609680176, |
| "learning_rate": 1.530166270783848e-05, |
| "loss": 0.3189, |
| "step": 2915 |
| }, |
| { |
| "epoch": 0.6241983753740915, |
| "grad_norm": 4.961543560028076, |
| "learning_rate": 1.528978622327791e-05, |
| "loss": 0.3389, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6252672082086361, |
| "grad_norm": 4.376546859741211, |
| "learning_rate": 1.527790973871734e-05, |
| "loss": 0.3552, |
| "step": 2925 |
| }, |
| { |
| "epoch": 0.6263360410431809, |
| "grad_norm": 3.2792232036590576, |
| "learning_rate": 1.5266033254156772e-05, |
| "loss": 0.2784, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6274048738777255, |
| "grad_norm": 4.739627838134766, |
| "learning_rate": 1.5254156769596201e-05, |
| "loss": 0.3416, |
| "step": 2935 |
| }, |
| { |
| "epoch": 0.6284737067122702, |
| "grad_norm": 4.889829635620117, |
| "learning_rate": 1.5242280285035631e-05, |
| "loss": 0.3805, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.6295425395468148, |
| "grad_norm": 5.562602519989014, |
| "learning_rate": 1.523040380047506e-05, |
| "loss": 0.4616, |
| "step": 2945 |
| }, |
| { |
| "epoch": 0.6306113723813596, |
| "grad_norm": 6.154614448547363, |
| "learning_rate": 1.521852731591449e-05, |
| "loss": 0.3584, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.6316802052159042, |
| "grad_norm": 4.117344856262207, |
| "learning_rate": 1.520665083135392e-05, |
| "loss": 0.3439, |
| "step": 2955 |
| }, |
| { |
| "epoch": 0.6327490380504489, |
| "grad_norm": 4.961648941040039, |
| "learning_rate": 1.519477434679335e-05, |
| "loss": 0.3569, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.6338178708849936, |
| "grad_norm": 4.030764579772949, |
| "learning_rate": 1.5182897862232779e-05, |
| "loss": 0.2775, |
| "step": 2965 |
| }, |
| { |
| "epoch": 0.6348867037195383, |
| "grad_norm": 5.615406036376953, |
| "learning_rate": 1.517102137767221e-05, |
| "loss": 0.3758, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.635955536554083, |
| "grad_norm": 5.250066757202148, |
| "learning_rate": 1.5159144893111638e-05, |
| "loss": 0.4178, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.6370243693886276, |
| "grad_norm": 3.862907648086548, |
| "learning_rate": 1.514726840855107e-05, |
| "loss": 0.2725, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.6380932022231723, |
| "grad_norm": 7.1906023025512695, |
| "learning_rate": 1.51353919239905e-05, |
| "loss": 0.4838, |
| "step": 2985 |
| }, |
| { |
| "epoch": 0.639162035057717, |
| "grad_norm": 4.240938663482666, |
| "learning_rate": 1.512351543942993e-05, |
| "loss": 0.3184, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.6402308678922617, |
| "grad_norm": 5.662024974822998, |
| "learning_rate": 1.511163895486936e-05, |
| "loss": 0.3265, |
| "step": 2995 |
| }, |
| { |
| "epoch": 0.6412997007268063, |
| "grad_norm": 5.721799850463867, |
| "learning_rate": 1.509976247030879e-05, |
| "loss": 0.3344, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.642368533561351, |
| "grad_norm": 4.524104118347168, |
| "learning_rate": 1.508788598574822e-05, |
| "loss": 0.284, |
| "step": 3005 |
| }, |
| { |
| "epoch": 0.6434373663958957, |
| "grad_norm": 4.907393455505371, |
| "learning_rate": 1.5076009501187649e-05, |
| "loss": 0.3337, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.6445061992304404, |
| "grad_norm": 4.567984580993652, |
| "learning_rate": 1.5064133016627081e-05, |
| "loss": 0.3085, |
| "step": 3015 |
| }, |
| { |
| "epoch": 0.645575032064985, |
| "grad_norm": 6.088601589202881, |
| "learning_rate": 1.505225653206651e-05, |
| "loss": 0.3685, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.6466438648995297, |
| "grad_norm": 5.842155456542969, |
| "learning_rate": 1.504038004750594e-05, |
| "loss": 0.4621, |
| "step": 3025 |
| }, |
| { |
| "epoch": 0.6477126977340744, |
| "grad_norm": 4.505978584289551, |
| "learning_rate": 1.502850356294537e-05, |
| "loss": 0.2958, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.6487815305686191, |
| "grad_norm": 3.832209825515747, |
| "learning_rate": 1.50166270783848e-05, |
| "loss": 0.4165, |
| "step": 3035 |
| }, |
| { |
| "epoch": 0.6498503634031637, |
| "grad_norm": 3.149580240249634, |
| "learning_rate": 1.5004750593824229e-05, |
| "loss": 0.336, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.6509191962377084, |
| "grad_norm": 4.5704121589660645, |
| "learning_rate": 1.499287410926366e-05, |
| "loss": 0.328, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.6519880290722531, |
| "grad_norm": 5.424034595489502, |
| "learning_rate": 1.4980997624703088e-05, |
| "loss": 0.3256, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.6530568619067978, |
| "grad_norm": 4.873384475708008, |
| "learning_rate": 1.4969121140142519e-05, |
| "loss": 0.2877, |
| "step": 3055 |
| }, |
| { |
| "epoch": 0.6541256947413424, |
| "grad_norm": 4.21671199798584, |
| "learning_rate": 1.4957244655581948e-05, |
| "loss": 0.3468, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.6551945275758871, |
| "grad_norm": 4.723153591156006, |
| "learning_rate": 1.494536817102138e-05, |
| "loss": 0.3836, |
| "step": 3065 |
| }, |
| { |
| "epoch": 0.6562633604104318, |
| "grad_norm": 4.3572587966918945, |
| "learning_rate": 1.4933491686460809e-05, |
| "loss": 0.3084, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.6573321932449765, |
| "grad_norm": 4.8245439529418945, |
| "learning_rate": 1.492161520190024e-05, |
| "loss": 0.2879, |
| "step": 3075 |
| }, |
| { |
| "epoch": 0.6584010260795211, |
| "grad_norm": 4.260484218597412, |
| "learning_rate": 1.4909738717339668e-05, |
| "loss": 0.2836, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.6594698589140658, |
| "grad_norm": 3.668529748916626, |
| "learning_rate": 1.4897862232779099e-05, |
| "loss": 0.3049, |
| "step": 3085 |
| }, |
| { |
| "epoch": 0.6605386917486105, |
| "grad_norm": 5.860143661499023, |
| "learning_rate": 1.4885985748218528e-05, |
| "loss": 0.3972, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.6616075245831552, |
| "grad_norm": 3.9581236839294434, |
| "learning_rate": 1.4874109263657958e-05, |
| "loss": 0.3189, |
| "step": 3095 |
| }, |
| { |
| "epoch": 0.6626763574176999, |
| "grad_norm": 2.8415067195892334, |
| "learning_rate": 1.4862232779097387e-05, |
| "loss": 0.207, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.6637451902522445, |
| "grad_norm": 5.096329689025879, |
| "learning_rate": 1.485035629453682e-05, |
| "loss": 0.2844, |
| "step": 3105 |
| }, |
| { |
| "epoch": 0.6648140230867893, |
| "grad_norm": 5.822755813598633, |
| "learning_rate": 1.4838479809976248e-05, |
| "loss": 0.3583, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.6658828559213339, |
| "grad_norm": 5.467360019683838, |
| "learning_rate": 1.4826603325415679e-05, |
| "loss": 0.2681, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.6669516887558786, |
| "grad_norm": 5.418729305267334, |
| "learning_rate": 1.4814726840855107e-05, |
| "loss": 0.3788, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.6680205215904232, |
| "grad_norm": 5.312787055969238, |
| "learning_rate": 1.4802850356294538e-05, |
| "loss": 0.3335, |
| "step": 3125 |
| }, |
| { |
| "epoch": 0.669089354424968, |
| "grad_norm": 4.632271766662598, |
| "learning_rate": 1.4790973871733969e-05, |
| "loss": 0.2958, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.6701581872595126, |
| "grad_norm": 5.137240886688232, |
| "learning_rate": 1.4779097387173397e-05, |
| "loss": 0.343, |
| "step": 3135 |
| }, |
| { |
| "epoch": 0.6712270200940573, |
| "grad_norm": 4.227065086364746, |
| "learning_rate": 1.4767220902612828e-05, |
| "loss": 0.3026, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.672295852928602, |
| "grad_norm": 4.9906110763549805, |
| "learning_rate": 1.4755344418052257e-05, |
| "loss": 0.3575, |
| "step": 3145 |
| }, |
| { |
| "epoch": 0.6733646857631467, |
| "grad_norm": 6.338077545166016, |
| "learning_rate": 1.4743467933491689e-05, |
| "loss": 0.3962, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.6744335185976913, |
| "grad_norm": 5.018848896026611, |
| "learning_rate": 1.4731591448931118e-05, |
| "loss": 0.292, |
| "step": 3155 |
| }, |
| { |
| "epoch": 0.675502351432236, |
| "grad_norm": 5.4188432693481445, |
| "learning_rate": 1.4719714964370548e-05, |
| "loss": 0.4226, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.6765711842667806, |
| "grad_norm": 5.020565032958984, |
| "learning_rate": 1.4707838479809977e-05, |
| "loss": 0.3999, |
| "step": 3165 |
| }, |
| { |
| "epoch": 0.6776400171013254, |
| "grad_norm": 5.457892894744873, |
| "learning_rate": 1.4695961995249408e-05, |
| "loss": 0.3949, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.67870884993587, |
| "grad_norm": 4.842294216156006, |
| "learning_rate": 1.4684085510688837e-05, |
| "loss": 0.2844, |
| "step": 3175 |
| }, |
| { |
| "epoch": 0.6797776827704147, |
| "grad_norm": 4.515163421630859, |
| "learning_rate": 1.4672209026128267e-05, |
| "loss": 0.3003, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.6808465156049593, |
| "grad_norm": 3.4031429290771484, |
| "learning_rate": 1.4660332541567696e-05, |
| "loss": 0.2636, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.6819153484395041, |
| "grad_norm": 4.693248748779297, |
| "learning_rate": 1.4648456057007128e-05, |
| "loss": 0.2334, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.6829841812740487, |
| "grad_norm": 4.690431118011475, |
| "learning_rate": 1.4636579572446557e-05, |
| "loss": 0.2574, |
| "step": 3195 |
| }, |
| { |
| "epoch": 0.6840530141085934, |
| "grad_norm": 3.9794492721557617, |
| "learning_rate": 1.4624703087885988e-05, |
| "loss": 0.3476, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.685121846943138, |
| "grad_norm": 4.062690258026123, |
| "learning_rate": 1.4612826603325417e-05, |
| "loss": 0.3763, |
| "step": 3205 |
| }, |
| { |
| "epoch": 0.6861906797776828, |
| "grad_norm": 2.888495683670044, |
| "learning_rate": 1.4600950118764847e-05, |
| "loss": 0.2873, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.6872595126122274, |
| "grad_norm": 4.061041355133057, |
| "learning_rate": 1.4589073634204276e-05, |
| "loss": 0.2859, |
| "step": 3215 |
| }, |
| { |
| "epoch": 0.6883283454467721, |
| "grad_norm": 5.954913139343262, |
| "learning_rate": 1.4577197149643707e-05, |
| "loss": 0.3335, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.6893971782813167, |
| "grad_norm": 4.9537434577941895, |
| "learning_rate": 1.4565320665083135e-05, |
| "loss": 0.3712, |
| "step": 3225 |
| }, |
| { |
| "epoch": 0.6904660111158615, |
| "grad_norm": 3.5754384994506836, |
| "learning_rate": 1.4553444180522566e-05, |
| "loss": 0.4072, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.6915348439504062, |
| "grad_norm": 6.583157062530518, |
| "learning_rate": 1.4541567695961995e-05, |
| "loss": 0.3442, |
| "step": 3235 |
| }, |
| { |
| "epoch": 0.6926036767849508, |
| "grad_norm": 4.144803524017334, |
| "learning_rate": 1.4529691211401427e-05, |
| "loss": 0.32, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.6936725096194956, |
| "grad_norm": 3.350670576095581, |
| "learning_rate": 1.4517814726840856e-05, |
| "loss": 0.3076, |
| "step": 3245 |
| }, |
| { |
| "epoch": 0.6947413424540402, |
| "grad_norm": 3.798152208328247, |
| "learning_rate": 1.4505938242280287e-05, |
| "loss": 0.3134, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.6958101752885849, |
| "grad_norm": 4.410452365875244, |
| "learning_rate": 1.4494061757719715e-05, |
| "loss": 0.3155, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.6968790081231295, |
| "grad_norm": 5.064853191375732, |
| "learning_rate": 1.4482185273159146e-05, |
| "loss": 0.3097, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.6979478409576743, |
| "grad_norm": 5.49769401550293, |
| "learning_rate": 1.4470308788598575e-05, |
| "loss": 0.2727, |
| "step": 3265 |
| }, |
| { |
| "epoch": 0.6990166737922189, |
| "grad_norm": 4.130645751953125, |
| "learning_rate": 1.4458432304038005e-05, |
| "loss": 0.3666, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.7000855066267636, |
| "grad_norm": 5.358222484588623, |
| "learning_rate": 1.4446555819477438e-05, |
| "loss": 0.3049, |
| "step": 3275 |
| }, |
| { |
| "epoch": 0.7011543394613082, |
| "grad_norm": 3.783137559890747, |
| "learning_rate": 1.4434679334916866e-05, |
| "loss": 0.3506, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.702223172295853, |
| "grad_norm": 4.486612319946289, |
| "learning_rate": 1.4422802850356297e-05, |
| "loss": 0.3027, |
| "step": 3285 |
| }, |
| { |
| "epoch": 0.7032920051303976, |
| "grad_norm": 5.604061126708984, |
| "learning_rate": 1.4410926365795726e-05, |
| "loss": 0.2706, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.7043608379649423, |
| "grad_norm": 5.663457870483398, |
| "learning_rate": 1.4399049881235156e-05, |
| "loss": 0.3165, |
| "step": 3295 |
| }, |
| { |
| "epoch": 0.7054296707994869, |
| "grad_norm": 4.874339580535889, |
| "learning_rate": 1.4387173396674585e-05, |
| "loss": 0.3567, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.7064985036340317, |
| "grad_norm": 5.478762626647949, |
| "learning_rate": 1.4375296912114016e-05, |
| "loss": 0.2795, |
| "step": 3305 |
| }, |
| { |
| "epoch": 0.7075673364685763, |
| "grad_norm": 4.213021278381348, |
| "learning_rate": 1.4363420427553445e-05, |
| "loss": 0.2905, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.708636169303121, |
| "grad_norm": 4.549129009246826, |
| "learning_rate": 1.4351543942992875e-05, |
| "loss": 0.2946, |
| "step": 3315 |
| }, |
| { |
| "epoch": 0.7097050021376656, |
| "grad_norm": 4.900253772735596, |
| "learning_rate": 1.4339667458432304e-05, |
| "loss": 0.298, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.7107738349722104, |
| "grad_norm": 5.591811656951904, |
| "learning_rate": 1.4327790973871736e-05, |
| "loss": 0.289, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.711842667806755, |
| "grad_norm": 3.1972029209136963, |
| "learning_rate": 1.4315914489311165e-05, |
| "loss": 0.3194, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.7129115006412997, |
| "grad_norm": 3.692401647567749, |
| "learning_rate": 1.4304038004750596e-05, |
| "loss": 0.2719, |
| "step": 3335 |
| }, |
| { |
| "epoch": 0.7139803334758443, |
| "grad_norm": 6.502699851989746, |
| "learning_rate": 1.4292161520190025e-05, |
| "loss": 0.3079, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.7150491663103891, |
| "grad_norm": 4.761363506317139, |
| "learning_rate": 1.4280285035629455e-05, |
| "loss": 0.3373, |
| "step": 3345 |
| }, |
| { |
| "epoch": 0.7161179991449337, |
| "grad_norm": 5.628553867340088, |
| "learning_rate": 1.4268408551068884e-05, |
| "loss": 0.3103, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.7171868319794784, |
| "grad_norm": 5.576054096221924, |
| "learning_rate": 1.4256532066508314e-05, |
| "loss": 0.3384, |
| "step": 3355 |
| }, |
| { |
| "epoch": 0.718255664814023, |
| "grad_norm": 4.364500999450684, |
| "learning_rate": 1.4244655581947743e-05, |
| "loss": 0.3785, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.7193244976485678, |
| "grad_norm": 2.8248353004455566, |
| "learning_rate": 1.4232779097387176e-05, |
| "loss": 0.2583, |
| "step": 3365 |
| }, |
| { |
| "epoch": 0.7203933304831125, |
| "grad_norm": 5.5604987144470215, |
| "learning_rate": 1.4220902612826604e-05, |
| "loss": 0.2992, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.7214621633176571, |
| "grad_norm": 4.8770527839660645, |
| "learning_rate": 1.4209026128266035e-05, |
| "loss": 0.2196, |
| "step": 3375 |
| }, |
| { |
| "epoch": 0.7225309961522018, |
| "grad_norm": 4.998085021972656, |
| "learning_rate": 1.4197149643705464e-05, |
| "loss": 0.3438, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.7235998289867465, |
| "grad_norm": 4.125364303588867, |
| "learning_rate": 1.4185273159144894e-05, |
| "loss": 0.333, |
| "step": 3385 |
| }, |
| { |
| "epoch": 0.7246686618212912, |
| "grad_norm": 5.174322605133057, |
| "learning_rate": 1.4173396674584323e-05, |
| "loss": 0.4422, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.7257374946558358, |
| "grad_norm": 4.850910186767578, |
| "learning_rate": 1.4161520190023754e-05, |
| "loss": 0.458, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.7268063274903805, |
| "grad_norm": 4.238053321838379, |
| "learning_rate": 1.4149643705463183e-05, |
| "loss": 0.2526, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.7278751603249252, |
| "grad_norm": 4.8868842124938965, |
| "learning_rate": 1.4137767220902613e-05, |
| "loss": 0.2443, |
| "step": 3405 |
| }, |
| { |
| "epoch": 0.7289439931594699, |
| "grad_norm": 6.352740287780762, |
| "learning_rate": 1.4125890736342045e-05, |
| "loss": 0.4024, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.7300128259940145, |
| "grad_norm": 3.7694151401519775, |
| "learning_rate": 1.4114014251781474e-05, |
| "loss": 0.3057, |
| "step": 3415 |
| }, |
| { |
| "epoch": 0.7310816588285592, |
| "grad_norm": 4.326847553253174, |
| "learning_rate": 1.4102137767220905e-05, |
| "loss": 0.3417, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.7321504916631039, |
| "grad_norm": 4.306587219238281, |
| "learning_rate": 1.4090261282660334e-05, |
| "loss": 0.3535, |
| "step": 3425 |
| }, |
| { |
| "epoch": 0.7332193244976486, |
| "grad_norm": 4.4991044998168945, |
| "learning_rate": 1.4078384798099764e-05, |
| "loss": 0.3814, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.7342881573321932, |
| "grad_norm": 4.0679779052734375, |
| "learning_rate": 1.4066508313539193e-05, |
| "loss": 0.3196, |
| "step": 3435 |
| }, |
| { |
| "epoch": 0.735356990166738, |
| "grad_norm": 4.0540666580200195, |
| "learning_rate": 1.4054631828978624e-05, |
| "loss": 0.2738, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.7364258230012826, |
| "grad_norm": 4.532857894897461, |
| "learning_rate": 1.4042755344418053e-05, |
| "loss": 0.2127, |
| "step": 3445 |
| }, |
| { |
| "epoch": 0.7374946558358273, |
| "grad_norm": 4.681793212890625, |
| "learning_rate": 1.4030878859857485e-05, |
| "loss": 0.2415, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.7385634886703719, |
| "grad_norm": 5.458173751831055, |
| "learning_rate": 1.4019002375296914e-05, |
| "loss": 0.368, |
| "step": 3455 |
| }, |
| { |
| "epoch": 0.7396323215049166, |
| "grad_norm": 4.303793430328369, |
| "learning_rate": 1.4007125890736344e-05, |
| "loss": 0.2965, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.7407011543394613, |
| "grad_norm": 5.24821138381958, |
| "learning_rate": 1.3995249406175773e-05, |
| "loss": 0.3676, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.741769987174006, |
| "grad_norm": 7.041927337646484, |
| "learning_rate": 1.3983372921615204e-05, |
| "loss": 0.4793, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.7428388200085506, |
| "grad_norm": 4.38003396987915, |
| "learning_rate": 1.3971496437054632e-05, |
| "loss": 0.2924, |
| "step": 3475 |
| }, |
| { |
| "epoch": 0.7439076528430953, |
| "grad_norm": 4.844277858734131, |
| "learning_rate": 1.3959619952494063e-05, |
| "loss": 0.3051, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.74497648567764, |
| "grad_norm": 4.943488121032715, |
| "learning_rate": 1.3947743467933492e-05, |
| "loss": 0.3206, |
| "step": 3485 |
| }, |
| { |
| "epoch": 0.7460453185121847, |
| "grad_norm": 3.5360701084136963, |
| "learning_rate": 1.3935866983372922e-05, |
| "loss": 0.3062, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.7471141513467293, |
| "grad_norm": 4.964517116546631, |
| "learning_rate": 1.3923990498812351e-05, |
| "loss": 0.3099, |
| "step": 3495 |
| }, |
| { |
| "epoch": 0.748182984181274, |
| "grad_norm": 4.1770124435424805, |
| "learning_rate": 1.3912114014251783e-05, |
| "loss": 0.3528, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.7492518170158188, |
| "grad_norm": 4.830697059631348, |
| "learning_rate": 1.3900237529691212e-05, |
| "loss": 0.3075, |
| "step": 3505 |
| }, |
| { |
| "epoch": 0.7503206498503634, |
| "grad_norm": 4.7558512687683105, |
| "learning_rate": 1.3888361045130643e-05, |
| "loss": 0.3132, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.7513894826849081, |
| "grad_norm": 5.082642555236816, |
| "learning_rate": 1.3876484560570072e-05, |
| "loss": 0.3789, |
| "step": 3515 |
| }, |
| { |
| "epoch": 0.7524583155194527, |
| "grad_norm": 5.486532211303711, |
| "learning_rate": 1.3864608076009502e-05, |
| "loss": 0.3316, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.7535271483539975, |
| "grad_norm": 4.763543605804443, |
| "learning_rate": 1.3852731591448931e-05, |
| "loss": 0.3113, |
| "step": 3525 |
| }, |
| { |
| "epoch": 0.7545959811885421, |
| "grad_norm": 4.146590709686279, |
| "learning_rate": 1.3840855106888362e-05, |
| "loss": 0.2481, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.7556648140230868, |
| "grad_norm": 4.292271614074707, |
| "learning_rate": 1.382897862232779e-05, |
| "loss": 0.3174, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.7567336468576314, |
| "grad_norm": 5.971374988555908, |
| "learning_rate": 1.3817102137767223e-05, |
| "loss": 0.3116, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.7578024796921762, |
| "grad_norm": 4.599390983581543, |
| "learning_rate": 1.3805225653206652e-05, |
| "loss": 0.29, |
| "step": 3545 |
| }, |
| { |
| "epoch": 0.7588713125267208, |
| "grad_norm": 3.7273731231689453, |
| "learning_rate": 1.3793349168646082e-05, |
| "loss": 0.33, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.7599401453612655, |
| "grad_norm": 3.681992530822754, |
| "learning_rate": 1.3781472684085513e-05, |
| "loss": 0.2002, |
| "step": 3555 |
| }, |
| { |
| "epoch": 0.7610089781958101, |
| "grad_norm": 5.324198246002197, |
| "learning_rate": 1.3769596199524942e-05, |
| "loss": 0.3566, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.7620778110303549, |
| "grad_norm": 4.434847354888916, |
| "learning_rate": 1.3757719714964372e-05, |
| "loss": 0.2618, |
| "step": 3565 |
| }, |
| { |
| "epoch": 0.7631466438648995, |
| "grad_norm": 5.279498100280762, |
| "learning_rate": 1.3745843230403801e-05, |
| "loss": 0.316, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.7642154766994442, |
| "grad_norm": 3.4741098880767822, |
| "learning_rate": 1.3733966745843233e-05, |
| "loss": 0.2997, |
| "step": 3575 |
| }, |
| { |
| "epoch": 0.7652843095339888, |
| "grad_norm": 4.7899909019470215, |
| "learning_rate": 1.372209026128266e-05, |
| "loss": 0.2809, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.7663531423685336, |
| "grad_norm": 4.318710803985596, |
| "learning_rate": 1.3710213776722093e-05, |
| "loss": 0.2023, |
| "step": 3585 |
| }, |
| { |
| "epoch": 0.7674219752030782, |
| "grad_norm": 4.148991107940674, |
| "learning_rate": 1.3698337292161522e-05, |
| "loss": 0.2726, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.7684908080376229, |
| "grad_norm": 5.0960373878479, |
| "learning_rate": 1.3686460807600952e-05, |
| "loss": 0.2878, |
| "step": 3595 |
| }, |
| { |
| "epoch": 0.7695596408721675, |
| "grad_norm": 5.928832530975342, |
| "learning_rate": 1.3674584323040381e-05, |
| "loss": 0.4026, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.7706284737067123, |
| "grad_norm": 4.24060583114624, |
| "learning_rate": 1.3662707838479811e-05, |
| "loss": 0.3205, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.7716973065412569, |
| "grad_norm": 4.517853736877441, |
| "learning_rate": 1.365083135391924e-05, |
| "loss": 0.3092, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.7727661393758016, |
| "grad_norm": 5.5383501052856445, |
| "learning_rate": 1.3638954869358671e-05, |
| "loss": 0.3249, |
| "step": 3615 |
| }, |
| { |
| "epoch": 0.7738349722103463, |
| "grad_norm": 3.5598056316375732, |
| "learning_rate": 1.36270783847981e-05, |
| "loss": 0.2898, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.774903805044891, |
| "grad_norm": 5.0517578125, |
| "learning_rate": 1.3615201900237532e-05, |
| "loss": 0.3464, |
| "step": 3625 |
| }, |
| { |
| "epoch": 0.7759726378794357, |
| "grad_norm": 4.764474868774414, |
| "learning_rate": 1.360332541567696e-05, |
| "loss": 0.3755, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.7770414707139803, |
| "grad_norm": 4.272229194641113, |
| "learning_rate": 1.3591448931116391e-05, |
| "loss": 0.3236, |
| "step": 3635 |
| }, |
| { |
| "epoch": 0.7781103035485251, |
| "grad_norm": 4.496946811676025, |
| "learning_rate": 1.357957244655582e-05, |
| "loss": 0.3298, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.7791791363830697, |
| "grad_norm": 3.3338801860809326, |
| "learning_rate": 1.356769596199525e-05, |
| "loss": 0.3301, |
| "step": 3645 |
| }, |
| { |
| "epoch": 0.7802479692176144, |
| "grad_norm": 4.775890350341797, |
| "learning_rate": 1.355581947743468e-05, |
| "loss": 0.2428, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.781316802052159, |
| "grad_norm": 3.7741811275482178, |
| "learning_rate": 1.354394299287411e-05, |
| "loss": 0.2789, |
| "step": 3655 |
| }, |
| { |
| "epoch": 0.7823856348867038, |
| "grad_norm": 5.699966907501221, |
| "learning_rate": 1.3532066508313539e-05, |
| "loss": 0.4398, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.7834544677212484, |
| "grad_norm": 5.20950174331665, |
| "learning_rate": 1.352019002375297e-05, |
| "loss": 0.3211, |
| "step": 3665 |
| }, |
| { |
| "epoch": 0.7845233005557931, |
| "grad_norm": 4.900545120239258, |
| "learning_rate": 1.3508313539192398e-05, |
| "loss": 0.3079, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.7855921333903377, |
| "grad_norm": 4.627389907836914, |
| "learning_rate": 1.349643705463183e-05, |
| "loss": 0.2765, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.7866609662248825, |
| "grad_norm": 3.996687889099121, |
| "learning_rate": 1.348456057007126e-05, |
| "loss": 0.2414, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.7877297990594271, |
| "grad_norm": 4.968347072601318, |
| "learning_rate": 1.347268408551069e-05, |
| "loss": 0.3142, |
| "step": 3685 |
| }, |
| { |
| "epoch": 0.7887986318939718, |
| "grad_norm": 5.365523815155029, |
| "learning_rate": 1.346080760095012e-05, |
| "loss": 0.4895, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.7898674647285164, |
| "grad_norm": 3.6716244220733643, |
| "learning_rate": 1.344893111638955e-05, |
| "loss": 0.3058, |
| "step": 3695 |
| }, |
| { |
| "epoch": 0.7909362975630612, |
| "grad_norm": 3.6110551357269287, |
| "learning_rate": 1.343705463182898e-05, |
| "loss": 0.2568, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.7920051303976058, |
| "grad_norm": 3.8466339111328125, |
| "learning_rate": 1.3425178147268409e-05, |
| "loss": 0.2505, |
| "step": 3705 |
| }, |
| { |
| "epoch": 0.7930739632321505, |
| "grad_norm": 6.473718643188477, |
| "learning_rate": 1.3413301662707841e-05, |
| "loss": 0.3416, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.7941427960666951, |
| "grad_norm": 4.931123733520508, |
| "learning_rate": 1.340142517814727e-05, |
| "loss": 0.2867, |
| "step": 3715 |
| }, |
| { |
| "epoch": 0.7952116289012399, |
| "grad_norm": 4.821789741516113, |
| "learning_rate": 1.33895486935867e-05, |
| "loss": 0.2696, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.7962804617357845, |
| "grad_norm": 3.5999889373779297, |
| "learning_rate": 1.337767220902613e-05, |
| "loss": 0.293, |
| "step": 3725 |
| }, |
| { |
| "epoch": 0.7973492945703292, |
| "grad_norm": 3.716235637664795, |
| "learning_rate": 1.336579572446556e-05, |
| "loss": 0.2741, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.7984181274048738, |
| "grad_norm": 3.1744401454925537, |
| "learning_rate": 1.3353919239904989e-05, |
| "loss": 0.3276, |
| "step": 3735 |
| }, |
| { |
| "epoch": 0.7994869602394186, |
| "grad_norm": 4.65699577331543, |
| "learning_rate": 1.334204275534442e-05, |
| "loss": 0.2688, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.8005557930739632, |
| "grad_norm": 3.338193416595459, |
| "learning_rate": 1.3330166270783848e-05, |
| "loss": 0.2408, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.8016246259085079, |
| "grad_norm": 4.22088098526001, |
| "learning_rate": 1.3318289786223279e-05, |
| "loss": 0.2926, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.8026934587430525, |
| "grad_norm": 5.624631881713867, |
| "learning_rate": 1.3306413301662708e-05, |
| "loss": 0.3119, |
| "step": 3755 |
| }, |
| { |
| "epoch": 0.8037622915775973, |
| "grad_norm": 3.8507394790649414, |
| "learning_rate": 1.329453681710214e-05, |
| "loss": 0.3018, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.804831124412142, |
| "grad_norm": 4.6665239334106445, |
| "learning_rate": 1.3282660332541569e-05, |
| "loss": 0.3448, |
| "step": 3765 |
| }, |
| { |
| "epoch": 0.8058999572466866, |
| "grad_norm": 4.100464344024658, |
| "learning_rate": 1.3270783847981e-05, |
| "loss": 0.3539, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.8069687900812313, |
| "grad_norm": 6.0533623695373535, |
| "learning_rate": 1.3258907363420428e-05, |
| "loss": 0.2776, |
| "step": 3775 |
| }, |
| { |
| "epoch": 0.808037622915776, |
| "grad_norm": 3.781015396118164, |
| "learning_rate": 1.3247030878859859e-05, |
| "loss": 0.2255, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.8091064557503207, |
| "grad_norm": 5.616995334625244, |
| "learning_rate": 1.3235154394299288e-05, |
| "loss": 0.2507, |
| "step": 3785 |
| }, |
| { |
| "epoch": 0.8101752885848653, |
| "grad_norm": 5.021564960479736, |
| "learning_rate": 1.3223277909738718e-05, |
| "loss": 0.3463, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.81124412141941, |
| "grad_norm": 4.946634769439697, |
| "learning_rate": 1.3211401425178147e-05, |
| "loss": 0.2849, |
| "step": 3795 |
| }, |
| { |
| "epoch": 0.8123129542539547, |
| "grad_norm": 3.1573128700256348, |
| "learning_rate": 1.319952494061758e-05, |
| "loss": 0.2678, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.8133817870884994, |
| "grad_norm": 5.302856922149658, |
| "learning_rate": 1.3187648456057008e-05, |
| "loss": 0.3446, |
| "step": 3805 |
| }, |
| { |
| "epoch": 0.814450619923044, |
| "grad_norm": 5.2195820808410645, |
| "learning_rate": 1.3175771971496439e-05, |
| "loss": 0.344, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.8155194527575887, |
| "grad_norm": 5.514340877532959, |
| "learning_rate": 1.3163895486935867e-05, |
| "loss": 0.3305, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.8165882855921334, |
| "grad_norm": 4.197089195251465, |
| "learning_rate": 1.3152019002375298e-05, |
| "loss": 0.2728, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.8176571184266781, |
| "grad_norm": 4.766973972320557, |
| "learning_rate": 1.3140142517814727e-05, |
| "loss": 0.4181, |
| "step": 3825 |
| }, |
| { |
| "epoch": 0.8187259512612227, |
| "grad_norm": 5.202324390411377, |
| "learning_rate": 1.3128266033254157e-05, |
| "loss": 0.3351, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.8197947840957674, |
| "grad_norm": 3.472627878189087, |
| "learning_rate": 1.311638954869359e-05, |
| "loss": 0.2646, |
| "step": 3835 |
| }, |
| { |
| "epoch": 0.8208636169303121, |
| "grad_norm": 4.589137554168701, |
| "learning_rate": 1.3104513064133017e-05, |
| "loss": 0.2628, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.8219324497648568, |
| "grad_norm": 3.9725475311279297, |
| "learning_rate": 1.3092636579572449e-05, |
| "loss": 0.2747, |
| "step": 3845 |
| }, |
| { |
| "epoch": 0.8230012825994014, |
| "grad_norm": 3.832432985305786, |
| "learning_rate": 1.3080760095011878e-05, |
| "loss": 0.2253, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.8240701154339461, |
| "grad_norm": 4.213531494140625, |
| "learning_rate": 1.3068883610451308e-05, |
| "loss": 0.2741, |
| "step": 3855 |
| }, |
| { |
| "epoch": 0.8251389482684908, |
| "grad_norm": 6.430481910705566, |
| "learning_rate": 1.3057007125890737e-05, |
| "loss": 0.3982, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.8262077811030355, |
| "grad_norm": 2.416151762008667, |
| "learning_rate": 1.3045130641330168e-05, |
| "loss": 0.3014, |
| "step": 3865 |
| }, |
| { |
| "epoch": 0.8272766139375801, |
| "grad_norm": 4.334439754486084, |
| "learning_rate": 1.3033254156769597e-05, |
| "loss": 0.2696, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.8283454467721248, |
| "grad_norm": 3.599234104156494, |
| "learning_rate": 1.3021377672209027e-05, |
| "loss": 0.2607, |
| "step": 3875 |
| }, |
| { |
| "epoch": 0.8294142796066695, |
| "grad_norm": 4.65981388092041, |
| "learning_rate": 1.3009501187648456e-05, |
| "loss": 0.3154, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.8304831124412142, |
| "grad_norm": 5.147418975830078, |
| "learning_rate": 1.2997624703087888e-05, |
| "loss": 0.3275, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.8315519452757588, |
| "grad_norm": 4.910894870758057, |
| "learning_rate": 1.2985748218527317e-05, |
| "loss": 0.274, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.8326207781103035, |
| "grad_norm": 3.3270483016967773, |
| "learning_rate": 1.2973871733966748e-05, |
| "loss": 0.3042, |
| "step": 3895 |
| }, |
| { |
| "epoch": 0.8336896109448483, |
| "grad_norm": 5.005611419677734, |
| "learning_rate": 1.2961995249406177e-05, |
| "loss": 0.2692, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.8347584437793929, |
| "grad_norm": 3.320770263671875, |
| "learning_rate": 1.2950118764845607e-05, |
| "loss": 0.2505, |
| "step": 3905 |
| }, |
| { |
| "epoch": 0.8358272766139376, |
| "grad_norm": 4.788522720336914, |
| "learning_rate": 1.2938242280285036e-05, |
| "loss": 0.3762, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.8368961094484823, |
| "grad_norm": 5.107404708862305, |
| "learning_rate": 1.2926365795724467e-05, |
| "loss": 0.2467, |
| "step": 3915 |
| }, |
| { |
| "epoch": 0.837964942283027, |
| "grad_norm": 3.5440781116485596, |
| "learning_rate": 1.2914489311163895e-05, |
| "loss": 0.2227, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.8390337751175716, |
| "grad_norm": 5.089791774749756, |
| "learning_rate": 1.2902612826603326e-05, |
| "loss": 0.2513, |
| "step": 3925 |
| }, |
| { |
| "epoch": 0.8401026079521163, |
| "grad_norm": 5.978660583496094, |
| "learning_rate": 1.2890736342042755e-05, |
| "loss": 0.313, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.841171440786661, |
| "grad_norm": 4.347848415374756, |
| "learning_rate": 1.2878859857482187e-05, |
| "loss": 0.265, |
| "step": 3935 |
| }, |
| { |
| "epoch": 0.8422402736212057, |
| "grad_norm": 5.038461208343506, |
| "learning_rate": 1.2866983372921616e-05, |
| "loss": 0.2839, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.8433091064557503, |
| "grad_norm": 4.367410659790039, |
| "learning_rate": 1.2855106888361046e-05, |
| "loss": 0.3432, |
| "step": 3945 |
| }, |
| { |
| "epoch": 0.844377939290295, |
| "grad_norm": 4.267697334289551, |
| "learning_rate": 1.2843230403800475e-05, |
| "loss": 0.2168, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.8454467721248397, |
| "grad_norm": 4.99351167678833, |
| "learning_rate": 1.2831353919239906e-05, |
| "loss": 0.3083, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.8465156049593844, |
| "grad_norm": 3.725167751312256, |
| "learning_rate": 1.2819477434679335e-05, |
| "loss": 0.3362, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.847584437793929, |
| "grad_norm": 4.825465679168701, |
| "learning_rate": 1.2807600950118765e-05, |
| "loss": 0.2897, |
| "step": 3965 |
| }, |
| { |
| "epoch": 0.8486532706284737, |
| "grad_norm": 4.231856822967529, |
| "learning_rate": 1.2795724465558198e-05, |
| "loss": 0.299, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.8497221034630184, |
| "grad_norm": 3.8439395427703857, |
| "learning_rate": 1.2783847980997626e-05, |
| "loss": 0.3421, |
| "step": 3975 |
| }, |
| { |
| "epoch": 0.8507909362975631, |
| "grad_norm": 4.338144779205322, |
| "learning_rate": 1.2771971496437057e-05, |
| "loss": 0.2886, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.8518597691321077, |
| "grad_norm": 5.123786449432373, |
| "learning_rate": 1.2760095011876486e-05, |
| "loss": 0.3563, |
| "step": 3985 |
| }, |
| { |
| "epoch": 0.8529286019666524, |
| "grad_norm": 5.506287574768066, |
| "learning_rate": 1.2748218527315916e-05, |
| "loss": 0.3204, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.853997434801197, |
| "grad_norm": 3.644973039627075, |
| "learning_rate": 1.2736342042755345e-05, |
| "loss": 0.3025, |
| "step": 3995 |
| }, |
| { |
| "epoch": 0.8550662676357418, |
| "grad_norm": 5.109133720397949, |
| "learning_rate": 1.2724465558194776e-05, |
| "loss": 0.2813, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.8561351004702864, |
| "grad_norm": 5.544173717498779, |
| "learning_rate": 1.2712589073634205e-05, |
| "loss": 0.2787, |
| "step": 4005 |
| }, |
| { |
| "epoch": 0.8572039333048311, |
| "grad_norm": 5.382670879364014, |
| "learning_rate": 1.2700712589073637e-05, |
| "loss": 0.2643, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.8582727661393758, |
| "grad_norm": 5.406363010406494, |
| "learning_rate": 1.2688836104513064e-05, |
| "loss": 0.291, |
| "step": 4015 |
| }, |
| { |
| "epoch": 0.8593415989739205, |
| "grad_norm": 3.5062954425811768, |
| "learning_rate": 1.2676959619952496e-05, |
| "loss": 0.2467, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.8604104318084651, |
| "grad_norm": 5.817686080932617, |
| "learning_rate": 1.2665083135391925e-05, |
| "loss": 0.3489, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.8614792646430098, |
| "grad_norm": 3.931792974472046, |
| "learning_rate": 1.2653206650831356e-05, |
| "loss": 0.2613, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.8625480974775546, |
| "grad_norm": 4.279338359832764, |
| "learning_rate": 1.2641330166270785e-05, |
| "loss": 0.3007, |
| "step": 4035 |
| }, |
| { |
| "epoch": 0.8636169303120992, |
| "grad_norm": 3.9646289348602295, |
| "learning_rate": 1.2629453681710215e-05, |
| "loss": 0.2685, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.8646857631466439, |
| "grad_norm": 5.029911518096924, |
| "learning_rate": 1.2617577197149644e-05, |
| "loss": 0.2984, |
| "step": 4045 |
| }, |
| { |
| "epoch": 0.8657545959811885, |
| "grad_norm": 4.78744649887085, |
| "learning_rate": 1.2605700712589074e-05, |
| "loss": 0.2321, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.8668234288157333, |
| "grad_norm": 3.825188636779785, |
| "learning_rate": 1.2593824228028503e-05, |
| "loss": 0.2417, |
| "step": 4055 |
| }, |
| { |
| "epoch": 0.8678922616502779, |
| "grad_norm": 4.478353500366211, |
| "learning_rate": 1.2581947743467936e-05, |
| "loss": 0.3164, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.8689610944848226, |
| "grad_norm": 5.523867607116699, |
| "learning_rate": 1.2570071258907364e-05, |
| "loss": 0.3769, |
| "step": 4065 |
| }, |
| { |
| "epoch": 0.8700299273193672, |
| "grad_norm": 6.190155506134033, |
| "learning_rate": 1.2558194774346795e-05, |
| "loss": 0.3385, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.871098760153912, |
| "grad_norm": 4.058770179748535, |
| "learning_rate": 1.2546318289786224e-05, |
| "loss": 0.3135, |
| "step": 4075 |
| }, |
| { |
| "epoch": 0.8721675929884566, |
| "grad_norm": 5.607039928436279, |
| "learning_rate": 1.2534441805225654e-05, |
| "loss": 0.3295, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.8732364258230013, |
| "grad_norm": 4.902414321899414, |
| "learning_rate": 1.2522565320665083e-05, |
| "loss": 0.2992, |
| "step": 4085 |
| }, |
| { |
| "epoch": 0.8743052586575459, |
| "grad_norm": 4.188961505889893, |
| "learning_rate": 1.2510688836104514e-05, |
| "loss": 0.2723, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.8753740914920907, |
| "grad_norm": 4.536145210266113, |
| "learning_rate": 1.2498812351543943e-05, |
| "loss": 0.2805, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.8764429243266353, |
| "grad_norm": 3.7727832794189453, |
| "learning_rate": 1.2486935866983373e-05, |
| "loss": 0.2171, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.87751175716118, |
| "grad_norm": 4.528228759765625, |
| "learning_rate": 1.2475059382422802e-05, |
| "loss": 0.2618, |
| "step": 4105 |
| }, |
| { |
| "epoch": 0.8785805899957246, |
| "grad_norm": 4.920950412750244, |
| "learning_rate": 1.2463182897862234e-05, |
| "loss": 0.2994, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.8796494228302694, |
| "grad_norm": 4.851797580718994, |
| "learning_rate": 1.2451306413301665e-05, |
| "loss": 0.2866, |
| "step": 4115 |
| }, |
| { |
| "epoch": 0.880718255664814, |
| "grad_norm": 3.021509885787964, |
| "learning_rate": 1.2439429928741094e-05, |
| "loss": 0.2091, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.8817870884993587, |
| "grad_norm": 5.19913911819458, |
| "learning_rate": 1.2427553444180524e-05, |
| "loss": 0.3285, |
| "step": 4125 |
| }, |
| { |
| "epoch": 0.8828559213339033, |
| "grad_norm": 4.311760902404785, |
| "learning_rate": 1.2415676959619953e-05, |
| "loss": 0.2854, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.8839247541684481, |
| "grad_norm": 5.5093994140625, |
| "learning_rate": 1.2403800475059384e-05, |
| "loss": 0.3004, |
| "step": 4135 |
| }, |
| { |
| "epoch": 0.8849935870029927, |
| "grad_norm": 3.5908706188201904, |
| "learning_rate": 1.2391923990498813e-05, |
| "loss": 0.2335, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.8860624198375374, |
| "grad_norm": 3.561647653579712, |
| "learning_rate": 1.2380047505938245e-05, |
| "loss": 0.2919, |
| "step": 4145 |
| }, |
| { |
| "epoch": 0.887131252672082, |
| "grad_norm": 3.1781160831451416, |
| "learning_rate": 1.2368171021377674e-05, |
| "loss": 0.2786, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.8882000855066268, |
| "grad_norm": 4.471413612365723, |
| "learning_rate": 1.2356294536817104e-05, |
| "loss": 0.3312, |
| "step": 4155 |
| }, |
| { |
| "epoch": 0.8892689183411714, |
| "grad_norm": 5.232965469360352, |
| "learning_rate": 1.2344418052256533e-05, |
| "loss": 0.2677, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.8903377511757161, |
| "grad_norm": 4.883133888244629, |
| "learning_rate": 1.2332541567695964e-05, |
| "loss": 0.2774, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.8914065840102608, |
| "grad_norm": 4.092249393463135, |
| "learning_rate": 1.2320665083135392e-05, |
| "loss": 0.2649, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.8924754168448055, |
| "grad_norm": 3.5607283115386963, |
| "learning_rate": 1.2308788598574823e-05, |
| "loss": 0.3119, |
| "step": 4175 |
| }, |
| { |
| "epoch": 0.8935442496793502, |
| "grad_norm": 4.573966026306152, |
| "learning_rate": 1.2296912114014252e-05, |
| "loss": 0.243, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.8946130825138948, |
| "grad_norm": 4.2962775230407715, |
| "learning_rate": 1.2285035629453684e-05, |
| "loss": 0.292, |
| "step": 4185 |
| }, |
| { |
| "epoch": 0.8956819153484396, |
| "grad_norm": 4.585544109344482, |
| "learning_rate": 1.2273159144893111e-05, |
| "loss": 0.352, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.8967507481829842, |
| "grad_norm": 4.529600143432617, |
| "learning_rate": 1.2261282660332543e-05, |
| "loss": 0.2422, |
| "step": 4195 |
| }, |
| { |
| "epoch": 0.8978195810175289, |
| "grad_norm": 2.9587581157684326, |
| "learning_rate": 1.2249406175771972e-05, |
| "loss": 0.2427, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.8988884138520735, |
| "grad_norm": 4.409660339355469, |
| "learning_rate": 1.2237529691211403e-05, |
| "loss": 0.2246, |
| "step": 4205 |
| }, |
| { |
| "epoch": 0.8999572466866183, |
| "grad_norm": 3.328666925430298, |
| "learning_rate": 1.2225653206650832e-05, |
| "loss": 0.2275, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.9010260795211629, |
| "grad_norm": 4.411447048187256, |
| "learning_rate": 1.2213776722090262e-05, |
| "loss": 0.3766, |
| "step": 4215 |
| }, |
| { |
| "epoch": 0.9020949123557076, |
| "grad_norm": 3.3779454231262207, |
| "learning_rate": 1.2201900237529691e-05, |
| "loss": 0.2748, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.9031637451902522, |
| "grad_norm": 5.558443069458008, |
| "learning_rate": 1.2190023752969122e-05, |
| "loss": 0.2941, |
| "step": 4225 |
| }, |
| { |
| "epoch": 0.904232578024797, |
| "grad_norm": 3.7313380241394043, |
| "learning_rate": 1.217814726840855e-05, |
| "loss": 0.2693, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.9053014108593416, |
| "grad_norm": 3.5401077270507812, |
| "learning_rate": 1.2166270783847983e-05, |
| "loss": 0.3058, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.9063702436938863, |
| "grad_norm": 3.6305854320526123, |
| "learning_rate": 1.2154394299287412e-05, |
| "loss": 0.2167, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.9074390765284309, |
| "grad_norm": 4.208883285522461, |
| "learning_rate": 1.2142517814726842e-05, |
| "loss": 0.2371, |
| "step": 4245 |
| }, |
| { |
| "epoch": 0.9085079093629757, |
| "grad_norm": 4.586354732513428, |
| "learning_rate": 1.2130641330166273e-05, |
| "loss": 0.2199, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.9095767421975203, |
| "grad_norm": 3.673724889755249, |
| "learning_rate": 1.2118764845605702e-05, |
| "loss": 0.2803, |
| "step": 4255 |
| }, |
| { |
| "epoch": 0.910645575032065, |
| "grad_norm": 4.0301337242126465, |
| "learning_rate": 1.2106888361045132e-05, |
| "loss": 0.2765, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.9117144078666096, |
| "grad_norm": 4.114202976226807, |
| "learning_rate": 1.2095011876484561e-05, |
| "loss": 0.2734, |
| "step": 4265 |
| }, |
| { |
| "epoch": 0.9127832407011544, |
| "grad_norm": 6.415131568908691, |
| "learning_rate": 1.2083135391923993e-05, |
| "loss": 0.3345, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.913852073535699, |
| "grad_norm": 4.800512790679932, |
| "learning_rate": 1.207125890736342e-05, |
| "loss": 0.2873, |
| "step": 4275 |
| }, |
| { |
| "epoch": 0.9149209063702437, |
| "grad_norm": 4.536464214324951, |
| "learning_rate": 1.2059382422802853e-05, |
| "loss": 0.2506, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.9159897392047883, |
| "grad_norm": 4.594064235687256, |
| "learning_rate": 1.2047505938242281e-05, |
| "loss": 0.2335, |
| "step": 4285 |
| }, |
| { |
| "epoch": 0.917058572039333, |
| "grad_norm": 5.493027687072754, |
| "learning_rate": 1.2035629453681712e-05, |
| "loss": 0.3218, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.9181274048738777, |
| "grad_norm": 4.560657501220703, |
| "learning_rate": 1.2023752969121141e-05, |
| "loss": 0.2971, |
| "step": 4295 |
| }, |
| { |
| "epoch": 0.9191962377084224, |
| "grad_norm": 3.5777430534362793, |
| "learning_rate": 1.2011876484560571e-05, |
| "loss": 0.2296, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.9202650705429671, |
| "grad_norm": 4.112082481384277, |
| "learning_rate": 1.2e-05, |
| "loss": 0.3087, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.9213339033775118, |
| "grad_norm": 3.815093994140625, |
| "learning_rate": 1.1988123515439431e-05, |
| "loss": 0.3353, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.9224027362120565, |
| "grad_norm": 5.078567028045654, |
| "learning_rate": 1.197624703087886e-05, |
| "loss": 0.3046, |
| "step": 4315 |
| }, |
| { |
| "epoch": 0.9234715690466011, |
| "grad_norm": 3.549429178237915, |
| "learning_rate": 1.1964370546318292e-05, |
| "loss": 0.3431, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.9245404018811458, |
| "grad_norm": 4.466531276702881, |
| "learning_rate": 1.195249406175772e-05, |
| "loss": 0.2707, |
| "step": 4325 |
| }, |
| { |
| "epoch": 0.9256092347156905, |
| "grad_norm": 5.423553943634033, |
| "learning_rate": 1.1940617577197151e-05, |
| "loss": 0.284, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.9266780675502352, |
| "grad_norm": 4.436051845550537, |
| "learning_rate": 1.192874109263658e-05, |
| "loss": 0.2714, |
| "step": 4335 |
| }, |
| { |
| "epoch": 0.9277469003847798, |
| "grad_norm": 4.404295444488525, |
| "learning_rate": 1.191686460807601e-05, |
| "loss": 0.2751, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.9288157332193245, |
| "grad_norm": 4.390391826629639, |
| "learning_rate": 1.190498812351544e-05, |
| "loss": 0.3047, |
| "step": 4345 |
| }, |
| { |
| "epoch": 0.9298845660538692, |
| "grad_norm": 4.6937479972839355, |
| "learning_rate": 1.189311163895487e-05, |
| "loss": 0.2867, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.9309533988884139, |
| "grad_norm": 4.352549076080322, |
| "learning_rate": 1.1881235154394299e-05, |
| "loss": 0.2895, |
| "step": 4355 |
| }, |
| { |
| "epoch": 0.9320222317229585, |
| "grad_norm": 4.013473033905029, |
| "learning_rate": 1.186935866983373e-05, |
| "loss": 0.2749, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.9330910645575032, |
| "grad_norm": 3.603860378265381, |
| "learning_rate": 1.1857482185273158e-05, |
| "loss": 0.2549, |
| "step": 4365 |
| }, |
| { |
| "epoch": 0.9341598973920479, |
| "grad_norm": 5.079062461853027, |
| "learning_rate": 1.184560570071259e-05, |
| "loss": 0.2648, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.9352287302265926, |
| "grad_norm": 6.029326438903809, |
| "learning_rate": 1.183372921615202e-05, |
| "loss": 0.3001, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.9362975630611372, |
| "grad_norm": 4.8559041023254395, |
| "learning_rate": 1.182185273159145e-05, |
| "loss": 0.3198, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.9373663958956819, |
| "grad_norm": 4.295980453491211, |
| "learning_rate": 1.1809976247030879e-05, |
| "loss": 0.2583, |
| "step": 4385 |
| }, |
| { |
| "epoch": 0.9384352287302266, |
| "grad_norm": 6.648914337158203, |
| "learning_rate": 1.179809976247031e-05, |
| "loss": 0.2894, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.9395040615647713, |
| "grad_norm": 5.454647064208984, |
| "learning_rate": 1.178622327790974e-05, |
| "loss": 0.3017, |
| "step": 4395 |
| }, |
| { |
| "epoch": 0.9405728943993159, |
| "grad_norm": 5.520369529724121, |
| "learning_rate": 1.1774346793349169e-05, |
| "loss": 0.2754, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.9416417272338606, |
| "grad_norm": 3.847935914993286, |
| "learning_rate": 1.1762470308788601e-05, |
| "loss": 0.3289, |
| "step": 4405 |
| }, |
| { |
| "epoch": 0.9427105600684053, |
| "grad_norm": 4.063333988189697, |
| "learning_rate": 1.175059382422803e-05, |
| "loss": 0.2787, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.94377939290295, |
| "grad_norm": 4.977645397186279, |
| "learning_rate": 1.173871733966746e-05, |
| "loss": 0.2406, |
| "step": 4415 |
| }, |
| { |
| "epoch": 0.9448482257374946, |
| "grad_norm": 4.375988483428955, |
| "learning_rate": 1.172684085510689e-05, |
| "loss": 0.3144, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.9459170585720393, |
| "grad_norm": 4.656064987182617, |
| "learning_rate": 1.171496437054632e-05, |
| "loss": 0.3237, |
| "step": 4425 |
| }, |
| { |
| "epoch": 0.946985891406584, |
| "grad_norm": 4.027129650115967, |
| "learning_rate": 1.1703087885985749e-05, |
| "loss": 0.2641, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.9480547242411287, |
| "grad_norm": 4.126834869384766, |
| "learning_rate": 1.169121140142518e-05, |
| "loss": 0.2875, |
| "step": 4435 |
| }, |
| { |
| "epoch": 0.9491235570756734, |
| "grad_norm": 3.4707841873168945, |
| "learning_rate": 1.1679334916864608e-05, |
| "loss": 0.3211, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.950192389910218, |
| "grad_norm": 2.8617501258850098, |
| "learning_rate": 1.166745843230404e-05, |
| "loss": 0.2403, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.9512612227447628, |
| "grad_norm": 4.50408935546875, |
| "learning_rate": 1.1655581947743468e-05, |
| "loss": 0.3018, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.9523300555793074, |
| "grad_norm": 3.976015329360962, |
| "learning_rate": 1.16437054631829e-05, |
| "loss": 0.2531, |
| "step": 4455 |
| }, |
| { |
| "epoch": 0.9533988884138521, |
| "grad_norm": 6.214652061462402, |
| "learning_rate": 1.1631828978622329e-05, |
| "loss": 0.349, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.9544677212483967, |
| "grad_norm": 3.969996929168701, |
| "learning_rate": 1.161995249406176e-05, |
| "loss": 0.238, |
| "step": 4465 |
| }, |
| { |
| "epoch": 0.9555365540829415, |
| "grad_norm": 3.9902470111846924, |
| "learning_rate": 1.1608076009501188e-05, |
| "loss": 0.2768, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.9566053869174861, |
| "grad_norm": 4.20414924621582, |
| "learning_rate": 1.1596199524940619e-05, |
| "loss": 0.2944, |
| "step": 4475 |
| }, |
| { |
| "epoch": 0.9576742197520308, |
| "grad_norm": 3.5199337005615234, |
| "learning_rate": 1.1584323040380048e-05, |
| "loss": 0.3043, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.9587430525865754, |
| "grad_norm": 3.7765684127807617, |
| "learning_rate": 1.1572446555819478e-05, |
| "loss": 0.2434, |
| "step": 4485 |
| }, |
| { |
| "epoch": 0.9598118854211202, |
| "grad_norm": 3.9338152408599854, |
| "learning_rate": 1.1560570071258907e-05, |
| "loss": 0.2451, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.9608807182556648, |
| "grad_norm": 2.86897873878479, |
| "learning_rate": 1.154869358669834e-05, |
| "loss": 0.213, |
| "step": 4495 |
| }, |
| { |
| "epoch": 0.9619495510902095, |
| "grad_norm": 4.536627292633057, |
| "learning_rate": 1.1536817102137768e-05, |
| "loss": 0.26, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.9630183839247541, |
| "grad_norm": 5.863621234893799, |
| "learning_rate": 1.1524940617577199e-05, |
| "loss": 0.3173, |
| "step": 4505 |
| }, |
| { |
| "epoch": 0.9640872167592989, |
| "grad_norm": 5.156888008117676, |
| "learning_rate": 1.1513064133016627e-05, |
| "loss": 0.2745, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.9651560495938435, |
| "grad_norm": 3.947845220565796, |
| "learning_rate": 1.1501187648456058e-05, |
| "loss": 0.2824, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.9662248824283882, |
| "grad_norm": 3.6855573654174805, |
| "learning_rate": 1.1489311163895487e-05, |
| "loss": 0.2769, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.9672937152629328, |
| "grad_norm": 3.929898977279663, |
| "learning_rate": 1.1477434679334917e-05, |
| "loss": 0.2464, |
| "step": 4525 |
| }, |
| { |
| "epoch": 0.9683625480974776, |
| "grad_norm": 3.9288270473480225, |
| "learning_rate": 1.146555819477435e-05, |
| "loss": 0.3213, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.9694313809320222, |
| "grad_norm": 5.536011219024658, |
| "learning_rate": 1.1453681710213777e-05, |
| "loss": 0.3606, |
| "step": 4535 |
| }, |
| { |
| "epoch": 0.9705002137665669, |
| "grad_norm": 3.3420379161834717, |
| "learning_rate": 1.1441805225653209e-05, |
| "loss": 0.2183, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.9715690466011115, |
| "grad_norm": 3.492932081222534, |
| "learning_rate": 1.1429928741092638e-05, |
| "loss": 0.2567, |
| "step": 4545 |
| }, |
| { |
| "epoch": 0.9726378794356563, |
| "grad_norm": 5.132521629333496, |
| "learning_rate": 1.1418052256532068e-05, |
| "loss": 0.2521, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.9737067122702009, |
| "grad_norm": 4.512472152709961, |
| "learning_rate": 1.1406175771971497e-05, |
| "loss": 0.2696, |
| "step": 4555 |
| }, |
| { |
| "epoch": 0.9747755451047456, |
| "grad_norm": 5.246362686157227, |
| "learning_rate": 1.1394299287410928e-05, |
| "loss": 0.3409, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.9758443779392902, |
| "grad_norm": 4.033038139343262, |
| "learning_rate": 1.1382422802850357e-05, |
| "loss": 0.2732, |
| "step": 4565 |
| }, |
| { |
| "epoch": 0.976913210773835, |
| "grad_norm": 4.162726879119873, |
| "learning_rate": 1.1370546318289787e-05, |
| "loss": 0.3003, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.9779820436083797, |
| "grad_norm": 5.6553730964660645, |
| "learning_rate": 1.1358669833729216e-05, |
| "loss": 0.3426, |
| "step": 4575 |
| }, |
| { |
| "epoch": 0.9790508764429243, |
| "grad_norm": 3.857776403427124, |
| "learning_rate": 1.1346793349168648e-05, |
| "loss": 0.2873, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.980119709277469, |
| "grad_norm": 4.109443187713623, |
| "learning_rate": 1.1334916864608077e-05, |
| "loss": 0.3, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.9811885421120137, |
| "grad_norm": 3.3073673248291016, |
| "learning_rate": 1.1323040380047508e-05, |
| "loss": 0.2074, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.9822573749465584, |
| "grad_norm": 3.0706233978271484, |
| "learning_rate": 1.1311163895486937e-05, |
| "loss": 0.2521, |
| "step": 4595 |
| }, |
| { |
| "epoch": 0.983326207781103, |
| "grad_norm": 5.8296356201171875, |
| "learning_rate": 1.1299287410926367e-05, |
| "loss": 0.3123, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.9843950406156478, |
| "grad_norm": 3.409862995147705, |
| "learning_rate": 1.1287410926365796e-05, |
| "loss": 0.2492, |
| "step": 4605 |
| }, |
| { |
| "epoch": 0.9854638734501924, |
| "grad_norm": 5.090631008148193, |
| "learning_rate": 1.1275534441805227e-05, |
| "loss": 0.3012, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.9865327062847371, |
| "grad_norm": 6.443350315093994, |
| "learning_rate": 1.1263657957244655e-05, |
| "loss": 0.2516, |
| "step": 4615 |
| }, |
| { |
| "epoch": 0.9876015391192817, |
| "grad_norm": 4.340301513671875, |
| "learning_rate": 1.1251781472684088e-05, |
| "loss": 0.3629, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.9886703719538265, |
| "grad_norm": 4.117158889770508, |
| "learning_rate": 1.1239904988123515e-05, |
| "loss": 0.2484, |
| "step": 4625 |
| }, |
| { |
| "epoch": 0.9897392047883711, |
| "grad_norm": 4.39588737487793, |
| "learning_rate": 1.1228028503562947e-05, |
| "loss": 0.2749, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.9908080376229158, |
| "grad_norm": 4.059388637542725, |
| "learning_rate": 1.1216152019002376e-05, |
| "loss": 0.2064, |
| "step": 4635 |
| }, |
| { |
| "epoch": 0.9918768704574604, |
| "grad_norm": 3.4412331581115723, |
| "learning_rate": 1.1204275534441806e-05, |
| "loss": 0.3089, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.9929457032920052, |
| "grad_norm": 4.691385746002197, |
| "learning_rate": 1.1192399049881235e-05, |
| "loss": 0.3145, |
| "step": 4645 |
| }, |
| { |
| "epoch": 0.9940145361265498, |
| "grad_norm": 3.472172737121582, |
| "learning_rate": 1.1180522565320666e-05, |
| "loss": 0.2357, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.9950833689610945, |
| "grad_norm": 4.1867289543151855, |
| "learning_rate": 1.1168646080760095e-05, |
| "loss": 0.2803, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.9961522017956391, |
| "grad_norm": 4.0518083572387695, |
| "learning_rate": 1.1156769596199525e-05, |
| "loss": 0.2437, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.9972210346301839, |
| "grad_norm": 3.507197141647339, |
| "learning_rate": 1.1144893111638954e-05, |
| "loss": 0.2708, |
| "step": 4665 |
| }, |
| { |
| "epoch": 0.9982898674647285, |
| "grad_norm": 5.1572585105896, |
| "learning_rate": 1.1133016627078386e-05, |
| "loss": 0.253, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.9993587002992732, |
| "grad_norm": 4.823436737060547, |
| "learning_rate": 1.1121140142517817e-05, |
| "loss": 0.2219, |
| "step": 4675 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.1271175593137741, |
| "eval_mrr": 0.9770190895741555, |
| "eval_runtime": 313.9716, |
| "eval_samples_per_second": 7.23, |
| "eval_steps_per_second": 0.905, |
| "step": 4678 |
| }, |
| { |
| "epoch": 1.000427533133818, |
| "grad_norm": 5.583497047424316, |
| "learning_rate": 1.1109263657957246e-05, |
| "loss": 0.2621, |
| "step": 4680 |
| }, |
| { |
| "epoch": 1.0014963659683624, |
| "grad_norm": 4.658013343811035, |
| "learning_rate": 1.1097387173396676e-05, |
| "loss": 0.382, |
| "step": 4685 |
| }, |
| { |
| "epoch": 1.0025651988029072, |
| "grad_norm": 3.0044312477111816, |
| "learning_rate": 1.1085510688836105e-05, |
| "loss": 0.3026, |
| "step": 4690 |
| }, |
| { |
| "epoch": 1.003634031637452, |
| "grad_norm": 4.063423156738281, |
| "learning_rate": 1.1073634204275536e-05, |
| "loss": 0.3643, |
| "step": 4695 |
| }, |
| { |
| "epoch": 1.0047028644719966, |
| "grad_norm": 4.625239372253418, |
| "learning_rate": 1.1061757719714965e-05, |
| "loss": 0.382, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.0057716973065411, |
| "grad_norm": 3.8251540660858154, |
| "learning_rate": 1.1049881235154397e-05, |
| "loss": 0.3082, |
| "step": 4705 |
| }, |
| { |
| "epoch": 1.0068405301410859, |
| "grad_norm": 4.241628170013428, |
| "learning_rate": 1.1038004750593824e-05, |
| "loss": 0.3411, |
| "step": 4710 |
| }, |
| { |
| "epoch": 1.0079093629756306, |
| "grad_norm": 5.6527276039123535, |
| "learning_rate": 1.1026128266033256e-05, |
| "loss": 0.317, |
| "step": 4715 |
| }, |
| { |
| "epoch": 1.0089781958101753, |
| "grad_norm": 5.0404052734375, |
| "learning_rate": 1.1014251781472685e-05, |
| "loss": 0.4396, |
| "step": 4720 |
| }, |
| { |
| "epoch": 1.01004702864472, |
| "grad_norm": 4.585846900939941, |
| "learning_rate": 1.1002375296912116e-05, |
| "loss": 0.4034, |
| "step": 4725 |
| }, |
| { |
| "epoch": 1.0111158614792646, |
| "grad_norm": 4.704357624053955, |
| "learning_rate": 1.0990498812351544e-05, |
| "loss": 0.2875, |
| "step": 4730 |
| }, |
| { |
| "epoch": 1.0121846943138093, |
| "grad_norm": 5.956788063049316, |
| "learning_rate": 1.0978622327790975e-05, |
| "loss": 0.4919, |
| "step": 4735 |
| }, |
| { |
| "epoch": 1.013253527148354, |
| "grad_norm": 4.240102291107178, |
| "learning_rate": 1.0966745843230404e-05, |
| "loss": 0.3118, |
| "step": 4740 |
| }, |
| { |
| "epoch": 1.0143223599828988, |
| "grad_norm": 4.7897515296936035, |
| "learning_rate": 1.0954869358669834e-05, |
| "loss": 0.3976, |
| "step": 4745 |
| }, |
| { |
| "epoch": 1.0153911928174433, |
| "grad_norm": 3.1631078720092773, |
| "learning_rate": 1.0942992874109263e-05, |
| "loss": 0.2919, |
| "step": 4750 |
| }, |
| { |
| "epoch": 1.016460025651988, |
| "grad_norm": 4.258396148681641, |
| "learning_rate": 1.0931116389548696e-05, |
| "loss": 0.5247, |
| "step": 4755 |
| }, |
| { |
| "epoch": 1.0175288584865327, |
| "grad_norm": 3.010542392730713, |
| "learning_rate": 1.0919239904988124e-05, |
| "loss": 0.2126, |
| "step": 4760 |
| }, |
| { |
| "epoch": 1.0185976913210775, |
| "grad_norm": 3.0874409675598145, |
| "learning_rate": 1.0907363420427555e-05, |
| "loss": 0.3455, |
| "step": 4765 |
| }, |
| { |
| "epoch": 1.019666524155622, |
| "grad_norm": 4.446132183074951, |
| "learning_rate": 1.0895486935866984e-05, |
| "loss": 0.3498, |
| "step": 4770 |
| }, |
| { |
| "epoch": 1.0207353569901667, |
| "grad_norm": 4.1357502937316895, |
| "learning_rate": 1.0883610451306414e-05, |
| "loss": 0.29, |
| "step": 4775 |
| }, |
| { |
| "epoch": 1.0218041898247114, |
| "grad_norm": 6.850640296936035, |
| "learning_rate": 1.0871733966745843e-05, |
| "loss": 0.3684, |
| "step": 4780 |
| }, |
| { |
| "epoch": 1.0228730226592562, |
| "grad_norm": 3.9681396484375, |
| "learning_rate": 1.0859857482185274e-05, |
| "loss": 0.3033, |
| "step": 4785 |
| }, |
| { |
| "epoch": 1.0239418554938007, |
| "grad_norm": 3.521563768386841, |
| "learning_rate": 1.0847980997624703e-05, |
| "loss": 0.2747, |
| "step": 4790 |
| }, |
| { |
| "epoch": 1.0250106883283454, |
| "grad_norm": 4.060203552246094, |
| "learning_rate": 1.0836104513064135e-05, |
| "loss": 0.2813, |
| "step": 4795 |
| }, |
| { |
| "epoch": 1.0260795211628901, |
| "grad_norm": 3.187224864959717, |
| "learning_rate": 1.0824228028503562e-05, |
| "loss": 0.2945, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.0271483539974349, |
| "grad_norm": 3.6413896083831787, |
| "learning_rate": 1.0812351543942994e-05, |
| "loss": 0.421, |
| "step": 4805 |
| }, |
| { |
| "epoch": 1.0282171868319794, |
| "grad_norm": 4.686298847198486, |
| "learning_rate": 1.0800475059382423e-05, |
| "loss": 0.3365, |
| "step": 4810 |
| }, |
| { |
| "epoch": 1.029286019666524, |
| "grad_norm": 5.890400409698486, |
| "learning_rate": 1.0788598574821854e-05, |
| "loss": 0.3708, |
| "step": 4815 |
| }, |
| { |
| "epoch": 1.0303548525010688, |
| "grad_norm": 3.566652774810791, |
| "learning_rate": 1.0776722090261284e-05, |
| "loss": 0.4345, |
| "step": 4820 |
| }, |
| { |
| "epoch": 1.0314236853356136, |
| "grad_norm": 5.6578826904296875, |
| "learning_rate": 1.0764845605700713e-05, |
| "loss": 0.3728, |
| "step": 4825 |
| }, |
| { |
| "epoch": 1.032492518170158, |
| "grad_norm": 4.193053245544434, |
| "learning_rate": 1.0752969121140144e-05, |
| "loss": 0.3173, |
| "step": 4830 |
| }, |
| { |
| "epoch": 1.0335613510047028, |
| "grad_norm": 4.646356105804443, |
| "learning_rate": 1.0741092636579572e-05, |
| "loss": 0.2574, |
| "step": 4835 |
| }, |
| { |
| "epoch": 1.0346301838392475, |
| "grad_norm": 3.941087245941162, |
| "learning_rate": 1.0729216152019005e-05, |
| "loss": 0.3128, |
| "step": 4840 |
| }, |
| { |
| "epoch": 1.0356990166737923, |
| "grad_norm": 4.5648884773254395, |
| "learning_rate": 1.0717339667458434e-05, |
| "loss": 0.2542, |
| "step": 4845 |
| }, |
| { |
| "epoch": 1.036767849508337, |
| "grad_norm": 3.661923408508301, |
| "learning_rate": 1.0705463182897864e-05, |
| "loss": 0.1649, |
| "step": 4850 |
| }, |
| { |
| "epoch": 1.0378366823428815, |
| "grad_norm": 5.052914619445801, |
| "learning_rate": 1.0693586698337293e-05, |
| "loss": 0.4091, |
| "step": 4855 |
| }, |
| { |
| "epoch": 1.0389055151774262, |
| "grad_norm": 5.769303321838379, |
| "learning_rate": 1.0681710213776724e-05, |
| "loss": 0.3553, |
| "step": 4860 |
| }, |
| { |
| "epoch": 1.039974348011971, |
| "grad_norm": 8.323318481445312, |
| "learning_rate": 1.0669833729216152e-05, |
| "loss": 0.5474, |
| "step": 4865 |
| }, |
| { |
| "epoch": 1.0410431808465157, |
| "grad_norm": 5.351403713226318, |
| "learning_rate": 1.0657957244655583e-05, |
| "loss": 0.3586, |
| "step": 4870 |
| }, |
| { |
| "epoch": 1.0421120136810602, |
| "grad_norm": 3.5083069801330566, |
| "learning_rate": 1.0646080760095012e-05, |
| "loss": 0.2589, |
| "step": 4875 |
| }, |
| { |
| "epoch": 1.043180846515605, |
| "grad_norm": 3.8574445247650146, |
| "learning_rate": 1.0634204275534444e-05, |
| "loss": 0.3143, |
| "step": 4880 |
| }, |
| { |
| "epoch": 1.0442496793501497, |
| "grad_norm": 3.950756311416626, |
| "learning_rate": 1.0622327790973871e-05, |
| "loss": 0.3248, |
| "step": 4885 |
| }, |
| { |
| "epoch": 1.0453185121846944, |
| "grad_norm": 5.606834411621094, |
| "learning_rate": 1.0610451306413303e-05, |
| "loss": 0.4631, |
| "step": 4890 |
| }, |
| { |
| "epoch": 1.046387345019239, |
| "grad_norm": 4.092567443847656, |
| "learning_rate": 1.0598574821852732e-05, |
| "loss": 0.2977, |
| "step": 4895 |
| }, |
| { |
| "epoch": 1.0474561778537836, |
| "grad_norm": 5.365922451019287, |
| "learning_rate": 1.0586698337292163e-05, |
| "loss": 0.2487, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.0485250106883284, |
| "grad_norm": 5.173450946807861, |
| "learning_rate": 1.0574821852731592e-05, |
| "loss": 0.3554, |
| "step": 4905 |
| }, |
| { |
| "epoch": 1.049593843522873, |
| "grad_norm": 5.21553373336792, |
| "learning_rate": 1.0562945368171022e-05, |
| "loss": 0.3579, |
| "step": 4910 |
| }, |
| { |
| "epoch": 1.0506626763574176, |
| "grad_norm": 4.973548889160156, |
| "learning_rate": 1.0551068883610451e-05, |
| "loss": 0.3562, |
| "step": 4915 |
| }, |
| { |
| "epoch": 1.0517315091919623, |
| "grad_norm": 6.216787815093994, |
| "learning_rate": 1.0539192399049882e-05, |
| "loss": 0.4625, |
| "step": 4920 |
| }, |
| { |
| "epoch": 1.052800342026507, |
| "grad_norm": 4.355904579162598, |
| "learning_rate": 1.052731591448931e-05, |
| "loss": 0.2834, |
| "step": 4925 |
| }, |
| { |
| "epoch": 1.0538691748610518, |
| "grad_norm": 4.44107723236084, |
| "learning_rate": 1.0515439429928743e-05, |
| "loss": 0.4072, |
| "step": 4930 |
| }, |
| { |
| "epoch": 1.0549380076955963, |
| "grad_norm": 5.2141289710998535, |
| "learning_rate": 1.0503562945368172e-05, |
| "loss": 0.2831, |
| "step": 4935 |
| }, |
| { |
| "epoch": 1.056006840530141, |
| "grad_norm": 4.4729228019714355, |
| "learning_rate": 1.0491686460807602e-05, |
| "loss": 0.2642, |
| "step": 4940 |
| }, |
| { |
| "epoch": 1.0570756733646858, |
| "grad_norm": 4.615827560424805, |
| "learning_rate": 1.0479809976247031e-05, |
| "loss": 0.2887, |
| "step": 4945 |
| }, |
| { |
| "epoch": 1.0581445061992305, |
| "grad_norm": 4.060108661651611, |
| "learning_rate": 1.0467933491686462e-05, |
| "loss": 0.4394, |
| "step": 4950 |
| }, |
| { |
| "epoch": 1.059213339033775, |
| "grad_norm": 3.323357582092285, |
| "learning_rate": 1.0456057007125892e-05, |
| "loss": 0.2957, |
| "step": 4955 |
| }, |
| { |
| "epoch": 1.0602821718683197, |
| "grad_norm": 3.9010369777679443, |
| "learning_rate": 1.0444180522565321e-05, |
| "loss": 0.298, |
| "step": 4960 |
| }, |
| { |
| "epoch": 1.0613510047028645, |
| "grad_norm": 4.847980976104736, |
| "learning_rate": 1.0432304038004753e-05, |
| "loss": 0.2643, |
| "step": 4965 |
| }, |
| { |
| "epoch": 1.0624198375374092, |
| "grad_norm": 4.916622638702393, |
| "learning_rate": 1.0420427553444182e-05, |
| "loss": 0.3147, |
| "step": 4970 |
| }, |
| { |
| "epoch": 1.063488670371954, |
| "grad_norm": 6.059121131896973, |
| "learning_rate": 1.0408551068883613e-05, |
| "loss": 0.3433, |
| "step": 4975 |
| }, |
| { |
| "epoch": 1.0645575032064984, |
| "grad_norm": 4.212458610534668, |
| "learning_rate": 1.0396674584323041e-05, |
| "loss": 0.2797, |
| "step": 4980 |
| }, |
| { |
| "epoch": 1.0656263360410432, |
| "grad_norm": 3.9374332427978516, |
| "learning_rate": 1.0384798099762472e-05, |
| "loss": 0.2958, |
| "step": 4985 |
| }, |
| { |
| "epoch": 1.066695168875588, |
| "grad_norm": 7.207469940185547, |
| "learning_rate": 1.0372921615201901e-05, |
| "loss": 0.3972, |
| "step": 4990 |
| }, |
| { |
| "epoch": 1.0677640017101326, |
| "grad_norm": 5.122316360473633, |
| "learning_rate": 1.0361045130641331e-05, |
| "loss": 0.3361, |
| "step": 4995 |
| }, |
| { |
| "epoch": 1.0688328345446771, |
| "grad_norm": 3.4103052616119385, |
| "learning_rate": 1.034916864608076e-05, |
| "loss": 0.3174, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.0699016673792219, |
| "grad_norm": 4.129265308380127, |
| "learning_rate": 1.033729216152019e-05, |
| "loss": 0.3346, |
| "step": 5005 |
| }, |
| { |
| "epoch": 1.0709705002137666, |
| "grad_norm": 4.027009963989258, |
| "learning_rate": 1.032541567695962e-05, |
| "loss": 0.326, |
| "step": 5010 |
| }, |
| { |
| "epoch": 1.0720393330483113, |
| "grad_norm": 3.362579822540283, |
| "learning_rate": 1.0313539192399052e-05, |
| "loss": 0.2975, |
| "step": 5015 |
| }, |
| { |
| "epoch": 1.0731081658828558, |
| "grad_norm": 5.225454330444336, |
| "learning_rate": 1.030166270783848e-05, |
| "loss": 0.3303, |
| "step": 5020 |
| }, |
| { |
| "epoch": 1.0741769987174006, |
| "grad_norm": 3.756742000579834, |
| "learning_rate": 1.0289786223277911e-05, |
| "loss": 0.2285, |
| "step": 5025 |
| }, |
| { |
| "epoch": 1.0752458315519453, |
| "grad_norm": 4.867086887359619, |
| "learning_rate": 1.027790973871734e-05, |
| "loss": 0.3798, |
| "step": 5030 |
| }, |
| { |
| "epoch": 1.07631466438649, |
| "grad_norm": 4.204124927520752, |
| "learning_rate": 1.026603325415677e-05, |
| "loss": 0.2882, |
| "step": 5035 |
| }, |
| { |
| "epoch": 1.0773834972210345, |
| "grad_norm": 4.995541095733643, |
| "learning_rate": 1.02541567695962e-05, |
| "loss": 0.4249, |
| "step": 5040 |
| }, |
| { |
| "epoch": 1.0784523300555793, |
| "grad_norm": 4.921726226806641, |
| "learning_rate": 1.024228028503563e-05, |
| "loss": 0.4139, |
| "step": 5045 |
| }, |
| { |
| "epoch": 1.079521162890124, |
| "grad_norm": 5.5460734367370605, |
| "learning_rate": 1.0230403800475059e-05, |
| "loss": 0.4502, |
| "step": 5050 |
| }, |
| { |
| "epoch": 1.0805899957246687, |
| "grad_norm": 4.828423023223877, |
| "learning_rate": 1.0218527315914491e-05, |
| "loss": 0.3521, |
| "step": 5055 |
| }, |
| { |
| "epoch": 1.0816588285592132, |
| "grad_norm": 3.87648344039917, |
| "learning_rate": 1.0206650831353918e-05, |
| "loss": 0.342, |
| "step": 5060 |
| }, |
| { |
| "epoch": 1.082727661393758, |
| "grad_norm": 4.833287715911865, |
| "learning_rate": 1.019477434679335e-05, |
| "loss": 0.294, |
| "step": 5065 |
| }, |
| { |
| "epoch": 1.0837964942283027, |
| "grad_norm": 4.559665679931641, |
| "learning_rate": 1.018289786223278e-05, |
| "loss": 0.2994, |
| "step": 5070 |
| }, |
| { |
| "epoch": 1.0848653270628474, |
| "grad_norm": 4.908376216888428, |
| "learning_rate": 1.017102137767221e-05, |
| "loss": 0.3467, |
| "step": 5075 |
| }, |
| { |
| "epoch": 1.085934159897392, |
| "grad_norm": 6.717689514160156, |
| "learning_rate": 1.0159144893111639e-05, |
| "loss": 0.443, |
| "step": 5080 |
| }, |
| { |
| "epoch": 1.0870029927319367, |
| "grad_norm": 3.5398693084716797, |
| "learning_rate": 1.014726840855107e-05, |
| "loss": 0.2759, |
| "step": 5085 |
| }, |
| { |
| "epoch": 1.0880718255664814, |
| "grad_norm": 4.501621246337891, |
| "learning_rate": 1.0135391923990498e-05, |
| "loss": 0.2614, |
| "step": 5090 |
| }, |
| { |
| "epoch": 1.0891406584010261, |
| "grad_norm": 5.194151401519775, |
| "learning_rate": 1.0123515439429929e-05, |
| "loss": 0.3649, |
| "step": 5095 |
| }, |
| { |
| "epoch": 1.0902094912355706, |
| "grad_norm": 4.68430757522583, |
| "learning_rate": 1.0111638954869361e-05, |
| "loss": 0.3699, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.0912783240701154, |
| "grad_norm": 6.266822814941406, |
| "learning_rate": 1.009976247030879e-05, |
| "loss": 0.3785, |
| "step": 5105 |
| }, |
| { |
| "epoch": 1.09234715690466, |
| "grad_norm": 5.040585517883301, |
| "learning_rate": 1.008788598574822e-05, |
| "loss": 0.325, |
| "step": 5110 |
| }, |
| { |
| "epoch": 1.0934159897392048, |
| "grad_norm": 6.65608024597168, |
| "learning_rate": 1.007600950118765e-05, |
| "loss": 0.3954, |
| "step": 5115 |
| }, |
| { |
| "epoch": 1.0944848225737496, |
| "grad_norm": 4.3081488609313965, |
| "learning_rate": 1.006413301662708e-05, |
| "loss": 0.2887, |
| "step": 5120 |
| }, |
| { |
| "epoch": 1.095553655408294, |
| "grad_norm": 3.5742883682250977, |
| "learning_rate": 1.0052256532066509e-05, |
| "loss": 0.2565, |
| "step": 5125 |
| }, |
| { |
| "epoch": 1.0966224882428388, |
| "grad_norm": 4.272683620452881, |
| "learning_rate": 1.004038004750594e-05, |
| "loss": 0.2661, |
| "step": 5130 |
| }, |
| { |
| "epoch": 1.0976913210773835, |
| "grad_norm": 3.7064707279205322, |
| "learning_rate": 1.0028503562945368e-05, |
| "loss": 0.286, |
| "step": 5135 |
| }, |
| { |
| "epoch": 1.0987601539119283, |
| "grad_norm": 6.0004963874816895, |
| "learning_rate": 1.00166270783848e-05, |
| "loss": 0.3969, |
| "step": 5140 |
| }, |
| { |
| "epoch": 1.0998289867464728, |
| "grad_norm": 6.301701068878174, |
| "learning_rate": 1.0004750593824228e-05, |
| "loss": 0.2451, |
| "step": 5145 |
| }, |
| { |
| "epoch": 1.1008978195810175, |
| "grad_norm": 5.471083164215088, |
| "learning_rate": 9.992874109263658e-06, |
| "loss": 0.2472, |
| "step": 5150 |
| }, |
| { |
| "epoch": 1.1019666524155622, |
| "grad_norm": 6.456992149353027, |
| "learning_rate": 9.980997624703089e-06, |
| "loss": 0.286, |
| "step": 5155 |
| }, |
| { |
| "epoch": 1.103035485250107, |
| "grad_norm": 6.616683006286621, |
| "learning_rate": 9.969121140142518e-06, |
| "loss": 0.3109, |
| "step": 5160 |
| }, |
| { |
| "epoch": 1.1041043180846515, |
| "grad_norm": 5.748746871948242, |
| "learning_rate": 9.95724465558195e-06, |
| "loss": 0.441, |
| "step": 5165 |
| }, |
| { |
| "epoch": 1.1051731509191962, |
| "grad_norm": 4.254424571990967, |
| "learning_rate": 9.945368171021379e-06, |
| "loss": 0.3494, |
| "step": 5170 |
| }, |
| { |
| "epoch": 1.106241983753741, |
| "grad_norm": 6.022365093231201, |
| "learning_rate": 9.93349168646081e-06, |
| "loss": 0.345, |
| "step": 5175 |
| }, |
| { |
| "epoch": 1.1073108165882857, |
| "grad_norm": 3.26804518699646, |
| "learning_rate": 9.921615201900238e-06, |
| "loss": 0.2503, |
| "step": 5180 |
| }, |
| { |
| "epoch": 1.1083796494228302, |
| "grad_norm": 3.100945234298706, |
| "learning_rate": 9.909738717339669e-06, |
| "loss": 0.3922, |
| "step": 5185 |
| }, |
| { |
| "epoch": 1.109448482257375, |
| "grad_norm": 4.631006717681885, |
| "learning_rate": 9.897862232779099e-06, |
| "loss": 0.3429, |
| "step": 5190 |
| }, |
| { |
| "epoch": 1.1105173150919196, |
| "grad_norm": 4.623953819274902, |
| "learning_rate": 9.885985748218528e-06, |
| "loss": 0.3437, |
| "step": 5195 |
| }, |
| { |
| "epoch": 1.1115861479264644, |
| "grad_norm": 3.877652406692505, |
| "learning_rate": 9.874109263657959e-06, |
| "loss": 0.2751, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.1126549807610089, |
| "grad_norm": 4.4313225746154785, |
| "learning_rate": 9.862232779097387e-06, |
| "loss": 0.3634, |
| "step": 5205 |
| }, |
| { |
| "epoch": 1.1137238135955536, |
| "grad_norm": 5.426332473754883, |
| "learning_rate": 9.850356294536818e-06, |
| "loss": 0.2685, |
| "step": 5210 |
| }, |
| { |
| "epoch": 1.1147926464300983, |
| "grad_norm": 3.7707009315490723, |
| "learning_rate": 9.838479809976248e-06, |
| "loss": 0.2121, |
| "step": 5215 |
| }, |
| { |
| "epoch": 1.115861479264643, |
| "grad_norm": 3.5573911666870117, |
| "learning_rate": 9.826603325415677e-06, |
| "loss": 0.4083, |
| "step": 5220 |
| }, |
| { |
| "epoch": 1.1169303120991876, |
| "grad_norm": 3.2365455627441406, |
| "learning_rate": 9.814726840855108e-06, |
| "loss": 0.2497, |
| "step": 5225 |
| }, |
| { |
| "epoch": 1.1179991449337323, |
| "grad_norm": 3.604321241378784, |
| "learning_rate": 9.802850356294538e-06, |
| "loss": 0.3521, |
| "step": 5230 |
| }, |
| { |
| "epoch": 1.119067977768277, |
| "grad_norm": 4.779599666595459, |
| "learning_rate": 9.790973871733967e-06, |
| "loss": 0.3048, |
| "step": 5235 |
| }, |
| { |
| "epoch": 1.1201368106028218, |
| "grad_norm": 3.685837745666504, |
| "learning_rate": 9.779097387173398e-06, |
| "loss": 0.2622, |
| "step": 5240 |
| }, |
| { |
| "epoch": 1.1212056434373663, |
| "grad_norm": 4.687803268432617, |
| "learning_rate": 9.767220902612827e-06, |
| "loss": 0.2651, |
| "step": 5245 |
| }, |
| { |
| "epoch": 1.122274476271911, |
| "grad_norm": 3.861872911453247, |
| "learning_rate": 9.755344418052257e-06, |
| "loss": 0.2971, |
| "step": 5250 |
| }, |
| { |
| "epoch": 1.1233433091064557, |
| "grad_norm": 5.285345077514648, |
| "learning_rate": 9.743467933491688e-06, |
| "loss": 0.3186, |
| "step": 5255 |
| }, |
| { |
| "epoch": 1.1244121419410005, |
| "grad_norm": 4.946507930755615, |
| "learning_rate": 9.731591448931117e-06, |
| "loss": 0.3277, |
| "step": 5260 |
| }, |
| { |
| "epoch": 1.1254809747755452, |
| "grad_norm": 3.527979612350464, |
| "learning_rate": 9.719714964370547e-06, |
| "loss": 0.2521, |
| "step": 5265 |
| }, |
| { |
| "epoch": 1.1265498076100897, |
| "grad_norm": 4.42695426940918, |
| "learning_rate": 9.707838479809976e-06, |
| "loss": 0.3114, |
| "step": 5270 |
| }, |
| { |
| "epoch": 1.1276186404446344, |
| "grad_norm": 2.8614661693573, |
| "learning_rate": 9.695961995249407e-06, |
| "loss": 0.3582, |
| "step": 5275 |
| }, |
| { |
| "epoch": 1.1286874732791792, |
| "grad_norm": 4.813942909240723, |
| "learning_rate": 9.684085510688837e-06, |
| "loss": 0.3617, |
| "step": 5280 |
| }, |
| { |
| "epoch": 1.129756306113724, |
| "grad_norm": 4.115063667297363, |
| "learning_rate": 9.672209026128266e-06, |
| "loss": 0.328, |
| "step": 5285 |
| }, |
| { |
| "epoch": 1.1308251389482684, |
| "grad_norm": 2.9611902236938477, |
| "learning_rate": 9.660332541567697e-06, |
| "loss": 0.2627, |
| "step": 5290 |
| }, |
| { |
| "epoch": 1.1318939717828131, |
| "grad_norm": 4.242338180541992, |
| "learning_rate": 9.648456057007125e-06, |
| "loss": 0.3054, |
| "step": 5295 |
| }, |
| { |
| "epoch": 1.1329628046173579, |
| "grad_norm": 3.4355380535125732, |
| "learning_rate": 9.636579572446556e-06, |
| "loss": 0.5342, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.1340316374519026, |
| "grad_norm": 3.823155641555786, |
| "learning_rate": 9.624703087885987e-06, |
| "loss": 0.2956, |
| "step": 5305 |
| }, |
| { |
| "epoch": 1.1351004702864471, |
| "grad_norm": 3.815985679626465, |
| "learning_rate": 9.612826603325417e-06, |
| "loss": 0.3008, |
| "step": 5310 |
| }, |
| { |
| "epoch": 1.1361693031209918, |
| "grad_norm": 6.562064170837402, |
| "learning_rate": 9.600950118764848e-06, |
| "loss": 0.4844, |
| "step": 5315 |
| }, |
| { |
| "epoch": 1.1372381359555366, |
| "grad_norm": 4.454050540924072, |
| "learning_rate": 9.589073634204276e-06, |
| "loss": 0.2941, |
| "step": 5320 |
| }, |
| { |
| "epoch": 1.1383069687900813, |
| "grad_norm": 4.194582462310791, |
| "learning_rate": 9.577197149643707e-06, |
| "loss": 0.3928, |
| "step": 5325 |
| }, |
| { |
| "epoch": 1.1393758016246258, |
| "grad_norm": 5.349386215209961, |
| "learning_rate": 9.565320665083136e-06, |
| "loss": 0.2804, |
| "step": 5330 |
| }, |
| { |
| "epoch": 1.1404446344591705, |
| "grad_norm": 4.539825916290283, |
| "learning_rate": 9.553444180522566e-06, |
| "loss": 0.2605, |
| "step": 5335 |
| }, |
| { |
| "epoch": 1.1415134672937153, |
| "grad_norm": 4.817893028259277, |
| "learning_rate": 9.541567695961997e-06, |
| "loss": 0.2871, |
| "step": 5340 |
| }, |
| { |
| "epoch": 1.14258230012826, |
| "grad_norm": 3.5281782150268555, |
| "learning_rate": 9.529691211401426e-06, |
| "loss": 0.2106, |
| "step": 5345 |
| }, |
| { |
| "epoch": 1.1436511329628045, |
| "grad_norm": 5.409219264984131, |
| "learning_rate": 9.517814726840856e-06, |
| "loss": 0.3053, |
| "step": 5350 |
| }, |
| { |
| "epoch": 1.1447199657973492, |
| "grad_norm": 4.795240879058838, |
| "learning_rate": 9.505938242280285e-06, |
| "loss": 0.3719, |
| "step": 5355 |
| }, |
| { |
| "epoch": 1.145788798631894, |
| "grad_norm": 6.551200866699219, |
| "learning_rate": 9.494061757719716e-06, |
| "loss": 0.396, |
| "step": 5360 |
| }, |
| { |
| "epoch": 1.1468576314664387, |
| "grad_norm": 4.7790703773498535, |
| "learning_rate": 9.482185273159146e-06, |
| "loss": 0.4105, |
| "step": 5365 |
| }, |
| { |
| "epoch": 1.1479264643009834, |
| "grad_norm": 5.062493801116943, |
| "learning_rate": 9.470308788598575e-06, |
| "loss": 0.3882, |
| "step": 5370 |
| }, |
| { |
| "epoch": 1.148995297135528, |
| "grad_norm": 4.342947006225586, |
| "learning_rate": 9.458432304038006e-06, |
| "loss": 0.2815, |
| "step": 5375 |
| }, |
| { |
| "epoch": 1.1500641299700727, |
| "grad_norm": 4.391014099121094, |
| "learning_rate": 9.446555819477435e-06, |
| "loss": 0.2477, |
| "step": 5380 |
| }, |
| { |
| "epoch": 1.1511329628046174, |
| "grad_norm": 3.2322447299957275, |
| "learning_rate": 9.434679334916865e-06, |
| "loss": 0.2798, |
| "step": 5385 |
| }, |
| { |
| "epoch": 1.152201795639162, |
| "grad_norm": 3.8520939350128174, |
| "learning_rate": 9.422802850356296e-06, |
| "loss": 0.2758, |
| "step": 5390 |
| }, |
| { |
| "epoch": 1.1532706284737066, |
| "grad_norm": 3.970700740814209, |
| "learning_rate": 9.410926365795725e-06, |
| "loss": 0.2938, |
| "step": 5395 |
| }, |
| { |
| "epoch": 1.1543394613082514, |
| "grad_norm": 4.378193378448486, |
| "learning_rate": 9.399049881235155e-06, |
| "loss": 0.3946, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.155408294142796, |
| "grad_norm": 3.779149293899536, |
| "learning_rate": 9.387173396674586e-06, |
| "loss": 0.2348, |
| "step": 5405 |
| }, |
| { |
| "epoch": 1.1564771269773408, |
| "grad_norm": 3.6311495304107666, |
| "learning_rate": 9.375296912114015e-06, |
| "loss": 0.2701, |
| "step": 5410 |
| }, |
| { |
| "epoch": 1.1575459598118853, |
| "grad_norm": 4.026009559631348, |
| "learning_rate": 9.363420427553445e-06, |
| "loss": 0.3154, |
| "step": 5415 |
| }, |
| { |
| "epoch": 1.15861479264643, |
| "grad_norm": 3.796111583709717, |
| "learning_rate": 9.351543942992874e-06, |
| "loss": 0.2617, |
| "step": 5420 |
| }, |
| { |
| "epoch": 1.1596836254809748, |
| "grad_norm": 4.301056861877441, |
| "learning_rate": 9.339667458432304e-06, |
| "loss": 0.3529, |
| "step": 5425 |
| }, |
| { |
| "epoch": 1.1607524583155195, |
| "grad_norm": 4.392220973968506, |
| "learning_rate": 9.327790973871735e-06, |
| "loss": 0.2858, |
| "step": 5430 |
| }, |
| { |
| "epoch": 1.161821291150064, |
| "grad_norm": 3.698474168777466, |
| "learning_rate": 9.315914489311164e-06, |
| "loss": 0.42, |
| "step": 5435 |
| }, |
| { |
| "epoch": 1.1628901239846088, |
| "grad_norm": 4.409991264343262, |
| "learning_rate": 9.304038004750594e-06, |
| "loss": 0.3894, |
| "step": 5440 |
| }, |
| { |
| "epoch": 1.1639589568191535, |
| "grad_norm": 2.799488067626953, |
| "learning_rate": 9.292161520190025e-06, |
| "loss": 0.3073, |
| "step": 5445 |
| }, |
| { |
| "epoch": 1.1650277896536982, |
| "grad_norm": 3.6285009384155273, |
| "learning_rate": 9.280285035629456e-06, |
| "loss": 0.2824, |
| "step": 5450 |
| }, |
| { |
| "epoch": 1.1660966224882428, |
| "grad_norm": 4.096553802490234, |
| "learning_rate": 9.268408551068884e-06, |
| "loss": 0.3139, |
| "step": 5455 |
| }, |
| { |
| "epoch": 1.1671654553227875, |
| "grad_norm": 6.436227798461914, |
| "learning_rate": 9.256532066508315e-06, |
| "loss": 0.4651, |
| "step": 5460 |
| }, |
| { |
| "epoch": 1.1682342881573322, |
| "grad_norm": 4.163245677947998, |
| "learning_rate": 9.244655581947744e-06, |
| "loss": 0.2611, |
| "step": 5465 |
| }, |
| { |
| "epoch": 1.169303120991877, |
| "grad_norm": 4.249100208282471, |
| "learning_rate": 9.232779097387174e-06, |
| "loss": 0.2663, |
| "step": 5470 |
| }, |
| { |
| "epoch": 1.1703719538264215, |
| "grad_norm": 4.877579212188721, |
| "learning_rate": 9.220902612826605e-06, |
| "loss": 0.252, |
| "step": 5475 |
| }, |
| { |
| "epoch": 1.1714407866609662, |
| "grad_norm": 4.387494087219238, |
| "learning_rate": 9.209026128266034e-06, |
| "loss": 0.4996, |
| "step": 5480 |
| }, |
| { |
| "epoch": 1.172509619495511, |
| "grad_norm": 3.6732518672943115, |
| "learning_rate": 9.197149643705464e-06, |
| "loss": 0.2786, |
| "step": 5485 |
| }, |
| { |
| "epoch": 1.1735784523300556, |
| "grad_norm": 4.684414386749268, |
| "learning_rate": 9.185273159144895e-06, |
| "loss": 0.2628, |
| "step": 5490 |
| }, |
| { |
| "epoch": 1.1746472851646002, |
| "grad_norm": 5.551144599914551, |
| "learning_rate": 9.173396674584324e-06, |
| "loss": 0.3326, |
| "step": 5495 |
| }, |
| { |
| "epoch": 1.1757161179991449, |
| "grad_norm": 3.942741632461548, |
| "learning_rate": 9.161520190023754e-06, |
| "loss": 0.3294, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.1767849508336896, |
| "grad_norm": 4.97520637512207, |
| "learning_rate": 9.149643705463183e-06, |
| "loss": 0.341, |
| "step": 5505 |
| }, |
| { |
| "epoch": 1.1778537836682343, |
| "grad_norm": 4.264441967010498, |
| "learning_rate": 9.137767220902614e-06, |
| "loss": 0.2878, |
| "step": 5510 |
| }, |
| { |
| "epoch": 1.178922616502779, |
| "grad_norm": 5.5287299156188965, |
| "learning_rate": 9.125890736342044e-06, |
| "loss": 0.354, |
| "step": 5515 |
| }, |
| { |
| "epoch": 1.1799914493373236, |
| "grad_norm": 2.997340679168701, |
| "learning_rate": 9.114014251781473e-06, |
| "loss": 0.2667, |
| "step": 5520 |
| }, |
| { |
| "epoch": 1.1810602821718683, |
| "grad_norm": 4.381051540374756, |
| "learning_rate": 9.102137767220904e-06, |
| "loss": 0.2932, |
| "step": 5525 |
| }, |
| { |
| "epoch": 1.182129115006413, |
| "grad_norm": 3.4648494720458984, |
| "learning_rate": 9.090261282660332e-06, |
| "loss": 0.2608, |
| "step": 5530 |
| }, |
| { |
| "epoch": 1.1831979478409576, |
| "grad_norm": 4.567250728607178, |
| "learning_rate": 9.078384798099763e-06, |
| "loss": 0.3078, |
| "step": 5535 |
| }, |
| { |
| "epoch": 1.1842667806755023, |
| "grad_norm": 4.373274326324463, |
| "learning_rate": 9.066508313539194e-06, |
| "loss": 0.4028, |
| "step": 5540 |
| }, |
| { |
| "epoch": 1.185335613510047, |
| "grad_norm": 4.338989734649658, |
| "learning_rate": 9.054631828978622e-06, |
| "loss": 0.3429, |
| "step": 5545 |
| }, |
| { |
| "epoch": 1.1864044463445917, |
| "grad_norm": 4.9778008460998535, |
| "learning_rate": 9.042755344418053e-06, |
| "loss": 0.3481, |
| "step": 5550 |
| }, |
| { |
| "epoch": 1.1874732791791365, |
| "grad_norm": 4.068686008453369, |
| "learning_rate": 9.030878859857482e-06, |
| "loss": 0.2931, |
| "step": 5555 |
| }, |
| { |
| "epoch": 1.188542112013681, |
| "grad_norm": 3.909130096435547, |
| "learning_rate": 9.019002375296912e-06, |
| "loss": 0.2719, |
| "step": 5560 |
| }, |
| { |
| "epoch": 1.1896109448482257, |
| "grad_norm": 4.785898208618164, |
| "learning_rate": 9.007125890736343e-06, |
| "loss": 0.3968, |
| "step": 5565 |
| }, |
| { |
| "epoch": 1.1906797776827704, |
| "grad_norm": 5.576188087463379, |
| "learning_rate": 8.995249406175772e-06, |
| "loss": 0.4653, |
| "step": 5570 |
| }, |
| { |
| "epoch": 1.1917486105173152, |
| "grad_norm": 3.010072946548462, |
| "learning_rate": 8.983372921615202e-06, |
| "loss": 0.2727, |
| "step": 5575 |
| }, |
| { |
| "epoch": 1.1928174433518597, |
| "grad_norm": 4.709297180175781, |
| "learning_rate": 8.971496437054633e-06, |
| "loss": 0.4521, |
| "step": 5580 |
| }, |
| { |
| "epoch": 1.1938862761864044, |
| "grad_norm": 5.573824405670166, |
| "learning_rate": 8.959619952494063e-06, |
| "loss": 0.3042, |
| "step": 5585 |
| }, |
| { |
| "epoch": 1.1949551090209491, |
| "grad_norm": 4.321738243103027, |
| "learning_rate": 8.947743467933492e-06, |
| "loss": 0.2687, |
| "step": 5590 |
| }, |
| { |
| "epoch": 1.1960239418554939, |
| "grad_norm": 5.602605819702148, |
| "learning_rate": 8.935866983372923e-06, |
| "loss": 0.2892, |
| "step": 5595 |
| }, |
| { |
| "epoch": 1.1970927746900384, |
| "grad_norm": 3.6464884281158447, |
| "learning_rate": 8.923990498812353e-06, |
| "loss": 0.2515, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.1981616075245831, |
| "grad_norm": 3.9809868335723877, |
| "learning_rate": 8.912114014251782e-06, |
| "loss": 0.3022, |
| "step": 5605 |
| }, |
| { |
| "epoch": 1.1992304403591278, |
| "grad_norm": 4.83494758605957, |
| "learning_rate": 8.900237529691213e-06, |
| "loss": 0.4018, |
| "step": 5610 |
| }, |
| { |
| "epoch": 1.2002992731936726, |
| "grad_norm": 3.961460590362549, |
| "learning_rate": 8.888361045130642e-06, |
| "loss": 0.2532, |
| "step": 5615 |
| }, |
| { |
| "epoch": 1.2013681060282173, |
| "grad_norm": 2.4498984813690186, |
| "learning_rate": 8.876484560570072e-06, |
| "loss": 0.2285, |
| "step": 5620 |
| }, |
| { |
| "epoch": 1.2024369388627618, |
| "grad_norm": 3.654311418533325, |
| "learning_rate": 8.864608076009503e-06, |
| "loss": 0.3307, |
| "step": 5625 |
| }, |
| { |
| "epoch": 1.2035057716973065, |
| "grad_norm": 4.238831996917725, |
| "learning_rate": 8.852731591448932e-06, |
| "loss": 0.2353, |
| "step": 5630 |
| }, |
| { |
| "epoch": 1.2045746045318513, |
| "grad_norm": 3.811962842941284, |
| "learning_rate": 8.840855106888362e-06, |
| "loss": 0.3137, |
| "step": 5635 |
| }, |
| { |
| "epoch": 1.2056434373663958, |
| "grad_norm": 3.8361501693725586, |
| "learning_rate": 8.828978622327791e-06, |
| "loss": 0.2549, |
| "step": 5640 |
| }, |
| { |
| "epoch": 1.2067122702009405, |
| "grad_norm": 4.136886119842529, |
| "learning_rate": 8.817102137767222e-06, |
| "loss": 0.2901, |
| "step": 5645 |
| }, |
| { |
| "epoch": 1.2077811030354852, |
| "grad_norm": 4.573363304138184, |
| "learning_rate": 8.805225653206652e-06, |
| "loss": 0.4423, |
| "step": 5650 |
| }, |
| { |
| "epoch": 1.20884993587003, |
| "grad_norm": 4.777524948120117, |
| "learning_rate": 8.793349168646081e-06, |
| "loss": 0.2959, |
| "step": 5655 |
| }, |
| { |
| "epoch": 1.2099187687045747, |
| "grad_norm": 4.250500679016113, |
| "learning_rate": 8.781472684085511e-06, |
| "loss": 0.2523, |
| "step": 5660 |
| }, |
| { |
| "epoch": 1.2109876015391192, |
| "grad_norm": 4.024094581604004, |
| "learning_rate": 8.769596199524942e-06, |
| "loss": 0.1746, |
| "step": 5665 |
| }, |
| { |
| "epoch": 1.212056434373664, |
| "grad_norm": 4.290604591369629, |
| "learning_rate": 8.757719714964371e-06, |
| "loss": 0.3541, |
| "step": 5670 |
| }, |
| { |
| "epoch": 1.2131252672082087, |
| "grad_norm": 3.597705125808716, |
| "learning_rate": 8.745843230403801e-06, |
| "loss": 0.2801, |
| "step": 5675 |
| }, |
| { |
| "epoch": 1.2141941000427534, |
| "grad_norm": 5.059614181518555, |
| "learning_rate": 8.73396674584323e-06, |
| "loss": 0.2846, |
| "step": 5680 |
| }, |
| { |
| "epoch": 1.215262932877298, |
| "grad_norm": 3.8920083045959473, |
| "learning_rate": 8.722090261282661e-06, |
| "loss": 0.3503, |
| "step": 5685 |
| }, |
| { |
| "epoch": 1.2163317657118426, |
| "grad_norm": 4.512190818786621, |
| "learning_rate": 8.710213776722091e-06, |
| "loss": 0.2831, |
| "step": 5690 |
| }, |
| { |
| "epoch": 1.2174005985463874, |
| "grad_norm": 4.729888916015625, |
| "learning_rate": 8.69833729216152e-06, |
| "loss": 0.2708, |
| "step": 5695 |
| }, |
| { |
| "epoch": 1.218469431380932, |
| "grad_norm": 4.533064365386963, |
| "learning_rate": 8.68646080760095e-06, |
| "loss": 0.2846, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.2195382642154766, |
| "grad_norm": 3.9406075477600098, |
| "learning_rate": 8.67458432304038e-06, |
| "loss": 0.3136, |
| "step": 5705 |
| }, |
| { |
| "epoch": 1.2206070970500214, |
| "grad_norm": 6.5291924476623535, |
| "learning_rate": 8.66270783847981e-06, |
| "loss": 0.3377, |
| "step": 5710 |
| }, |
| { |
| "epoch": 1.221675929884566, |
| "grad_norm": 4.291172981262207, |
| "learning_rate": 8.65083135391924e-06, |
| "loss": 0.4648, |
| "step": 5715 |
| }, |
| { |
| "epoch": 1.2227447627191108, |
| "grad_norm": 5.999503135681152, |
| "learning_rate": 8.63895486935867e-06, |
| "loss": 0.371, |
| "step": 5720 |
| }, |
| { |
| "epoch": 1.2238135955536553, |
| "grad_norm": 3.673821449279785, |
| "learning_rate": 8.6270783847981e-06, |
| "loss": 0.3419, |
| "step": 5725 |
| }, |
| { |
| "epoch": 1.2248824283882, |
| "grad_norm": 4.6455607414245605, |
| "learning_rate": 8.61520190023753e-06, |
| "loss": 0.3034, |
| "step": 5730 |
| }, |
| { |
| "epoch": 1.2259512612227448, |
| "grad_norm": 4.375533103942871, |
| "learning_rate": 8.603325415676961e-06, |
| "loss": 0.2912, |
| "step": 5735 |
| }, |
| { |
| "epoch": 1.2270200940572895, |
| "grad_norm": 4.1931376457214355, |
| "learning_rate": 8.59144893111639e-06, |
| "loss": 0.3251, |
| "step": 5740 |
| }, |
| { |
| "epoch": 1.228088926891834, |
| "grad_norm": 7.451878547668457, |
| "learning_rate": 8.57957244655582e-06, |
| "loss": 0.3989, |
| "step": 5745 |
| }, |
| { |
| "epoch": 1.2291577597263788, |
| "grad_norm": 5.163100242614746, |
| "learning_rate": 8.567695961995251e-06, |
| "loss": 0.3193, |
| "step": 5750 |
| }, |
| { |
| "epoch": 1.2302265925609235, |
| "grad_norm": 6.099165439605713, |
| "learning_rate": 8.55581947743468e-06, |
| "loss": 0.3586, |
| "step": 5755 |
| }, |
| { |
| "epoch": 1.2312954253954682, |
| "grad_norm": 3.8234498500823975, |
| "learning_rate": 8.54394299287411e-06, |
| "loss": 0.2832, |
| "step": 5760 |
| }, |
| { |
| "epoch": 1.232364258230013, |
| "grad_norm": 4.173794269561768, |
| "learning_rate": 8.53206650831354e-06, |
| "loss": 0.389, |
| "step": 5765 |
| }, |
| { |
| "epoch": 1.2334330910645575, |
| "grad_norm": 4.987196922302246, |
| "learning_rate": 8.52019002375297e-06, |
| "loss": 0.3889, |
| "step": 5770 |
| }, |
| { |
| "epoch": 1.2345019238991022, |
| "grad_norm": 3.354900360107422, |
| "learning_rate": 8.5083135391924e-06, |
| "loss": 0.2243, |
| "step": 5775 |
| }, |
| { |
| "epoch": 1.235570756733647, |
| "grad_norm": 4.882574558258057, |
| "learning_rate": 8.49643705463183e-06, |
| "loss": 0.2416, |
| "step": 5780 |
| }, |
| { |
| "epoch": 1.2366395895681914, |
| "grad_norm": 4.3282790184021, |
| "learning_rate": 8.48456057007126e-06, |
| "loss": 0.3066, |
| "step": 5785 |
| }, |
| { |
| "epoch": 1.2377084224027362, |
| "grad_norm": 5.309357166290283, |
| "learning_rate": 8.472684085510689e-06, |
| "loss": 0.3227, |
| "step": 5790 |
| }, |
| { |
| "epoch": 1.2387772552372809, |
| "grad_norm": 3.708139181137085, |
| "learning_rate": 8.46080760095012e-06, |
| "loss": 0.3194, |
| "step": 5795 |
| }, |
| { |
| "epoch": 1.2398460880718256, |
| "grad_norm": 5.823927879333496, |
| "learning_rate": 8.44893111638955e-06, |
| "loss": 0.3851, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.2409149209063703, |
| "grad_norm": 5.825521945953369, |
| "learning_rate": 8.437054631828979e-06, |
| "loss": 0.2793, |
| "step": 5805 |
| }, |
| { |
| "epoch": 1.2419837537409149, |
| "grad_norm": 4.350478172302246, |
| "learning_rate": 8.42517814726841e-06, |
| "loss": 0.2482, |
| "step": 5810 |
| }, |
| { |
| "epoch": 1.2430525865754596, |
| "grad_norm": 4.824470043182373, |
| "learning_rate": 8.413301662707838e-06, |
| "loss": 0.3311, |
| "step": 5815 |
| }, |
| { |
| "epoch": 1.2441214194100043, |
| "grad_norm": 4.695113182067871, |
| "learning_rate": 8.401425178147269e-06, |
| "loss": 0.2456, |
| "step": 5820 |
| }, |
| { |
| "epoch": 1.245190252244549, |
| "grad_norm": 5.539307594299316, |
| "learning_rate": 8.3895486935867e-06, |
| "loss": 0.3267, |
| "step": 5825 |
| }, |
| { |
| "epoch": 1.2462590850790936, |
| "grad_norm": 4.055349826812744, |
| "learning_rate": 8.377672209026128e-06, |
| "loss": 0.2496, |
| "step": 5830 |
| }, |
| { |
| "epoch": 1.2473279179136383, |
| "grad_norm": 4.012608051300049, |
| "learning_rate": 8.365795724465559e-06, |
| "loss": 0.2896, |
| "step": 5835 |
| }, |
| { |
| "epoch": 1.248396750748183, |
| "grad_norm": 4.369838714599609, |
| "learning_rate": 8.35391923990499e-06, |
| "loss": 0.2717, |
| "step": 5840 |
| }, |
| { |
| "epoch": 1.2494655835827277, |
| "grad_norm": 7.311318874359131, |
| "learning_rate": 8.342042755344418e-06, |
| "loss": 0.344, |
| "step": 5845 |
| }, |
| { |
| "epoch": 1.2505344164172723, |
| "grad_norm": 3.8691282272338867, |
| "learning_rate": 8.330166270783849e-06, |
| "loss": 0.1997, |
| "step": 5850 |
| }, |
| { |
| "epoch": 1.251603249251817, |
| "grad_norm": 4.140939712524414, |
| "learning_rate": 8.318289786223278e-06, |
| "loss": 0.2454, |
| "step": 5855 |
| }, |
| { |
| "epoch": 1.2526720820863617, |
| "grad_norm": 4.034096717834473, |
| "learning_rate": 8.306413301662708e-06, |
| "loss": 0.2923, |
| "step": 5860 |
| }, |
| { |
| "epoch": 1.2537409149209064, |
| "grad_norm": 4.175270080566406, |
| "learning_rate": 8.294536817102139e-06, |
| "loss": 0.268, |
| "step": 5865 |
| }, |
| { |
| "epoch": 1.2548097477554512, |
| "grad_norm": 5.182862758636475, |
| "learning_rate": 8.28266033254157e-06, |
| "loss": 0.2469, |
| "step": 5870 |
| }, |
| { |
| "epoch": 1.2558785805899957, |
| "grad_norm": 3.4455058574676514, |
| "learning_rate": 8.270783847980998e-06, |
| "loss": 0.2488, |
| "step": 5875 |
| }, |
| { |
| "epoch": 1.2569474134245404, |
| "grad_norm": 3.5229389667510986, |
| "learning_rate": 8.258907363420429e-06, |
| "loss": 0.2809, |
| "step": 5880 |
| }, |
| { |
| "epoch": 1.2580162462590851, |
| "grad_norm": 5.2068071365356445, |
| "learning_rate": 8.247030878859859e-06, |
| "loss": 0.2967, |
| "step": 5885 |
| }, |
| { |
| "epoch": 1.2590850790936297, |
| "grad_norm": 5.500560283660889, |
| "learning_rate": 8.235154394299288e-06, |
| "loss": 0.3366, |
| "step": 5890 |
| }, |
| { |
| "epoch": 1.2601539119281744, |
| "grad_norm": 3.9053938388824463, |
| "learning_rate": 8.223277909738719e-06, |
| "loss": 0.2968, |
| "step": 5895 |
| }, |
| { |
| "epoch": 1.2612227447627191, |
| "grad_norm": 3.7163820266723633, |
| "learning_rate": 8.211401425178147e-06, |
| "loss": 0.2, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.2622915775972638, |
| "grad_norm": 4.347673416137695, |
| "learning_rate": 8.199524940617578e-06, |
| "loss": 0.2761, |
| "step": 5905 |
| }, |
| { |
| "epoch": 1.2633604104318086, |
| "grad_norm": 3.297481060028076, |
| "learning_rate": 8.187648456057008e-06, |
| "loss": 0.2177, |
| "step": 5910 |
| }, |
| { |
| "epoch": 1.264429243266353, |
| "grad_norm": 5.587257385253906, |
| "learning_rate": 8.175771971496437e-06, |
| "loss": 0.2721, |
| "step": 5915 |
| }, |
| { |
| "epoch": 1.2654980761008978, |
| "grad_norm": 3.562802791595459, |
| "learning_rate": 8.163895486935868e-06, |
| "loss": 0.2592, |
| "step": 5920 |
| }, |
| { |
| "epoch": 1.2665669089354425, |
| "grad_norm": 5.265760898590088, |
| "learning_rate": 8.152019002375298e-06, |
| "loss": 0.453, |
| "step": 5925 |
| }, |
| { |
| "epoch": 1.267635741769987, |
| "grad_norm": 4.091883182525635, |
| "learning_rate": 8.140142517814727e-06, |
| "loss": 0.1952, |
| "step": 5930 |
| }, |
| { |
| "epoch": 1.2687045746045318, |
| "grad_norm": 4.552518844604492, |
| "learning_rate": 8.128266033254158e-06, |
| "loss": 0.3269, |
| "step": 5935 |
| }, |
| { |
| "epoch": 1.2697734074390765, |
| "grad_norm": 4.755618572235107, |
| "learning_rate": 8.116389548693587e-06, |
| "loss": 0.2776, |
| "step": 5940 |
| }, |
| { |
| "epoch": 1.2708422402736212, |
| "grad_norm": 4.392646312713623, |
| "learning_rate": 8.104513064133017e-06, |
| "loss": 0.2627, |
| "step": 5945 |
| }, |
| { |
| "epoch": 1.271911073108166, |
| "grad_norm": 2.6964704990386963, |
| "learning_rate": 8.092636579572448e-06, |
| "loss": 0.2524, |
| "step": 5950 |
| }, |
| { |
| "epoch": 1.2729799059427105, |
| "grad_norm": 3.914213180541992, |
| "learning_rate": 8.080760095011877e-06, |
| "loss": 0.2713, |
| "step": 5955 |
| }, |
| { |
| "epoch": 1.2740487387772552, |
| "grad_norm": 3.009427785873413, |
| "learning_rate": 8.068883610451307e-06, |
| "loss": 0.2618, |
| "step": 5960 |
| }, |
| { |
| "epoch": 1.2751175716118, |
| "grad_norm": 4.362711429595947, |
| "learning_rate": 8.057007125890736e-06, |
| "loss": 0.2735, |
| "step": 5965 |
| }, |
| { |
| "epoch": 1.2761864044463445, |
| "grad_norm": 5.038128852844238, |
| "learning_rate": 8.045130641330167e-06, |
| "loss": 0.327, |
| "step": 5970 |
| }, |
| { |
| "epoch": 1.2772552372808892, |
| "grad_norm": 5.867886543273926, |
| "learning_rate": 8.033254156769597e-06, |
| "loss": 0.3294, |
| "step": 5975 |
| }, |
| { |
| "epoch": 1.278324070115434, |
| "grad_norm": 3.8101038932800293, |
| "learning_rate": 8.021377672209026e-06, |
| "loss": 0.3299, |
| "step": 5980 |
| }, |
| { |
| "epoch": 1.2793929029499786, |
| "grad_norm": 4.082939624786377, |
| "learning_rate": 8.009501187648457e-06, |
| "loss": 0.2328, |
| "step": 5985 |
| }, |
| { |
| "epoch": 1.2804617357845234, |
| "grad_norm": 5.352798938751221, |
| "learning_rate": 7.997624703087885e-06, |
| "loss": 0.2554, |
| "step": 5990 |
| }, |
| { |
| "epoch": 1.281530568619068, |
| "grad_norm": 2.7532148361206055, |
| "learning_rate": 7.985748218527316e-06, |
| "loss": 0.3285, |
| "step": 5995 |
| }, |
| { |
| "epoch": 1.2825994014536126, |
| "grad_norm": 4.2501349449157715, |
| "learning_rate": 7.973871733966747e-06, |
| "loss": 0.2884, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.2836682342881574, |
| "grad_norm": 3.0817322731018066, |
| "learning_rate": 7.961995249406177e-06, |
| "loss": 0.3201, |
| "step": 6005 |
| }, |
| { |
| "epoch": 1.284737067122702, |
| "grad_norm": 4.214169502258301, |
| "learning_rate": 7.950118764845608e-06, |
| "loss": 0.3529, |
| "step": 6010 |
| }, |
| { |
| "epoch": 1.2858058999572468, |
| "grad_norm": 4.896885871887207, |
| "learning_rate": 7.938242280285036e-06, |
| "loss": 0.3113, |
| "step": 6015 |
| }, |
| { |
| "epoch": 1.2868747327917913, |
| "grad_norm": 4.869765758514404, |
| "learning_rate": 7.926365795724467e-06, |
| "loss": 0.386, |
| "step": 6020 |
| }, |
| { |
| "epoch": 1.287943565626336, |
| "grad_norm": 4.720851421356201, |
| "learning_rate": 7.914489311163896e-06, |
| "loss": 0.2453, |
| "step": 6025 |
| }, |
| { |
| "epoch": 1.2890123984608808, |
| "grad_norm": 4.764908790588379, |
| "learning_rate": 7.902612826603326e-06, |
| "loss": 0.3845, |
| "step": 6030 |
| }, |
| { |
| "epoch": 1.2900812312954253, |
| "grad_norm": 4.5335845947265625, |
| "learning_rate": 7.890736342042757e-06, |
| "loss": 0.3311, |
| "step": 6035 |
| }, |
| { |
| "epoch": 1.29115006412997, |
| "grad_norm": 5.650118350982666, |
| "learning_rate": 7.878859857482186e-06, |
| "loss": 0.3556, |
| "step": 6040 |
| }, |
| { |
| "epoch": 1.2922188969645148, |
| "grad_norm": 4.7145209312438965, |
| "learning_rate": 7.866983372921616e-06, |
| "loss": 0.2491, |
| "step": 6045 |
| }, |
| { |
| "epoch": 1.2932877297990595, |
| "grad_norm": 5.045220851898193, |
| "learning_rate": 7.855106888361045e-06, |
| "loss": 0.3478, |
| "step": 6050 |
| }, |
| { |
| "epoch": 1.2943565626336042, |
| "grad_norm": 3.746929407119751, |
| "learning_rate": 7.843230403800476e-06, |
| "loss": 0.175, |
| "step": 6055 |
| }, |
| { |
| "epoch": 1.2954253954681487, |
| "grad_norm": 3.4932451248168945, |
| "learning_rate": 7.831353919239906e-06, |
| "loss": 0.2475, |
| "step": 6060 |
| }, |
| { |
| "epoch": 1.2964942283026935, |
| "grad_norm": 4.507287502288818, |
| "learning_rate": 7.819477434679335e-06, |
| "loss": 0.2793, |
| "step": 6065 |
| }, |
| { |
| "epoch": 1.2975630611372382, |
| "grad_norm": 3.872846841812134, |
| "learning_rate": 7.807600950118766e-06, |
| "loss": 0.3745, |
| "step": 6070 |
| }, |
| { |
| "epoch": 1.2986318939717827, |
| "grad_norm": 3.80639910697937, |
| "learning_rate": 7.795724465558195e-06, |
| "loss": 0.2619, |
| "step": 6075 |
| }, |
| { |
| "epoch": 1.2997007268063274, |
| "grad_norm": 4.278339862823486, |
| "learning_rate": 7.783847980997625e-06, |
| "loss": 0.2882, |
| "step": 6080 |
| }, |
| { |
| "epoch": 1.3007695596408722, |
| "grad_norm": 3.2503674030303955, |
| "learning_rate": 7.771971496437056e-06, |
| "loss": 0.2651, |
| "step": 6085 |
| }, |
| { |
| "epoch": 1.3018383924754169, |
| "grad_norm": 3.709991216659546, |
| "learning_rate": 7.760095011876485e-06, |
| "loss": 0.3257, |
| "step": 6090 |
| }, |
| { |
| "epoch": 1.3029072253099616, |
| "grad_norm": 4.797738075256348, |
| "learning_rate": 7.748218527315915e-06, |
| "loss": 0.2626, |
| "step": 6095 |
| }, |
| { |
| "epoch": 1.3039760581445061, |
| "grad_norm": 3.289095163345337, |
| "learning_rate": 7.736342042755346e-06, |
| "loss": 0.3391, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.3050448909790509, |
| "grad_norm": 5.237732410430908, |
| "learning_rate": 7.724465558194774e-06, |
| "loss": 0.2868, |
| "step": 6105 |
| }, |
| { |
| "epoch": 1.3061137238135956, |
| "grad_norm": 3.3352086544036865, |
| "learning_rate": 7.712589073634205e-06, |
| "loss": 0.2358, |
| "step": 6110 |
| }, |
| { |
| "epoch": 1.30718255664814, |
| "grad_norm": 4.8291168212890625, |
| "learning_rate": 7.700712589073634e-06, |
| "loss": 0.3505, |
| "step": 6115 |
| }, |
| { |
| "epoch": 1.308251389482685, |
| "grad_norm": 6.421624183654785, |
| "learning_rate": 7.688836104513064e-06, |
| "loss": 0.3643, |
| "step": 6120 |
| }, |
| { |
| "epoch": 1.3093202223172296, |
| "grad_norm": 2.7074790000915527, |
| "learning_rate": 7.676959619952495e-06, |
| "loss": 0.2729, |
| "step": 6125 |
| }, |
| { |
| "epoch": 1.3103890551517743, |
| "grad_norm": 4.26420783996582, |
| "learning_rate": 7.665083135391924e-06, |
| "loss": 0.2363, |
| "step": 6130 |
| }, |
| { |
| "epoch": 1.311457887986319, |
| "grad_norm": 6.1749773025512695, |
| "learning_rate": 7.653206650831354e-06, |
| "loss": 0.3173, |
| "step": 6135 |
| }, |
| { |
| "epoch": 1.3125267208208635, |
| "grad_norm": 3.3525917530059814, |
| "learning_rate": 7.641330166270783e-06, |
| "loss": 0.2797, |
| "step": 6140 |
| }, |
| { |
| "epoch": 1.3135955536554083, |
| "grad_norm": 3.1302783489227295, |
| "learning_rate": 7.629453681710216e-06, |
| "loss": 0.3054, |
| "step": 6145 |
| }, |
| { |
| "epoch": 1.314664386489953, |
| "grad_norm": 3.0552220344543457, |
| "learning_rate": 7.617577197149645e-06, |
| "loss": 0.2596, |
| "step": 6150 |
| }, |
| { |
| "epoch": 1.3157332193244977, |
| "grad_norm": 5.424324035644531, |
| "learning_rate": 7.605700712589075e-06, |
| "loss": 0.3871, |
| "step": 6155 |
| }, |
| { |
| "epoch": 1.3168020521590424, |
| "grad_norm": 4.735466480255127, |
| "learning_rate": 7.593824228028505e-06, |
| "loss": 0.2798, |
| "step": 6160 |
| }, |
| { |
| "epoch": 1.317870884993587, |
| "grad_norm": 5.158178806304932, |
| "learning_rate": 7.581947743467934e-06, |
| "loss": 0.2532, |
| "step": 6165 |
| }, |
| { |
| "epoch": 1.3189397178281317, |
| "grad_norm": 5.720581531524658, |
| "learning_rate": 7.570071258907364e-06, |
| "loss": 0.2649, |
| "step": 6170 |
| }, |
| { |
| "epoch": 1.3200085506626764, |
| "grad_norm": 4.740435600280762, |
| "learning_rate": 7.5581947743467946e-06, |
| "loss": 0.3457, |
| "step": 6175 |
| }, |
| { |
| "epoch": 1.321077383497221, |
| "grad_norm": 4.528372287750244, |
| "learning_rate": 7.546318289786224e-06, |
| "loss": 0.4062, |
| "step": 6180 |
| }, |
| { |
| "epoch": 1.3221462163317657, |
| "grad_norm": 5.7430243492126465, |
| "learning_rate": 7.534441805225654e-06, |
| "loss": 0.4318, |
| "step": 6185 |
| }, |
| { |
| "epoch": 1.3232150491663104, |
| "grad_norm": 3.7349984645843506, |
| "learning_rate": 7.522565320665084e-06, |
| "loss": 0.2305, |
| "step": 6190 |
| }, |
| { |
| "epoch": 1.3242838820008551, |
| "grad_norm": 3.384366273880005, |
| "learning_rate": 7.510688836104514e-06, |
| "loss": 0.218, |
| "step": 6195 |
| }, |
| { |
| "epoch": 1.3253527148353998, |
| "grad_norm": 4.311688423156738, |
| "learning_rate": 7.498812351543944e-06, |
| "loss": 0.2738, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.3264215476699444, |
| "grad_norm": 3.9737985134124756, |
| "learning_rate": 7.486935866983374e-06, |
| "loss": 0.3043, |
| "step": 6205 |
| }, |
| { |
| "epoch": 1.327490380504489, |
| "grad_norm": 3.2927355766296387, |
| "learning_rate": 7.475059382422803e-06, |
| "loss": 0.2055, |
| "step": 6210 |
| }, |
| { |
| "epoch": 1.3285592133390338, |
| "grad_norm": 4.364592552185059, |
| "learning_rate": 7.463182897862233e-06, |
| "loss": 0.2528, |
| "step": 6215 |
| }, |
| { |
| "epoch": 1.3296280461735783, |
| "grad_norm": 4.896527290344238, |
| "learning_rate": 7.451306413301664e-06, |
| "loss": 0.3514, |
| "step": 6220 |
| }, |
| { |
| "epoch": 1.330696879008123, |
| "grad_norm": 3.7543258666992188, |
| "learning_rate": 7.439429928741093e-06, |
| "loss": 0.3199, |
| "step": 6225 |
| }, |
| { |
| "epoch": 1.3317657118426678, |
| "grad_norm": 4.389688491821289, |
| "learning_rate": 7.427553444180523e-06, |
| "loss": 0.2453, |
| "step": 6230 |
| }, |
| { |
| "epoch": 1.3328345446772125, |
| "grad_norm": 5.297595500946045, |
| "learning_rate": 7.415676959619953e-06, |
| "loss": 0.3237, |
| "step": 6235 |
| }, |
| { |
| "epoch": 1.3339033775117572, |
| "grad_norm": 4.290585041046143, |
| "learning_rate": 7.403800475059383e-06, |
| "loss": 0.3409, |
| "step": 6240 |
| }, |
| { |
| "epoch": 1.3349722103463018, |
| "grad_norm": 3.8684494495391846, |
| "learning_rate": 7.391923990498813e-06, |
| "loss": 0.268, |
| "step": 6245 |
| }, |
| { |
| "epoch": 1.3360410431808465, |
| "grad_norm": 7.344365119934082, |
| "learning_rate": 7.380047505938243e-06, |
| "loss": 0.4072, |
| "step": 6250 |
| }, |
| { |
| "epoch": 1.3371098760153912, |
| "grad_norm": 4.403175354003906, |
| "learning_rate": 7.368171021377672e-06, |
| "loss": 0.3601, |
| "step": 6255 |
| }, |
| { |
| "epoch": 1.338178708849936, |
| "grad_norm": 4.6706414222717285, |
| "learning_rate": 7.356294536817102e-06, |
| "loss": 0.3997, |
| "step": 6260 |
| }, |
| { |
| "epoch": 1.3392475416844807, |
| "grad_norm": 3.4723129272460938, |
| "learning_rate": 7.344418052256533e-06, |
| "loss": 0.2062, |
| "step": 6265 |
| }, |
| { |
| "epoch": 1.3403163745190252, |
| "grad_norm": 3.8669190406799316, |
| "learning_rate": 7.332541567695962e-06, |
| "loss": 0.2703, |
| "step": 6270 |
| }, |
| { |
| "epoch": 1.34138520735357, |
| "grad_norm": 4.620151519775391, |
| "learning_rate": 7.320665083135392e-06, |
| "loss": 0.2498, |
| "step": 6275 |
| }, |
| { |
| "epoch": 1.3424540401881147, |
| "grad_norm": 4.765347480773926, |
| "learning_rate": 7.308788598574822e-06, |
| "loss": 0.3418, |
| "step": 6280 |
| }, |
| { |
| "epoch": 1.3435228730226592, |
| "grad_norm": 3.9806559085845947, |
| "learning_rate": 7.296912114014253e-06, |
| "loss": 0.2046, |
| "step": 6285 |
| }, |
| { |
| "epoch": 1.344591705857204, |
| "grad_norm": 6.489411354064941, |
| "learning_rate": 7.285035629453683e-06, |
| "loss": 0.3267, |
| "step": 6290 |
| }, |
| { |
| "epoch": 1.3456605386917486, |
| "grad_norm": 4.385682582855225, |
| "learning_rate": 7.2731591448931125e-06, |
| "loss": 0.2756, |
| "step": 6295 |
| }, |
| { |
| "epoch": 1.3467293715262934, |
| "grad_norm": 5.30741548538208, |
| "learning_rate": 7.261282660332542e-06, |
| "loss": 0.2808, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.347798204360838, |
| "grad_norm": 3.52230167388916, |
| "learning_rate": 7.249406175771973e-06, |
| "loss": 0.3037, |
| "step": 6305 |
| }, |
| { |
| "epoch": 1.3488670371953826, |
| "grad_norm": 3.3302509784698486, |
| "learning_rate": 7.2375296912114025e-06, |
| "loss": 0.3837, |
| "step": 6310 |
| }, |
| { |
| "epoch": 1.3499358700299273, |
| "grad_norm": 4.349034309387207, |
| "learning_rate": 7.225653206650832e-06, |
| "loss": 0.2496, |
| "step": 6315 |
| }, |
| { |
| "epoch": 1.351004702864472, |
| "grad_norm": 3.651261329650879, |
| "learning_rate": 7.213776722090262e-06, |
| "loss": 0.3131, |
| "step": 6320 |
| }, |
| { |
| "epoch": 1.3520735356990166, |
| "grad_norm": 4.3042144775390625, |
| "learning_rate": 7.201900237529692e-06, |
| "loss": 0.3038, |
| "step": 6325 |
| }, |
| { |
| "epoch": 1.3531423685335613, |
| "grad_norm": 4.746523380279541, |
| "learning_rate": 7.190023752969122e-06, |
| "loss": 0.2872, |
| "step": 6330 |
| }, |
| { |
| "epoch": 1.354211201368106, |
| "grad_norm": 3.058163642883301, |
| "learning_rate": 7.178147268408552e-06, |
| "loss": 0.3548, |
| "step": 6335 |
| }, |
| { |
| "epoch": 1.3552800342026508, |
| "grad_norm": 4.4561309814453125, |
| "learning_rate": 7.1662707838479815e-06, |
| "loss": 0.1994, |
| "step": 6340 |
| }, |
| { |
| "epoch": 1.3563488670371955, |
| "grad_norm": 3.580275535583496, |
| "learning_rate": 7.154394299287411e-06, |
| "loss": 0.2383, |
| "step": 6345 |
| }, |
| { |
| "epoch": 1.35741769987174, |
| "grad_norm": 4.0294671058654785, |
| "learning_rate": 7.142517814726842e-06, |
| "loss": 0.3412, |
| "step": 6350 |
| }, |
| { |
| "epoch": 1.3584865327062847, |
| "grad_norm": 4.032179355621338, |
| "learning_rate": 7.1306413301662715e-06, |
| "loss": 0.2392, |
| "step": 6355 |
| }, |
| { |
| "epoch": 1.3595553655408295, |
| "grad_norm": 3.6529910564422607, |
| "learning_rate": 7.118764845605701e-06, |
| "loss": 0.2946, |
| "step": 6360 |
| }, |
| { |
| "epoch": 1.360624198375374, |
| "grad_norm": 5.5632781982421875, |
| "learning_rate": 7.106888361045131e-06, |
| "loss": 0.3075, |
| "step": 6365 |
| }, |
| { |
| "epoch": 1.3616930312099187, |
| "grad_norm": 4.6378865242004395, |
| "learning_rate": 7.0950118764845614e-06, |
| "loss": 0.2695, |
| "step": 6370 |
| }, |
| { |
| "epoch": 1.3627618640444634, |
| "grad_norm": 4.01276969909668, |
| "learning_rate": 7.083135391923991e-06, |
| "loss": 0.2289, |
| "step": 6375 |
| }, |
| { |
| "epoch": 1.3638306968790082, |
| "grad_norm": 2.731029748916626, |
| "learning_rate": 7.071258907363421e-06, |
| "loss": 0.339, |
| "step": 6380 |
| }, |
| { |
| "epoch": 1.3648995297135529, |
| "grad_norm": 6.142641544342041, |
| "learning_rate": 7.0593824228028505e-06, |
| "loss": 0.3048, |
| "step": 6385 |
| }, |
| { |
| "epoch": 1.3659683625480974, |
| "grad_norm": 4.8854289054870605, |
| "learning_rate": 7.04750593824228e-06, |
| "loss": 0.3437, |
| "step": 6390 |
| }, |
| { |
| "epoch": 1.3670371953826421, |
| "grad_norm": 4.592909336090088, |
| "learning_rate": 7.035629453681711e-06, |
| "loss": 0.2587, |
| "step": 6395 |
| }, |
| { |
| "epoch": 1.3681060282171869, |
| "grad_norm": 4.572000026702881, |
| "learning_rate": 7.0237529691211405e-06, |
| "loss": 0.3156, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.3691748610517316, |
| "grad_norm": 6.196121692657471, |
| "learning_rate": 7.01187648456057e-06, |
| "loss": 0.2827, |
| "step": 6405 |
| }, |
| { |
| "epoch": 1.3702436938862763, |
| "grad_norm": 3.967109441757202, |
| "learning_rate": 7e-06, |
| "loss": 0.2337, |
| "step": 6410 |
| }, |
| { |
| "epoch": 1.3713125267208208, |
| "grad_norm": 3.1756539344787598, |
| "learning_rate": 6.98812351543943e-06, |
| "loss": 0.265, |
| "step": 6415 |
| }, |
| { |
| "epoch": 1.3723813595553656, |
| "grad_norm": 3.4292986392974854, |
| "learning_rate": 6.97624703087886e-06, |
| "loss": 0.2822, |
| "step": 6420 |
| }, |
| { |
| "epoch": 1.3734501923899103, |
| "grad_norm": 4.521055698394775, |
| "learning_rate": 6.964370546318291e-06, |
| "loss": 0.2697, |
| "step": 6425 |
| }, |
| { |
| "epoch": 1.3745190252244548, |
| "grad_norm": 3.9092273712158203, |
| "learning_rate": 6.95249406175772e-06, |
| "loss": 0.2136, |
| "step": 6430 |
| }, |
| { |
| "epoch": 1.3755878580589995, |
| "grad_norm": 3.5216240882873535, |
| "learning_rate": 6.940617577197151e-06, |
| "loss": 0.2549, |
| "step": 6435 |
| }, |
| { |
| "epoch": 1.3766566908935443, |
| "grad_norm": 5.987946510314941, |
| "learning_rate": 6.928741092636581e-06, |
| "loss": 0.2631, |
| "step": 6440 |
| }, |
| { |
| "epoch": 1.377725523728089, |
| "grad_norm": 5.098079681396484, |
| "learning_rate": 6.91686460807601e-06, |
| "loss": 0.324, |
| "step": 6445 |
| }, |
| { |
| "epoch": 1.3787943565626337, |
| "grad_norm": 4.314655303955078, |
| "learning_rate": 6.90498812351544e-06, |
| "loss": 0.3722, |
| "step": 6450 |
| }, |
| { |
| "epoch": 1.3798631893971782, |
| "grad_norm": 5.151162147521973, |
| "learning_rate": 6.893111638954871e-06, |
| "loss": 0.453, |
| "step": 6455 |
| }, |
| { |
| "epoch": 1.380932022231723, |
| "grad_norm": 4.187003135681152, |
| "learning_rate": 6.8812351543943e-06, |
| "loss": 0.2798, |
| "step": 6460 |
| }, |
| { |
| "epoch": 1.3820008550662677, |
| "grad_norm": 5.253510475158691, |
| "learning_rate": 6.86935866983373e-06, |
| "loss": 0.2605, |
| "step": 6465 |
| }, |
| { |
| "epoch": 1.3830696879008122, |
| "grad_norm": 2.9405324459075928, |
| "learning_rate": 6.85748218527316e-06, |
| "loss": 0.3834, |
| "step": 6470 |
| }, |
| { |
| "epoch": 1.384138520735357, |
| "grad_norm": 3.8434178829193115, |
| "learning_rate": 6.845605700712589e-06, |
| "loss": 0.2683, |
| "step": 6475 |
| }, |
| { |
| "epoch": 1.3852073535699017, |
| "grad_norm": 4.633339881896973, |
| "learning_rate": 6.83372921615202e-06, |
| "loss": 0.243, |
| "step": 6480 |
| }, |
| { |
| "epoch": 1.3862761864044464, |
| "grad_norm": 4.103108882904053, |
| "learning_rate": 6.82185273159145e-06, |
| "loss": 0.3287, |
| "step": 6485 |
| }, |
| { |
| "epoch": 1.3873450192389911, |
| "grad_norm": 4.187243938446045, |
| "learning_rate": 6.809976247030879e-06, |
| "loss": 0.2754, |
| "step": 6490 |
| }, |
| { |
| "epoch": 1.3884138520735356, |
| "grad_norm": 5.196486949920654, |
| "learning_rate": 6.798099762470309e-06, |
| "loss": 0.3701, |
| "step": 6495 |
| }, |
| { |
| "epoch": 1.3894826849080804, |
| "grad_norm": 4.622681140899658, |
| "learning_rate": 6.78622327790974e-06, |
| "loss": 0.2762, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.390551517742625, |
| "grad_norm": 2.859978675842285, |
| "learning_rate": 6.774346793349169e-06, |
| "loss": 0.266, |
| "step": 6505 |
| }, |
| { |
| "epoch": 1.3916203505771696, |
| "grad_norm": 5.8184332847595215, |
| "learning_rate": 6.762470308788599e-06, |
| "loss": 0.2958, |
| "step": 6510 |
| }, |
| { |
| "epoch": 1.3926891834117143, |
| "grad_norm": 3.787079334259033, |
| "learning_rate": 6.750593824228029e-06, |
| "loss": 0.2754, |
| "step": 6515 |
| }, |
| { |
| "epoch": 1.393758016246259, |
| "grad_norm": 4.132429599761963, |
| "learning_rate": 6.7387173396674584e-06, |
| "loss": 0.3634, |
| "step": 6520 |
| }, |
| { |
| "epoch": 1.3948268490808038, |
| "grad_norm": 5.011837005615234, |
| "learning_rate": 6.726840855106889e-06, |
| "loss": 0.2974, |
| "step": 6525 |
| }, |
| { |
| "epoch": 1.3958956819153485, |
| "grad_norm": 5.0287957191467285, |
| "learning_rate": 6.714964370546319e-06, |
| "loss": 0.3158, |
| "step": 6530 |
| }, |
| { |
| "epoch": 1.396964514749893, |
| "grad_norm": 3.846284866333008, |
| "learning_rate": 6.703087885985748e-06, |
| "loss": 0.2718, |
| "step": 6535 |
| }, |
| { |
| "epoch": 1.3980333475844378, |
| "grad_norm": 5.715949058532715, |
| "learning_rate": 6.691211401425178e-06, |
| "loss": 0.2586, |
| "step": 6540 |
| }, |
| { |
| "epoch": 1.3991021804189825, |
| "grad_norm": 2.0641372203826904, |
| "learning_rate": 6.679334916864609e-06, |
| "loss": 0.2071, |
| "step": 6545 |
| }, |
| { |
| "epoch": 1.4001710132535272, |
| "grad_norm": 3.989108085632324, |
| "learning_rate": 6.667458432304038e-06, |
| "loss": 0.3308, |
| "step": 6550 |
| }, |
| { |
| "epoch": 1.401239846088072, |
| "grad_norm": 5.488873481750488, |
| "learning_rate": 6.655581947743468e-06, |
| "loss": 0.2517, |
| "step": 6555 |
| }, |
| { |
| "epoch": 1.4023086789226165, |
| "grad_norm": 4.91823673248291, |
| "learning_rate": 6.643705463182898e-06, |
| "loss": 0.234, |
| "step": 6560 |
| }, |
| { |
| "epoch": 1.4033775117571612, |
| "grad_norm": 5.4402289390563965, |
| "learning_rate": 6.631828978622329e-06, |
| "loss": 0.3025, |
| "step": 6565 |
| }, |
| { |
| "epoch": 1.404446344591706, |
| "grad_norm": 5.417737007141113, |
| "learning_rate": 6.619952494061759e-06, |
| "loss": 0.2619, |
| "step": 6570 |
| }, |
| { |
| "epoch": 1.4055151774262504, |
| "grad_norm": 3.603675127029419, |
| "learning_rate": 6.6080760095011885e-06, |
| "loss": 0.2521, |
| "step": 6575 |
| }, |
| { |
| "epoch": 1.4065840102607952, |
| "grad_norm": 4.426266670227051, |
| "learning_rate": 6.596199524940618e-06, |
| "loss": 0.3113, |
| "step": 6580 |
| }, |
| { |
| "epoch": 1.40765284309534, |
| "grad_norm": 4.535027980804443, |
| "learning_rate": 6.584323040380049e-06, |
| "loss": 0.3537, |
| "step": 6585 |
| }, |
| { |
| "epoch": 1.4087216759298846, |
| "grad_norm": 3.585488796234131, |
| "learning_rate": 6.5724465558194785e-06, |
| "loss": 0.2386, |
| "step": 6590 |
| }, |
| { |
| "epoch": 1.4097905087644294, |
| "grad_norm": 5.358974456787109, |
| "learning_rate": 6.560570071258908e-06, |
| "loss": 0.3081, |
| "step": 6595 |
| }, |
| { |
| "epoch": 1.4108593415989739, |
| "grad_norm": 3.859417200088501, |
| "learning_rate": 6.548693586698338e-06, |
| "loss": 0.189, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.4119281744335186, |
| "grad_norm": 3.350184679031372, |
| "learning_rate": 6.536817102137768e-06, |
| "loss": 0.2643, |
| "step": 6605 |
| }, |
| { |
| "epoch": 1.4129970072680633, |
| "grad_norm": 3.4859519004821777, |
| "learning_rate": 6.524940617577198e-06, |
| "loss": 0.2206, |
| "step": 6610 |
| }, |
| { |
| "epoch": 1.4140658401026078, |
| "grad_norm": 6.238532543182373, |
| "learning_rate": 6.513064133016628e-06, |
| "loss": 0.3687, |
| "step": 6615 |
| }, |
| { |
| "epoch": 1.4151346729371526, |
| "grad_norm": 6.955577850341797, |
| "learning_rate": 6.5011876484560576e-06, |
| "loss": 0.3626, |
| "step": 6620 |
| }, |
| { |
| "epoch": 1.4162035057716973, |
| "grad_norm": 4.4574995040893555, |
| "learning_rate": 6.489311163895487e-06, |
| "loss": 0.2711, |
| "step": 6625 |
| }, |
| { |
| "epoch": 1.417272338606242, |
| "grad_norm": 4.533407211303711, |
| "learning_rate": 6.477434679334918e-06, |
| "loss": 0.3304, |
| "step": 6630 |
| }, |
| { |
| "epoch": 1.4183411714407868, |
| "grad_norm": 2.947624921798706, |
| "learning_rate": 6.4655581947743475e-06, |
| "loss": 0.321, |
| "step": 6635 |
| }, |
| { |
| "epoch": 1.4194100042753313, |
| "grad_norm": 4.557621955871582, |
| "learning_rate": 6.453681710213777e-06, |
| "loss": 0.319, |
| "step": 6640 |
| }, |
| { |
| "epoch": 1.420478837109876, |
| "grad_norm": 4.511264324188232, |
| "learning_rate": 6.441805225653207e-06, |
| "loss": 0.2363, |
| "step": 6645 |
| }, |
| { |
| "epoch": 1.4215476699444207, |
| "grad_norm": 4.200313568115234, |
| "learning_rate": 6.429928741092637e-06, |
| "loss": 0.2319, |
| "step": 6650 |
| }, |
| { |
| "epoch": 1.4226165027789655, |
| "grad_norm": 7.376286506652832, |
| "learning_rate": 6.418052256532067e-06, |
| "loss": 0.3526, |
| "step": 6655 |
| }, |
| { |
| "epoch": 1.4236853356135102, |
| "grad_norm": 4.415379047393799, |
| "learning_rate": 6.406175771971497e-06, |
| "loss": 0.4186, |
| "step": 6660 |
| }, |
| { |
| "epoch": 1.4247541684480547, |
| "grad_norm": 4.578277587890625, |
| "learning_rate": 6.394299287410927e-06, |
| "loss": 0.2846, |
| "step": 6665 |
| }, |
| { |
| "epoch": 1.4258230012825994, |
| "grad_norm": 4.811502456665039, |
| "learning_rate": 6.382422802850356e-06, |
| "loss": 0.3077, |
| "step": 6670 |
| }, |
| { |
| "epoch": 1.4268918341171442, |
| "grad_norm": 3.3036532402038574, |
| "learning_rate": 6.370546318289787e-06, |
| "loss": 0.3709, |
| "step": 6675 |
| }, |
| { |
| "epoch": 1.4279606669516887, |
| "grad_norm": 4.229010105133057, |
| "learning_rate": 6.3586698337292165e-06, |
| "loss": 0.3438, |
| "step": 6680 |
| }, |
| { |
| "epoch": 1.4290294997862334, |
| "grad_norm": 7.352675914764404, |
| "learning_rate": 6.346793349168646e-06, |
| "loss": 0.4159, |
| "step": 6685 |
| }, |
| { |
| "epoch": 1.4300983326207781, |
| "grad_norm": 3.935654878616333, |
| "learning_rate": 6.334916864608076e-06, |
| "loss": 0.3511, |
| "step": 6690 |
| }, |
| { |
| "epoch": 1.4311671654553229, |
| "grad_norm": 4.271127700805664, |
| "learning_rate": 6.323040380047506e-06, |
| "loss": 0.3061, |
| "step": 6695 |
| }, |
| { |
| "epoch": 1.4322359982898676, |
| "grad_norm": 4.57000207901001, |
| "learning_rate": 6.311163895486936e-06, |
| "loss": 0.2694, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.433304831124412, |
| "grad_norm": 4.243838787078857, |
| "learning_rate": 6.299287410926367e-06, |
| "loss": 0.3412, |
| "step": 6705 |
| }, |
| { |
| "epoch": 1.4343736639589568, |
| "grad_norm": 4.534287929534912, |
| "learning_rate": 6.2874109263657964e-06, |
| "loss": 0.2631, |
| "step": 6710 |
| }, |
| { |
| "epoch": 1.4354424967935016, |
| "grad_norm": 4.457566261291504, |
| "learning_rate": 6.275534441805227e-06, |
| "loss": 0.3879, |
| "step": 6715 |
| }, |
| { |
| "epoch": 1.436511329628046, |
| "grad_norm": 3.7211356163024902, |
| "learning_rate": 6.263657957244657e-06, |
| "loss": 0.2942, |
| "step": 6720 |
| }, |
| { |
| "epoch": 1.4375801624625908, |
| "grad_norm": 6.8076300621032715, |
| "learning_rate": 6.251781472684086e-06, |
| "loss": 0.3901, |
| "step": 6725 |
| }, |
| { |
| "epoch": 1.4386489952971355, |
| "grad_norm": 6.238668441772461, |
| "learning_rate": 6.239904988123516e-06, |
| "loss": 0.3192, |
| "step": 6730 |
| }, |
| { |
| "epoch": 1.4397178281316803, |
| "grad_norm": 4.374307155609131, |
| "learning_rate": 6.228028503562946e-06, |
| "loss": 0.2269, |
| "step": 6735 |
| }, |
| { |
| "epoch": 1.440786660966225, |
| "grad_norm": 5.202229976654053, |
| "learning_rate": 6.216152019002376e-06, |
| "loss": 0.372, |
| "step": 6740 |
| }, |
| { |
| "epoch": 1.4418554938007695, |
| "grad_norm": 4.483334064483643, |
| "learning_rate": 6.204275534441806e-06, |
| "loss": 0.2467, |
| "step": 6745 |
| }, |
| { |
| "epoch": 1.4429243266353142, |
| "grad_norm": 3.3366737365722656, |
| "learning_rate": 6.192399049881236e-06, |
| "loss": 0.2313, |
| "step": 6750 |
| }, |
| { |
| "epoch": 1.443993159469859, |
| "grad_norm": 6.443348407745361, |
| "learning_rate": 6.1805225653206655e-06, |
| "loss": 0.3538, |
| "step": 6755 |
| }, |
| { |
| "epoch": 1.4450619923044035, |
| "grad_norm": 3.4701974391937256, |
| "learning_rate": 6.168646080760096e-06, |
| "loss": 0.252, |
| "step": 6760 |
| }, |
| { |
| "epoch": 1.4461308251389482, |
| "grad_norm": 3.572749137878418, |
| "learning_rate": 6.156769596199526e-06, |
| "loss": 0.3049, |
| "step": 6765 |
| }, |
| { |
| "epoch": 1.447199657973493, |
| "grad_norm": 4.363938808441162, |
| "learning_rate": 6.144893111638955e-06, |
| "loss": 0.2796, |
| "step": 6770 |
| }, |
| { |
| "epoch": 1.4482684908080377, |
| "grad_norm": 3.493666172027588, |
| "learning_rate": 6.133016627078385e-06, |
| "loss": 0.2128, |
| "step": 6775 |
| }, |
| { |
| "epoch": 1.4493373236425824, |
| "grad_norm": 4.754271507263184, |
| "learning_rate": 6.121140142517815e-06, |
| "loss": 0.3407, |
| "step": 6780 |
| }, |
| { |
| "epoch": 1.450406156477127, |
| "grad_norm": 4.948278903961182, |
| "learning_rate": 6.109263657957245e-06, |
| "loss": 0.2196, |
| "step": 6785 |
| }, |
| { |
| "epoch": 1.4514749893116716, |
| "grad_norm": 4.344764709472656, |
| "learning_rate": 6.097387173396675e-06, |
| "loss": 0.2472, |
| "step": 6790 |
| }, |
| { |
| "epoch": 1.4525438221462164, |
| "grad_norm": 4.455203056335449, |
| "learning_rate": 6.085510688836105e-06, |
| "loss": 0.2788, |
| "step": 6795 |
| }, |
| { |
| "epoch": 1.453612654980761, |
| "grad_norm": 5.69878625869751, |
| "learning_rate": 6.0736342042755345e-06, |
| "loss": 0.305, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.4546814878153058, |
| "grad_norm": 4.746001243591309, |
| "learning_rate": 6.061757719714965e-06, |
| "loss": 0.3072, |
| "step": 6805 |
| }, |
| { |
| "epoch": 1.4557503206498503, |
| "grad_norm": 3.463618755340576, |
| "learning_rate": 6.049881235154395e-06, |
| "loss": 0.2879, |
| "step": 6810 |
| }, |
| { |
| "epoch": 1.456819153484395, |
| "grad_norm": 3.4969255924224854, |
| "learning_rate": 6.0380047505938244e-06, |
| "loss": 0.4406, |
| "step": 6815 |
| }, |
| { |
| "epoch": 1.4578879863189398, |
| "grad_norm": 3.6291632652282715, |
| "learning_rate": 6.026128266033254e-06, |
| "loss": 0.3371, |
| "step": 6820 |
| }, |
| { |
| "epoch": 1.4589568191534843, |
| "grad_norm": 4.0304765701293945, |
| "learning_rate": 6.014251781472684e-06, |
| "loss": 0.2775, |
| "step": 6825 |
| }, |
| { |
| "epoch": 1.460025651988029, |
| "grad_norm": 3.6861469745635986, |
| "learning_rate": 6.002375296912114e-06, |
| "loss": 0.2872, |
| "step": 6830 |
| }, |
| { |
| "epoch": 1.4610944848225738, |
| "grad_norm": 4.720432758331299, |
| "learning_rate": 5.990498812351544e-06, |
| "loss": 0.3056, |
| "step": 6835 |
| }, |
| { |
| "epoch": 1.4621633176571185, |
| "grad_norm": 3.8419721126556396, |
| "learning_rate": 5.978622327790974e-06, |
| "loss": 0.3183, |
| "step": 6840 |
| }, |
| { |
| "epoch": 1.4632321504916632, |
| "grad_norm": 4.320315361022949, |
| "learning_rate": 5.9667458432304035e-06, |
| "loss": 0.2801, |
| "step": 6845 |
| }, |
| { |
| "epoch": 1.4643009833262077, |
| "grad_norm": 4.07327127456665, |
| "learning_rate": 5.954869358669835e-06, |
| "loss": 0.2641, |
| "step": 6850 |
| }, |
| { |
| "epoch": 1.4653698161607525, |
| "grad_norm": 5.109342098236084, |
| "learning_rate": 5.942992874109265e-06, |
| "loss": 0.2903, |
| "step": 6855 |
| }, |
| { |
| "epoch": 1.4664386489952972, |
| "grad_norm": 5.147985458374023, |
| "learning_rate": 5.931116389548694e-06, |
| "loss": 0.4097, |
| "step": 6860 |
| }, |
| { |
| "epoch": 1.4675074818298417, |
| "grad_norm": 5.812030792236328, |
| "learning_rate": 5.919239904988124e-06, |
| "loss": 0.2133, |
| "step": 6865 |
| }, |
| { |
| "epoch": 1.4685763146643864, |
| "grad_norm": 4.3751220703125, |
| "learning_rate": 5.9073634204275545e-06, |
| "loss": 0.3064, |
| "step": 6870 |
| }, |
| { |
| "epoch": 1.4696451474989312, |
| "grad_norm": 3.8219094276428223, |
| "learning_rate": 5.895486935866984e-06, |
| "loss": 0.2629, |
| "step": 6875 |
| }, |
| { |
| "epoch": 1.470713980333476, |
| "grad_norm": 3.550219774246216, |
| "learning_rate": 5.883610451306414e-06, |
| "loss": 0.1846, |
| "step": 6880 |
| }, |
| { |
| "epoch": 1.4717828131680206, |
| "grad_norm": 4.344959259033203, |
| "learning_rate": 5.871733966745844e-06, |
| "loss": 0.2552, |
| "step": 6885 |
| }, |
| { |
| "epoch": 1.4728516460025651, |
| "grad_norm": 3.7821099758148193, |
| "learning_rate": 5.859857482185274e-06, |
| "loss": 0.2812, |
| "step": 6890 |
| }, |
| { |
| "epoch": 1.4739204788371099, |
| "grad_norm": 5.074913501739502, |
| "learning_rate": 5.847980997624704e-06, |
| "loss": 0.2796, |
| "step": 6895 |
| }, |
| { |
| "epoch": 1.4749893116716546, |
| "grad_norm": 5.702268600463867, |
| "learning_rate": 5.836104513064134e-06, |
| "loss": 0.3157, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.476058144506199, |
| "grad_norm": 4.769154071807861, |
| "learning_rate": 5.824228028503563e-06, |
| "loss": 0.271, |
| "step": 6905 |
| }, |
| { |
| "epoch": 1.4771269773407438, |
| "grad_norm": 3.915893077850342, |
| "learning_rate": 5.812351543942993e-06, |
| "loss": 0.2352, |
| "step": 6910 |
| }, |
| { |
| "epoch": 1.4781958101752886, |
| "grad_norm": 5.49572229385376, |
| "learning_rate": 5.8004750593824236e-06, |
| "loss": 0.3752, |
| "step": 6915 |
| }, |
| { |
| "epoch": 1.4792646430098333, |
| "grad_norm": 5.197114944458008, |
| "learning_rate": 5.788598574821853e-06, |
| "loss": 0.2811, |
| "step": 6920 |
| }, |
| { |
| "epoch": 1.480333475844378, |
| "grad_norm": 4.672935485839844, |
| "learning_rate": 5.776722090261283e-06, |
| "loss": 0.3303, |
| "step": 6925 |
| }, |
| { |
| "epoch": 1.4814023086789225, |
| "grad_norm": 3.5662314891815186, |
| "learning_rate": 5.764845605700713e-06, |
| "loss": 0.3382, |
| "step": 6930 |
| }, |
| { |
| "epoch": 1.4824711415134673, |
| "grad_norm": 3.7478342056274414, |
| "learning_rate": 5.752969121140143e-06, |
| "loss": 0.2235, |
| "step": 6935 |
| }, |
| { |
| "epoch": 1.483539974348012, |
| "grad_norm": 5.836414813995361, |
| "learning_rate": 5.741092636579573e-06, |
| "loss": 0.2446, |
| "step": 6940 |
| }, |
| { |
| "epoch": 1.4846088071825567, |
| "grad_norm": 4.945041179656982, |
| "learning_rate": 5.729216152019003e-06, |
| "loss": 0.2745, |
| "step": 6945 |
| }, |
| { |
| "epoch": 1.4856776400171015, |
| "grad_norm": 4.556496620178223, |
| "learning_rate": 5.717339667458432e-06, |
| "loss": 0.3061, |
| "step": 6950 |
| }, |
| { |
| "epoch": 1.486746472851646, |
| "grad_norm": 5.837685585021973, |
| "learning_rate": 5.705463182897862e-06, |
| "loss": 0.3059, |
| "step": 6955 |
| }, |
| { |
| "epoch": 1.4878153056861907, |
| "grad_norm": 3.4342663288116455, |
| "learning_rate": 5.6935866983372926e-06, |
| "loss": 0.2692, |
| "step": 6960 |
| }, |
| { |
| "epoch": 1.4888841385207354, |
| "grad_norm": 4.30683708190918, |
| "learning_rate": 5.681710213776722e-06, |
| "loss": 0.2853, |
| "step": 6965 |
| }, |
| { |
| "epoch": 1.48995297135528, |
| "grad_norm": 3.7401227951049805, |
| "learning_rate": 5.669833729216152e-06, |
| "loss": 0.238, |
| "step": 6970 |
| }, |
| { |
| "epoch": 1.4910218041898247, |
| "grad_norm": 3.991908311843872, |
| "learning_rate": 5.657957244655582e-06, |
| "loss": 0.3086, |
| "step": 6975 |
| }, |
| { |
| "epoch": 1.4920906370243694, |
| "grad_norm": 5.546383857727051, |
| "learning_rate": 5.646080760095012e-06, |
| "loss": 0.3016, |
| "step": 6980 |
| }, |
| { |
| "epoch": 1.4931594698589141, |
| "grad_norm": 4.429809093475342, |
| "learning_rate": 5.634204275534442e-06, |
| "loss": 0.3272, |
| "step": 6985 |
| }, |
| { |
| "epoch": 1.4942283026934589, |
| "grad_norm": 4.91778564453125, |
| "learning_rate": 5.6223277909738725e-06, |
| "loss": 0.3125, |
| "step": 6990 |
| }, |
| { |
| "epoch": 1.4952971355280034, |
| "grad_norm": 5.806905269622803, |
| "learning_rate": 5.610451306413302e-06, |
| "loss": 0.2777, |
| "step": 6995 |
| }, |
| { |
| "epoch": 1.496365968362548, |
| "grad_norm": 5.0485711097717285, |
| "learning_rate": 5.598574821852733e-06, |
| "loss": 0.3026, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.4974348011970928, |
| "grad_norm": 4.642349720001221, |
| "learning_rate": 5.5866983372921624e-06, |
| "loss": 0.2522, |
| "step": 7005 |
| }, |
| { |
| "epoch": 1.4985036340316373, |
| "grad_norm": 3.192457437515259, |
| "learning_rate": 5.574821852731592e-06, |
| "loss": 0.2487, |
| "step": 7010 |
| }, |
| { |
| "epoch": 1.499572466866182, |
| "grad_norm": 4.002120494842529, |
| "learning_rate": 5.562945368171022e-06, |
| "loss": 0.2316, |
| "step": 7015 |
| }, |
| { |
| "epoch": 1.5006412997007268, |
| "grad_norm": 4.840696334838867, |
| "learning_rate": 5.551068883610452e-06, |
| "loss": 0.2484, |
| "step": 7020 |
| }, |
| { |
| "epoch": 1.5017101325352715, |
| "grad_norm": 4.7393927574157715, |
| "learning_rate": 5.539192399049882e-06, |
| "loss": 0.2852, |
| "step": 7025 |
| }, |
| { |
| "epoch": 1.5027789653698163, |
| "grad_norm": 4.964815616607666, |
| "learning_rate": 5.527315914489312e-06, |
| "loss": 0.2944, |
| "step": 7030 |
| }, |
| { |
| "epoch": 1.5038477982043608, |
| "grad_norm": 4.7306342124938965, |
| "learning_rate": 5.5154394299287415e-06, |
| "loss": 0.3133, |
| "step": 7035 |
| }, |
| { |
| "epoch": 1.5049166310389055, |
| "grad_norm": 5.262001991271973, |
| "learning_rate": 5.503562945368171e-06, |
| "loss": 0.2557, |
| "step": 7040 |
| }, |
| { |
| "epoch": 1.5059854638734502, |
| "grad_norm": 4.136565685272217, |
| "learning_rate": 5.491686460807602e-06, |
| "loss": 0.232, |
| "step": 7045 |
| }, |
| { |
| "epoch": 1.5070542967079947, |
| "grad_norm": 3.917520046234131, |
| "learning_rate": 5.4798099762470315e-06, |
| "loss": 0.2635, |
| "step": 7050 |
| }, |
| { |
| "epoch": 1.5081231295425397, |
| "grad_norm": 5.6809210777282715, |
| "learning_rate": 5.467933491686461e-06, |
| "loss": 0.3033, |
| "step": 7055 |
| }, |
| { |
| "epoch": 1.5091919623770842, |
| "grad_norm": 3.7200369834899902, |
| "learning_rate": 5.456057007125891e-06, |
| "loss": 0.2477, |
| "step": 7060 |
| }, |
| { |
| "epoch": 1.510260795211629, |
| "grad_norm": 4.6949543952941895, |
| "learning_rate": 5.444180522565321e-06, |
| "loss": 0.2443, |
| "step": 7065 |
| }, |
| { |
| "epoch": 1.5113296280461737, |
| "grad_norm": 4.025641918182373, |
| "learning_rate": 5.432304038004751e-06, |
| "loss": 0.4329, |
| "step": 7070 |
| }, |
| { |
| "epoch": 1.5123984608807182, |
| "grad_norm": 3.7725117206573486, |
| "learning_rate": 5.420427553444181e-06, |
| "loss": 0.2682, |
| "step": 7075 |
| }, |
| { |
| "epoch": 1.513467293715263, |
| "grad_norm": 4.11836051940918, |
| "learning_rate": 5.4085510688836105e-06, |
| "loss": 0.3149, |
| "step": 7080 |
| }, |
| { |
| "epoch": 1.5145361265498076, |
| "grad_norm": 4.033612251281738, |
| "learning_rate": 5.39667458432304e-06, |
| "loss": 0.353, |
| "step": 7085 |
| }, |
| { |
| "epoch": 1.5156049593843521, |
| "grad_norm": 5.4751482009887695, |
| "learning_rate": 5.384798099762471e-06, |
| "loss": 0.2247, |
| "step": 7090 |
| }, |
| { |
| "epoch": 1.516673792218897, |
| "grad_norm": 4.203334808349609, |
| "learning_rate": 5.3729216152019005e-06, |
| "loss": 0.2862, |
| "step": 7095 |
| }, |
| { |
| "epoch": 1.5177426250534416, |
| "grad_norm": 5.31473970413208, |
| "learning_rate": 5.36104513064133e-06, |
| "loss": 0.287, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.5188114578879863, |
| "grad_norm": 4.896878719329834, |
| "learning_rate": 5.34916864608076e-06, |
| "loss": 0.3141, |
| "step": 7105 |
| }, |
| { |
| "epoch": 1.519880290722531, |
| "grad_norm": 3.62528133392334, |
| "learning_rate": 5.33729216152019e-06, |
| "loss": 0.4446, |
| "step": 7110 |
| }, |
| { |
| "epoch": 1.5209491235570756, |
| "grad_norm": 5.231464385986328, |
| "learning_rate": 5.32541567695962e-06, |
| "loss": 0.2853, |
| "step": 7115 |
| }, |
| { |
| "epoch": 1.5220179563916203, |
| "grad_norm": 3.0587196350097656, |
| "learning_rate": 5.31353919239905e-06, |
| "loss": 0.2662, |
| "step": 7120 |
| }, |
| { |
| "epoch": 1.523086789226165, |
| "grad_norm": 5.080547332763672, |
| "learning_rate": 5.3016627078384795e-06, |
| "loss": 0.2729, |
| "step": 7125 |
| }, |
| { |
| "epoch": 1.5241556220607098, |
| "grad_norm": 3.547877073287964, |
| "learning_rate": 5.289786223277911e-06, |
| "loss": 0.2376, |
| "step": 7130 |
| }, |
| { |
| "epoch": 1.5252244548952545, |
| "grad_norm": 3.9913973808288574, |
| "learning_rate": 5.277909738717341e-06, |
| "loss": 0.2434, |
| "step": 7135 |
| }, |
| { |
| "epoch": 1.526293287729799, |
| "grad_norm": 3.9852547645568848, |
| "learning_rate": 5.26603325415677e-06, |
| "loss": 0.302, |
| "step": 7140 |
| }, |
| { |
| "epoch": 1.5273621205643437, |
| "grad_norm": 3.660104274749756, |
| "learning_rate": 5.2541567695962e-06, |
| "loss": 0.2346, |
| "step": 7145 |
| }, |
| { |
| "epoch": 1.5284309533988885, |
| "grad_norm": 4.887364387512207, |
| "learning_rate": 5.242280285035631e-06, |
| "loss": 0.4036, |
| "step": 7150 |
| }, |
| { |
| "epoch": 1.529499786233433, |
| "grad_norm": 5.766690254211426, |
| "learning_rate": 5.23040380047506e-06, |
| "loss": 0.2902, |
| "step": 7155 |
| }, |
| { |
| "epoch": 1.530568619067978, |
| "grad_norm": 5.018100738525391, |
| "learning_rate": 5.21852731591449e-06, |
| "loss": 0.4254, |
| "step": 7160 |
| }, |
| { |
| "epoch": 1.5316374519025224, |
| "grad_norm": 2.8769116401672363, |
| "learning_rate": 5.20665083135392e-06, |
| "loss": 0.2863, |
| "step": 7165 |
| }, |
| { |
| "epoch": 1.5327062847370672, |
| "grad_norm": 4.766345024108887, |
| "learning_rate": 5.194774346793349e-06, |
| "loss": 0.2618, |
| "step": 7170 |
| }, |
| { |
| "epoch": 1.533775117571612, |
| "grad_norm": 4.371603012084961, |
| "learning_rate": 5.18289786223278e-06, |
| "loss": 0.3614, |
| "step": 7175 |
| }, |
| { |
| "epoch": 1.5348439504061564, |
| "grad_norm": 3.7386531829833984, |
| "learning_rate": 5.17102137767221e-06, |
| "loss": 0.3084, |
| "step": 7180 |
| }, |
| { |
| "epoch": 1.5359127832407011, |
| "grad_norm": 3.2616264820098877, |
| "learning_rate": 5.159144893111639e-06, |
| "loss": 0.2799, |
| "step": 7185 |
| }, |
| { |
| "epoch": 1.5369816160752459, |
| "grad_norm": 4.840415000915527, |
| "learning_rate": 5.147268408551069e-06, |
| "loss": 0.2843, |
| "step": 7190 |
| }, |
| { |
| "epoch": 1.5380504489097904, |
| "grad_norm": 2.643326997756958, |
| "learning_rate": 5.1353919239905e-06, |
| "loss": 0.255, |
| "step": 7195 |
| }, |
| { |
| "epoch": 1.5391192817443353, |
| "grad_norm": 3.9539496898651123, |
| "learning_rate": 5.123515439429929e-06, |
| "loss": 0.2278, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.5401881145788798, |
| "grad_norm": 4.173327922821045, |
| "learning_rate": 5.111638954869359e-06, |
| "loss": 0.3137, |
| "step": 7205 |
| }, |
| { |
| "epoch": 1.5412569474134246, |
| "grad_norm": 4.327914237976074, |
| "learning_rate": 5.099762470308789e-06, |
| "loss": 0.3365, |
| "step": 7210 |
| }, |
| { |
| "epoch": 1.5423257802479693, |
| "grad_norm": 2.9048960208892822, |
| "learning_rate": 5.087885985748218e-06, |
| "loss": 0.1981, |
| "step": 7215 |
| }, |
| { |
| "epoch": 1.5433946130825138, |
| "grad_norm": 4.26038932800293, |
| "learning_rate": 5.076009501187649e-06, |
| "loss": 0.2338, |
| "step": 7220 |
| }, |
| { |
| "epoch": 1.5444634459170585, |
| "grad_norm": 5.362328052520752, |
| "learning_rate": 5.064133016627079e-06, |
| "loss": 0.2692, |
| "step": 7225 |
| }, |
| { |
| "epoch": 1.5455322787516033, |
| "grad_norm": 4.408464431762695, |
| "learning_rate": 5.052256532066508e-06, |
| "loss": 0.2129, |
| "step": 7230 |
| }, |
| { |
| "epoch": 1.5466011115861478, |
| "grad_norm": 5.237843990325928, |
| "learning_rate": 5.040380047505938e-06, |
| "loss": 0.2395, |
| "step": 7235 |
| }, |
| { |
| "epoch": 1.5476699444206927, |
| "grad_norm": 6.1017045974731445, |
| "learning_rate": 5.028503562945369e-06, |
| "loss": 0.485, |
| "step": 7240 |
| }, |
| { |
| "epoch": 1.5487387772552372, |
| "grad_norm": 5.8066582679748535, |
| "learning_rate": 5.016627078384798e-06, |
| "loss": 0.2166, |
| "step": 7245 |
| }, |
| { |
| "epoch": 1.549807610089782, |
| "grad_norm": 6.7323899269104, |
| "learning_rate": 5.004750593824228e-06, |
| "loss": 0.2799, |
| "step": 7250 |
| }, |
| { |
| "epoch": 1.5508764429243267, |
| "grad_norm": 4.477848052978516, |
| "learning_rate": 4.9928741092636586e-06, |
| "loss": 0.2856, |
| "step": 7255 |
| }, |
| { |
| "epoch": 1.5519452757588712, |
| "grad_norm": 3.282881498336792, |
| "learning_rate": 4.980997624703088e-06, |
| "loss": 0.272, |
| "step": 7260 |
| }, |
| { |
| "epoch": 1.5530141085934162, |
| "grad_norm": 4.757537364959717, |
| "learning_rate": 4.969121140142518e-06, |
| "loss": 0.299, |
| "step": 7265 |
| }, |
| { |
| "epoch": 1.5540829414279607, |
| "grad_norm": 6.090857028961182, |
| "learning_rate": 4.9572446555819485e-06, |
| "loss": 0.3309, |
| "step": 7270 |
| }, |
| { |
| "epoch": 1.5551517742625054, |
| "grad_norm": 3.326892137527466, |
| "learning_rate": 4.945368171021378e-06, |
| "loss": 0.223, |
| "step": 7275 |
| }, |
| { |
| "epoch": 1.5562206070970501, |
| "grad_norm": 3.5346665382385254, |
| "learning_rate": 4.933491686460808e-06, |
| "loss": 0.2351, |
| "step": 7280 |
| }, |
| { |
| "epoch": 1.5572894399315946, |
| "grad_norm": 3.1125802993774414, |
| "learning_rate": 4.921615201900238e-06, |
| "loss": 0.2177, |
| "step": 7285 |
| }, |
| { |
| "epoch": 1.5583582727661394, |
| "grad_norm": 3.7614200115203857, |
| "learning_rate": 4.909738717339667e-06, |
| "loss": 0.2606, |
| "step": 7290 |
| }, |
| { |
| "epoch": 1.559427105600684, |
| "grad_norm": 3.761014223098755, |
| "learning_rate": 4.897862232779098e-06, |
| "loss": 0.3972, |
| "step": 7295 |
| }, |
| { |
| "epoch": 1.5604959384352286, |
| "grad_norm": 3.6661438941955566, |
| "learning_rate": 4.885985748218528e-06, |
| "loss": 0.2594, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.5615647712697736, |
| "grad_norm": 4.455360412597656, |
| "learning_rate": 4.874109263657958e-06, |
| "loss": 0.2934, |
| "step": 7305 |
| }, |
| { |
| "epoch": 1.562633604104318, |
| "grad_norm": 4.19691801071167, |
| "learning_rate": 4.862232779097388e-06, |
| "loss": 0.4105, |
| "step": 7310 |
| }, |
| { |
| "epoch": 1.5637024369388628, |
| "grad_norm": 4.041048049926758, |
| "learning_rate": 4.8503562945368175e-06, |
| "loss": 0.1971, |
| "step": 7315 |
| }, |
| { |
| "epoch": 1.5647712697734075, |
| "grad_norm": 3.2611756324768066, |
| "learning_rate": 4.838479809976247e-06, |
| "loss": 0.2107, |
| "step": 7320 |
| }, |
| { |
| "epoch": 1.565840102607952, |
| "grad_norm": 3.419591188430786, |
| "learning_rate": 4.826603325415678e-06, |
| "loss": 0.2441, |
| "step": 7325 |
| }, |
| { |
| "epoch": 1.5669089354424968, |
| "grad_norm": 4.567037105560303, |
| "learning_rate": 4.8147268408551075e-06, |
| "loss": 0.2413, |
| "step": 7330 |
| }, |
| { |
| "epoch": 1.5679777682770415, |
| "grad_norm": 3.887484550476074, |
| "learning_rate": 4.802850356294537e-06, |
| "loss": 0.2619, |
| "step": 7335 |
| }, |
| { |
| "epoch": 1.569046601111586, |
| "grad_norm": 4.95120906829834, |
| "learning_rate": 4.790973871733967e-06, |
| "loss": 0.3098, |
| "step": 7340 |
| }, |
| { |
| "epoch": 1.570115433946131, |
| "grad_norm": 4.205053806304932, |
| "learning_rate": 4.779097387173397e-06, |
| "loss": 0.3024, |
| "step": 7345 |
| }, |
| { |
| "epoch": 1.5711842667806755, |
| "grad_norm": 6.198763847351074, |
| "learning_rate": 4.767220902612827e-06, |
| "loss": 0.2548, |
| "step": 7350 |
| }, |
| { |
| "epoch": 1.5722530996152202, |
| "grad_norm": 4.158599853515625, |
| "learning_rate": 4.755344418052257e-06, |
| "loss": 0.2925, |
| "step": 7355 |
| }, |
| { |
| "epoch": 1.573321932449765, |
| "grad_norm": 3.3105695247650146, |
| "learning_rate": 4.7434679334916866e-06, |
| "loss": 0.2245, |
| "step": 7360 |
| }, |
| { |
| "epoch": 1.5743907652843094, |
| "grad_norm": 2.852360963821411, |
| "learning_rate": 4.731591448931116e-06, |
| "loss": 0.2612, |
| "step": 7365 |
| }, |
| { |
| "epoch": 1.5754595981188542, |
| "grad_norm": 5.082930564880371, |
| "learning_rate": 4.719714964370547e-06, |
| "loss": 0.4075, |
| "step": 7370 |
| }, |
| { |
| "epoch": 1.576528430953399, |
| "grad_norm": 3.626047372817993, |
| "learning_rate": 4.7078384798099765e-06, |
| "loss": 0.2457, |
| "step": 7375 |
| }, |
| { |
| "epoch": 1.5775972637879434, |
| "grad_norm": 3.2513113021850586, |
| "learning_rate": 4.695961995249407e-06, |
| "loss": 0.214, |
| "step": 7380 |
| }, |
| { |
| "epoch": 1.5786660966224884, |
| "grad_norm": 4.396987438201904, |
| "learning_rate": 4.684085510688837e-06, |
| "loss": 0.2761, |
| "step": 7385 |
| }, |
| { |
| "epoch": 1.5797349294570329, |
| "grad_norm": 4.177000045776367, |
| "learning_rate": 4.6722090261282665e-06, |
| "loss": 0.278, |
| "step": 7390 |
| }, |
| { |
| "epoch": 1.5808037622915776, |
| "grad_norm": 6.472886562347412, |
| "learning_rate": 4.660332541567696e-06, |
| "loss": 0.4008, |
| "step": 7395 |
| }, |
| { |
| "epoch": 1.5818725951261223, |
| "grad_norm": 5.244050979614258, |
| "learning_rate": 4.648456057007127e-06, |
| "loss": 0.3222, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.5829414279606668, |
| "grad_norm": 3.3180673122406006, |
| "learning_rate": 4.636579572446556e-06, |
| "loss": 0.2645, |
| "step": 7405 |
| }, |
| { |
| "epoch": 1.5840102607952118, |
| "grad_norm": 4.317756652832031, |
| "learning_rate": 4.624703087885986e-06, |
| "loss": 0.2121, |
| "step": 7410 |
| }, |
| { |
| "epoch": 1.5850790936297563, |
| "grad_norm": 5.13472843170166, |
| "learning_rate": 4.612826603325416e-06, |
| "loss": 0.2679, |
| "step": 7415 |
| }, |
| { |
| "epoch": 1.586147926464301, |
| "grad_norm": 4.850220680236816, |
| "learning_rate": 4.6009501187648455e-06, |
| "loss": 0.342, |
| "step": 7420 |
| }, |
| { |
| "epoch": 1.5872167592988458, |
| "grad_norm": 3.7907469272613525, |
| "learning_rate": 4.589073634204276e-06, |
| "loss": 0.2312, |
| "step": 7425 |
| }, |
| { |
| "epoch": 1.5882855921333903, |
| "grad_norm": 5.306363582611084, |
| "learning_rate": 4.577197149643706e-06, |
| "loss": 0.3332, |
| "step": 7430 |
| }, |
| { |
| "epoch": 1.589354424967935, |
| "grad_norm": 4.227755069732666, |
| "learning_rate": 4.5653206650831355e-06, |
| "loss": 0.2628, |
| "step": 7435 |
| }, |
| { |
| "epoch": 1.5904232578024797, |
| "grad_norm": 4.175191879272461, |
| "learning_rate": 4.553444180522565e-06, |
| "loss": 0.2824, |
| "step": 7440 |
| }, |
| { |
| "epoch": 1.5914920906370242, |
| "grad_norm": 4.70232629776001, |
| "learning_rate": 4.541567695961996e-06, |
| "loss": 0.3249, |
| "step": 7445 |
| }, |
| { |
| "epoch": 1.5925609234715692, |
| "grad_norm": 5.078143119812012, |
| "learning_rate": 4.5296912114014254e-06, |
| "loss": 0.3738, |
| "step": 7450 |
| }, |
| { |
| "epoch": 1.5936297563061137, |
| "grad_norm": 3.0150363445281982, |
| "learning_rate": 4.517814726840856e-06, |
| "loss": 0.3357, |
| "step": 7455 |
| }, |
| { |
| "epoch": 1.5946985891406584, |
| "grad_norm": 6.010279655456543, |
| "learning_rate": 4.505938242280286e-06, |
| "loss": 0.2563, |
| "step": 7460 |
| }, |
| { |
| "epoch": 1.5957674219752032, |
| "grad_norm": 4.169801712036133, |
| "learning_rate": 4.494061757719715e-06, |
| "loss": 0.2091, |
| "step": 7465 |
| }, |
| { |
| "epoch": 1.5968362548097477, |
| "grad_norm": 5.483653545379639, |
| "learning_rate": 4.482185273159145e-06, |
| "loss": 0.2425, |
| "step": 7470 |
| }, |
| { |
| "epoch": 1.5979050876442924, |
| "grad_norm": 3.874551773071289, |
| "learning_rate": 4.470308788598575e-06, |
| "loss": 0.2699, |
| "step": 7475 |
| }, |
| { |
| "epoch": 1.5989739204788371, |
| "grad_norm": 5.686993598937988, |
| "learning_rate": 4.458432304038005e-06, |
| "loss": 0.3409, |
| "step": 7480 |
| }, |
| { |
| "epoch": 1.6000427533133816, |
| "grad_norm": 4.527751922607422, |
| "learning_rate": 4.446555819477435e-06, |
| "loss": 0.2233, |
| "step": 7485 |
| }, |
| { |
| "epoch": 1.6011115861479266, |
| "grad_norm": 4.663357257843018, |
| "learning_rate": 4.434679334916865e-06, |
| "loss": 0.3269, |
| "step": 7490 |
| }, |
| { |
| "epoch": 1.602180418982471, |
| "grad_norm": 5.009659767150879, |
| "learning_rate": 4.4228028503562945e-06, |
| "loss": 0.3029, |
| "step": 7495 |
| }, |
| { |
| "epoch": 1.6032492518170158, |
| "grad_norm": 3.9787962436676025, |
| "learning_rate": 4.410926365795725e-06, |
| "loss": 0.2547, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.6043180846515606, |
| "grad_norm": 5.281296253204346, |
| "learning_rate": 4.399049881235155e-06, |
| "loss": 0.2855, |
| "step": 7505 |
| }, |
| { |
| "epoch": 1.605386917486105, |
| "grad_norm": 6.091033935546875, |
| "learning_rate": 4.387173396674584e-06, |
| "loss": 0.3106, |
| "step": 7510 |
| }, |
| { |
| "epoch": 1.6064557503206498, |
| "grad_norm": 5.57248067855835, |
| "learning_rate": 4.375296912114015e-06, |
| "loss": 0.262, |
| "step": 7515 |
| }, |
| { |
| "epoch": 1.6075245831551945, |
| "grad_norm": 4.538100242614746, |
| "learning_rate": 4.363420427553445e-06, |
| "loss": 0.3016, |
| "step": 7520 |
| }, |
| { |
| "epoch": 1.608593415989739, |
| "grad_norm": 2.859865665435791, |
| "learning_rate": 4.351543942992874e-06, |
| "loss": 0.2852, |
| "step": 7525 |
| }, |
| { |
| "epoch": 1.609662248824284, |
| "grad_norm": 4.841543197631836, |
| "learning_rate": 4.339667458432305e-06, |
| "loss": 0.3126, |
| "step": 7530 |
| }, |
| { |
| "epoch": 1.6107310816588285, |
| "grad_norm": 4.134354114532471, |
| "learning_rate": 4.327790973871735e-06, |
| "loss": 0.2779, |
| "step": 7535 |
| }, |
| { |
| "epoch": 1.6117999144933732, |
| "grad_norm": 5.4539875984191895, |
| "learning_rate": 4.315914489311164e-06, |
| "loss": 0.2811, |
| "step": 7540 |
| }, |
| { |
| "epoch": 1.612868747327918, |
| "grad_norm": 4.018299579620361, |
| "learning_rate": 4.304038004750594e-06, |
| "loss": 0.259, |
| "step": 7545 |
| }, |
| { |
| "epoch": 1.6139375801624625, |
| "grad_norm": 3.978214740753174, |
| "learning_rate": 4.292161520190024e-06, |
| "loss": 0.2602, |
| "step": 7550 |
| }, |
| { |
| "epoch": 1.6150064129970074, |
| "grad_norm": 4.782619953155518, |
| "learning_rate": 4.280285035629454e-06, |
| "loss": 0.2481, |
| "step": 7555 |
| }, |
| { |
| "epoch": 1.616075245831552, |
| "grad_norm": 4.34796142578125, |
| "learning_rate": 4.268408551068884e-06, |
| "loss": 0.2022, |
| "step": 7560 |
| }, |
| { |
| "epoch": 1.6171440786660967, |
| "grad_norm": 4.58864688873291, |
| "learning_rate": 4.256532066508314e-06, |
| "loss": 0.2943, |
| "step": 7565 |
| }, |
| { |
| "epoch": 1.6182129115006414, |
| "grad_norm": 3.2588422298431396, |
| "learning_rate": 4.244655581947743e-06, |
| "loss": 0.2144, |
| "step": 7570 |
| }, |
| { |
| "epoch": 1.619281744335186, |
| "grad_norm": 4.609071731567383, |
| "learning_rate": 4.232779097387174e-06, |
| "loss": 0.2589, |
| "step": 7575 |
| }, |
| { |
| "epoch": 1.6203505771697306, |
| "grad_norm": 3.8828067779541016, |
| "learning_rate": 4.220902612826604e-06, |
| "loss": 0.1999, |
| "step": 7580 |
| }, |
| { |
| "epoch": 1.6214194100042754, |
| "grad_norm": 5.068613052368164, |
| "learning_rate": 4.209026128266034e-06, |
| "loss": 0.3035, |
| "step": 7585 |
| }, |
| { |
| "epoch": 1.6224882428388199, |
| "grad_norm": 3.4416937828063965, |
| "learning_rate": 4.197149643705464e-06, |
| "loss": 0.2322, |
| "step": 7590 |
| }, |
| { |
| "epoch": 1.6235570756733648, |
| "grad_norm": 4.246146202087402, |
| "learning_rate": 4.185273159144894e-06, |
| "loss": 0.237, |
| "step": 7595 |
| }, |
| { |
| "epoch": 1.6246259085079093, |
| "grad_norm": 4.175546646118164, |
| "learning_rate": 4.173396674584323e-06, |
| "loss": 0.2815, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.625694741342454, |
| "grad_norm": 5.142884254455566, |
| "learning_rate": 4.161520190023753e-06, |
| "loss": 0.4136, |
| "step": 7605 |
| }, |
| { |
| "epoch": 1.6267635741769988, |
| "grad_norm": 4.261429309844971, |
| "learning_rate": 4.1496437054631835e-06, |
| "loss": 0.2474, |
| "step": 7610 |
| }, |
| { |
| "epoch": 1.6278324070115433, |
| "grad_norm": 5.0894646644592285, |
| "learning_rate": 4.137767220902613e-06, |
| "loss": 0.3143, |
| "step": 7615 |
| }, |
| { |
| "epoch": 1.628901239846088, |
| "grad_norm": 4.596246242523193, |
| "learning_rate": 4.125890736342043e-06, |
| "loss": 0.244, |
| "step": 7620 |
| }, |
| { |
| "epoch": 1.6299700726806328, |
| "grad_norm": 4.05454158782959, |
| "learning_rate": 4.114014251781473e-06, |
| "loss": 0.3134, |
| "step": 7625 |
| }, |
| { |
| "epoch": 1.6310389055151773, |
| "grad_norm": 5.604685306549072, |
| "learning_rate": 4.102137767220903e-06, |
| "loss": 0.2516, |
| "step": 7630 |
| }, |
| { |
| "epoch": 1.6321077383497222, |
| "grad_norm": 2.5428969860076904, |
| "learning_rate": 4.090261282660333e-06, |
| "loss": 0.3159, |
| "step": 7635 |
| }, |
| { |
| "epoch": 1.6331765711842667, |
| "grad_norm": 3.228505849838257, |
| "learning_rate": 4.078384798099763e-06, |
| "loss": 0.2519, |
| "step": 7640 |
| }, |
| { |
| "epoch": 1.6342454040188115, |
| "grad_norm": 5.0502753257751465, |
| "learning_rate": 4.066508313539192e-06, |
| "loss": 0.2785, |
| "step": 7645 |
| }, |
| { |
| "epoch": 1.6353142368533562, |
| "grad_norm": 3.824427366256714, |
| "learning_rate": 4.054631828978622e-06, |
| "loss": 0.2627, |
| "step": 7650 |
| }, |
| { |
| "epoch": 1.6363830696879007, |
| "grad_norm": 4.460954666137695, |
| "learning_rate": 4.0427553444180526e-06, |
| "loss": 0.2677, |
| "step": 7655 |
| }, |
| { |
| "epoch": 1.6374519025224454, |
| "grad_norm": 3.3890676498413086, |
| "learning_rate": 4.030878859857483e-06, |
| "loss": 0.1962, |
| "step": 7660 |
| }, |
| { |
| "epoch": 1.6385207353569902, |
| "grad_norm": 4.556974411010742, |
| "learning_rate": 4.019002375296913e-06, |
| "loss": 0.3779, |
| "step": 7665 |
| }, |
| { |
| "epoch": 1.639589568191535, |
| "grad_norm": 3.9803950786590576, |
| "learning_rate": 4.0071258907363425e-06, |
| "loss": 0.2731, |
| "step": 7670 |
| }, |
| { |
| "epoch": 1.6406584010260796, |
| "grad_norm": 3.7230427265167236, |
| "learning_rate": 3.995249406175772e-06, |
| "loss": 0.2806, |
| "step": 7675 |
| }, |
| { |
| "epoch": 1.6417272338606241, |
| "grad_norm": 3.6325037479400635, |
| "learning_rate": 3.983372921615202e-06, |
| "loss": 0.2582, |
| "step": 7680 |
| }, |
| { |
| "epoch": 1.6427960666951689, |
| "grad_norm": 4.024942398071289, |
| "learning_rate": 3.9714964370546325e-06, |
| "loss": 0.2064, |
| "step": 7685 |
| }, |
| { |
| "epoch": 1.6438648995297136, |
| "grad_norm": 4.7745819091796875, |
| "learning_rate": 3.959619952494062e-06, |
| "loss": 0.263, |
| "step": 7690 |
| }, |
| { |
| "epoch": 1.644933732364258, |
| "grad_norm": 3.8132996559143066, |
| "learning_rate": 3.947743467933492e-06, |
| "loss": 0.3126, |
| "step": 7695 |
| }, |
| { |
| "epoch": 1.646002565198803, |
| "grad_norm": 3.711763620376587, |
| "learning_rate": 3.9358669833729216e-06, |
| "loss": 0.2889, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.6470713980333476, |
| "grad_norm": 3.696894645690918, |
| "learning_rate": 3.923990498812352e-06, |
| "loss": 0.2372, |
| "step": 7705 |
| }, |
| { |
| "epoch": 1.6481402308678923, |
| "grad_norm": 5.242607593536377, |
| "learning_rate": 3.912114014251782e-06, |
| "loss": 0.2062, |
| "step": 7710 |
| }, |
| { |
| "epoch": 1.649209063702437, |
| "grad_norm": 3.8635284900665283, |
| "learning_rate": 3.9002375296912115e-06, |
| "loss": 0.3085, |
| "step": 7715 |
| }, |
| { |
| "epoch": 1.6502778965369815, |
| "grad_norm": 4.494617938995361, |
| "learning_rate": 3.888361045130641e-06, |
| "loss": 0.22, |
| "step": 7720 |
| }, |
| { |
| "epoch": 1.6513467293715263, |
| "grad_norm": 5.683468818664551, |
| "learning_rate": 3.876484560570072e-06, |
| "loss": 0.258, |
| "step": 7725 |
| }, |
| { |
| "epoch": 1.652415562206071, |
| "grad_norm": 7.1560845375061035, |
| "learning_rate": 3.8646080760095015e-06, |
| "loss": 0.2496, |
| "step": 7730 |
| }, |
| { |
| "epoch": 1.6534843950406155, |
| "grad_norm": 4.27496337890625, |
| "learning_rate": 3.852731591448932e-06, |
| "loss": 0.2975, |
| "step": 7735 |
| }, |
| { |
| "epoch": 1.6545532278751605, |
| "grad_norm": 5.494519233703613, |
| "learning_rate": 3.840855106888362e-06, |
| "loss": 0.2744, |
| "step": 7740 |
| }, |
| { |
| "epoch": 1.655622060709705, |
| "grad_norm": 4.088238716125488, |
| "learning_rate": 3.8289786223277914e-06, |
| "loss": 0.2255, |
| "step": 7745 |
| }, |
| { |
| "epoch": 1.6566908935442497, |
| "grad_norm": 3.627351760864258, |
| "learning_rate": 3.817102137767221e-06, |
| "loss": 0.2387, |
| "step": 7750 |
| }, |
| { |
| "epoch": 1.6577597263787944, |
| "grad_norm": 4.195761680603027, |
| "learning_rate": 3.8052256532066513e-06, |
| "loss": 0.2724, |
| "step": 7755 |
| }, |
| { |
| "epoch": 1.658828559213339, |
| "grad_norm": 4.758053779602051, |
| "learning_rate": 3.793349168646081e-06, |
| "loss": 0.282, |
| "step": 7760 |
| }, |
| { |
| "epoch": 1.6598973920478837, |
| "grad_norm": 3.427823066711426, |
| "learning_rate": 3.781472684085511e-06, |
| "loss": 0.166, |
| "step": 7765 |
| }, |
| { |
| "epoch": 1.6609662248824284, |
| "grad_norm": 4.784726142883301, |
| "learning_rate": 3.769596199524941e-06, |
| "loss": 0.2653, |
| "step": 7770 |
| }, |
| { |
| "epoch": 1.662035057716973, |
| "grad_norm": 4.018444538116455, |
| "learning_rate": 3.757719714964371e-06, |
| "loss": 0.2368, |
| "step": 7775 |
| }, |
| { |
| "epoch": 1.6631038905515179, |
| "grad_norm": 4.532012462615967, |
| "learning_rate": 3.7458432304038006e-06, |
| "loss": 0.2235, |
| "step": 7780 |
| }, |
| { |
| "epoch": 1.6641727233860624, |
| "grad_norm": 4.576938152313232, |
| "learning_rate": 3.7339667458432303e-06, |
| "loss": 0.309, |
| "step": 7785 |
| }, |
| { |
| "epoch": 1.665241556220607, |
| "grad_norm": 4.126202583312988, |
| "learning_rate": 3.7220902612826604e-06, |
| "loss": 0.3402, |
| "step": 7790 |
| }, |
| { |
| "epoch": 1.6663103890551518, |
| "grad_norm": 5.895056247711182, |
| "learning_rate": 3.710213776722091e-06, |
| "loss": 0.2741, |
| "step": 7795 |
| }, |
| { |
| "epoch": 1.6673792218896963, |
| "grad_norm": 5.252209663391113, |
| "learning_rate": 3.6983372921615207e-06, |
| "loss": 0.2282, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.6684480547242413, |
| "grad_norm": 5.411665439605713, |
| "learning_rate": 3.6864608076009504e-06, |
| "loss": 0.3233, |
| "step": 7805 |
| }, |
| { |
| "epoch": 1.6695168875587858, |
| "grad_norm": 3.801215887069702, |
| "learning_rate": 3.6745843230403805e-06, |
| "loss": 0.23, |
| "step": 7810 |
| }, |
| { |
| "epoch": 1.6705857203933305, |
| "grad_norm": 5.455605983734131, |
| "learning_rate": 3.6627078384798102e-06, |
| "loss": 0.22, |
| "step": 7815 |
| }, |
| { |
| "epoch": 1.6716545532278753, |
| "grad_norm": 3.8827927112579346, |
| "learning_rate": 3.6508313539192404e-06, |
| "loss": 0.216, |
| "step": 7820 |
| }, |
| { |
| "epoch": 1.6727233860624198, |
| "grad_norm": 3.9195375442504883, |
| "learning_rate": 3.63895486935867e-06, |
| "loss": 0.3479, |
| "step": 7825 |
| }, |
| { |
| "epoch": 1.6737922188969645, |
| "grad_norm": 4.495283603668213, |
| "learning_rate": 3.6270783847981e-06, |
| "loss": 0.2256, |
| "step": 7830 |
| }, |
| { |
| "epoch": 1.6748610517315092, |
| "grad_norm": 5.642339706420898, |
| "learning_rate": 3.61520190023753e-06, |
| "loss": 0.3205, |
| "step": 7835 |
| }, |
| { |
| "epoch": 1.6759298845660537, |
| "grad_norm": 5.5151495933532715, |
| "learning_rate": 3.60332541567696e-06, |
| "loss": 0.28, |
| "step": 7840 |
| }, |
| { |
| "epoch": 1.6769987174005987, |
| "grad_norm": 3.8195252418518066, |
| "learning_rate": 3.5914489311163897e-06, |
| "loss": 0.255, |
| "step": 7845 |
| }, |
| { |
| "epoch": 1.6780675502351432, |
| "grad_norm": 5.310424327850342, |
| "learning_rate": 3.5795724465558194e-06, |
| "loss": 0.2584, |
| "step": 7850 |
| }, |
| { |
| "epoch": 1.679136383069688, |
| "grad_norm": 5.491156101226807, |
| "learning_rate": 3.5676959619952495e-06, |
| "loss": 0.254, |
| "step": 7855 |
| }, |
| { |
| "epoch": 1.6802052159042327, |
| "grad_norm": 4.094849109649658, |
| "learning_rate": 3.5558194774346792e-06, |
| "loss": 0.2051, |
| "step": 7860 |
| }, |
| { |
| "epoch": 1.6812740487387772, |
| "grad_norm": 3.9543018341064453, |
| "learning_rate": 3.54394299287411e-06, |
| "loss": 0.2653, |
| "step": 7865 |
| }, |
| { |
| "epoch": 1.682342881573322, |
| "grad_norm": 4.145587921142578, |
| "learning_rate": 3.5320665083135395e-06, |
| "loss": 0.2882, |
| "step": 7870 |
| }, |
| { |
| "epoch": 1.6834117144078666, |
| "grad_norm": 3.4505057334899902, |
| "learning_rate": 3.5201900237529696e-06, |
| "loss": 0.2685, |
| "step": 7875 |
| }, |
| { |
| "epoch": 1.6844805472424111, |
| "grad_norm": 4.536677837371826, |
| "learning_rate": 3.5083135391923993e-06, |
| "loss": 0.2606, |
| "step": 7880 |
| }, |
| { |
| "epoch": 1.685549380076956, |
| "grad_norm": 5.157629013061523, |
| "learning_rate": 3.4964370546318295e-06, |
| "loss": 0.2266, |
| "step": 7885 |
| }, |
| { |
| "epoch": 1.6866182129115006, |
| "grad_norm": 4.595909595489502, |
| "learning_rate": 3.484560570071259e-06, |
| "loss": 0.2112, |
| "step": 7890 |
| }, |
| { |
| "epoch": 1.6876870457460453, |
| "grad_norm": 4.331202030181885, |
| "learning_rate": 3.4726840855106893e-06, |
| "loss": 0.245, |
| "step": 7895 |
| }, |
| { |
| "epoch": 1.68875587858059, |
| "grad_norm": 5.239740371704102, |
| "learning_rate": 3.460807600950119e-06, |
| "loss": 0.2144, |
| "step": 7900 |
| }, |
| { |
| "epoch": 1.6898247114151346, |
| "grad_norm": 3.1925699710845947, |
| "learning_rate": 3.448931116389549e-06, |
| "loss": 0.3334, |
| "step": 7905 |
| }, |
| { |
| "epoch": 1.6908935442496793, |
| "grad_norm": 3.5667247772216797, |
| "learning_rate": 3.437054631828979e-06, |
| "loss": 0.2754, |
| "step": 7910 |
| }, |
| { |
| "epoch": 1.691962377084224, |
| "grad_norm": 4.145174026489258, |
| "learning_rate": 3.4251781472684085e-06, |
| "loss": 0.3048, |
| "step": 7915 |
| }, |
| { |
| "epoch": 1.6930312099187685, |
| "grad_norm": 3.559020519256592, |
| "learning_rate": 3.4133016627078386e-06, |
| "loss": 0.2319, |
| "step": 7920 |
| }, |
| { |
| "epoch": 1.6941000427533135, |
| "grad_norm": 3.1762850284576416, |
| "learning_rate": 3.4014251781472683e-06, |
| "loss": 0.3505, |
| "step": 7925 |
| }, |
| { |
| "epoch": 1.695168875587858, |
| "grad_norm": 4.600183963775635, |
| "learning_rate": 3.3895486935866985e-06, |
| "loss": 0.3171, |
| "step": 7930 |
| }, |
| { |
| "epoch": 1.6962377084224027, |
| "grad_norm": 4.069181442260742, |
| "learning_rate": 3.3776722090261286e-06, |
| "loss": 0.2359, |
| "step": 7935 |
| }, |
| { |
| "epoch": 1.6973065412569475, |
| "grad_norm": 5.979001998901367, |
| "learning_rate": 3.3657957244655587e-06, |
| "loss": 0.259, |
| "step": 7940 |
| }, |
| { |
| "epoch": 1.698375374091492, |
| "grad_norm": 4.2909040451049805, |
| "learning_rate": 3.3539192399049884e-06, |
| "loss": 0.2345, |
| "step": 7945 |
| }, |
| { |
| "epoch": 1.699444206926037, |
| "grad_norm": 4.572742938995361, |
| "learning_rate": 3.3420427553444185e-06, |
| "loss": 0.2364, |
| "step": 7950 |
| }, |
| { |
| "epoch": 1.7005130397605814, |
| "grad_norm": 4.979130744934082, |
| "learning_rate": 3.3301662707838482e-06, |
| "loss": 0.3125, |
| "step": 7955 |
| }, |
| { |
| "epoch": 1.7015818725951262, |
| "grad_norm": 8.828888893127441, |
| "learning_rate": 3.3182897862232784e-06, |
| "loss": 0.3855, |
| "step": 7960 |
| }, |
| { |
| "epoch": 1.702650705429671, |
| "grad_norm": 3.5113627910614014, |
| "learning_rate": 3.306413301662708e-06, |
| "loss": 0.3085, |
| "step": 7965 |
| }, |
| { |
| "epoch": 1.7037195382642154, |
| "grad_norm": 3.138580322265625, |
| "learning_rate": 3.294536817102138e-06, |
| "loss": 0.2558, |
| "step": 7970 |
| }, |
| { |
| "epoch": 1.7047883710987601, |
| "grad_norm": 3.382124900817871, |
| "learning_rate": 3.282660332541568e-06, |
| "loss": 0.2863, |
| "step": 7975 |
| }, |
| { |
| "epoch": 1.7058572039333049, |
| "grad_norm": 4.64111328125, |
| "learning_rate": 3.2707838479809976e-06, |
| "loss": 0.2763, |
| "step": 7980 |
| }, |
| { |
| "epoch": 1.7069260367678494, |
| "grad_norm": 3.9928252696990967, |
| "learning_rate": 3.2589073634204277e-06, |
| "loss": 0.2307, |
| "step": 7985 |
| }, |
| { |
| "epoch": 1.7079948696023943, |
| "grad_norm": 4.402683258056641, |
| "learning_rate": 3.2470308788598574e-06, |
| "loss": 0.2604, |
| "step": 7990 |
| }, |
| { |
| "epoch": 1.7090637024369388, |
| "grad_norm": 4.634458541870117, |
| "learning_rate": 3.2351543942992876e-06, |
| "loss": 0.3524, |
| "step": 7995 |
| }, |
| { |
| "epoch": 1.7101325352714836, |
| "grad_norm": 3.9876441955566406, |
| "learning_rate": 3.2232779097387173e-06, |
| "loss": 0.2591, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.7112013681060283, |
| "grad_norm": 5.491477012634277, |
| "learning_rate": 3.211401425178148e-06, |
| "loss": 0.2945, |
| "step": 8005 |
| }, |
| { |
| "epoch": 1.7122702009405728, |
| "grad_norm": 3.348909378051758, |
| "learning_rate": 3.1995249406175775e-06, |
| "loss": 0.2698, |
| "step": 8010 |
| }, |
| { |
| "epoch": 1.7133390337751175, |
| "grad_norm": 2.3808627128601074, |
| "learning_rate": 3.1876484560570076e-06, |
| "loss": 0.2091, |
| "step": 8015 |
| }, |
| { |
| "epoch": 1.7144078666096623, |
| "grad_norm": 4.511120319366455, |
| "learning_rate": 3.1757719714964373e-06, |
| "loss": 0.2313, |
| "step": 8020 |
| }, |
| { |
| "epoch": 1.7154766994442068, |
| "grad_norm": 3.1614320278167725, |
| "learning_rate": 3.1638954869358675e-06, |
| "loss": 0.2935, |
| "step": 8025 |
| }, |
| { |
| "epoch": 1.7165455322787517, |
| "grad_norm": 4.708336353302002, |
| "learning_rate": 3.152019002375297e-06, |
| "loss": 0.2358, |
| "step": 8030 |
| }, |
| { |
| "epoch": 1.7176143651132962, |
| "grad_norm": 5.274806499481201, |
| "learning_rate": 3.1401425178147273e-06, |
| "loss": 0.2842, |
| "step": 8035 |
| }, |
| { |
| "epoch": 1.718683197947841, |
| "grad_norm": 4.673067569732666, |
| "learning_rate": 3.128266033254157e-06, |
| "loss": 0.26, |
| "step": 8040 |
| }, |
| { |
| "epoch": 1.7197520307823857, |
| "grad_norm": 7.412868499755859, |
| "learning_rate": 3.1163895486935867e-06, |
| "loss": 0.312, |
| "step": 8045 |
| }, |
| { |
| "epoch": 1.7208208636169302, |
| "grad_norm": 5.098508834838867, |
| "learning_rate": 3.104513064133017e-06, |
| "loss": 0.2776, |
| "step": 8050 |
| }, |
| { |
| "epoch": 1.721889696451475, |
| "grad_norm": 2.9823100566864014, |
| "learning_rate": 3.0926365795724465e-06, |
| "loss": 0.1612, |
| "step": 8055 |
| }, |
| { |
| "epoch": 1.7229585292860197, |
| "grad_norm": 3.906702995300293, |
| "learning_rate": 3.0807600950118767e-06, |
| "loss": 0.1803, |
| "step": 8060 |
| }, |
| { |
| "epoch": 1.7240273621205642, |
| "grad_norm": 4.462987899780273, |
| "learning_rate": 3.0688836104513064e-06, |
| "loss": 0.2677, |
| "step": 8065 |
| }, |
| { |
| "epoch": 1.7250961949551091, |
| "grad_norm": 3.3349108695983887, |
| "learning_rate": 3.0570071258907365e-06, |
| "loss": 0.2315, |
| "step": 8070 |
| }, |
| { |
| "epoch": 1.7261650277896536, |
| "grad_norm": 3.8888843059539795, |
| "learning_rate": 3.0451306413301666e-06, |
| "loss": 0.2583, |
| "step": 8075 |
| }, |
| { |
| "epoch": 1.7272338606241984, |
| "grad_norm": 3.5807013511657715, |
| "learning_rate": 3.0332541567695967e-06, |
| "loss": 0.2488, |
| "step": 8080 |
| }, |
| { |
| "epoch": 1.728302693458743, |
| "grad_norm": 4.443240165710449, |
| "learning_rate": 3.0213776722090264e-06, |
| "loss": 0.2379, |
| "step": 8085 |
| }, |
| { |
| "epoch": 1.7293715262932876, |
| "grad_norm": 4.572385311126709, |
| "learning_rate": 3.0095011876484566e-06, |
| "loss": 0.2637, |
| "step": 8090 |
| }, |
| { |
| "epoch": 1.7304403591278326, |
| "grad_norm": 3.8426921367645264, |
| "learning_rate": 2.9976247030878863e-06, |
| "loss": 0.2101, |
| "step": 8095 |
| }, |
| { |
| "epoch": 1.731509191962377, |
| "grad_norm": 3.6695351600646973, |
| "learning_rate": 2.9857482185273164e-06, |
| "loss": 0.283, |
| "step": 8100 |
| }, |
| { |
| "epoch": 1.7325780247969218, |
| "grad_norm": 4.494965076446533, |
| "learning_rate": 2.973871733966746e-06, |
| "loss": 0.2163, |
| "step": 8105 |
| }, |
| { |
| "epoch": 1.7336468576314665, |
| "grad_norm": 4.575949192047119, |
| "learning_rate": 2.961995249406176e-06, |
| "loss": 0.2474, |
| "step": 8110 |
| }, |
| { |
| "epoch": 1.734715690466011, |
| "grad_norm": 5.060282230377197, |
| "learning_rate": 2.950118764845606e-06, |
| "loss": 0.3346, |
| "step": 8115 |
| }, |
| { |
| "epoch": 1.7357845233005558, |
| "grad_norm": 5.1213274002075195, |
| "learning_rate": 2.9382422802850356e-06, |
| "loss": 0.2031, |
| "step": 8120 |
| }, |
| { |
| "epoch": 1.7368533561351005, |
| "grad_norm": 4.754722595214844, |
| "learning_rate": 2.9263657957244658e-06, |
| "loss": 0.2301, |
| "step": 8125 |
| }, |
| { |
| "epoch": 1.737922188969645, |
| "grad_norm": 3.7561304569244385, |
| "learning_rate": 2.9144893111638955e-06, |
| "loss": 0.3413, |
| "step": 8130 |
| }, |
| { |
| "epoch": 1.73899102180419, |
| "grad_norm": 4.434960842132568, |
| "learning_rate": 2.9026128266033256e-06, |
| "loss": 0.3121, |
| "step": 8135 |
| }, |
| { |
| "epoch": 1.7400598546387345, |
| "grad_norm": 3.5216495990753174, |
| "learning_rate": 2.8907363420427553e-06, |
| "loss": 0.2696, |
| "step": 8140 |
| }, |
| { |
| "epoch": 1.7411286874732792, |
| "grad_norm": 3.2195262908935547, |
| "learning_rate": 2.878859857482186e-06, |
| "loss": 0.1871, |
| "step": 8145 |
| }, |
| { |
| "epoch": 1.742197520307824, |
| "grad_norm": 2.6963675022125244, |
| "learning_rate": 2.8669833729216155e-06, |
| "loss": 0.2427, |
| "step": 8150 |
| }, |
| { |
| "epoch": 1.7432663531423684, |
| "grad_norm": 3.3632442951202393, |
| "learning_rate": 2.8551068883610457e-06, |
| "loss": 0.215, |
| "step": 8155 |
| }, |
| { |
| "epoch": 1.7443351859769132, |
| "grad_norm": 4.627504825592041, |
| "learning_rate": 2.8432304038004754e-06, |
| "loss": 0.2603, |
| "step": 8160 |
| }, |
| { |
| "epoch": 1.745404018811458, |
| "grad_norm": 4.896625995635986, |
| "learning_rate": 2.8313539192399055e-06, |
| "loss": 0.2149, |
| "step": 8165 |
| }, |
| { |
| "epoch": 1.7464728516460024, |
| "grad_norm": 3.6175167560577393, |
| "learning_rate": 2.819477434679335e-06, |
| "loss": 0.2961, |
| "step": 8170 |
| }, |
| { |
| "epoch": 1.7475416844805474, |
| "grad_norm": 2.9704079627990723, |
| "learning_rate": 2.807600950118765e-06, |
| "loss": 0.2363, |
| "step": 8175 |
| }, |
| { |
| "epoch": 1.7486105173150919, |
| "grad_norm": 5.211386203765869, |
| "learning_rate": 2.795724465558195e-06, |
| "loss": 0.2238, |
| "step": 8180 |
| }, |
| { |
| "epoch": 1.7496793501496366, |
| "grad_norm": 4.538329601287842, |
| "learning_rate": 2.7838479809976247e-06, |
| "loss": 0.2522, |
| "step": 8185 |
| }, |
| { |
| "epoch": 1.7507481829841813, |
| "grad_norm": 4.693541049957275, |
| "learning_rate": 2.771971496437055e-06, |
| "loss": 0.2314, |
| "step": 8190 |
| }, |
| { |
| "epoch": 1.7518170158187258, |
| "grad_norm": 6.232285499572754, |
| "learning_rate": 2.7600950118764846e-06, |
| "loss": 0.34, |
| "step": 8195 |
| }, |
| { |
| "epoch": 1.7528858486532708, |
| "grad_norm": 3.3624300956726074, |
| "learning_rate": 2.7482185273159147e-06, |
| "loss": 0.3113, |
| "step": 8200 |
| }, |
| { |
| "epoch": 1.7539546814878153, |
| "grad_norm": 4.9479193687438965, |
| "learning_rate": 2.7363420427553444e-06, |
| "loss": 0.2022, |
| "step": 8205 |
| }, |
| { |
| "epoch": 1.75502351432236, |
| "grad_norm": 2.4150469303131104, |
| "learning_rate": 2.7244655581947745e-06, |
| "loss": 0.2244, |
| "step": 8210 |
| }, |
| { |
| "epoch": 1.7560923471569048, |
| "grad_norm": 2.7240800857543945, |
| "learning_rate": 2.7125890736342046e-06, |
| "loss": 0.1794, |
| "step": 8215 |
| }, |
| { |
| "epoch": 1.7571611799914493, |
| "grad_norm": 5.584763526916504, |
| "learning_rate": 2.7007125890736348e-06, |
| "loss": 0.3058, |
| "step": 8220 |
| }, |
| { |
| "epoch": 1.758230012825994, |
| "grad_norm": 6.3999505043029785, |
| "learning_rate": 2.6888361045130645e-06, |
| "loss": 0.2495, |
| "step": 8225 |
| }, |
| { |
| "epoch": 1.7592988456605387, |
| "grad_norm": 3.8963570594787598, |
| "learning_rate": 2.6769596199524946e-06, |
| "loss": 0.3586, |
| "step": 8230 |
| }, |
| { |
| "epoch": 1.7603676784950832, |
| "grad_norm": 4.307738780975342, |
| "learning_rate": 2.6650831353919243e-06, |
| "loss": 0.2726, |
| "step": 8235 |
| }, |
| { |
| "epoch": 1.7614365113296282, |
| "grad_norm": 4.685522079467773, |
| "learning_rate": 2.653206650831354e-06, |
| "loss": 0.2774, |
| "step": 8240 |
| }, |
| { |
| "epoch": 1.7625053441641727, |
| "grad_norm": 3.685218095779419, |
| "learning_rate": 2.641330166270784e-06, |
| "loss": 0.3009, |
| "step": 8245 |
| }, |
| { |
| "epoch": 1.7635741769987174, |
| "grad_norm": 3.9087140560150146, |
| "learning_rate": 2.629453681710214e-06, |
| "loss": 0.2813, |
| "step": 8250 |
| }, |
| { |
| "epoch": 1.7646430098332622, |
| "grad_norm": 4.455633163452148, |
| "learning_rate": 2.617577197149644e-06, |
| "loss": 0.2942, |
| "step": 8255 |
| }, |
| { |
| "epoch": 1.7657118426678067, |
| "grad_norm": 3.3832907676696777, |
| "learning_rate": 2.6057007125890737e-06, |
| "loss": 0.2463, |
| "step": 8260 |
| }, |
| { |
| "epoch": 1.7667806755023514, |
| "grad_norm": 4.235377788543701, |
| "learning_rate": 2.5938242280285038e-06, |
| "loss": 0.2399, |
| "step": 8265 |
| }, |
| { |
| "epoch": 1.7678495083368961, |
| "grad_norm": 5.997225761413574, |
| "learning_rate": 2.5819477434679335e-06, |
| "loss": 0.2725, |
| "step": 8270 |
| }, |
| { |
| "epoch": 1.7689183411714406, |
| "grad_norm": 3.9668803215026855, |
| "learning_rate": 2.5700712589073636e-06, |
| "loss": 0.2171, |
| "step": 8275 |
| }, |
| { |
| "epoch": 1.7699871740059856, |
| "grad_norm": 6.379711151123047, |
| "learning_rate": 2.5581947743467933e-06, |
| "loss": 0.3037, |
| "step": 8280 |
| }, |
| { |
| "epoch": 1.77105600684053, |
| "grad_norm": 4.1840901374816895, |
| "learning_rate": 2.546318289786224e-06, |
| "loss": 0.1921, |
| "step": 8285 |
| }, |
| { |
| "epoch": 1.7721248396750748, |
| "grad_norm": 3.4607646465301514, |
| "learning_rate": 2.5344418052256536e-06, |
| "loss": 0.2519, |
| "step": 8290 |
| }, |
| { |
| "epoch": 1.7731936725096196, |
| "grad_norm": 4.899019241333008, |
| "learning_rate": 2.5225653206650837e-06, |
| "loss": 0.2644, |
| "step": 8295 |
| }, |
| { |
| "epoch": 1.774262505344164, |
| "grad_norm": 3.769134283065796, |
| "learning_rate": 2.5106888361045134e-06, |
| "loss": 0.3707, |
| "step": 8300 |
| }, |
| { |
| "epoch": 1.7753313381787088, |
| "grad_norm": 3.0456831455230713, |
| "learning_rate": 2.4988123515439435e-06, |
| "loss": 0.1496, |
| "step": 8305 |
| }, |
| { |
| "epoch": 1.7764001710132535, |
| "grad_norm": 4.198024749755859, |
| "learning_rate": 2.4869358669833732e-06, |
| "loss": 0.2251, |
| "step": 8310 |
| }, |
| { |
| "epoch": 1.777469003847798, |
| "grad_norm": 3.964083194732666, |
| "learning_rate": 2.475059382422803e-06, |
| "loss": 0.2426, |
| "step": 8315 |
| }, |
| { |
| "epoch": 1.778537836682343, |
| "grad_norm": 4.519120216369629, |
| "learning_rate": 2.463182897862233e-06, |
| "loss": 0.2853, |
| "step": 8320 |
| }, |
| { |
| "epoch": 1.7796066695168875, |
| "grad_norm": 4.322653293609619, |
| "learning_rate": 2.4513064133016627e-06, |
| "loss": 0.2156, |
| "step": 8325 |
| }, |
| { |
| "epoch": 1.7806755023514322, |
| "grad_norm": 2.6961798667907715, |
| "learning_rate": 2.439429928741093e-06, |
| "loss": 0.2293, |
| "step": 8330 |
| }, |
| { |
| "epoch": 1.781744335185977, |
| "grad_norm": 4.139772415161133, |
| "learning_rate": 2.4275534441805226e-06, |
| "loss": 0.2516, |
| "step": 8335 |
| }, |
| { |
| "epoch": 1.7828131680205215, |
| "grad_norm": 3.3040573596954346, |
| "learning_rate": 2.4156769596199527e-06, |
| "loss": 0.2272, |
| "step": 8340 |
| }, |
| { |
| "epoch": 1.7838820008550664, |
| "grad_norm": 4.51014518737793, |
| "learning_rate": 2.403800475059383e-06, |
| "loss": 0.2995, |
| "step": 8345 |
| }, |
| { |
| "epoch": 1.784950833689611, |
| "grad_norm": 3.647020101547241, |
| "learning_rate": 2.3919239904988125e-06, |
| "loss": 0.2825, |
| "step": 8350 |
| }, |
| { |
| "epoch": 1.7860196665241557, |
| "grad_norm": 3.456620931625366, |
| "learning_rate": 2.3800475059382427e-06, |
| "loss": 0.2604, |
| "step": 8355 |
| }, |
| { |
| "epoch": 1.7870884993587004, |
| "grad_norm": 5.626756191253662, |
| "learning_rate": 2.3681710213776724e-06, |
| "loss": 0.2216, |
| "step": 8360 |
| }, |
| { |
| "epoch": 1.788157332193245, |
| "grad_norm": 4.277560710906982, |
| "learning_rate": 2.356294536817102e-06, |
| "loss": 0.3034, |
| "step": 8365 |
| }, |
| { |
| "epoch": 1.7892261650277896, |
| "grad_norm": 2.8576090335845947, |
| "learning_rate": 2.344418052256532e-06, |
| "loss": 0.229, |
| "step": 8370 |
| }, |
| { |
| "epoch": 1.7902949978623344, |
| "grad_norm": 4.79686975479126, |
| "learning_rate": 2.3325415676959623e-06, |
| "loss": 0.271, |
| "step": 8375 |
| }, |
| { |
| "epoch": 1.7913638306968789, |
| "grad_norm": 5.135036945343018, |
| "learning_rate": 2.320665083135392e-06, |
| "loss": 0.2371, |
| "step": 8380 |
| }, |
| { |
| "epoch": 1.7924326635314238, |
| "grad_norm": 5.7761406898498535, |
| "learning_rate": 2.308788598574822e-06, |
| "loss": 0.2592, |
| "step": 8385 |
| }, |
| { |
| "epoch": 1.7935014963659683, |
| "grad_norm": 2.8430325984954834, |
| "learning_rate": 2.296912114014252e-06, |
| "loss": 0.206, |
| "step": 8390 |
| }, |
| { |
| "epoch": 1.794570329200513, |
| "grad_norm": 4.540223598480225, |
| "learning_rate": 2.285035629453682e-06, |
| "loss": 0.2167, |
| "step": 8395 |
| }, |
| { |
| "epoch": 1.7956391620350578, |
| "grad_norm": 4.889501094818115, |
| "learning_rate": 2.2731591448931117e-06, |
| "loss": 0.2521, |
| "step": 8400 |
| }, |
| { |
| "epoch": 1.7967079948696023, |
| "grad_norm": 3.3274142742156982, |
| "learning_rate": 2.261282660332542e-06, |
| "loss": 0.2283, |
| "step": 8405 |
| }, |
| { |
| "epoch": 1.797776827704147, |
| "grad_norm": 3.501002073287964, |
| "learning_rate": 2.249406175771972e-06, |
| "loss": 0.2161, |
| "step": 8410 |
| }, |
| { |
| "epoch": 1.7988456605386918, |
| "grad_norm": 2.9936413764953613, |
| "learning_rate": 2.2375296912114016e-06, |
| "loss": 0.2233, |
| "step": 8415 |
| }, |
| { |
| "epoch": 1.7999144933732363, |
| "grad_norm": 4.086530685424805, |
| "learning_rate": 2.2256532066508318e-06, |
| "loss": 0.2799, |
| "step": 8420 |
| }, |
| { |
| "epoch": 1.8009833262077812, |
| "grad_norm": 4.791090965270996, |
| "learning_rate": 2.2137767220902615e-06, |
| "loss": 0.2558, |
| "step": 8425 |
| }, |
| { |
| "epoch": 1.8020521590423257, |
| "grad_norm": 4.07485294342041, |
| "learning_rate": 2.201900237529691e-06, |
| "loss": 0.3093, |
| "step": 8430 |
| }, |
| { |
| "epoch": 1.8031209918768705, |
| "grad_norm": 4.454413414001465, |
| "learning_rate": 2.1900237529691213e-06, |
| "loss": 0.2751, |
| "step": 8435 |
| }, |
| { |
| "epoch": 1.8041898247114152, |
| "grad_norm": 4.849613666534424, |
| "learning_rate": 2.178147268408551e-06, |
| "loss": 0.268, |
| "step": 8440 |
| }, |
| { |
| "epoch": 1.8052586575459597, |
| "grad_norm": 4.424874782562256, |
| "learning_rate": 2.166270783847981e-06, |
| "loss": 0.2473, |
| "step": 8445 |
| }, |
| { |
| "epoch": 1.8063274903805044, |
| "grad_norm": 5.070244789123535, |
| "learning_rate": 2.1543942992874112e-06, |
| "loss": 0.3218, |
| "step": 8450 |
| }, |
| { |
| "epoch": 1.8073963232150492, |
| "grad_norm": 4.407561302185059, |
| "learning_rate": 2.142517814726841e-06, |
| "loss": 0.2602, |
| "step": 8455 |
| }, |
| { |
| "epoch": 1.8084651560495937, |
| "grad_norm": 3.2732160091400146, |
| "learning_rate": 2.130641330166271e-06, |
| "loss": 0.2118, |
| "step": 8460 |
| }, |
| { |
| "epoch": 1.8095339888841386, |
| "grad_norm": 6.757079124450684, |
| "learning_rate": 2.1187648456057008e-06, |
| "loss": 0.2526, |
| "step": 8465 |
| }, |
| { |
| "epoch": 1.8106028217186831, |
| "grad_norm": 3.9517734050750732, |
| "learning_rate": 2.106888361045131e-06, |
| "loss": 0.2797, |
| "step": 8470 |
| }, |
| { |
| "epoch": 1.8116716545532279, |
| "grad_norm": 3.6137807369232178, |
| "learning_rate": 2.0950118764845606e-06, |
| "loss": 0.2177, |
| "step": 8475 |
| }, |
| { |
| "epoch": 1.8127404873877726, |
| "grad_norm": 3.5731587409973145, |
| "learning_rate": 2.0831353919239907e-06, |
| "loss": 0.2264, |
| "step": 8480 |
| }, |
| { |
| "epoch": 1.8138093202223171, |
| "grad_norm": 4.859638690948486, |
| "learning_rate": 2.071258907363421e-06, |
| "loss": 0.253, |
| "step": 8485 |
| }, |
| { |
| "epoch": 1.814878153056862, |
| "grad_norm": 4.231696605682373, |
| "learning_rate": 2.0593824228028506e-06, |
| "loss": 0.199, |
| "step": 8490 |
| }, |
| { |
| "epoch": 1.8159469858914066, |
| "grad_norm": 3.7343459129333496, |
| "learning_rate": 2.0475059382422803e-06, |
| "loss": 0.2484, |
| "step": 8495 |
| }, |
| { |
| "epoch": 1.8170158187259513, |
| "grad_norm": 4.6749958992004395, |
| "learning_rate": 2.0356294536817104e-06, |
| "loss": 0.2666, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.818084651560496, |
| "grad_norm": 3.5160164833068848, |
| "learning_rate": 2.02375296912114e-06, |
| "loss": 0.2423, |
| "step": 8505 |
| }, |
| { |
| "epoch": 1.8191534843950405, |
| "grad_norm": 5.324501037597656, |
| "learning_rate": 2.01187648456057e-06, |
| "loss": 0.3206, |
| "step": 8510 |
| }, |
| { |
| "epoch": 1.8202223172295853, |
| "grad_norm": 4.3562092781066895, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.1885, |
| "step": 8515 |
| }, |
| { |
| "epoch": 1.82129115006413, |
| "grad_norm": 3.188398838043213, |
| "learning_rate": 1.98812351543943e-06, |
| "loss": 0.3103, |
| "step": 8520 |
| }, |
| { |
| "epoch": 1.8223599828986745, |
| "grad_norm": 3.9081082344055176, |
| "learning_rate": 1.97624703087886e-06, |
| "loss": 0.2204, |
| "step": 8525 |
| }, |
| { |
| "epoch": 1.8234288157332195, |
| "grad_norm": 4.220818519592285, |
| "learning_rate": 1.96437054631829e-06, |
| "loss": 0.2269, |
| "step": 8530 |
| }, |
| { |
| "epoch": 1.824497648567764, |
| "grad_norm": 4.2256035804748535, |
| "learning_rate": 1.95249406175772e-06, |
| "loss": 0.3602, |
| "step": 8535 |
| }, |
| { |
| "epoch": 1.8255664814023087, |
| "grad_norm": 3.092357635498047, |
| "learning_rate": 1.9406175771971497e-06, |
| "loss": 0.1735, |
| "step": 8540 |
| }, |
| { |
| "epoch": 1.8266353142368534, |
| "grad_norm": 5.8758649826049805, |
| "learning_rate": 1.9287410926365794e-06, |
| "loss": 0.3107, |
| "step": 8545 |
| }, |
| { |
| "epoch": 1.827704147071398, |
| "grad_norm": 4.43316650390625, |
| "learning_rate": 1.91686460807601e-06, |
| "loss": 0.2346, |
| "step": 8550 |
| }, |
| { |
| "epoch": 1.8287729799059427, |
| "grad_norm": 4.877310276031494, |
| "learning_rate": 1.9049881235154396e-06, |
| "loss": 0.2372, |
| "step": 8555 |
| }, |
| { |
| "epoch": 1.8298418127404874, |
| "grad_norm": 5.378355026245117, |
| "learning_rate": 1.8931116389548696e-06, |
| "loss": 0.2665, |
| "step": 8560 |
| }, |
| { |
| "epoch": 1.830910645575032, |
| "grad_norm": 4.576028347015381, |
| "learning_rate": 1.8812351543942995e-06, |
| "loss": 0.2722, |
| "step": 8565 |
| }, |
| { |
| "epoch": 1.8319794784095769, |
| "grad_norm": 2.452864646911621, |
| "learning_rate": 1.8693586698337294e-06, |
| "loss": 0.1879, |
| "step": 8570 |
| }, |
| { |
| "epoch": 1.8330483112441214, |
| "grad_norm": 4.013648509979248, |
| "learning_rate": 1.8574821852731593e-06, |
| "loss": 0.2434, |
| "step": 8575 |
| }, |
| { |
| "epoch": 1.834117144078666, |
| "grad_norm": 4.295891761779785, |
| "learning_rate": 1.845605700712589e-06, |
| "loss": 0.3408, |
| "step": 8580 |
| }, |
| { |
| "epoch": 1.8351859769132108, |
| "grad_norm": 5.399013042449951, |
| "learning_rate": 1.8337292161520193e-06, |
| "loss": 0.2819, |
| "step": 8585 |
| }, |
| { |
| "epoch": 1.8362548097477553, |
| "grad_norm": 5.267197608947754, |
| "learning_rate": 1.8218527315914493e-06, |
| "loss": 0.2828, |
| "step": 8590 |
| }, |
| { |
| "epoch": 1.8373236425823, |
| "grad_norm": 4.1791672706604, |
| "learning_rate": 1.809976247030879e-06, |
| "loss": 0.1821, |
| "step": 8595 |
| }, |
| { |
| "epoch": 1.8383924754168448, |
| "grad_norm": 4.158424377441406, |
| "learning_rate": 1.7980997624703089e-06, |
| "loss": 0.2193, |
| "step": 8600 |
| }, |
| { |
| "epoch": 1.8394613082513895, |
| "grad_norm": 3.101128101348877, |
| "learning_rate": 1.7862232779097388e-06, |
| "loss": 0.3155, |
| "step": 8605 |
| }, |
| { |
| "epoch": 1.8405301410859343, |
| "grad_norm": 3.661057233810425, |
| "learning_rate": 1.7743467933491687e-06, |
| "loss": 0.2129, |
| "step": 8610 |
| }, |
| { |
| "epoch": 1.8415989739204788, |
| "grad_norm": 3.7547378540039062, |
| "learning_rate": 1.7624703087885986e-06, |
| "loss": 0.2321, |
| "step": 8615 |
| }, |
| { |
| "epoch": 1.8426678067550235, |
| "grad_norm": 4.53202486038208, |
| "learning_rate": 1.7505938242280287e-06, |
| "loss": 0.1748, |
| "step": 8620 |
| }, |
| { |
| "epoch": 1.8437366395895682, |
| "grad_norm": 3.7189040184020996, |
| "learning_rate": 1.7387173396674587e-06, |
| "loss": 0.2355, |
| "step": 8625 |
| }, |
| { |
| "epoch": 1.8448054724241127, |
| "grad_norm": 5.827390670776367, |
| "learning_rate": 1.7268408551068886e-06, |
| "loss": 0.2226, |
| "step": 8630 |
| }, |
| { |
| "epoch": 1.8458743052586577, |
| "grad_norm": 4.365615367889404, |
| "learning_rate": 1.7149643705463185e-06, |
| "loss": 0.2812, |
| "step": 8635 |
| }, |
| { |
| "epoch": 1.8469431380932022, |
| "grad_norm": 4.593905925750732, |
| "learning_rate": 1.7030878859857484e-06, |
| "loss": 0.2542, |
| "step": 8640 |
| }, |
| { |
| "epoch": 1.848011970927747, |
| "grad_norm": 4.3599419593811035, |
| "learning_rate": 1.691211401425178e-06, |
| "loss": 0.214, |
| "step": 8645 |
| }, |
| { |
| "epoch": 1.8490808037622917, |
| "grad_norm": 5.342328071594238, |
| "learning_rate": 1.679334916864608e-06, |
| "loss": 0.2157, |
| "step": 8650 |
| }, |
| { |
| "epoch": 1.8501496365968362, |
| "grad_norm": 3.1678943634033203, |
| "learning_rate": 1.6674584323040384e-06, |
| "loss": 0.2336, |
| "step": 8655 |
| }, |
| { |
| "epoch": 1.851218469431381, |
| "grad_norm": 4.464089870452881, |
| "learning_rate": 1.655581947743468e-06, |
| "loss": 0.3409, |
| "step": 8660 |
| }, |
| { |
| "epoch": 1.8522873022659256, |
| "grad_norm": 4.1919755935668945, |
| "learning_rate": 1.643705463182898e-06, |
| "loss": 0.2902, |
| "step": 8665 |
| }, |
| { |
| "epoch": 1.8533561351004701, |
| "grad_norm": 3.814858913421631, |
| "learning_rate": 1.6318289786223279e-06, |
| "loss": 0.2329, |
| "step": 8670 |
| }, |
| { |
| "epoch": 1.854424967935015, |
| "grad_norm": 3.2706382274627686, |
| "learning_rate": 1.6199524940617578e-06, |
| "loss": 0.1849, |
| "step": 8675 |
| }, |
| { |
| "epoch": 1.8554938007695596, |
| "grad_norm": 3.6442952156066895, |
| "learning_rate": 1.6080760095011877e-06, |
| "loss": 0.2919, |
| "step": 8680 |
| }, |
| { |
| "epoch": 1.8565626336041043, |
| "grad_norm": 3.179872512817383, |
| "learning_rate": 1.5961995249406176e-06, |
| "loss": 0.2157, |
| "step": 8685 |
| }, |
| { |
| "epoch": 1.857631466438649, |
| "grad_norm": 3.71156644821167, |
| "learning_rate": 1.5843230403800478e-06, |
| "loss": 0.2286, |
| "step": 8690 |
| }, |
| { |
| "epoch": 1.8587002992731936, |
| "grad_norm": 5.000162124633789, |
| "learning_rate": 1.5724465558194777e-06, |
| "loss": 0.1957, |
| "step": 8695 |
| }, |
| { |
| "epoch": 1.8597691321077383, |
| "grad_norm": 3.7217514514923096, |
| "learning_rate": 1.5605700712589076e-06, |
| "loss": 0.2102, |
| "step": 8700 |
| }, |
| { |
| "epoch": 1.860837964942283, |
| "grad_norm": 5.23848295211792, |
| "learning_rate": 1.5486935866983375e-06, |
| "loss": 0.3172, |
| "step": 8705 |
| }, |
| { |
| "epoch": 1.8619067977768275, |
| "grad_norm": 3.95940899848938, |
| "learning_rate": 1.5368171021377672e-06, |
| "loss": 0.2619, |
| "step": 8710 |
| }, |
| { |
| "epoch": 1.8629756306113725, |
| "grad_norm": 4.389864921569824, |
| "learning_rate": 1.5249406175771971e-06, |
| "loss": 0.2898, |
| "step": 8715 |
| }, |
| { |
| "epoch": 1.864044463445917, |
| "grad_norm": 4.196899890899658, |
| "learning_rate": 1.513064133016627e-06, |
| "loss": 0.2523, |
| "step": 8720 |
| }, |
| { |
| "epoch": 1.8651132962804617, |
| "grad_norm": 4.35107946395874, |
| "learning_rate": 1.5011876484560572e-06, |
| "loss": 0.2534, |
| "step": 8725 |
| }, |
| { |
| "epoch": 1.8661821291150065, |
| "grad_norm": 5.233465194702148, |
| "learning_rate": 1.489311163895487e-06, |
| "loss": 0.2546, |
| "step": 8730 |
| }, |
| { |
| "epoch": 1.867250961949551, |
| "grad_norm": 4.285619735717773, |
| "learning_rate": 1.477434679334917e-06, |
| "loss": 0.2171, |
| "step": 8735 |
| }, |
| { |
| "epoch": 1.868319794784096, |
| "grad_norm": 5.0237579345703125, |
| "learning_rate": 1.465558194774347e-06, |
| "loss": 0.2617, |
| "step": 8740 |
| }, |
| { |
| "epoch": 1.8693886276186404, |
| "grad_norm": 3.848062753677368, |
| "learning_rate": 1.4536817102137768e-06, |
| "loss": 0.1917, |
| "step": 8745 |
| }, |
| { |
| "epoch": 1.8704574604531852, |
| "grad_norm": 3.6329150199890137, |
| "learning_rate": 1.4418052256532067e-06, |
| "loss": 0.2256, |
| "step": 8750 |
| }, |
| { |
| "epoch": 1.87152629328773, |
| "grad_norm": 4.504333019256592, |
| "learning_rate": 1.4299287410926366e-06, |
| "loss": 0.2319, |
| "step": 8755 |
| }, |
| { |
| "epoch": 1.8725951261222744, |
| "grad_norm": 6.011372089385986, |
| "learning_rate": 1.4180522565320668e-06, |
| "loss": 0.2783, |
| "step": 8760 |
| }, |
| { |
| "epoch": 1.8736639589568191, |
| "grad_norm": 4.750868320465088, |
| "learning_rate": 1.4061757719714967e-06, |
| "loss": 0.2885, |
| "step": 8765 |
| }, |
| { |
| "epoch": 1.8747327917913639, |
| "grad_norm": 3.2728309631347656, |
| "learning_rate": 1.3942992874109266e-06, |
| "loss": 0.2586, |
| "step": 8770 |
| }, |
| { |
| "epoch": 1.8758016246259084, |
| "grad_norm": 3.3371262550354004, |
| "learning_rate": 1.3824228028503565e-06, |
| "loss": 0.2009, |
| "step": 8775 |
| }, |
| { |
| "epoch": 1.8768704574604533, |
| "grad_norm": 3.7395825386047363, |
| "learning_rate": 1.3705463182897862e-06, |
| "loss": 0.273, |
| "step": 8780 |
| }, |
| { |
| "epoch": 1.8779392902949978, |
| "grad_norm": 4.672481060028076, |
| "learning_rate": 1.3586698337292161e-06, |
| "loss": 0.2502, |
| "step": 8785 |
| }, |
| { |
| "epoch": 1.8790081231295426, |
| "grad_norm": 2.957099676132202, |
| "learning_rate": 1.346793349168646e-06, |
| "loss": 0.2174, |
| "step": 8790 |
| }, |
| { |
| "epoch": 1.8800769559640873, |
| "grad_norm": 4.8943915367126465, |
| "learning_rate": 1.3349168646080762e-06, |
| "loss": 0.2723, |
| "step": 8795 |
| }, |
| { |
| "epoch": 1.8811457887986318, |
| "grad_norm": 4.067677021026611, |
| "learning_rate": 1.323040380047506e-06, |
| "loss": 0.2633, |
| "step": 8800 |
| }, |
| { |
| "epoch": 1.8822146216331765, |
| "grad_norm": 4.314869403839111, |
| "learning_rate": 1.311163895486936e-06, |
| "loss": 0.2794, |
| "step": 8805 |
| }, |
| { |
| "epoch": 1.8832834544677213, |
| "grad_norm": 4.225076675415039, |
| "learning_rate": 1.299287410926366e-06, |
| "loss": 0.2961, |
| "step": 8810 |
| }, |
| { |
| "epoch": 1.8843522873022658, |
| "grad_norm": 3.992135763168335, |
| "learning_rate": 1.2874109263657958e-06, |
| "loss": 0.2598, |
| "step": 8815 |
| }, |
| { |
| "epoch": 1.8854211201368107, |
| "grad_norm": 4.5158586502075195, |
| "learning_rate": 1.2755344418052257e-06, |
| "loss": 0.2794, |
| "step": 8820 |
| }, |
| { |
| "epoch": 1.8864899529713552, |
| "grad_norm": 4.226551055908203, |
| "learning_rate": 1.2636579572446556e-06, |
| "loss": 0.2447, |
| "step": 8825 |
| }, |
| { |
| "epoch": 1.8875587858059, |
| "grad_norm": 3.2052338123321533, |
| "learning_rate": 1.2517814726840858e-06, |
| "loss": 0.2741, |
| "step": 8830 |
| }, |
| { |
| "epoch": 1.8886276186404447, |
| "grad_norm": 3.315537929534912, |
| "learning_rate": 1.2399049881235155e-06, |
| "loss": 0.2192, |
| "step": 8835 |
| }, |
| { |
| "epoch": 1.8896964514749892, |
| "grad_norm": 4.095473289489746, |
| "learning_rate": 1.2280285035629456e-06, |
| "loss": 0.3188, |
| "step": 8840 |
| }, |
| { |
| "epoch": 1.890765284309534, |
| "grad_norm": 4.654134273529053, |
| "learning_rate": 1.2161520190023753e-06, |
| "loss": 0.294, |
| "step": 8845 |
| }, |
| { |
| "epoch": 1.8918341171440787, |
| "grad_norm": 3.982452154159546, |
| "learning_rate": 1.2042755344418052e-06, |
| "loss": 0.2961, |
| "step": 8850 |
| }, |
| { |
| "epoch": 1.8929029499786232, |
| "grad_norm": 3.594325542449951, |
| "learning_rate": 1.1923990498812353e-06, |
| "loss": 0.2288, |
| "step": 8855 |
| }, |
| { |
| "epoch": 1.8939717828131681, |
| "grad_norm": 4.437509059906006, |
| "learning_rate": 1.1805225653206653e-06, |
| "loss": 0.2796, |
| "step": 8860 |
| }, |
| { |
| "epoch": 1.8950406156477126, |
| "grad_norm": 4.6788716316223145, |
| "learning_rate": 1.1686460807600952e-06, |
| "loss": 0.2464, |
| "step": 8865 |
| }, |
| { |
| "epoch": 1.8961094484822574, |
| "grad_norm": 4.381009578704834, |
| "learning_rate": 1.1567695961995249e-06, |
| "loss": 0.2435, |
| "step": 8870 |
| }, |
| { |
| "epoch": 1.897178281316802, |
| "grad_norm": 4.203982353210449, |
| "learning_rate": 1.144893111638955e-06, |
| "loss": 0.2993, |
| "step": 8875 |
| }, |
| { |
| "epoch": 1.8982471141513466, |
| "grad_norm": 3.9560775756835938, |
| "learning_rate": 1.133016627078385e-06, |
| "loss": 0.2049, |
| "step": 8880 |
| }, |
| { |
| "epoch": 1.8993159469858916, |
| "grad_norm": 4.908998012542725, |
| "learning_rate": 1.1211401425178148e-06, |
| "loss": 0.2588, |
| "step": 8885 |
| }, |
| { |
| "epoch": 1.900384779820436, |
| "grad_norm": 2.399383544921875, |
| "learning_rate": 1.1092636579572447e-06, |
| "loss": 0.2559, |
| "step": 8890 |
| }, |
| { |
| "epoch": 1.9014536126549808, |
| "grad_norm": 5.100274085998535, |
| "learning_rate": 1.0973871733966747e-06, |
| "loss": 0.261, |
| "step": 8895 |
| }, |
| { |
| "epoch": 1.9025224454895255, |
| "grad_norm": 1.9479761123657227, |
| "learning_rate": 1.0855106888361046e-06, |
| "loss": 0.2132, |
| "step": 8900 |
| }, |
| { |
| "epoch": 1.90359127832407, |
| "grad_norm": 4.266331195831299, |
| "learning_rate": 1.0736342042755345e-06, |
| "loss": 0.2184, |
| "step": 8905 |
| }, |
| { |
| "epoch": 1.9046601111586148, |
| "grad_norm": 3.761469841003418, |
| "learning_rate": 1.0617577197149644e-06, |
| "loss": 0.2551, |
| "step": 8910 |
| }, |
| { |
| "epoch": 1.9057289439931595, |
| "grad_norm": 5.301465034484863, |
| "learning_rate": 1.0498812351543943e-06, |
| "loss": 0.2302, |
| "step": 8915 |
| }, |
| { |
| "epoch": 1.906797776827704, |
| "grad_norm": 4.8627095222473145, |
| "learning_rate": 1.0380047505938242e-06, |
| "loss": 0.2441, |
| "step": 8920 |
| }, |
| { |
| "epoch": 1.907866609662249, |
| "grad_norm": 3.7152163982391357, |
| "learning_rate": 1.0261282660332544e-06, |
| "loss": 0.2176, |
| "step": 8925 |
| }, |
| { |
| "epoch": 1.9089354424967935, |
| "grad_norm": 4.612980365753174, |
| "learning_rate": 1.0142517814726843e-06, |
| "loss": 0.3041, |
| "step": 8930 |
| }, |
| { |
| "epoch": 1.9100042753313382, |
| "grad_norm": 3.9601426124572754, |
| "learning_rate": 1.002375296912114e-06, |
| "loss": 0.2325, |
| "step": 8935 |
| }, |
| { |
| "epoch": 1.911073108165883, |
| "grad_norm": 3.773958921432495, |
| "learning_rate": 9.904988123515439e-07, |
| "loss": 0.2463, |
| "step": 8940 |
| }, |
| { |
| "epoch": 1.9121419410004274, |
| "grad_norm": 5.172873020172119, |
| "learning_rate": 9.78622327790974e-07, |
| "loss": 0.2997, |
| "step": 8945 |
| }, |
| { |
| "epoch": 1.9132107738349722, |
| "grad_norm": 3.382683038711548, |
| "learning_rate": 9.66745843230404e-07, |
| "loss": 0.2202, |
| "step": 8950 |
| }, |
| { |
| "epoch": 1.914279606669517, |
| "grad_norm": 5.699649333953857, |
| "learning_rate": 9.548693586698338e-07, |
| "loss": 0.2745, |
| "step": 8955 |
| }, |
| { |
| "epoch": 1.9153484395040614, |
| "grad_norm": 4.574731349945068, |
| "learning_rate": 9.429928741092638e-07, |
| "loss": 0.2642, |
| "step": 8960 |
| }, |
| { |
| "epoch": 1.9164172723386064, |
| "grad_norm": 7.173608303070068, |
| "learning_rate": 9.311163895486937e-07, |
| "loss": 0.2782, |
| "step": 8965 |
| }, |
| { |
| "epoch": 1.9174861051731509, |
| "grad_norm": 3.9324846267700195, |
| "learning_rate": 9.192399049881236e-07, |
| "loss": 0.2435, |
| "step": 8970 |
| }, |
| { |
| "epoch": 1.9185549380076956, |
| "grad_norm": 3.742494583129883, |
| "learning_rate": 9.073634204275535e-07, |
| "loss": 0.2492, |
| "step": 8975 |
| }, |
| { |
| "epoch": 1.9196237708422403, |
| "grad_norm": 5.236582279205322, |
| "learning_rate": 8.954869358669835e-07, |
| "loss": 0.2161, |
| "step": 8980 |
| }, |
| { |
| "epoch": 1.9206926036767848, |
| "grad_norm": 3.473259449005127, |
| "learning_rate": 8.836104513064133e-07, |
| "loss": 0.2549, |
| "step": 8985 |
| }, |
| { |
| "epoch": 1.9217614365113296, |
| "grad_norm": 3.2006514072418213, |
| "learning_rate": 8.717339667458432e-07, |
| "loss": 0.2217, |
| "step": 8990 |
| }, |
| { |
| "epoch": 1.9228302693458743, |
| "grad_norm": 3.0505008697509766, |
| "learning_rate": 8.598574821852733e-07, |
| "loss": 0.266, |
| "step": 8995 |
| }, |
| { |
| "epoch": 1.9238991021804188, |
| "grad_norm": 3.8124094009399414, |
| "learning_rate": 8.479809976247032e-07, |
| "loss": 0.2909, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.9249679350149638, |
| "grad_norm": 3.0390665531158447, |
| "learning_rate": 8.361045130641331e-07, |
| "loss": 0.2149, |
| "step": 9005 |
| }, |
| { |
| "epoch": 1.9260367678495083, |
| "grad_norm": 3.928755521774292, |
| "learning_rate": 8.24228028503563e-07, |
| "loss": 0.3099, |
| "step": 9010 |
| }, |
| { |
| "epoch": 1.927105600684053, |
| "grad_norm": 4.092939376831055, |
| "learning_rate": 8.12351543942993e-07, |
| "loss": 0.1929, |
| "step": 9015 |
| }, |
| { |
| "epoch": 1.9281744335185977, |
| "grad_norm": 4.7592573165893555, |
| "learning_rate": 8.004750593824228e-07, |
| "loss": 0.2854, |
| "step": 9020 |
| }, |
| { |
| "epoch": 1.9292432663531422, |
| "grad_norm": 3.904730796813965, |
| "learning_rate": 7.885985748218527e-07, |
| "loss": 0.1857, |
| "step": 9025 |
| }, |
| { |
| "epoch": 1.9303120991876872, |
| "grad_norm": 4.656405925750732, |
| "learning_rate": 7.767220902612828e-07, |
| "loss": 0.2445, |
| "step": 9030 |
| }, |
| { |
| "epoch": 1.9313809320222317, |
| "grad_norm": 3.890486240386963, |
| "learning_rate": 7.648456057007127e-07, |
| "loss": 0.226, |
| "step": 9035 |
| }, |
| { |
| "epoch": 1.9324497648567764, |
| "grad_norm": 4.5724334716796875, |
| "learning_rate": 7.529691211401426e-07, |
| "loss": 0.2822, |
| "step": 9040 |
| }, |
| { |
| "epoch": 1.9335185976913212, |
| "grad_norm": 4.720613479614258, |
| "learning_rate": 7.410926365795724e-07, |
| "loss": 0.2541, |
| "step": 9045 |
| }, |
| { |
| "epoch": 1.9345874305258657, |
| "grad_norm": 3.9262373447418213, |
| "learning_rate": 7.292161520190025e-07, |
| "loss": 0.2442, |
| "step": 9050 |
| }, |
| { |
| "epoch": 1.9356562633604104, |
| "grad_norm": 3.6456849575042725, |
| "learning_rate": 7.173396674584323e-07, |
| "loss": 0.2504, |
| "step": 9055 |
| }, |
| { |
| "epoch": 1.9367250961949551, |
| "grad_norm": 3.021383762359619, |
| "learning_rate": 7.054631828978623e-07, |
| "loss": 0.2073, |
| "step": 9060 |
| }, |
| { |
| "epoch": 1.9377939290294997, |
| "grad_norm": 4.671846389770508, |
| "learning_rate": 6.935866983372923e-07, |
| "loss": 0.2245, |
| "step": 9065 |
| }, |
| { |
| "epoch": 1.9388627618640446, |
| "grad_norm": 4.805634021759033, |
| "learning_rate": 6.817102137767222e-07, |
| "loss": 0.2442, |
| "step": 9070 |
| }, |
| { |
| "epoch": 1.9399315946985891, |
| "grad_norm": 3.9393720626831055, |
| "learning_rate": 6.698337292161521e-07, |
| "loss": 0.2382, |
| "step": 9075 |
| }, |
| { |
| "epoch": 1.9410004275331338, |
| "grad_norm": 5.1551408767700195, |
| "learning_rate": 6.579572446555819e-07, |
| "loss": 0.2482, |
| "step": 9080 |
| }, |
| { |
| "epoch": 1.9420692603676786, |
| "grad_norm": 5.381765365600586, |
| "learning_rate": 6.460807600950119e-07, |
| "loss": 0.2849, |
| "step": 9085 |
| }, |
| { |
| "epoch": 1.943138093202223, |
| "grad_norm": 3.842059850692749, |
| "learning_rate": 6.342042755344418e-07, |
| "loss": 0.2666, |
| "step": 9090 |
| }, |
| { |
| "epoch": 1.9442069260367678, |
| "grad_norm": 4.254835605621338, |
| "learning_rate": 6.223277909738719e-07, |
| "loss": 0.224, |
| "step": 9095 |
| }, |
| { |
| "epoch": 1.9452757588713125, |
| "grad_norm": 5.467522144317627, |
| "learning_rate": 6.104513064133017e-07, |
| "loss": 0.2961, |
| "step": 9100 |
| }, |
| { |
| "epoch": 1.946344591705857, |
| "grad_norm": 4.110438823699951, |
| "learning_rate": 5.985748218527317e-07, |
| "loss": 0.217, |
| "step": 9105 |
| }, |
| { |
| "epoch": 1.947413424540402, |
| "grad_norm": 4.675514221191406, |
| "learning_rate": 5.866983372921616e-07, |
| "loss": 0.2384, |
| "step": 9110 |
| }, |
| { |
| "epoch": 1.9484822573749465, |
| "grad_norm": 4.90285062789917, |
| "learning_rate": 5.748218527315915e-07, |
| "loss": 0.2205, |
| "step": 9115 |
| }, |
| { |
| "epoch": 1.9495510902094912, |
| "grad_norm": 4.838087558746338, |
| "learning_rate": 5.629453681710214e-07, |
| "loss": 0.2807, |
| "step": 9120 |
| }, |
| { |
| "epoch": 1.950619923044036, |
| "grad_norm": 4.49014949798584, |
| "learning_rate": 5.510688836104513e-07, |
| "loss": 0.2577, |
| "step": 9125 |
| }, |
| { |
| "epoch": 1.9516887558785805, |
| "grad_norm": 6.248046398162842, |
| "learning_rate": 5.391923990498813e-07, |
| "loss": 0.3212, |
| "step": 9130 |
| }, |
| { |
| "epoch": 1.9527575887131252, |
| "grad_norm": 2.6727161407470703, |
| "learning_rate": 5.273159144893112e-07, |
| "loss": 0.239, |
| "step": 9135 |
| }, |
| { |
| "epoch": 1.95382642154767, |
| "grad_norm": 5.567617416381836, |
| "learning_rate": 5.154394299287412e-07, |
| "loss": 0.2283, |
| "step": 9140 |
| }, |
| { |
| "epoch": 1.9548952543822147, |
| "grad_norm": 4.877483367919922, |
| "learning_rate": 5.03562945368171e-07, |
| "loss": 0.2606, |
| "step": 9145 |
| }, |
| { |
| "epoch": 1.9559640872167594, |
| "grad_norm": 4.150485515594482, |
| "learning_rate": 4.91686460807601e-07, |
| "loss": 0.2684, |
| "step": 9150 |
| }, |
| { |
| "epoch": 1.957032920051304, |
| "grad_norm": 4.878507614135742, |
| "learning_rate": 4.798099762470309e-07, |
| "loss": 0.2857, |
| "step": 9155 |
| }, |
| { |
| "epoch": 1.9581017528858486, |
| "grad_norm": 5.343387126922607, |
| "learning_rate": 4.6793349168646085e-07, |
| "loss": 0.2962, |
| "step": 9160 |
| }, |
| { |
| "epoch": 1.9591705857203934, |
| "grad_norm": 4.346437454223633, |
| "learning_rate": 4.560570071258908e-07, |
| "loss": 0.2639, |
| "step": 9165 |
| }, |
| { |
| "epoch": 1.9602394185549379, |
| "grad_norm": 5.331128120422363, |
| "learning_rate": 4.441805225653207e-07, |
| "loss": 0.2549, |
| "step": 9170 |
| }, |
| { |
| "epoch": 1.9613082513894828, |
| "grad_norm": 4.075921535491943, |
| "learning_rate": 4.3230403800475065e-07, |
| "loss": 0.2561, |
| "step": 9175 |
| }, |
| { |
| "epoch": 1.9623770842240273, |
| "grad_norm": 4.879267692565918, |
| "learning_rate": 4.2042755344418056e-07, |
| "loss": 0.2324, |
| "step": 9180 |
| }, |
| { |
| "epoch": 1.963445917058572, |
| "grad_norm": 4.325537204742432, |
| "learning_rate": 4.085510688836105e-07, |
| "loss": 0.3142, |
| "step": 9185 |
| }, |
| { |
| "epoch": 1.9645147498931168, |
| "grad_norm": 3.530134439468384, |
| "learning_rate": 3.966745843230404e-07, |
| "loss": 0.2778, |
| "step": 9190 |
| }, |
| { |
| "epoch": 1.9655835827276613, |
| "grad_norm": 2.6315903663635254, |
| "learning_rate": 3.8479809976247036e-07, |
| "loss": 0.2603, |
| "step": 9195 |
| }, |
| { |
| "epoch": 1.966652415562206, |
| "grad_norm": 4.100142002105713, |
| "learning_rate": 3.729216152019002e-07, |
| "loss": 0.2191, |
| "step": 9200 |
| }, |
| { |
| "epoch": 1.9677212483967508, |
| "grad_norm": 3.4908711910247803, |
| "learning_rate": 3.610451306413302e-07, |
| "loss": 0.2658, |
| "step": 9205 |
| }, |
| { |
| "epoch": 1.9687900812312953, |
| "grad_norm": 4.331186771392822, |
| "learning_rate": 3.4916864608076015e-07, |
| "loss": 0.2701, |
| "step": 9210 |
| }, |
| { |
| "epoch": 1.9698589140658402, |
| "grad_norm": 6.090305805206299, |
| "learning_rate": 3.3729216152019e-07, |
| "loss": 0.3, |
| "step": 9215 |
| }, |
| { |
| "epoch": 1.9709277469003847, |
| "grad_norm": 3.7345423698425293, |
| "learning_rate": 3.2541567695962e-07, |
| "loss": 0.2728, |
| "step": 9220 |
| }, |
| { |
| "epoch": 1.9719965797349295, |
| "grad_norm": 6.370054244995117, |
| "learning_rate": 3.135391923990499e-07, |
| "loss": 0.2724, |
| "step": 9225 |
| }, |
| { |
| "epoch": 1.9730654125694742, |
| "grad_norm": 3.2030200958251953, |
| "learning_rate": 3.0166270783847986e-07, |
| "loss": 0.1735, |
| "step": 9230 |
| }, |
| { |
| "epoch": 1.9741342454040187, |
| "grad_norm": 3.904633045196533, |
| "learning_rate": 2.897862232779098e-07, |
| "loss": 0.25, |
| "step": 9235 |
| }, |
| { |
| "epoch": 1.9752030782385634, |
| "grad_norm": 5.196364402770996, |
| "learning_rate": 2.779097387173397e-07, |
| "loss": 0.276, |
| "step": 9240 |
| }, |
| { |
| "epoch": 1.9762719110731082, |
| "grad_norm": 5.8324785232543945, |
| "learning_rate": 2.660332541567696e-07, |
| "loss": 0.2581, |
| "step": 9245 |
| }, |
| { |
| "epoch": 1.9773407439076527, |
| "grad_norm": 3.4866878986358643, |
| "learning_rate": 2.541567695961995e-07, |
| "loss": 0.2545, |
| "step": 9250 |
| }, |
| { |
| "epoch": 1.9784095767421976, |
| "grad_norm": 5.080046653747559, |
| "learning_rate": 2.422802850356295e-07, |
| "loss": 0.2251, |
| "step": 9255 |
| }, |
| { |
| "epoch": 1.9794784095767421, |
| "grad_norm": 4.654627799987793, |
| "learning_rate": 2.304038004750594e-07, |
| "loss": 0.269, |
| "step": 9260 |
| }, |
| { |
| "epoch": 1.9805472424112869, |
| "grad_norm": 4.3756327629089355, |
| "learning_rate": 2.1852731591448934e-07, |
| "loss": 0.2026, |
| "step": 9265 |
| }, |
| { |
| "epoch": 1.9816160752458316, |
| "grad_norm": 4.612358093261719, |
| "learning_rate": 2.0665083135391925e-07, |
| "loss": 0.2297, |
| "step": 9270 |
| }, |
| { |
| "epoch": 1.9826849080803761, |
| "grad_norm": 4.363190174102783, |
| "learning_rate": 1.9477434679334917e-07, |
| "loss": 0.2414, |
| "step": 9275 |
| }, |
| { |
| "epoch": 1.983753740914921, |
| "grad_norm": 4.239806175231934, |
| "learning_rate": 1.828978622327791e-07, |
| "loss": 0.2724, |
| "step": 9280 |
| }, |
| { |
| "epoch": 1.9848225737494656, |
| "grad_norm": 3.087779998779297, |
| "learning_rate": 1.7102137767220902e-07, |
| "loss": 0.2338, |
| "step": 9285 |
| }, |
| { |
| "epoch": 1.9858914065840103, |
| "grad_norm": 5.1465277671813965, |
| "learning_rate": 1.59144893111639e-07, |
| "loss": 0.2606, |
| "step": 9290 |
| }, |
| { |
| "epoch": 1.986960239418555, |
| "grad_norm": 3.789433240890503, |
| "learning_rate": 1.4726840855106888e-07, |
| "loss": 0.2747, |
| "step": 9295 |
| }, |
| { |
| "epoch": 1.9880290722530995, |
| "grad_norm": 3.880868673324585, |
| "learning_rate": 1.3539192399049882e-07, |
| "loss": 0.1792, |
| "step": 9300 |
| }, |
| { |
| "epoch": 1.9890979050876443, |
| "grad_norm": 4.200949668884277, |
| "learning_rate": 1.2351543942992876e-07, |
| "loss": 0.2479, |
| "step": 9305 |
| }, |
| { |
| "epoch": 1.990166737922189, |
| "grad_norm": 4.372617721557617, |
| "learning_rate": 1.1163895486935867e-07, |
| "loss": 0.2554, |
| "step": 9310 |
| }, |
| { |
| "epoch": 1.9912355707567335, |
| "grad_norm": 3.7008919715881348, |
| "learning_rate": 9.97624703087886e-08, |
| "loss": 0.25, |
| "step": 9315 |
| }, |
| { |
| "epoch": 1.9923044035912785, |
| "grad_norm": 3.9479458332061768, |
| "learning_rate": 8.788598574821854e-08, |
| "loss": 0.2814, |
| "step": 9320 |
| }, |
| { |
| "epoch": 1.993373236425823, |
| "grad_norm": 4.310093402862549, |
| "learning_rate": 7.600950118764846e-08, |
| "loss": 0.2102, |
| "step": 9325 |
| }, |
| { |
| "epoch": 1.9944420692603677, |
| "grad_norm": 3.808363199234009, |
| "learning_rate": 6.41330166270784e-08, |
| "loss": 0.2466, |
| "step": 9330 |
| }, |
| { |
| "epoch": 1.9955109020949124, |
| "grad_norm": 4.076649188995361, |
| "learning_rate": 5.225653206650832e-08, |
| "loss": 0.2547, |
| "step": 9335 |
| }, |
| { |
| "epoch": 1.996579734929457, |
| "grad_norm": 3.773390531539917, |
| "learning_rate": 4.0380047505938245e-08, |
| "loss": 0.2216, |
| "step": 9340 |
| }, |
| { |
| "epoch": 1.9976485677640017, |
| "grad_norm": 3.149965286254883, |
| "learning_rate": 2.8503562945368176e-08, |
| "loss": 0.2521, |
| "step": 9345 |
| }, |
| { |
| "epoch": 1.9987174005985464, |
| "grad_norm": 3.375763177871704, |
| "learning_rate": 1.66270783847981e-08, |
| "loss": 0.2558, |
| "step": 9350 |
| }, |
| { |
| "epoch": 1.999786233433091, |
| "grad_norm": 5.134764194488525, |
| "learning_rate": 4.7505938242280285e-09, |
| "loss": 0.2345, |
| "step": 9355 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.12192188948392868, |
| "eval_mrr": 0.9798825256975033, |
| "eval_runtime": 315.6223, |
| "eval_samples_per_second": 7.192, |
| "eval_steps_per_second": 0.9, |
| "step": 9356 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 9356, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 1, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|