| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.9769954476479514, |
| "eval_steps": 500, |
| "global_step": 28500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0006936917407327119, |
| "grad_norm": 12224.0, |
| "learning_rate": 4.998434782608696e-06, |
| "loss": 10.7132, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0013873834814654238, |
| "grad_norm": 19712.0, |
| "learning_rate": 4.9966956521739135e-06, |
| "loss": 1.4256, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0020810752221981357, |
| "grad_norm": 21504.0, |
| "learning_rate": 4.994956521739131e-06, |
| "loss": 0.8714, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0027747669629308476, |
| "grad_norm": 2624.0, |
| "learning_rate": 4.993217391304348e-06, |
| "loss": 0.7117, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0034684587036635595, |
| "grad_norm": 23.875, |
| "learning_rate": 4.991478260869566e-06, |
| "loss": 0.3634, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.004162150444396271, |
| "grad_norm": 31.375, |
| "learning_rate": 4.989739130434783e-06, |
| "loss": 0.7818, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.004855842185128983, |
| "grad_norm": 8096.0, |
| "learning_rate": 4.988e-06, |
| "loss": 0.5493, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.005549533925861695, |
| "grad_norm": 27.875, |
| "learning_rate": 4.986260869565218e-06, |
| "loss": 0.457, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.006243225666594407, |
| "grad_norm": 37632.0, |
| "learning_rate": 4.984521739130435e-06, |
| "loss": 0.4123, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.006936917407327119, |
| "grad_norm": 61.5, |
| "learning_rate": 4.9827826086956525e-06, |
| "loss": 0.3742, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.007630609148059831, |
| "grad_norm": 34.25, |
| "learning_rate": 4.98104347826087e-06, |
| "loss": 0.2794, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.008324300888792543, |
| "grad_norm": 61.5, |
| "learning_rate": 4.979304347826087e-06, |
| "loss": 0.2913, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.009017992629525256, |
| "grad_norm": 8.5, |
| "learning_rate": 4.977565217391305e-06, |
| "loss": 0.3476, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.009711684370257967, |
| "grad_norm": 76800.0, |
| "learning_rate": 4.975826086956522e-06, |
| "loss": 1.5003, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.01040537611099068, |
| "grad_norm": 120.5, |
| "learning_rate": 4.97408695652174e-06, |
| "loss": 0.3051, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.01109906785172339, |
| "grad_norm": 2.28125, |
| "learning_rate": 4.972347826086957e-06, |
| "loss": 0.2632, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.011792759592456103, |
| "grad_norm": 12.6875, |
| "learning_rate": 4.970608695652174e-06, |
| "loss": 0.2847, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.012486451333188814, |
| "grad_norm": 20.75, |
| "learning_rate": 4.9688695652173914e-06, |
| "loss": 0.3436, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.013180143073921527, |
| "grad_norm": 234.0, |
| "learning_rate": 4.96713043478261e-06, |
| "loss": 0.2752, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.013873834814654238, |
| "grad_norm": 10.25, |
| "learning_rate": 4.965391304347826e-06, |
| "loss": 0.3504, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.01456752655538695, |
| "grad_norm": 8.0625, |
| "learning_rate": 4.9636521739130436e-06, |
| "loss": 0.2447, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.015261218296119662, |
| "grad_norm": 3.640625, |
| "learning_rate": 4.961913043478262e-06, |
| "loss": 0.2864, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.015954910036852375, |
| "grad_norm": 7.6875, |
| "learning_rate": 4.960173913043478e-06, |
| "loss": 0.2455, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.016648601777585086, |
| "grad_norm": 10.25, |
| "learning_rate": 4.958434782608696e-06, |
| "loss": 0.2847, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.017342293518317797, |
| "grad_norm": 6.75, |
| "learning_rate": 4.956695652173914e-06, |
| "loss": 0.2797, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.01803598525905051, |
| "grad_norm": 9.4375, |
| "learning_rate": 4.954956521739131e-06, |
| "loss": 0.5388, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.018729676999783222, |
| "grad_norm": 125.0, |
| "learning_rate": 4.953217391304348e-06, |
| "loss": 0.2508, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.019423368740515933, |
| "grad_norm": 2.515625, |
| "learning_rate": 4.951478260869565e-06, |
| "loss": 0.263, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.020117060481248644, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.949739130434783e-06, |
| "loss": 0.2583, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.02081075222198136, |
| "grad_norm": 4.53125, |
| "learning_rate": 4.948000000000001e-06, |
| "loss": 0.2684, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.02150444396271407, |
| "grad_norm": 19.625, |
| "learning_rate": 4.946260869565217e-06, |
| "loss": 0.2318, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.02219813570344678, |
| "grad_norm": 15.8125, |
| "learning_rate": 4.9445217391304355e-06, |
| "loss": 0.2509, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.022891827444179492, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.942782608695653e-06, |
| "loss": 0.2428, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.023585519184912206, |
| "grad_norm": 20.75, |
| "learning_rate": 4.94104347826087e-06, |
| "loss": 0.2789, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.024279210925644917, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.939304347826087e-06, |
| "loss": 0.2624, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.02497290266637763, |
| "grad_norm": 1.6640625, |
| "learning_rate": 4.937565217391305e-06, |
| "loss": 0.2712, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.02566659440711034, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.935826086956522e-06, |
| "loss": 0.2973, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.026360286147843054, |
| "grad_norm": 2.4375, |
| "learning_rate": 4.93408695652174e-06, |
| "loss": 0.2769, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.027053977888575765, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.932347826086957e-06, |
| "loss": 0.2675, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.027747669629308476, |
| "grad_norm": 4.21875, |
| "learning_rate": 4.9306086956521744e-06, |
| "loss": 0.2663, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.028441361370041187, |
| "grad_norm": 17.75, |
| "learning_rate": 4.928869565217392e-06, |
| "loss": 0.3639, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0291350531107739, |
| "grad_norm": 5.03125, |
| "learning_rate": 4.927130434782609e-06, |
| "loss": 0.3279, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.029828744851506613, |
| "grad_norm": 2.21875, |
| "learning_rate": 4.9253913043478266e-06, |
| "loss": 0.2773, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.030522436592239324, |
| "grad_norm": 6.625, |
| "learning_rate": 4.923652173913044e-06, |
| "loss": 0.2445, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.031216128332972035, |
| "grad_norm": 3.84375, |
| "learning_rate": 4.921913043478261e-06, |
| "loss": 0.2256, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.03190982007370475, |
| "grad_norm": 3.15625, |
| "learning_rate": 4.920173913043479e-06, |
| "loss": 0.2288, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.03260351181443746, |
| "grad_norm": 52.0, |
| "learning_rate": 4.918434782608696e-06, |
| "loss": 0.2662, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.03329720355517017, |
| "grad_norm": 13.4375, |
| "learning_rate": 4.916695652173913e-06, |
| "loss": 0.2476, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.033990895295902886, |
| "grad_norm": 49.0, |
| "learning_rate": 4.914956521739131e-06, |
| "loss": 0.2328, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.03468458703663559, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.913217391304348e-06, |
| "loss": 0.2463, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.03537827877736831, |
| "grad_norm": 10.5625, |
| "learning_rate": 4.9114782608695655e-06, |
| "loss": 0.2657, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.03607197051810102, |
| "grad_norm": 2.1875, |
| "learning_rate": 4.909739130434783e-06, |
| "loss": 0.268, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.03676566225883373, |
| "grad_norm": 1.9921875, |
| "learning_rate": 4.908e-06, |
| "loss": 0.3324, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.037459353999566444, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.906260869565218e-06, |
| "loss": 0.2086, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.03815304574029915, |
| "grad_norm": 2.453125, |
| "learning_rate": 4.904521739130435e-06, |
| "loss": 0.2581, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.038846737481031866, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.902782608695652e-06, |
| "loss": 0.2908, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.03954042922176458, |
| "grad_norm": 4.28125, |
| "learning_rate": 4.90104347826087e-06, |
| "loss": 0.2192, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.04023412096249729, |
| "grad_norm": 3.859375, |
| "learning_rate": 4.899304347826087e-06, |
| "loss": 0.2916, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.04092781270323, |
| "grad_norm": 1.390625, |
| "learning_rate": 4.8975652173913045e-06, |
| "loss": 0.2388, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.04162150444396272, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.895826086956522e-06, |
| "loss": 0.2509, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.042315196184695425, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.89408695652174e-06, |
| "loss": 0.2736, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.04300888792542814, |
| "grad_norm": 5.09375, |
| "learning_rate": 4.892347826086957e-06, |
| "loss": 0.329, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.04370257966616085, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.890608695652174e-06, |
| "loss": 0.2188, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.04439627140689356, |
| "grad_norm": 1.640625, |
| "learning_rate": 4.888869565217391e-06, |
| "loss": 0.2942, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.045089963147626276, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.8871304347826096e-06, |
| "loss": 0.2324, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.045783654888358984, |
| "grad_norm": 6.40625, |
| "learning_rate": 4.885391304347826e-06, |
| "loss": 0.2601, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0464773466290917, |
| "grad_norm": 1.859375, |
| "learning_rate": 4.8836521739130435e-06, |
| "loss": 0.2292, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.04717103836982441, |
| "grad_norm": 1.84375, |
| "learning_rate": 4.881913043478262e-06, |
| "loss": 0.2719, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.04786473011055712, |
| "grad_norm": 2.125, |
| "learning_rate": 4.880173913043479e-06, |
| "loss": 0.2875, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.048558421851289835, |
| "grad_norm": 2.265625, |
| "learning_rate": 4.878434782608696e-06, |
| "loss": 0.2648, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04925211359202254, |
| "grad_norm": 1.625, |
| "learning_rate": 4.876695652173914e-06, |
| "loss": 0.2901, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.04994580533275526, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.874956521739131e-06, |
| "loss": 0.2532, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.05063949707348797, |
| "grad_norm": 1.953125, |
| "learning_rate": 4.8732173913043485e-06, |
| "loss": 0.2736, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.05133318881422068, |
| "grad_norm": 2.390625, |
| "learning_rate": 4.871478260869565e-06, |
| "loss": 0.2531, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.05202688055495339, |
| "grad_norm": 1.875, |
| "learning_rate": 4.869739130434783e-06, |
| "loss": 0.262, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.05272057229568611, |
| "grad_norm": 2.171875, |
| "learning_rate": 4.868000000000001e-06, |
| "loss": 0.2242, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.053414264036418815, |
| "grad_norm": 2.546875, |
| "learning_rate": 4.866260869565218e-06, |
| "loss": 0.2873, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.05410795577715153, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.864521739130435e-06, |
| "loss": 0.2867, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.05480164751788424, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.862782608695653e-06, |
| "loss": 0.2815, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.05549533925861695, |
| "grad_norm": 1.7265625, |
| "learning_rate": 4.86104347826087e-06, |
| "loss": 0.2943, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.056189030999349666, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.8593043478260875e-06, |
| "loss": 0.2723, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.056882722740082374, |
| "grad_norm": 1.25, |
| "learning_rate": 4.857565217391305e-06, |
| "loss": 0.2441, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.05757641448081509, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.855826086956522e-06, |
| "loss": 0.239, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.0582701062215478, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.85408695652174e-06, |
| "loss": 0.3108, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.05896379796228051, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.852347826086957e-06, |
| "loss": 0.2425, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.059657489703013225, |
| "grad_norm": 1.390625, |
| "learning_rate": 4.850608695652174e-06, |
| "loss": 0.3201, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.06035118144374593, |
| "grad_norm": 1.859375, |
| "learning_rate": 4.848869565217392e-06, |
| "loss": 0.2886, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.06104487318447865, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.847130434782609e-06, |
| "loss": 0.2269, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.06173856492521136, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.8453913043478265e-06, |
| "loss": 0.247, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.06243225666594407, |
| "grad_norm": 1.484375, |
| "learning_rate": 4.843652173913044e-06, |
| "loss": 0.2773, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.06312594840667678, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.841913043478261e-06, |
| "loss": 0.2464, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0638196401474095, |
| "grad_norm": 1.703125, |
| "learning_rate": 4.840173913043479e-06, |
| "loss": 0.2407, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.0645133318881422, |
| "grad_norm": 1.125, |
| "learning_rate": 4.838434782608696e-06, |
| "loss": 0.2772, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.06520702362887491, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.836695652173913e-06, |
| "loss": 0.252, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.06590071536960763, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.834956521739131e-06, |
| "loss": 0.2426, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.06659440711034034, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.833217391304348e-06, |
| "loss": 0.2722, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.06728809885107305, |
| "grad_norm": 1.7890625, |
| "learning_rate": 4.8314782608695655e-06, |
| "loss": 0.2661, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.06798179059180577, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.829739130434783e-06, |
| "loss": 0.2329, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.06867548233253848, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.828e-06, |
| "loss": 0.2414, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.06936917407327119, |
| "grad_norm": 1.9296875, |
| "learning_rate": 4.826260869565218e-06, |
| "loss": 0.264, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07006286581400391, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.824521739130435e-06, |
| "loss": 0.2493, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.07075655755473662, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.822782608695652e-06, |
| "loss": 0.3118, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.07145024929546932, |
| "grad_norm": 1.8828125, |
| "learning_rate": 4.82104347826087e-06, |
| "loss": 0.2739, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.07214394103620204, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.819304347826088e-06, |
| "loss": 0.2743, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.07283763277693475, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.817565217391304e-06, |
| "loss": 0.2299, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.07353132451766746, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.815826086956522e-06, |
| "loss": 0.2203, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.07422501625840017, |
| "grad_norm": 0.74609375, |
| "learning_rate": 4.81408695652174e-06, |
| "loss": 0.2155, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.07491870799913289, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.812347826086957e-06, |
| "loss": 0.248, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.0756123997398656, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.810608695652174e-06, |
| "loss": 0.2366, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0763060914805983, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.808869565217391e-06, |
| "loss": 0.2251, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.07699978322133103, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.8071304347826095e-06, |
| "loss": 0.2265, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.07769347496206373, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.805391304347827e-06, |
| "loss": 0.2876, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.07838716670279644, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.803652173913043e-06, |
| "loss": 0.2942, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.07908085844352916, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.801913043478262e-06, |
| "loss": 0.2531, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.07977455018426187, |
| "grad_norm": 2.265625, |
| "learning_rate": 4.800173913043479e-06, |
| "loss": 0.2473, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.08046824192499458, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.7984347826086955e-06, |
| "loss": 0.2106, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.0811619336657273, |
| "grad_norm": 1.0, |
| "learning_rate": 4.796695652173914e-06, |
| "loss": 0.2111, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.08185562540646, |
| "grad_norm": 1.734375, |
| "learning_rate": 4.794956521739131e-06, |
| "loss": 0.262, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.08254931714719271, |
| "grad_norm": 1.125, |
| "learning_rate": 4.7932173913043485e-06, |
| "loss": 0.2402, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.08324300888792543, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.791478260869565e-06, |
| "loss": 0.2325, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.08393670062865814, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.789739130434783e-06, |
| "loss": 0.2355, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.08463039236939085, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.7880000000000006e-06, |
| "loss": 0.2561, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.08532408411012356, |
| "grad_norm": 1.7890625, |
| "learning_rate": 4.786260869565218e-06, |
| "loss": 0.2351, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.08601777585085628, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.784521739130435e-06, |
| "loss": 0.2571, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.08671146759158899, |
| "grad_norm": 2.640625, |
| "learning_rate": 4.782782608695653e-06, |
| "loss": 0.2187, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0874051593323217, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.78104347826087e-06, |
| "loss": 0.313, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.08809885107305442, |
| "grad_norm": 1.9140625, |
| "learning_rate": 4.7793043478260874e-06, |
| "loss": 0.3083, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.08879254281378712, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.777565217391305e-06, |
| "loss": 0.2748, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.08948623455451983, |
| "grad_norm": 1.640625, |
| "learning_rate": 4.775826086956522e-06, |
| "loss": 0.2393, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.09017992629525255, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.7740869565217395e-06, |
| "loss": 0.2355, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.09087361803598526, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.772347826086957e-06, |
| "loss": 0.2335, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.09156730977671797, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.770608695652174e-06, |
| "loss": 0.2321, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.09226100151745069, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.768869565217392e-06, |
| "loss": 0.3314, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.0929546932581834, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.767130434782609e-06, |
| "loss": 0.2046, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.0936483849989161, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.765391304347826e-06, |
| "loss": 0.2461, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.09434207673964883, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.763652173913044e-06, |
| "loss": 0.2129, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.09503576848038153, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.761913043478261e-06, |
| "loss": 0.2483, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.09572946022111424, |
| "grad_norm": 1.859375, |
| "learning_rate": 4.7601739130434785e-06, |
| "loss": 0.2285, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.09642315196184695, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.758434782608696e-06, |
| "loss": 0.2333, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.09711684370257967, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.756695652173913e-06, |
| "loss": 0.2183, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.09781053544331238, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.754956521739131e-06, |
| "loss": 0.2621, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.09850422718404508, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.753217391304348e-06, |
| "loss": 0.2572, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.0991979189247778, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.751478260869566e-06, |
| "loss": 0.2177, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.09989161066551051, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.749739130434783e-06, |
| "loss": 0.2542, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.10058530240624322, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.748e-06, |
| "loss": 0.2459, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.10127899414697594, |
| "grad_norm": 1.640625, |
| "learning_rate": 4.746260869565218e-06, |
| "loss": 0.2786, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.10197268588770865, |
| "grad_norm": 1.8359375, |
| "learning_rate": 4.744521739130435e-06, |
| "loss": 0.2875, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.10266637762844136, |
| "grad_norm": 1.9765625, |
| "learning_rate": 4.742782608695652e-06, |
| "loss": 0.2957, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.10336006936917408, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.74104347826087e-06, |
| "loss": 0.2543, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.10405376110990679, |
| "grad_norm": 1.828125, |
| "learning_rate": 4.739304347826088e-06, |
| "loss": 0.2631, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1047474528506395, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.737565217391304e-06, |
| "loss": 0.2462, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.10544114459137222, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.735826086956522e-06, |
| "loss": 0.2237, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.10613483633210492, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.73408695652174e-06, |
| "loss": 0.2206, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.10682852807283763, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.732347826086957e-06, |
| "loss": 0.2533, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.10752221981357034, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.730608695652174e-06, |
| "loss": 0.2136, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.10821591155430306, |
| "grad_norm": 2.015625, |
| "learning_rate": 4.728869565217391e-06, |
| "loss": 0.3101, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.10890960329503577, |
| "grad_norm": 2.4375, |
| "learning_rate": 4.727130434782609e-06, |
| "loss": 0.2574, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.10960329503576847, |
| "grad_norm": 1.25, |
| "learning_rate": 4.725391304347827e-06, |
| "loss": 0.2142, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.1102969867765012, |
| "grad_norm": 2.21875, |
| "learning_rate": 4.723652173913043e-06, |
| "loss": 0.1908, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.1109906785172339, |
| "grad_norm": 1.703125, |
| "learning_rate": 4.7219130434782615e-06, |
| "loss": 0.2437, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.11168437025796661, |
| "grad_norm": 1.25, |
| "learning_rate": 4.720173913043479e-06, |
| "loss": 0.2262, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.11237806199869933, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.718434782608696e-06, |
| "loss": 0.258, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.11307175373943204, |
| "grad_norm": 1.609375, |
| "learning_rate": 4.716695652173914e-06, |
| "loss": 0.2992, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.11376544548016475, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.714956521739131e-06, |
| "loss": 0.247, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.11445913722089747, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.713217391304348e-06, |
| "loss": 0.262, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.11515282896163018, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.711478260869566e-06, |
| "loss": 0.2604, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.11584652070236288, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.709739130434783e-06, |
| "loss": 0.2101, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.1165402124430956, |
| "grad_norm": 1.8671875, |
| "learning_rate": 4.7080000000000005e-06, |
| "loss": 0.2641, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.11723390418382831, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.706260869565218e-06, |
| "loss": 0.2134, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.11792759592456102, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.704521739130435e-06, |
| "loss": 0.2305, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.11862128766529373, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.702782608695653e-06, |
| "loss": 0.2542, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.11931497940602645, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.70104347826087e-06, |
| "loss": 0.2722, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.12000867114675916, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.699304347826087e-06, |
| "loss": 0.2172, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.12070236288749187, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.697565217391305e-06, |
| "loss": 0.3591, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.12139605462822459, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.695826086956522e-06, |
| "loss": 0.2487, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.1220897463689573, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.6940869565217395e-06, |
| "loss": 0.2619, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.12278343810969, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.692347826086957e-06, |
| "loss": 0.2614, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.12347712985042272, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.690608695652174e-06, |
| "loss": 0.2354, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.12417082159115543, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.688869565217392e-06, |
| "loss": 0.2302, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.12486451333188814, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.687130434782609e-06, |
| "loss": 0.2436, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.12555820507262086, |
| "grad_norm": 1.9140625, |
| "learning_rate": 4.685391304347826e-06, |
| "loss": 0.2219, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.12625189681335355, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.683652173913044e-06, |
| "loss": 0.237, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.12694558855408627, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.681913043478261e-06, |
| "loss": 0.2562, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.127639280294819, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.6801739130434784e-06, |
| "loss": 0.2683, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.1283329720355517, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.678434782608696e-06, |
| "loss": 0.2653, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.1290266637762844, |
| "grad_norm": 1.2421875, |
| "learning_rate": 4.676695652173913e-06, |
| "loss": 0.2423, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.12972035551701713, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.6749565217391305e-06, |
| "loss": 0.2275, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.13041404725774983, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.673217391304348e-06, |
| "loss": 0.229, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.13110773899848255, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.671478260869566e-06, |
| "loss": 0.2491, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.13180143073921527, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.669739130434783e-06, |
| "loss": 0.2106, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.13249512247994796, |
| "grad_norm": 1.7265625, |
| "learning_rate": 4.668e-06, |
| "loss": 0.3183, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.13318881422068068, |
| "grad_norm": 1.5625, |
| "learning_rate": 4.666260869565218e-06, |
| "loss": 0.2252, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.1338825059614134, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.664521739130436e-06, |
| "loss": 0.2986, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.1345761977021461, |
| "grad_norm": 1.6328125, |
| "learning_rate": 4.662782608695652e-06, |
| "loss": 0.2311, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.13526988944287882, |
| "grad_norm": 2.3125, |
| "learning_rate": 4.6610434782608695e-06, |
| "loss": 0.2383, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.13596358118361154, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.659304347826088e-06, |
| "loss": 0.2228, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.13665727292434424, |
| "grad_norm": 1.6484375, |
| "learning_rate": 4.657565217391305e-06, |
| "loss": 0.2704, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.13735096466507696, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.655826086956522e-06, |
| "loss": 0.2858, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.13804465640580968, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.65408695652174e-06, |
| "loss": 0.2813, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.13873834814654237, |
| "grad_norm": 1.25, |
| "learning_rate": 4.652347826086957e-06, |
| "loss": 0.2458, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1394320398872751, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.650608695652175e-06, |
| "loss": 0.2186, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.14012573162800782, |
| "grad_norm": 2.546875, |
| "learning_rate": 4.648869565217391e-06, |
| "loss": 0.2789, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.1408194233687405, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.647130434782609e-06, |
| "loss": 0.2564, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.14151311510947323, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.645391304347827e-06, |
| "loss": 0.2318, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.14220680685020595, |
| "grad_norm": 1.703125, |
| "learning_rate": 4.643652173913044e-06, |
| "loss": 0.2513, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.14290049859093865, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.6419130434782614e-06, |
| "loss": 0.2211, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.14359419033167137, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.640173913043479e-06, |
| "loss": 0.2939, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.1442878820724041, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.638434782608696e-06, |
| "loss": 0.2321, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.14498157381313678, |
| "grad_norm": 1.484375, |
| "learning_rate": 4.6366956521739136e-06, |
| "loss": 0.226, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.1456752655538695, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.634956521739131e-06, |
| "loss": 0.2262, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.1463689572946022, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.633217391304348e-06, |
| "loss": 0.2198, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.14706264903533492, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.631478260869566e-06, |
| "loss": 0.2517, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.14775634077606764, |
| "grad_norm": 1.125, |
| "learning_rate": 4.629739130434783e-06, |
| "loss": 0.2736, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.14845003251680033, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.628e-06, |
| "loss": 0.2483, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.14914372425753306, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.626260869565218e-06, |
| "loss": 0.2138, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.14983741599826578, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.624521739130435e-06, |
| "loss": 0.2202, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.15053110773899847, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.6227826086956525e-06, |
| "loss": 0.2468, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.1512247994797312, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.62104347826087e-06, |
| "loss": 0.2218, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.1519184912204639, |
| "grad_norm": 1.6328125, |
| "learning_rate": 4.619304347826087e-06, |
| "loss": 0.2741, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.1526121829611966, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.617565217391305e-06, |
| "loss": 0.2553, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.15330587470192933, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.615826086956522e-06, |
| "loss": 0.234, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.15399956644266205, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.614086956521739e-06, |
| "loss": 0.2321, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.15469325818339474, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.612347826086957e-06, |
| "loss": 0.2222, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.15538694992412747, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.610608695652174e-06, |
| "loss": 0.2726, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.1560806416648602, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.6088695652173915e-06, |
| "loss": 0.2418, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.15677433340559288, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.607130434782609e-06, |
| "loss": 0.2499, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.1574680251463256, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.605391304347826e-06, |
| "loss": 0.2741, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.15816171688705832, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.6036521739130445e-06, |
| "loss": 0.2061, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.15885540862779102, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.601913043478261e-06, |
| "loss": 0.2696, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.15954910036852374, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.600173913043478e-06, |
| "loss": 0.2767, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.16024279210925646, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.598434782608696e-06, |
| "loss": 0.2485, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.16093648384998915, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.596695652173914e-06, |
| "loss": 0.2485, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.16163017559072188, |
| "grad_norm": 1.8046875, |
| "learning_rate": 4.5949565217391305e-06, |
| "loss": 0.2586, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.1623238673314546, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.593217391304348e-06, |
| "loss": 0.1902, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.1630175590721873, |
| "grad_norm": 1.7734375, |
| "learning_rate": 4.591478260869566e-06, |
| "loss": 0.3126, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.16371125081292, |
| "grad_norm": 1.8828125, |
| "learning_rate": 4.5897391304347834e-06, |
| "loss": 0.285, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.16440494255365273, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.588e-06, |
| "loss": 0.2054, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.16509863429438543, |
| "grad_norm": 2.921875, |
| "learning_rate": 4.586260869565218e-06, |
| "loss": 0.2095, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.16579232603511815, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.5845217391304355e-06, |
| "loss": 0.234, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.16648601777585087, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.582782608695652e-06, |
| "loss": 0.2567, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.16717970951658356, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.5810434782608694e-06, |
| "loss": 0.2976, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.16787340125731628, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.579304347826088e-06, |
| "loss": 0.2411, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.16856709299804898, |
| "grad_norm": 1.125, |
| "learning_rate": 4.577565217391305e-06, |
| "loss": 0.2256, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.1692607847387817, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.5758260869565215e-06, |
| "loss": 0.2514, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.16995447647951442, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.57408695652174e-06, |
| "loss": 0.2754, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.17064816822024712, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.572347826086957e-06, |
| "loss": 0.2486, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.17134185996097984, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.5706086956521745e-06, |
| "loss": 0.2643, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.17203555170171256, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.568869565217391e-06, |
| "loss": 0.2401, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.17272924344244525, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.567130434782609e-06, |
| "loss": 0.2471, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.17342293518317797, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.565391304347827e-06, |
| "loss": 0.2813, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.1741166269239107, |
| "grad_norm": 2.109375, |
| "learning_rate": 4.563652173913044e-06, |
| "loss": 0.2199, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.1748103186646434, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.561913043478261e-06, |
| "loss": 0.2346, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.1755040104053761, |
| "grad_norm": 0.90234375, |
| "learning_rate": 4.560173913043479e-06, |
| "loss": 0.3302, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.17619770214610883, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.558434782608696e-06, |
| "loss": 0.2683, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.17689139388684152, |
| "grad_norm": 2.34375, |
| "learning_rate": 4.5566956521739135e-06, |
| "loss": 0.2297, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.17758508562757425, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.554956521739131e-06, |
| "loss": 0.2285, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.17827877736830697, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.553217391304348e-06, |
| "loss": 0.2914, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.17897246910903966, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.551478260869566e-06, |
| "loss": 0.2983, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.17966616084977238, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.549739130434783e-06, |
| "loss": 0.2334, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.1803598525905051, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.548e-06, |
| "loss": 0.2318, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.1810535443312378, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.546260869565218e-06, |
| "loss": 0.2391, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.18174723607197052, |
| "grad_norm": 0.80859375, |
| "learning_rate": 4.544521739130435e-06, |
| "loss": 0.2697, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.18244092781270324, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.5427826086956524e-06, |
| "loss": 0.263, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.18313461955343593, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.54104347826087e-06, |
| "loss": 0.2569, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.18382831129416866, |
| "grad_norm": 1.8203125, |
| "learning_rate": 4.539304347826087e-06, |
| "loss": 0.2997, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.18452200303490138, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.5375652173913046e-06, |
| "loss": 0.2352, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.18521569477563407, |
| "grad_norm": 2.0625, |
| "learning_rate": 4.535826086956523e-06, |
| "loss": 0.2896, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.1859093865163668, |
| "grad_norm": 1.921875, |
| "learning_rate": 4.534086956521739e-06, |
| "loss": 0.2807, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.18660307825709951, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.532347826086957e-06, |
| "loss": 0.2258, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.1872967699978322, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.530608695652174e-06, |
| "loss": 0.2467, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.18799046173856493, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.528869565217391e-06, |
| "loss": 0.2375, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.18868415347929765, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.527130434782609e-06, |
| "loss": 0.2648, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.18937784522003034, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.525391304347826e-06, |
| "loss": 0.241, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.19007153696076307, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.523652173913044e-06, |
| "loss": 0.2694, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.19076522870149576, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.521913043478261e-06, |
| "loss": 0.213, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.19145892044222848, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.520173913043478e-06, |
| "loss": 0.2127, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.1921526121829612, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.518434782608696e-06, |
| "loss": 0.26, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.1928463039236939, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.516695652173914e-06, |
| "loss": 0.2297, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.19353999566442662, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.51495652173913e-06, |
| "loss": 0.2087, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.19423368740515934, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.513217391304348e-06, |
| "loss": 0.2179, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.19492737914589203, |
| "grad_norm": 1.5625, |
| "learning_rate": 4.511478260869566e-06, |
| "loss": 0.2698, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.19562107088662475, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.509739130434783e-06, |
| "loss": 0.2297, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.19631476262735748, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.508e-06, |
| "loss": 0.2154, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.19700845436809017, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.506260869565218e-06, |
| "loss": 0.236, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.1977021461088229, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.5045217391304355e-06, |
| "loss": 0.281, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.1983958378495556, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.502782608695653e-06, |
| "loss": 0.2373, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.1990895295902883, |
| "grad_norm": 1.375, |
| "learning_rate": 4.501043478260869e-06, |
| "loss": 0.2899, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.19978322133102103, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.4993043478260876e-06, |
| "loss": 0.2459, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.20047691307175375, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.497565217391305e-06, |
| "loss": 0.2292, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.20117060481248644, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.495826086956522e-06, |
| "loss": 0.2465, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.20186429655321916, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.49408695652174e-06, |
| "loss": 0.2323, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.20255798829395188, |
| "grad_norm": 2.234375, |
| "learning_rate": 4.492347826086957e-06, |
| "loss": 0.3579, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.20325168003468458, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.4906086956521744e-06, |
| "loss": 0.3302, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.2039453717754173, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.488869565217392e-06, |
| "loss": 0.2271, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.20463906351615002, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.487130434782609e-06, |
| "loss": 0.2119, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.20533275525688272, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.4853913043478265e-06, |
| "loss": 0.2446, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.20602644699761544, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.483652173913044e-06, |
| "loss": 0.2289, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.20672013873834816, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.481913043478261e-06, |
| "loss": 0.2335, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.20741383047908085, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.480173913043479e-06, |
| "loss": 0.2208, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.20810752221981357, |
| "grad_norm": 1.5859375, |
| "learning_rate": 4.478434782608696e-06, |
| "loss": 0.2224, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.2088012139605463, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.476695652173913e-06, |
| "loss": 0.2543, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.209494905701279, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.474956521739131e-06, |
| "loss": 0.2274, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.2101885974420117, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.473217391304348e-06, |
| "loss": 0.2999, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.21088228918274443, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.4714782608695655e-06, |
| "loss": 0.2384, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.21157598092347712, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.469739130434783e-06, |
| "loss": 0.2618, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.21226967266420985, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.468e-06, |
| "loss": 0.2545, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.21296336440494254, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.466260869565218e-06, |
| "loss": 0.2794, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.21365705614567526, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.464521739130435e-06, |
| "loss": 0.2537, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.21435074788640798, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.462782608695652e-06, |
| "loss": 0.2936, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.21504443962714068, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.46104347826087e-06, |
| "loss": 0.2446, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.2157381313678734, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.459304347826087e-06, |
| "loss": 0.2443, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.21643182310860612, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.4575652173913045e-06, |
| "loss": 0.2338, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.2171255148493388, |
| "grad_norm": 1.0, |
| "learning_rate": 4.455826086956523e-06, |
| "loss": 0.2197, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.21781920659007153, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.454086956521739e-06, |
| "loss": 0.2222, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.21851289833080426, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.452347826086957e-06, |
| "loss": 0.2494, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.21920659007153695, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.450608695652174e-06, |
| "loss": 0.2529, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.21990028181226967, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.448869565217392e-06, |
| "loss": 0.185, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.2205939735530024, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.447130434782609e-06, |
| "loss": 0.2576, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.2212876652937351, |
| "grad_norm": 1.84375, |
| "learning_rate": 4.445391304347826e-06, |
| "loss": 0.2218, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.2219813570344678, |
| "grad_norm": 1.546875, |
| "learning_rate": 4.443652173913044e-06, |
| "loss": 0.2442, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.22267504877520053, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.441913043478262e-06, |
| "loss": 0.269, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.22336874051593322, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.440173913043478e-06, |
| "loss": 0.239, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.22406243225666594, |
| "grad_norm": 0.94140625, |
| "learning_rate": 4.4384347826086956e-06, |
| "loss": 0.2284, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.22475612399739867, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.436695652173914e-06, |
| "loss": 0.2405, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.22544981573813136, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.434956521739131e-06, |
| "loss": 0.2391, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.22614350747886408, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.433217391304348e-06, |
| "loss": 0.2244, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.2268371992195968, |
| "grad_norm": 1.5, |
| "learning_rate": 4.431478260869566e-06, |
| "loss": 0.2313, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.2275308909603295, |
| "grad_norm": 1.71875, |
| "learning_rate": 4.429739130434783e-06, |
| "loss": 0.2855, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.22822458270106222, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.428000000000001e-06, |
| "loss": 0.2532, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.22891827444179494, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.426260869565218e-06, |
| "loss": 0.2226, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.22961196618252763, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.424521739130435e-06, |
| "loss": 0.2389, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.23030565792326035, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.422782608695653e-06, |
| "loss": 0.2067, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.23099934966399308, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.421043478260869e-06, |
| "loss": 0.1951, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.23169304140472577, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.4193043478260875e-06, |
| "loss": 0.2054, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.2323867331454585, |
| "grad_norm": 1.125, |
| "learning_rate": 4.417565217391305e-06, |
| "loss": 0.2557, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.2330804248861912, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.415826086956522e-06, |
| "loss": 0.229, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.2337741166269239, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.41408695652174e-06, |
| "loss": 0.2895, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.23446780836765663, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.412347826086957e-06, |
| "loss": 0.299, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.23516150010838932, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.410608695652174e-06, |
| "loss": 0.2132, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.23585519184912204, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.408869565217392e-06, |
| "loss": 0.2573, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.23654888358985476, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.407130434782609e-06, |
| "loss": 0.2225, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.23724257533058746, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.4053913043478265e-06, |
| "loss": 0.2154, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.23793626707132018, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.403652173913044e-06, |
| "loss": 0.3088, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.2386299588120529, |
| "grad_norm": 1.9375, |
| "learning_rate": 4.401913043478261e-06, |
| "loss": 0.2877, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.2393236505527856, |
| "grad_norm": 1.984375, |
| "learning_rate": 4.4001739130434786e-06, |
| "loss": 0.2145, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.24001734229351832, |
| "grad_norm": 0.8515625, |
| "learning_rate": 4.398434782608696e-06, |
| "loss": 0.2338, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.24071103403425104, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.396695652173913e-06, |
| "loss": 0.2488, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.24140472577498373, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.394956521739131e-06, |
| "loss": 0.2444, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.24209841751571645, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.393217391304348e-06, |
| "loss": 0.2326, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.24279210925644917, |
| "grad_norm": 0.99609375, |
| "learning_rate": 4.3914782608695654e-06, |
| "loss": 0.2431, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.24348580099718187, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.389739130434783e-06, |
| "loss": 0.2479, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.2441794927379146, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.388e-06, |
| "loss": 0.2517, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.2448731844786473, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.3862608695652175e-06, |
| "loss": 0.2808, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.24556687621938, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.384521739130435e-06, |
| "loss": 0.2571, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.24626056796011273, |
| "grad_norm": 1.9921875, |
| "learning_rate": 4.382782608695652e-06, |
| "loss": 0.302, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.24695425970084545, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.3810434782608705e-06, |
| "loss": 0.2128, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.24764795144157814, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.379304347826087e-06, |
| "loss": 0.2285, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.24834164318231086, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.377565217391304e-06, |
| "loss": 0.2401, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.24903533492304358, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.375826086956523e-06, |
| "loss": 0.2543, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.24972902666377628, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.37408695652174e-06, |
| "loss": 0.2598, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.25042271840450897, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.3723478260869565e-06, |
| "loss": 0.3157, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.2511164101452417, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.370608695652174e-06, |
| "loss": 0.1943, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.2518101018859744, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.368869565217392e-06, |
| "loss": 0.2737, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.2525037936267071, |
| "grad_norm": 0.9375, |
| "learning_rate": 4.367130434782609e-06, |
| "loss": 0.2737, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.25319748536743986, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.365391304347826e-06, |
| "loss": 0.2254, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.25389117710817255, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.363652173913044e-06, |
| "loss": 0.2407, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.25458486884890524, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.361913043478262e-06, |
| "loss": 0.2414, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.255278560589638, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.360173913043478e-06, |
| "loss": 0.2899, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.2559722523303707, |
| "grad_norm": 1.125, |
| "learning_rate": 4.3584347826086955e-06, |
| "loss": 0.2452, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.2566659440711034, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.356695652173914e-06, |
| "loss": 0.2251, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.25735963581183613, |
| "grad_norm": 1.421875, |
| "learning_rate": 4.354956521739131e-06, |
| "loss": 0.2506, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.2580533275525688, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.353217391304348e-06, |
| "loss": 0.2592, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.2587470192933015, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.351478260869566e-06, |
| "loss": 0.2523, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.25944071103403427, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.349739130434783e-06, |
| "loss": 0.2409, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.26013440277476696, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.3480000000000006e-06, |
| "loss": 0.253, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.26082809451549965, |
| "grad_norm": 1.8515625, |
| "learning_rate": 4.346260869565218e-06, |
| "loss": 0.2862, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.2615217862562324, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.344521739130435e-06, |
| "loss": 0.2504, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.2622154779969651, |
| "grad_norm": 0.984375, |
| "learning_rate": 4.342782608695653e-06, |
| "loss": 0.2537, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.2629091697376978, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.34104347826087e-06, |
| "loss": 0.2099, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.26360286147843054, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.339304347826087e-06, |
| "loss": 0.2753, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.26429655321916323, |
| "grad_norm": 1.125, |
| "learning_rate": 4.337565217391305e-06, |
| "loss": 0.2202, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.2649902449598959, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.335826086956522e-06, |
| "loss": 0.2382, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.2656839367006287, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.3340869565217395e-06, |
| "loss": 0.2092, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.26637762844136137, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.332347826086957e-06, |
| "loss": 0.3343, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.26707132018209406, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.330608695652174e-06, |
| "loss": 0.2324, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.2677650119228268, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.328869565217392e-06, |
| "loss": 0.2864, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.2684587036635595, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.327130434782609e-06, |
| "loss": 0.2883, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.2691523954042922, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.325391304347826e-06, |
| "loss": 0.2376, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.26984608714502495, |
| "grad_norm": 1.5, |
| "learning_rate": 4.323652173913044e-06, |
| "loss": 0.2556, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.27053977888575764, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.321913043478261e-06, |
| "loss": 0.3276, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.27123347062649034, |
| "grad_norm": 1.375, |
| "learning_rate": 4.3201739130434785e-06, |
| "loss": 0.2472, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.2719271623672231, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.318434782608696e-06, |
| "loss": 0.2264, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.2726208541079558, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.316695652173913e-06, |
| "loss": 0.2494, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.2733145458486885, |
| "grad_norm": 1.59375, |
| "learning_rate": 4.314956521739131e-06, |
| "loss": 0.2339, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.2740082375894212, |
| "grad_norm": 0.89453125, |
| "learning_rate": 4.313217391304348e-06, |
| "loss": 0.2365, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.2747019293301539, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.311478260869565e-06, |
| "loss": 0.2584, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.2753956210708866, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.309739130434783e-06, |
| "loss": 0.2279, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.27608931281161936, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.308000000000001e-06, |
| "loss": 0.2542, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.27678300455235205, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.3062608695652175e-06, |
| "loss": 0.2315, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.27747669629308475, |
| "grad_norm": 1.5, |
| "learning_rate": 4.304521739130435e-06, |
| "loss": 0.2557, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2781703880338175, |
| "grad_norm": 0.90625, |
| "learning_rate": 4.302782608695652e-06, |
| "loss": 0.2569, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.2788640797745502, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.30104347826087e-06, |
| "loss": 0.2823, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.2795577715152829, |
| "grad_norm": 0.9921875, |
| "learning_rate": 4.299304347826087e-06, |
| "loss": 0.2826, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.28025146325601563, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.297565217391304e-06, |
| "loss": 0.2421, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.2809451549967483, |
| "grad_norm": 1.375, |
| "learning_rate": 4.2958260869565225e-06, |
| "loss": 0.236, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.281638846737481, |
| "grad_norm": 1.0625, |
| "learning_rate": 4.29408695652174e-06, |
| "loss": 0.2304, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.28233253847821377, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.2923478260869564e-06, |
| "loss": 0.2403, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.28302623021894646, |
| "grad_norm": 2.03125, |
| "learning_rate": 4.290608695652174e-06, |
| "loss": 0.3514, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.28371992195967916, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.288869565217392e-06, |
| "loss": 0.2904, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.2844136137004119, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.287130434782609e-06, |
| "loss": 0.2514, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.2851073054411446, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.285391304347826e-06, |
| "loss": 0.2677, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.2858009971818773, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.283652173913044e-06, |
| "loss": 0.2288, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.28649468892261004, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.2819130434782615e-06, |
| "loss": 0.2551, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.28718838066334273, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.280173913043479e-06, |
| "loss": 0.2538, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.28788207240407543, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.278434782608696e-06, |
| "loss": 0.2679, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.2885757641448082, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.276695652173914e-06, |
| "loss": 0.2682, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.28926945588554087, |
| "grad_norm": 1.8046875, |
| "learning_rate": 4.274956521739131e-06, |
| "loss": 0.2981, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.28996314762627357, |
| "grad_norm": 2.015625, |
| "learning_rate": 4.273217391304348e-06, |
| "loss": 0.2792, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.29065683936700626, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.271478260869566e-06, |
| "loss": 0.243, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.291350531107739, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.269739130434783e-06, |
| "loss": 0.217, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.2920442228484717, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.2680000000000005e-06, |
| "loss": 0.2287, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.2927379145892044, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.266260869565218e-06, |
| "loss": 0.2609, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.29343160632993714, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.264521739130435e-06, |
| "loss": 0.2283, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.29412529807066984, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.262782608695653e-06, |
| "loss": 0.2076, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.29481898981140253, |
| "grad_norm": 1.6953125, |
| "learning_rate": 4.26104347826087e-06, |
| "loss": 0.2701, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.2955126815521353, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.259304347826087e-06, |
| "loss": 0.2239, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.296206373292868, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.257565217391305e-06, |
| "loss": 0.2263, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.29690006503360067, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.255826086956522e-06, |
| "loss": 0.2316, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.2975937567743334, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.2540869565217394e-06, |
| "loss": 0.2467, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.2982874485150661, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.252347826086957e-06, |
| "loss": 0.2514, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.2989811402557988, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.250608695652174e-06, |
| "loss": 0.257, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.29967483199653155, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.2488695652173916e-06, |
| "loss": 0.2062, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.30036852373726425, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.247130434782609e-06, |
| "loss": 0.197, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.30106221547799694, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.245391304347826e-06, |
| "loss": 0.2319, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.3017559072187297, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.243652173913044e-06, |
| "loss": 0.2404, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.3024495989594624, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.241913043478261e-06, |
| "loss": 0.2156, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.3031432907001951, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.240173913043478e-06, |
| "loss": 0.2299, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.3038369824409278, |
| "grad_norm": 1.375, |
| "learning_rate": 4.238434782608696e-06, |
| "loss": 0.2404, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.3045306741816605, |
| "grad_norm": 1.4140625, |
| "learning_rate": 4.236695652173913e-06, |
| "loss": 0.1988, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.3052243659223932, |
| "grad_norm": 0.87890625, |
| "learning_rate": 4.2349565217391305e-06, |
| "loss": 0.2215, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.30591805766312596, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.233217391304349e-06, |
| "loss": 0.1986, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.30661174940385866, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.231478260869565e-06, |
| "loss": 0.2359, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.30730544114459135, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.229739130434783e-06, |
| "loss": 0.2575, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.3079991328853241, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.228000000000001e-06, |
| "loss": 0.2429, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.3086928246260568, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.226260869565218e-06, |
| "loss": 0.2587, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.3093865163667895, |
| "grad_norm": 1.6171875, |
| "learning_rate": 4.224521739130435e-06, |
| "loss": 0.348, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.31008020810752224, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.222782608695652e-06, |
| "loss": 0.2771, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.31077389984825493, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.22104347826087e-06, |
| "loss": 0.2308, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.3114675915889876, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.219304347826088e-06, |
| "loss": 0.2672, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.3121612833297204, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.217565217391304e-06, |
| "loss": 0.2603, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.31285497507045307, |
| "grad_norm": 0.8671875, |
| "learning_rate": 4.2158260869565225e-06, |
| "loss": 0.2212, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.31354866681118576, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.21408695652174e-06, |
| "loss": 0.2073, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.3142423585519185, |
| "grad_norm": 1.546875, |
| "learning_rate": 4.212347826086957e-06, |
| "loss": 0.2602, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.3149360502926512, |
| "grad_norm": 1.140625, |
| "learning_rate": 4.210608695652174e-06, |
| "loss": 0.2279, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.3156297420333839, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.208869565217392e-06, |
| "loss": 0.2433, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.31632343377411665, |
| "grad_norm": 1.171875, |
| "learning_rate": 4.207130434782609e-06, |
| "loss": 0.3271, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.31701712551484934, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.205391304347826e-06, |
| "loss": 0.3058, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.31771081725558203, |
| "grad_norm": 1.921875, |
| "learning_rate": 4.203652173913044e-06, |
| "loss": 0.2724, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.3184045089963148, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.201913043478261e-06, |
| "loss": 0.2538, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.3190982007370475, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.200173913043479e-06, |
| "loss": 0.2394, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.31979189247778017, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.198434782608696e-06, |
| "loss": 0.2433, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.3204855842185129, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.1966956521739135e-06, |
| "loss": 0.2457, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.3211792759592456, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.194956521739131e-06, |
| "loss": 0.2291, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.3218729676999783, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.193217391304348e-06, |
| "loss": 0.3308, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.32256665944071106, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.191478260869566e-06, |
| "loss": 0.2558, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.32326035118144375, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.189739130434783e-06, |
| "loss": 0.308, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.32395404292217644, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.188e-06, |
| "loss": 0.2524, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.3246477346629092, |
| "grad_norm": 1.125, |
| "learning_rate": 4.186260869565218e-06, |
| "loss": 0.2373, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.3253414264036419, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.184521739130435e-06, |
| "loss": 0.2355, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.3260351181443746, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.1827826086956525e-06, |
| "loss": 0.2094, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.32672880988510733, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.18104347826087e-06, |
| "loss": 0.2346, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.32742250162584, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.179304347826087e-06, |
| "loss": 0.2531, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.3281161933665727, |
| "grad_norm": 1.6015625, |
| "learning_rate": 4.177565217391305e-06, |
| "loss": 0.2837, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.32880988510730547, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.175826086956522e-06, |
| "loss": 0.2844, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.32950357684803816, |
| "grad_norm": 1.25, |
| "learning_rate": 4.174086956521739e-06, |
| "loss": 0.2293, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.33019726858877085, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.172347826086957e-06, |
| "loss": 0.2447, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.3308909603295036, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.170608695652174e-06, |
| "loss": 0.2422, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.3315846520702363, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.1688695652173915e-06, |
| "loss": 0.2318, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.332278343810969, |
| "grad_norm": 1.7734375, |
| "learning_rate": 4.167130434782609e-06, |
| "loss": 0.2632, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.33297203555170174, |
| "grad_norm": 0.91796875, |
| "learning_rate": 4.165391304347827e-06, |
| "loss": 0.2217, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.33366572729243443, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.163652173913044e-06, |
| "loss": 0.2919, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.3343594190331671, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.161913043478261e-06, |
| "loss": 0.2504, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.3350531107738999, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.160173913043478e-06, |
| "loss": 0.2633, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.33574680251463257, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.1584347826086965e-06, |
| "loss": 0.2394, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.33644049425536526, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.156695652173913e-06, |
| "loss": 0.2634, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.33713418599609796, |
| "grad_norm": 1.265625, |
| "learning_rate": 4.1549565217391304e-06, |
| "loss": 0.2994, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.3378278777368307, |
| "grad_norm": 1.0, |
| "learning_rate": 4.153217391304349e-06, |
| "loss": 0.2641, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.3385215694775634, |
| "grad_norm": 1.1484375, |
| "learning_rate": 4.151478260869565e-06, |
| "loss": 0.233, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.3392152612182961, |
| "grad_norm": 1.0703125, |
| "learning_rate": 4.1497391304347826e-06, |
| "loss": 0.2218, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.33990895295902884, |
| "grad_norm": 1.5546875, |
| "learning_rate": 4.148000000000001e-06, |
| "loss": 0.2219, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.34060264469976154, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.146260869565218e-06, |
| "loss": 0.2372, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.34129633644049423, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.144521739130435e-06, |
| "loss": 0.2405, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.341990028181227, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.142782608695652e-06, |
| "loss": 0.2585, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.3426837199219597, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.14104347826087e-06, |
| "loss": 0.2515, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.34337741166269237, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.139304347826088e-06, |
| "loss": 0.3092, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.3440711034034251, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.137565217391304e-06, |
| "loss": 0.237, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.3447647951441578, |
| "grad_norm": 1.359375, |
| "learning_rate": 4.135826086956522e-06, |
| "loss": 0.2279, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.3454584868848905, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.13408695652174e-06, |
| "loss": 0.2481, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.34615217862562325, |
| "grad_norm": 1.046875, |
| "learning_rate": 4.132347826086957e-06, |
| "loss": 0.2375, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.34684587036635595, |
| "grad_norm": 1.3125, |
| "learning_rate": 4.130608695652174e-06, |
| "loss": 0.2408, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.34753956210708864, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.128869565217392e-06, |
| "loss": 0.2326, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.3482332538478214, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.127130434782609e-06, |
| "loss": 0.2321, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.3489269455885541, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.125391304347827e-06, |
| "loss": 0.2579, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.3496206373292868, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.123652173913044e-06, |
| "loss": 0.2407, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.3503143290700195, |
| "grad_norm": 1.53125, |
| "learning_rate": 4.121913043478261e-06, |
| "loss": 0.2519, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.3510080208107522, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.120173913043479e-06, |
| "loss": 0.2598, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.3517017125514849, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.118434782608696e-06, |
| "loss": 0.2603, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.35239540429221766, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.1166956521739135e-06, |
| "loss": 0.2179, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.35308909603295036, |
| "grad_norm": 0.98828125, |
| "learning_rate": 4.114956521739131e-06, |
| "loss": 0.2368, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.35378278777368305, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.113217391304348e-06, |
| "loss": 0.2639, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.3544764795144158, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.1114782608695656e-06, |
| "loss": 0.224, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.3551701712551485, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.109739130434783e-06, |
| "loss": 0.2546, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.3558638629958812, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.108e-06, |
| "loss": 0.2738, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.35655755473661394, |
| "grad_norm": 1.125, |
| "learning_rate": 4.106260869565218e-06, |
| "loss": 0.2176, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.35725124647734663, |
| "grad_norm": 1.125, |
| "learning_rate": 4.104521739130435e-06, |
| "loss": 0.2345, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.3579449382180793, |
| "grad_norm": 1.25, |
| "learning_rate": 4.102782608695652e-06, |
| "loss": 0.2391, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.35863862995881207, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.10104347826087e-06, |
| "loss": 0.2476, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.35933232169954477, |
| "grad_norm": 1.390625, |
| "learning_rate": 4.099304347826087e-06, |
| "loss": 0.2305, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.36002601344027746, |
| "grad_norm": 1.3984375, |
| "learning_rate": 4.0975652173913045e-06, |
| "loss": 0.2072, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.3607197051810102, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.095826086956522e-06, |
| "loss": 0.2452, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.3614133969217429, |
| "grad_norm": 1.453125, |
| "learning_rate": 4.094086956521739e-06, |
| "loss": 0.2279, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.3621070886624756, |
| "grad_norm": 0.8984375, |
| "learning_rate": 4.092347826086957e-06, |
| "loss": 0.2563, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.36280078040320834, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.090608695652174e-06, |
| "loss": 0.2403, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.36349447214394104, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.088869565217391e-06, |
| "loss": 0.2423, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.36418816388467373, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.087130434782609e-06, |
| "loss": 0.2361, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.3648818556254065, |
| "grad_norm": 1.125, |
| "learning_rate": 4.085391304347827e-06, |
| "loss": 0.2578, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.3655755473661392, |
| "grad_norm": 1.5703125, |
| "learning_rate": 4.0836521739130435e-06, |
| "loss": 0.2747, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.36626923910687187, |
| "grad_norm": 0.91015625, |
| "learning_rate": 4.081913043478261e-06, |
| "loss": 0.2621, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.3669629308476046, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.080173913043478e-06, |
| "loss": 0.2146, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.3676566225883373, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.0784347826086965e-06, |
| "loss": 0.2239, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.36835031432907, |
| "grad_norm": 1.0078125, |
| "learning_rate": 4.076695652173913e-06, |
| "loss": 0.2409, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.36904400606980275, |
| "grad_norm": 1.5078125, |
| "learning_rate": 4.07495652173913e-06, |
| "loss": 0.2346, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.36973769781053545, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.073217391304349e-06, |
| "loss": 0.2461, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.37043138955126814, |
| "grad_norm": 1.328125, |
| "learning_rate": 4.071478260869566e-06, |
| "loss": 0.3231, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.3711250812920009, |
| "grad_norm": 1.0546875, |
| "learning_rate": 4.0697391304347825e-06, |
| "loss": 0.2363, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.3718187730327336, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.068000000000001e-06, |
| "loss": 0.2485, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.3725124647734663, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.066260869565218e-06, |
| "loss": 0.2416, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.37320615651419903, |
| "grad_norm": 1.34375, |
| "learning_rate": 4.0645217391304354e-06, |
| "loss": 0.2117, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.3738998482549317, |
| "grad_norm": 1.1640625, |
| "learning_rate": 4.062782608695652e-06, |
| "loss": 0.2054, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.3745935399956644, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.06104347826087e-06, |
| "loss": 0.216, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.37528723173639716, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.0593043478260875e-06, |
| "loss": 0.251, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.37598092347712986, |
| "grad_norm": 0.92578125, |
| "learning_rate": 4.057565217391305e-06, |
| "loss": 0.2377, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.37667461521786255, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.055826086956522e-06, |
| "loss": 0.2643, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.3773683069585953, |
| "grad_norm": 2.03125, |
| "learning_rate": 4.05408695652174e-06, |
| "loss": 0.3051, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.378061998699328, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.052347826086957e-06, |
| "loss": 0.2471, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.3787556904400607, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.050608695652174e-06, |
| "loss": 0.2804, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.37944938218079344, |
| "grad_norm": 1.078125, |
| "learning_rate": 4.048869565217392e-06, |
| "loss": 0.2892, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.38014307392152613, |
| "grad_norm": 1.015625, |
| "learning_rate": 4.047130434782609e-06, |
| "loss": 0.2323, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.3808367656622588, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.0453913043478265e-06, |
| "loss": 0.2362, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.3815304574029915, |
| "grad_norm": 0.96875, |
| "learning_rate": 4.043652173913044e-06, |
| "loss": 0.2262, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.38222414914372427, |
| "grad_norm": 1.1015625, |
| "learning_rate": 4.041913043478261e-06, |
| "loss": 0.2667, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.38291784088445696, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.040173913043479e-06, |
| "loss": 0.2642, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.38361153262518966, |
| "grad_norm": 1.4140625, |
| "learning_rate": 4.038434782608696e-06, |
| "loss": 0.2579, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.3843052243659224, |
| "grad_norm": 0.921875, |
| "learning_rate": 4.036695652173913e-06, |
| "loss": 0.2491, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.3849989161066551, |
| "grad_norm": 1.7890625, |
| "learning_rate": 4.034956521739131e-06, |
| "loss": 0.2521, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.3856926078473878, |
| "grad_norm": 1.9453125, |
| "learning_rate": 4.033217391304348e-06, |
| "loss": 0.3052, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.38638629958812054, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.0314782608695655e-06, |
| "loss": 0.2403, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.38707999132885323, |
| "grad_norm": 1.1328125, |
| "learning_rate": 4.029739130434783e-06, |
| "loss": 0.2278, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.38777368306958593, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.028e-06, |
| "loss": 0.2863, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.3884673748103187, |
| "grad_norm": 1.46875, |
| "learning_rate": 4.026260869565218e-06, |
| "loss": 0.2414, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.38916106655105137, |
| "grad_norm": 1.4375, |
| "learning_rate": 4.024521739130435e-06, |
| "loss": 0.2932, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.38985475829178406, |
| "grad_norm": 1.7421875, |
| "learning_rate": 4.022782608695652e-06, |
| "loss": 0.2212, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.3905484500325168, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.02104347826087e-06, |
| "loss": 0.2225, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.3912421417732495, |
| "grad_norm": 0.9296875, |
| "learning_rate": 4.019304347826087e-06, |
| "loss": 0.2289, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.3919358335139822, |
| "grad_norm": 1.25, |
| "learning_rate": 4.017565217391305e-06, |
| "loss": 0.3125, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.39262952525471495, |
| "grad_norm": 1.7578125, |
| "learning_rate": 4.015826086956522e-06, |
| "loss": 0.278, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.39332321699544764, |
| "grad_norm": 1.1875, |
| "learning_rate": 4.014086956521739e-06, |
| "loss": 0.2555, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.39401690873618034, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.0123478260869566e-06, |
| "loss": 0.2313, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.3947106004769131, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.010608695652175e-06, |
| "loss": 0.2275, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.3954042922176458, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.008869565217391e-06, |
| "loss": 0.219, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.3960979839583785, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.007130434782609e-06, |
| "loss": 0.2823, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.3967916756991112, |
| "grad_norm": 1.4296875, |
| "learning_rate": 4.005391304347827e-06, |
| "loss": 0.2273, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.3974853674398439, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.003652173913044e-06, |
| "loss": 0.2356, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.3981790591805766, |
| "grad_norm": 1.0859375, |
| "learning_rate": 4.001913043478261e-06, |
| "loss": 0.2267, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.39887275092130936, |
| "grad_norm": 1.203125, |
| "learning_rate": 4.000173913043478e-06, |
| "loss": 0.2469, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.39956644266204205, |
| "grad_norm": 1.3515625, |
| "learning_rate": 3.998434782608696e-06, |
| "loss": 0.2549, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.40026013440277475, |
| "grad_norm": 0.7578125, |
| "learning_rate": 3.996695652173914e-06, |
| "loss": 0.23, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.4009538261435075, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.99495652173913e-06, |
| "loss": 0.2393, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.4016475178842402, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.9932173913043485e-06, |
| "loss": 0.3191, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.4023412096249729, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.991478260869566e-06, |
| "loss": 0.2143, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.40303490136570563, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.989739130434782e-06, |
| "loss": 0.2801, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.4037285931064383, |
| "grad_norm": 1.4140625, |
| "learning_rate": 3.988000000000001e-06, |
| "loss": 0.2921, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.404422284847171, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.986260869565218e-06, |
| "loss": 0.3027, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.40511597658790377, |
| "grad_norm": 1.53125, |
| "learning_rate": 3.984521739130435e-06, |
| "loss": 0.2357, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.40580966832863646, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.982782608695652e-06, |
| "loss": 0.2426, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.40650336006936916, |
| "grad_norm": 1.3515625, |
| "learning_rate": 3.98104347826087e-06, |
| "loss": 0.2232, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.4071970518101019, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.9793043478260875e-06, |
| "loss": 0.2597, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.4078907435508346, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.977565217391305e-06, |
| "loss": 0.2848, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.4085844352915673, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.975826086956522e-06, |
| "loss": 0.2033, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.40927812703230004, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.97408695652174e-06, |
| "loss": 0.2851, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.40997181877303274, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.972347826086957e-06, |
| "loss": 0.2773, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.41066551051376543, |
| "grad_norm": 1.375, |
| "learning_rate": 3.970608695652174e-06, |
| "loss": 0.3003, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.4113592022544982, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.968869565217392e-06, |
| "loss": 0.2621, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.4120528939952309, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.967130434782609e-06, |
| "loss": 0.2287, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.41274658573596357, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.9653913043478264e-06, |
| "loss": 0.2912, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.4134402774766963, |
| "grad_norm": 1.1171875, |
| "learning_rate": 3.963652173913044e-06, |
| "loss": 0.1962, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.414133969217429, |
| "grad_norm": 0.88671875, |
| "learning_rate": 3.961913043478261e-06, |
| "loss": 0.2444, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.4148276609581617, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.9601739130434785e-06, |
| "loss": 0.2362, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.41552135269889445, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.958434782608696e-06, |
| "loss": 0.2421, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.41621504443962715, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.956695652173913e-06, |
| "loss": 0.2565, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.41690873618035984, |
| "grad_norm": 1.53125, |
| "learning_rate": 3.954956521739131e-06, |
| "loss": 0.2551, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.4176024279210926, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.953217391304348e-06, |
| "loss": 0.2542, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.4182961196618253, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.951478260869565e-06, |
| "loss": 0.2514, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.418989811402558, |
| "grad_norm": 1.6875, |
| "learning_rate": 3.949739130434783e-06, |
| "loss": 0.2156, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.4196835031432907, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.948e-06, |
| "loss": 0.2041, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.4203771948840234, |
| "grad_norm": 1.5859375, |
| "learning_rate": 3.9462608695652175e-06, |
| "loss": 0.245, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.4210708866247561, |
| "grad_norm": 1.125, |
| "learning_rate": 3.944521739130435e-06, |
| "loss": 0.2536, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.42176457836548886, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.942782608695653e-06, |
| "loss": 0.2117, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.42245827010622156, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.94104347826087e-06, |
| "loss": 0.2369, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.42315196184695425, |
| "grad_norm": 1.484375, |
| "learning_rate": 3.939304347826087e-06, |
| "loss": 0.2449, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.423845653587687, |
| "grad_norm": 1.4921875, |
| "learning_rate": 3.937565217391305e-06, |
| "loss": 0.3172, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.4245393453284197, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.935826086956522e-06, |
| "loss": 0.2349, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.4252330370691524, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.934086956521739e-06, |
| "loss": 0.2547, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.4259267288098851, |
| "grad_norm": 1.125, |
| "learning_rate": 3.9323478260869565e-06, |
| "loss": 0.2688, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.42662042055061783, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.930608695652175e-06, |
| "loss": 0.3595, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.4273141122913505, |
| "grad_norm": 1.125, |
| "learning_rate": 3.928869565217391e-06, |
| "loss": 0.2317, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.4280078040320832, |
| "grad_norm": 1.6328125, |
| "learning_rate": 3.927130434782609e-06, |
| "loss": 0.2359, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.42870149577281597, |
| "grad_norm": 1.125, |
| "learning_rate": 3.925391304347827e-06, |
| "loss": 0.2333, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.42939518751354866, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.923652173913044e-06, |
| "loss": 0.2054, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.43008887925428135, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.921913043478261e-06, |
| "loss": 0.2491, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.4307825709950141, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.920173913043478e-06, |
| "loss": 0.2249, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.4314762627357468, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.918434782608696e-06, |
| "loss": 0.2962, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.4321699544764795, |
| "grad_norm": 1.59375, |
| "learning_rate": 3.916695652173914e-06, |
| "loss": 0.3596, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.43286364621721224, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.91495652173913e-06, |
| "loss": 0.2359, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.43355733795794493, |
| "grad_norm": 1.4140625, |
| "learning_rate": 3.913217391304348e-06, |
| "loss": 0.3381, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.4342510296986776, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.911478260869566e-06, |
| "loss": 0.24, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.4349447214394104, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.909739130434783e-06, |
| "loss": 0.2566, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.43563841318014307, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.9080000000000005e-06, |
| "loss": 0.2599, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.43633210492087576, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.906260869565218e-06, |
| "loss": 0.2257, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.4370257966616085, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.904521739130435e-06, |
| "loss": 0.229, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.4377194884023412, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.902782608695653e-06, |
| "loss": 0.2464, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.4384131801430739, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.90104347826087e-06, |
| "loss": 0.2392, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.43910687188380665, |
| "grad_norm": 1.6015625, |
| "learning_rate": 3.899304347826087e-06, |
| "loss": 0.2661, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.43980056362453934, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.897565217391305e-06, |
| "loss": 0.2971, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.44049425536527204, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.895826086956522e-06, |
| "loss": 0.2578, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.4411879471060048, |
| "grad_norm": 1.125, |
| "learning_rate": 3.8940869565217395e-06, |
| "loss": 0.2107, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.4418816388467375, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.892347826086957e-06, |
| "loss": 0.213, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.4425753305874702, |
| "grad_norm": 1.4375, |
| "learning_rate": 3.890608695652174e-06, |
| "loss": 0.3, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.4432690223282029, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.888869565217392e-06, |
| "loss": 0.2388, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.4439627140689356, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.887130434782609e-06, |
| "loss": 0.2557, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.4446564058096683, |
| "grad_norm": 0.71484375, |
| "learning_rate": 3.885391304347826e-06, |
| "loss": 0.2638, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.44535009755040106, |
| "grad_norm": 1.5234375, |
| "learning_rate": 3.883652173913044e-06, |
| "loss": 0.2884, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.44604378929113375, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.881913043478261e-06, |
| "loss": 0.2356, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.44673748103186645, |
| "grad_norm": 1.65625, |
| "learning_rate": 3.8801739130434785e-06, |
| "loss": 0.2654, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.4474311727725992, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.878434782608696e-06, |
| "loss": 0.2445, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.4481248645133319, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.876695652173913e-06, |
| "loss": 0.2363, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.4488185562540646, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.874956521739131e-06, |
| "loss": 0.2402, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.44951224799479733, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.873217391304348e-06, |
| "loss": 0.2362, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.45020593973553, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.871478260869565e-06, |
| "loss": 0.3399, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.4508996314762627, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.869739130434783e-06, |
| "loss": 0.2556, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.45159332321699547, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.868e-06, |
| "loss": 0.2391, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.45228701495772816, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.8662608695652174e-06, |
| "loss": 0.315, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.45298070669846086, |
| "grad_norm": 1.5546875, |
| "learning_rate": 3.864521739130435e-06, |
| "loss": 0.2522, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.4536743984391936, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.862782608695653e-06, |
| "loss": 0.2416, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.4543680901799263, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.8610434782608696e-06, |
| "loss": 0.2644, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.455061781920659, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.859304347826087e-06, |
| "loss": 0.2374, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.45575547366139174, |
| "grad_norm": 1.375, |
| "learning_rate": 3.857565217391305e-06, |
| "loss": 0.2737, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.45644916540212443, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.8558260869565225e-06, |
| "loss": 0.2772, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.854086956521739e-06, |
| "loss": 0.2583, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.4578365488835899, |
| "grad_norm": 1.546875, |
| "learning_rate": 3.852347826086956e-06, |
| "loss": 0.2847, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.45853024062432257, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.850608695652175e-06, |
| "loss": 0.213, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.45922393236505527, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.848869565217392e-06, |
| "loss": 0.2157, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.459917624105788, |
| "grad_norm": 1.375, |
| "learning_rate": 3.8471304347826085e-06, |
| "loss": 0.2696, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.4606113158465207, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.845391304347827e-06, |
| "loss": 0.2729, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.4613050075872534, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.843652173913044e-06, |
| "loss": 0.2258, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.46199869932798615, |
| "grad_norm": 1.125, |
| "learning_rate": 3.8419130434782615e-06, |
| "loss": 0.2469, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.46269239106871884, |
| "grad_norm": 0.94140625, |
| "learning_rate": 3.840173913043478e-06, |
| "loss": 0.2038, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.46338608280945154, |
| "grad_norm": 1.1171875, |
| "learning_rate": 3.838434782608696e-06, |
| "loss": 0.315, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.4640797745501843, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.836695652173914e-06, |
| "loss": 0.2534, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.464773466290917, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.834956521739131e-06, |
| "loss": 0.2372, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.4654671580316497, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.833217391304348e-06, |
| "loss": 0.246, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.4661608497723824, |
| "grad_norm": 1.1171875, |
| "learning_rate": 3.831478260869566e-06, |
| "loss": 0.2586, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.4668545415131151, |
| "grad_norm": 1.25, |
| "learning_rate": 3.829739130434783e-06, |
| "loss": 0.2605, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.4675482332538478, |
| "grad_norm": 1.5625, |
| "learning_rate": 3.8280000000000004e-06, |
| "loss": 0.2853, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.46824192499458056, |
| "grad_norm": 1.375, |
| "learning_rate": 3.826260869565218e-06, |
| "loss": 0.2116, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.46893561673531325, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.824521739130435e-06, |
| "loss": 0.3067, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.46962930847604595, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.8227826086956526e-06, |
| "loss": 0.2128, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.47032300021677864, |
| "grad_norm": 0.85546875, |
| "learning_rate": 3.82104347826087e-06, |
| "loss": 0.2067, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.4710166919575114, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.819304347826087e-06, |
| "loss": 0.2094, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.4717103836982441, |
| "grad_norm": 1.4140625, |
| "learning_rate": 3.817565217391305e-06, |
| "loss": 0.2455, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.4724040754389768, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.815826086956522e-06, |
| "loss": 0.2471, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.4730977671797095, |
| "grad_norm": 1.5546875, |
| "learning_rate": 3.8140869565217394e-06, |
| "loss": 0.2839, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.4737914589204422, |
| "grad_norm": 1.5703125, |
| "learning_rate": 3.812347826086957e-06, |
| "loss": 0.2218, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.4744851506611749, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.810608695652174e-06, |
| "loss": 0.2793, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.47517884240190766, |
| "grad_norm": 1.9453125, |
| "learning_rate": 3.808869565217392e-06, |
| "loss": 0.3007, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.47587253414264036, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.807130434782609e-06, |
| "loss": 0.1971, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.47656622588337305, |
| "grad_norm": 0.89453125, |
| "learning_rate": 3.8053913043478263e-06, |
| "loss": 0.2195, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.4772599176241058, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.803652173913044e-06, |
| "loss": 0.2397, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.4779536093648385, |
| "grad_norm": 1.5625, |
| "learning_rate": 3.8019130434782614e-06, |
| "loss": 0.2547, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.4786473011055712, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.8001739130434784e-06, |
| "loss": 0.2687, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.47934099284630394, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.7984347826086958e-06, |
| "loss": 0.2853, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.48003468458703663, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.7966956521739136e-06, |
| "loss": 0.2279, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.4807283763277693, |
| "grad_norm": 1.5078125, |
| "learning_rate": 3.794956521739131e-06, |
| "loss": 0.2167, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.4814220680685021, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.793217391304348e-06, |
| "loss": 0.3339, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.48211575980923477, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.7914782608695657e-06, |
| "loss": 0.2604, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.48280945154996746, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.789739130434783e-06, |
| "loss": 0.2045, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.4835031432907002, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.7880000000000004e-06, |
| "loss": 0.2629, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.4841968350314329, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.7862608695652174e-06, |
| "loss": 0.2602, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.4848905267721656, |
| "grad_norm": 1.6171875, |
| "learning_rate": 3.784521739130435e-06, |
| "loss": 0.3032, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.48558421851289835, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.7827826086956525e-06, |
| "loss": 0.238, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.48627791025363104, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.7810434782608703e-06, |
| "loss": 0.2422, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.48697160199436373, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.7793043478260873e-06, |
| "loss": 0.2349, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.4876652937350965, |
| "grad_norm": 1.3125, |
| "learning_rate": 3.7775652173913046e-06, |
| "loss": 0.23, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.4883589854758292, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.775826086956522e-06, |
| "loss": 0.2634, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.48905267721656187, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.7740869565217394e-06, |
| "loss": 0.2912, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.4897463689572946, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.7723478260869567e-06, |
| "loss": 0.232, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.4904400606980273, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.770608695652174e-06, |
| "loss": 0.2252, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.49113375243876, |
| "grad_norm": 0.8125, |
| "learning_rate": 3.768869565217392e-06, |
| "loss": 0.2311, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.49182744417949276, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.767130434782609e-06, |
| "loss": 0.2493, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.49252113592022545, |
| "grad_norm": 1.3671875, |
| "learning_rate": 3.7653913043478262e-06, |
| "loss": 0.2665, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.49321482766095814, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.763652173913044e-06, |
| "loss": 0.2158, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.4939085194016909, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.7619130434782614e-06, |
| "loss": 0.2555, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.4946022111424236, |
| "grad_norm": 0.9296875, |
| "learning_rate": 3.7601739130434783e-06, |
| "loss": 0.2437, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.4952959028831563, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.7584347826086957e-06, |
| "loss": 0.2703, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.49598959462388903, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.7566956521739135e-06, |
| "loss": 0.2314, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.4966832863646217, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.754956521739131e-06, |
| "loss": 0.2534, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.4973769781053544, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.753217391304348e-06, |
| "loss": 0.2763, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.49807066984608717, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.7514782608695656e-06, |
| "loss": 0.2734, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.49876436158681986, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.749739130434783e-06, |
| "loss": 0.2158, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.49945805332755255, |
| "grad_norm": 1.4765625, |
| "learning_rate": 3.7480000000000004e-06, |
| "loss": 0.2429, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.5001517450682853, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.7462608695652173e-06, |
| "loss": 0.2961, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.5008454368090179, |
| "grad_norm": 1.4375, |
| "learning_rate": 3.744521739130435e-06, |
| "loss": 0.2368, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.5015391285497507, |
| "grad_norm": 1.5625, |
| "learning_rate": 3.7427826086956525e-06, |
| "loss": 0.2605, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.5022328202904834, |
| "grad_norm": 0.83984375, |
| "learning_rate": 3.7410434782608703e-06, |
| "loss": 0.2194, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.5029265120312161, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.7393043478260872e-06, |
| "loss": 0.2153, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.5036202037719488, |
| "grad_norm": 1.671875, |
| "learning_rate": 3.7375652173913046e-06, |
| "loss": 0.2329, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.5043138955126816, |
| "grad_norm": 1.8203125, |
| "learning_rate": 3.735826086956522e-06, |
| "loss": 0.2821, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.5050075872534142, |
| "grad_norm": 1.3984375, |
| "learning_rate": 3.7340869565217398e-06, |
| "loss": 0.263, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.505701278994147, |
| "grad_norm": 1.3046875, |
| "learning_rate": 3.7323478260869567e-06, |
| "loss": 0.2428, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.5063949707348797, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.730608695652174e-06, |
| "loss": 0.2353, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.5070886624756124, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.728869565217392e-06, |
| "loss": 0.2616, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.5077823542163451, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.7271304347826092e-06, |
| "loss": 0.2314, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.5084760459570778, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.725391304347826e-06, |
| "loss": 0.2484, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.5091697376978105, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.723652173913044e-06, |
| "loss": 0.2182, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.5098634294385432, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.7219130434782614e-06, |
| "loss": 0.1906, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.510557121179276, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.7201739130434783e-06, |
| "loss": 0.2971, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.5112508129200086, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.7184347826086957e-06, |
| "loss": 0.2281, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.5119445046607414, |
| "grad_norm": 1.4375, |
| "learning_rate": 3.7166956521739135e-06, |
| "loss": 0.2597, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.5126381964014741, |
| "grad_norm": 1.3125, |
| "learning_rate": 3.714956521739131e-06, |
| "loss": 0.2685, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.5133318881422068, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.713217391304348e-06, |
| "loss": 0.2763, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.5140255798829395, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.7114782608695656e-06, |
| "loss": 0.2088, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.5147192716236723, |
| "grad_norm": 4.9375, |
| "learning_rate": 3.709739130434783e-06, |
| "loss": 0.2388, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.5154129633644049, |
| "grad_norm": 1.3046875, |
| "learning_rate": 3.7080000000000003e-06, |
| "loss": 0.2594, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.5161066551051376, |
| "grad_norm": 1.7265625, |
| "learning_rate": 3.7062608695652173e-06, |
| "loss": 0.2595, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.5168003468458704, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.704521739130435e-06, |
| "loss": 0.2404, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.517494038586603, |
| "grad_norm": 1.25, |
| "learning_rate": 3.7027826086956524e-06, |
| "loss": 0.2635, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.5181877303273358, |
| "grad_norm": 0.875, |
| "learning_rate": 3.7010434782608702e-06, |
| "loss": 0.245, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.5188814220680685, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.699304347826087e-06, |
| "loss": 0.2676, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.5195751138088012, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.6975652173913046e-06, |
| "loss": 0.2699, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.5202688055495339, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.695826086956522e-06, |
| "loss": 0.244, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.5209624972902667, |
| "grad_norm": 1.390625, |
| "learning_rate": 3.6940869565217397e-06, |
| "loss": 0.2438, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.5216561890309993, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.6923478260869567e-06, |
| "loss": 0.2516, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.5223498807717321, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.690608695652174e-06, |
| "loss": 0.2458, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.5230435725124648, |
| "grad_norm": 1.375, |
| "learning_rate": 3.688869565217392e-06, |
| "loss": 0.2696, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.5237372642531974, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.687130434782609e-06, |
| "loss": 0.2365, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.5244309559939302, |
| "grad_norm": 1.609375, |
| "learning_rate": 3.685391304347826e-06, |
| "loss": 0.2247, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.5251246477346629, |
| "grad_norm": 1.390625, |
| "learning_rate": 3.683652173913044e-06, |
| "loss": 0.2388, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.5258183394753956, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.6819130434782613e-06, |
| "loss": 0.2709, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.5265120312161283, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.6801739130434787e-06, |
| "loss": 0.3069, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.5272057229568611, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.6784347826086956e-06, |
| "loss": 0.2254, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.5278994146975937, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.6766956521739134e-06, |
| "loss": 0.2332, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.5285931064383265, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.674956521739131e-06, |
| "loss": 0.2917, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.5292867981790592, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.6732173913043486e-06, |
| "loss": 0.2575, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.5299804899197919, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.6714782608695655e-06, |
| "loss": 0.2636, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.5306741816605246, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.669739130434783e-06, |
| "loss": 0.2782, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.5313678734012574, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.6680000000000003e-06, |
| "loss": 0.2155, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.53206156514199, |
| "grad_norm": 1.96875, |
| "learning_rate": 3.6662608695652172e-06, |
| "loss": 0.2726, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.5327552568827227, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.664521739130435e-06, |
| "loss": 0.2456, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.5334489486234555, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.6627826086956524e-06, |
| "loss": 0.2691, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.5341426403641881, |
| "grad_norm": 1.421875, |
| "learning_rate": 3.66104347826087e-06, |
| "loss": 0.2618, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.5348363321049209, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.659304347826087e-06, |
| "loss": 0.2684, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.5355300238456536, |
| "grad_norm": 0.8828125, |
| "learning_rate": 3.6575652173913045e-06, |
| "loss": 0.2165, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.5362237155863863, |
| "grad_norm": 0.79296875, |
| "learning_rate": 3.655826086956522e-06, |
| "loss": 0.2917, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.536917407327119, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.6540869565217397e-06, |
| "loss": 0.2629, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.5376110990678518, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.6523478260869566e-06, |
| "loss": 0.2612, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.5383047908085844, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.650608695652174e-06, |
| "loss": 0.3481, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.5389984825493171, |
| "grad_norm": 1.5859375, |
| "learning_rate": 3.648869565217392e-06, |
| "loss": 0.2939, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.5396921742900499, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.647130434782609e-06, |
| "loss": 0.232, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.5403858660307825, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.645391304347826e-06, |
| "loss": 0.2843, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.5410795577715153, |
| "grad_norm": 1.0, |
| "learning_rate": 3.643652173913044e-06, |
| "loss": 0.2311, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.541773249512248, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.6419130434782613e-06, |
| "loss": 0.2587, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.5424669412529807, |
| "grad_norm": 0.95703125, |
| "learning_rate": 3.6401739130434786e-06, |
| "loss": 0.2894, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.5431606329937134, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.6384347826086956e-06, |
| "loss": 0.2337, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.5438543247344462, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.6366956521739134e-06, |
| "loss": 0.2619, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.5445480164751788, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.6349565217391308e-06, |
| "loss": 0.3071, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.5452417082159116, |
| "grad_norm": 0.87109375, |
| "learning_rate": 3.6332173913043486e-06, |
| "loss": 0.2436, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.5459353999566443, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.6314782608695655e-06, |
| "loss": 0.316, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.546629091697377, |
| "grad_norm": 1.6328125, |
| "learning_rate": 3.629739130434783e-06, |
| "loss": 0.2135, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.5473227834381097, |
| "grad_norm": 1.3671875, |
| "learning_rate": 3.6280000000000002e-06, |
| "loss": 0.2578, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.5480164751788424, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.626260869565218e-06, |
| "loss": 0.2587, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.5487101669195751, |
| "grad_norm": 1.3828125, |
| "learning_rate": 3.624521739130435e-06, |
| "loss": 0.2425, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.5494038586603078, |
| "grad_norm": 1.421875, |
| "learning_rate": 3.6227826086956524e-06, |
| "loss": 0.2497, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.5500975504010406, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.62104347826087e-06, |
| "loss": 0.1969, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.5507912421417732, |
| "grad_norm": 1.25, |
| "learning_rate": 3.6193043478260875e-06, |
| "loss": 0.2794, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.551484933882506, |
| "grad_norm": 1.4921875, |
| "learning_rate": 3.6175652173913045e-06, |
| "loss": 0.2452, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.5521786256232387, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.615826086956522e-06, |
| "loss": 0.2181, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.5528723173639714, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.6140869565217396e-06, |
| "loss": 0.2407, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.5535660091047041, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.6123478260869566e-06, |
| "loss": 0.2556, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.5542597008454369, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.610608695652174e-06, |
| "loss": 0.2783, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.5549533925861695, |
| "grad_norm": 1.59375, |
| "learning_rate": 3.6088695652173918e-06, |
| "loss": 0.2702, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.5556470843269022, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.607130434782609e-06, |
| "loss": 0.2606, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.556340776067635, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.605391304347826e-06, |
| "loss": 0.2103, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.5570344678083676, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.603652173913044e-06, |
| "loss": 0.2451, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.5577281595491004, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.6019130434782612e-06, |
| "loss": 0.2489, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.5584218512898331, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.6001739130434786e-06, |
| "loss": 0.2254, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.5591155430305658, |
| "grad_norm": 1.5625, |
| "learning_rate": 3.5984347826086956e-06, |
| "loss": 0.2404, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.5598092347712985, |
| "grad_norm": 1.3125, |
| "learning_rate": 3.5966956521739134e-06, |
| "loss": 0.3151, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.5605029265120313, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.5949565217391307e-06, |
| "loss": 0.2554, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.5611966182527639, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.5932173913043485e-06, |
| "loss": 0.25, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.5618903099934967, |
| "grad_norm": 1.390625, |
| "learning_rate": 3.5914782608695655e-06, |
| "loss": 0.2968, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.5625840017342294, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.589739130434783e-06, |
| "loss": 0.2523, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.563277693474962, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.588e-06, |
| "loss": 0.2254, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.5639713852156948, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.586260869565218e-06, |
| "loss": 0.272, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.5646650769564275, |
| "grad_norm": 1.6953125, |
| "learning_rate": 3.584521739130435e-06, |
| "loss": 0.3524, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.5653587686971602, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.5827826086956523e-06, |
| "loss": 0.2291, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.5660524604378929, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.58104347826087e-06, |
| "loss": 0.27, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.5667461521786257, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.5793043478260875e-06, |
| "loss": 0.324, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.5674398439193583, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.5775652173913044e-06, |
| "loss": 0.3121, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.5681335356600911, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.575826086956522e-06, |
| "loss": 0.2035, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.5688272274008238, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.5740869565217396e-06, |
| "loss": 0.2779, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.5695209191415564, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.572347826086957e-06, |
| "loss": 0.2545, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.5702146108822892, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.570608695652174e-06, |
| "loss": 0.3188, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.570908302623022, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.5688695652173917e-06, |
| "loss": 0.2356, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.5716019943637546, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.567130434782609e-06, |
| "loss": 0.2316, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.5722956861044873, |
| "grad_norm": 1.5390625, |
| "learning_rate": 3.5653913043478265e-06, |
| "loss": 0.2724, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.5729893778452201, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.563652173913044e-06, |
| "loss": 0.2535, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.5736830695859527, |
| "grad_norm": 1.125, |
| "learning_rate": 3.561913043478261e-06, |
| "loss": 0.2413, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.5743767613266855, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.5601739130434786e-06, |
| "loss": 0.2488, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.5750704530674182, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.5584347826086955e-06, |
| "loss": 0.2248, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.5757641448081509, |
| "grad_norm": 1.1640625, |
| "learning_rate": 3.5566956521739133e-06, |
| "loss": 0.2339, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.5764578365488836, |
| "grad_norm": 2.25, |
| "learning_rate": 3.5549565217391307e-06, |
| "loss": 0.3776, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.5771515282896164, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.5532173913043485e-06, |
| "loss": 0.2102, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.577845220030349, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.5514782608695654e-06, |
| "loss": 0.2692, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.5785389117710817, |
| "grad_norm": 1.6796875, |
| "learning_rate": 3.549739130434783e-06, |
| "loss": 0.3611, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.5792326035118145, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.548e-06, |
| "loss": 0.227, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.5799262952525471, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.546260869565218e-06, |
| "loss": 0.242, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.5806199869932799, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.544521739130435e-06, |
| "loss": 0.3021, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.5813136787340125, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.5427826086956523e-06, |
| "loss": 0.2542, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.5820073704747453, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.54104347826087e-06, |
| "loss": 0.24, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.582701062215478, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.5393043478260874e-06, |
| "loss": 0.2421, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.5833947539562107, |
| "grad_norm": 0.984375, |
| "learning_rate": 3.5375652173913044e-06, |
| "loss": 0.212, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.5840884456969434, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.5358260869565218e-06, |
| "loss": 0.2281, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.5847821374376762, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.5340869565217396e-06, |
| "loss": 0.2296, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.5854758291784088, |
| "grad_norm": 1.640625, |
| "learning_rate": 3.532347826086957e-06, |
| "loss": 0.2546, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.5861695209191415, |
| "grad_norm": 1.7109375, |
| "learning_rate": 3.530608695652174e-06, |
| "loss": 0.2691, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.5868632126598743, |
| "grad_norm": 1.5078125, |
| "learning_rate": 3.5288695652173917e-06, |
| "loss": 0.2716, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.5875569044006069, |
| "grad_norm": 1.3046875, |
| "learning_rate": 3.527130434782609e-06, |
| "loss": 0.3, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.5882505961413397, |
| "grad_norm": 0.98046875, |
| "learning_rate": 3.5253913043478264e-06, |
| "loss": 0.2811, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.5889442878820724, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.5236521739130438e-06, |
| "loss": 0.2327, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.5896379796228051, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.521913043478261e-06, |
| "loss": 0.2393, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.5903316713635378, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.5201739130434785e-06, |
| "loss": 0.2633, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.5910253631042706, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.5184347826086963e-06, |
| "loss": 0.2459, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.5917190548450032, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.5166956521739133e-06, |
| "loss": 0.2063, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.592412746585736, |
| "grad_norm": 0.88671875, |
| "learning_rate": 3.5149565217391306e-06, |
| "loss": 0.2193, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.5931064383264687, |
| "grad_norm": 1.3828125, |
| "learning_rate": 3.5132173913043484e-06, |
| "loss": 0.3143, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.5938001300672013, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.511478260869566e-06, |
| "loss": 0.2205, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.5944938218079341, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.5097391304347828e-06, |
| "loss": 0.2392, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.5951875135486668, |
| "grad_norm": 1.421875, |
| "learning_rate": 3.508e-06, |
| "loss": 0.252, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.5958812052893995, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.506260869565218e-06, |
| "loss": 0.318, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.5965748970301322, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.504521739130435e-06, |
| "loss": 0.2799, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.597268588770865, |
| "grad_norm": 1.25, |
| "learning_rate": 3.5027826086956522e-06, |
| "loss": 0.2301, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.5979622805115976, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.50104347826087e-06, |
| "loss": 0.2516, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.5986559722523304, |
| "grad_norm": 1.375, |
| "learning_rate": 3.4993043478260874e-06, |
| "loss": 0.2338, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.5993496639930631, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.4975652173913044e-06, |
| "loss": 0.2511, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.6000433557337957, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.4958260869565217e-06, |
| "loss": 0.2401, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.6007370474745285, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.4940869565217395e-06, |
| "loss": 0.2961, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.6014307392152612, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.492347826086957e-06, |
| "loss": 0.2639, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.6021244309559939, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.490608695652174e-06, |
| "loss": 0.2283, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.6028181226967266, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.4888695652173916e-06, |
| "loss": 0.2404, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.6035118144374594, |
| "grad_norm": 1.375, |
| "learning_rate": 3.487130434782609e-06, |
| "loss": 0.2559, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.604205506178192, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.4853913043478264e-06, |
| "loss": 0.24, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.6048991979189248, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.4836521739130437e-06, |
| "loss": 0.2297, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.6055928896596575, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.481913043478261e-06, |
| "loss": 0.2517, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.6062865814003902, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.4801739130434785e-06, |
| "loss": 0.2096, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.6069802731411229, |
| "grad_norm": 1.4609375, |
| "learning_rate": 3.4784347826086963e-06, |
| "loss": 0.2116, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.6076739648818557, |
| "grad_norm": 0.79296875, |
| "learning_rate": 3.4766956521739132e-06, |
| "loss": 0.1944, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.6083676566225883, |
| "grad_norm": 1.3125, |
| "learning_rate": 3.4749565217391306e-06, |
| "loss": 0.2687, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.609061348363321, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.4732173913043484e-06, |
| "loss": 0.283, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.6097550401040538, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.4714782608695658e-06, |
| "loss": 0.2059, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.6104487318447864, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.4697391304347827e-06, |
| "loss": 0.3626, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.6111424235855192, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.468e-06, |
| "loss": 0.2557, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.6118361153262519, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.466260869565218e-06, |
| "loss": 0.2391, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.6125298070669846, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.4645217391304353e-06, |
| "loss": 0.296, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.6132234988077173, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.462782608695652e-06, |
| "loss": 0.2482, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.6139171905484501, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.46104347826087e-06, |
| "loss": 0.2761, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.6146108822891827, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.4593043478260874e-06, |
| "loss": 0.2493, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.6153045740299155, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.4575652173913047e-06, |
| "loss": 0.2301, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.6159982657706482, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.4558260869565217e-06, |
| "loss": 0.2608, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.6166919575113808, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.4540869565217395e-06, |
| "loss": 0.2481, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.6173856492521136, |
| "grad_norm": 1.4140625, |
| "learning_rate": 3.452347826086957e-06, |
| "loss": 0.2292, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.6180793409928463, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.450608695652174e-06, |
| "loss": 0.2091, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.618773032733579, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.4488695652173916e-06, |
| "loss": 0.2997, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.6194667244743117, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.447130434782609e-06, |
| "loss": 0.3502, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.6201604162150445, |
| "grad_norm": 1.7578125, |
| "learning_rate": 3.4453913043478263e-06, |
| "loss": 0.313, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.6208541079557771, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.4436521739130437e-06, |
| "loss": 0.2215, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.6215477996965099, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.441913043478261e-06, |
| "loss": 0.2398, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.6222414914372426, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.4401739130434784e-06, |
| "loss": 0.2002, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.6229351831779752, |
| "grad_norm": 1.484375, |
| "learning_rate": 3.4384347826086962e-06, |
| "loss": 0.2549, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.623628874918708, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.436695652173913e-06, |
| "loss": 0.2009, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.6243225666594407, |
| "grad_norm": 0.9375, |
| "learning_rate": 3.4349565217391306e-06, |
| "loss": 0.22, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.6250162584001734, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.4332173913043484e-06, |
| "loss": 0.2436, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.6257099501409061, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.4314782608695657e-06, |
| "loss": 0.2411, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.6264036418816389, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.4297391304347827e-06, |
| "loss": 0.2155, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.6270973336223715, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.428e-06, |
| "loss": 0.26, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.6277910253631043, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.426260869565218e-06, |
| "loss": 0.2602, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.628484717103837, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.424521739130435e-06, |
| "loss": 0.2459, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.6291784088445697, |
| "grad_norm": 1.515625, |
| "learning_rate": 3.422782608695652e-06, |
| "loss": 0.3279, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.6298721005853024, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.42104347826087e-06, |
| "loss": 0.2208, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.6305657923260352, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.4193043478260873e-06, |
| "loss": 0.2717, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.6312594840667678, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.4175652173913047e-06, |
| "loss": 0.2052, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.6319531758075005, |
| "grad_norm": 1.1171875, |
| "learning_rate": 3.4158260869565216e-06, |
| "loss": 0.2791, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.6326468675482333, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.4140869565217394e-06, |
| "loss": 0.2904, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.6333405592889659, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.412347826086957e-06, |
| "loss": 0.2522, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.6340342510296987, |
| "grad_norm": 1.4140625, |
| "learning_rate": 3.4106086956521746e-06, |
| "loss": 0.2051, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.6347279427704314, |
| "grad_norm": 1.515625, |
| "learning_rate": 3.4088695652173915e-06, |
| "loss": 0.2321, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.6354216345111641, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.407130434782609e-06, |
| "loss": 0.2734, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.6361153262518968, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.4053913043478263e-06, |
| "loss": 0.2302, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.6368090179926296, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.403652173913044e-06, |
| "loss": 0.2541, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.6375027097333622, |
| "grad_norm": 1.5, |
| "learning_rate": 3.401913043478261e-06, |
| "loss": 0.2151, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.638196401474095, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.4001739130434784e-06, |
| "loss": 0.2795, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.6388900932148277, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.398434782608696e-06, |
| "loss": 0.2498, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.6395837849555603, |
| "grad_norm": 2.0625, |
| "learning_rate": 3.396695652173913e-06, |
| "loss": 0.2836, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.6402774766962931, |
| "grad_norm": 1.125, |
| "learning_rate": 3.3949565217391305e-06, |
| "loss": 0.2346, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.6409711684370258, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.3932173913043483e-06, |
| "loss": 0.2447, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.6416648601777585, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.3914782608695657e-06, |
| "loss": 0.2775, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.6423585519184912, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.3897391304347826e-06, |
| "loss": 0.2225, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.643052243659224, |
| "grad_norm": 1.4609375, |
| "learning_rate": 3.388e-06, |
| "loss": 0.2327, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.6437459353999566, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.386260869565218e-06, |
| "loss": 0.2091, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.6444396271406894, |
| "grad_norm": 1.484375, |
| "learning_rate": 3.384521739130435e-06, |
| "loss": 0.244, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.6451333188814221, |
| "grad_norm": 1.25, |
| "learning_rate": 3.382782608695652e-06, |
| "loss": 0.2169, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.6458270106221548, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.38104347826087e-06, |
| "loss": 0.3279, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.6465207023628875, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.3793043478260873e-06, |
| "loss": 0.2176, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.6472143941036202, |
| "grad_norm": 1.96875, |
| "learning_rate": 3.3775652173913047e-06, |
| "loss": 0.315, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.6479080858443529, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.3758260869565216e-06, |
| "loss": 0.2778, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.6486017775850856, |
| "grad_norm": 0.92578125, |
| "learning_rate": 3.3740869565217394e-06, |
| "loss": 0.2681, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.6492954693258184, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.3723478260869568e-06, |
| "loss": 0.2078, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.649989161066551, |
| "grad_norm": 1.8984375, |
| "learning_rate": 3.3706086956521746e-06, |
| "loss": 0.3201, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.6506828528072838, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.3688695652173915e-06, |
| "loss": 0.2226, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.6513765445480165, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.367130434782609e-06, |
| "loss": 0.2847, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.6520702362887492, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.3653913043478263e-06, |
| "loss": 0.2418, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.6527639280294819, |
| "grad_norm": 2.078125, |
| "learning_rate": 3.363652173913044e-06, |
| "loss": 0.2898, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.6534576197702147, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.361913043478261e-06, |
| "loss": 0.2522, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.6541513115109473, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.3601739130434784e-06, |
| "loss": 0.2297, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.65484500325168, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.358434782608696e-06, |
| "loss": 0.2395, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.6555386949924128, |
| "grad_norm": 1.4453125, |
| "learning_rate": 3.3566956521739135e-06, |
| "loss": 0.2372, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.6562323867331454, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.3549565217391305e-06, |
| "loss": 0.2412, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.6569260784738782, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.3532173913043483e-06, |
| "loss": 0.2586, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.6576197702146109, |
| "grad_norm": 1.5703125, |
| "learning_rate": 3.3514782608695656e-06, |
| "loss": 0.269, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.6583134619553436, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.349739130434783e-06, |
| "loss": 0.2298, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.6590071536960763, |
| "grad_norm": 0.9609375, |
| "learning_rate": 3.348e-06, |
| "loss": 0.2737, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.6597008454368091, |
| "grad_norm": 1.4453125, |
| "learning_rate": 3.3462608695652178e-06, |
| "loss": 0.2729, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.6603945371775417, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.344521739130435e-06, |
| "loss": 0.2285, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.6610882289182745, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.342782608695652e-06, |
| "loss": 0.2581, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.6617819206590072, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.34104347826087e-06, |
| "loss": 0.3278, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.6624756123997398, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.3393043478260872e-06, |
| "loss": 0.2526, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.6631693041404726, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.3375652173913046e-06, |
| "loss": 0.2401, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.6638629958812053, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.3358260869565216e-06, |
| "loss": 0.2359, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.664556687621938, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.3340869565217394e-06, |
| "loss": 0.2697, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.6652503793626707, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.3323478260869567e-06, |
| "loss": 0.2191, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.6659440711034035, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.3306086956521745e-06, |
| "loss": 0.2142, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.6666377628441361, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.3288695652173915e-06, |
| "loss": 0.2708, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.6673314545848689, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.327130434782609e-06, |
| "loss": 0.2285, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.6680251463256016, |
| "grad_norm": 1.4609375, |
| "learning_rate": 3.325391304347826e-06, |
| "loss": 0.2216, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.6687188380663343, |
| "grad_norm": 1.1796875, |
| "learning_rate": 3.323652173913044e-06, |
| "loss": 0.2208, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.669412529807067, |
| "grad_norm": 0.86328125, |
| "learning_rate": 3.321913043478261e-06, |
| "loss": 0.1785, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.6701062215477998, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.3201739130434783e-06, |
| "loss": 0.2453, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.6707999132885324, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.318434782608696e-06, |
| "loss": 0.2761, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.6714936050292651, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.3166956521739135e-06, |
| "loss": 0.2701, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.6721872967699978, |
| "grad_norm": 1.5546875, |
| "learning_rate": 3.3149565217391304e-06, |
| "loss": 0.258, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.6728809885107305, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.3132173913043482e-06, |
| "loss": 0.2648, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.6735746802514633, |
| "grad_norm": 1.3125, |
| "learning_rate": 3.3114782608695656e-06, |
| "loss": 0.2885, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.6742683719921959, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.309739130434783e-06, |
| "loss": 0.264, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.6749620637329287, |
| "grad_norm": 1.09375, |
| "learning_rate": 3.308e-06, |
| "loss": 0.2387, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.6756557554736614, |
| "grad_norm": 1.375, |
| "learning_rate": 3.3062608695652177e-06, |
| "loss": 0.3259, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.676349447214394, |
| "grad_norm": 1.421875, |
| "learning_rate": 3.304521739130435e-06, |
| "loss": 0.251, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.6770431389551268, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.302782608695653e-06, |
| "loss": 0.2134, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.6777368306958595, |
| "grad_norm": 1.1171875, |
| "learning_rate": 3.30104347826087e-06, |
| "loss": 0.2788, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.6784305224365922, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.299304347826087e-06, |
| "loss": 0.2509, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.6791242141773249, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.2975652173913046e-06, |
| "loss": 0.2613, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.6798179059180577, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.2958260869565224e-06, |
| "loss": 0.2278, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.6805115976587903, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.2940869565217393e-06, |
| "loss": 0.2223, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.6812052893995231, |
| "grad_norm": 1.484375, |
| "learning_rate": 3.2923478260869567e-06, |
| "loss": 0.2233, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.6818989811402558, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.2906086956521745e-06, |
| "loss": 0.2087, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.6825926728809885, |
| "grad_norm": 1.125, |
| "learning_rate": 3.2888695652173914e-06, |
| "loss": 0.2244, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.6832863646217212, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.287130434782609e-06, |
| "loss": 0.2528, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.683980056362454, |
| "grad_norm": 1.3203125, |
| "learning_rate": 3.2853913043478266e-06, |
| "loss": 0.2369, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.6846737481031866, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.283652173913044e-06, |
| "loss": 0.2412, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.6853674398439193, |
| "grad_norm": 1.9296875, |
| "learning_rate": 3.281913043478261e-06, |
| "loss": 0.2932, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.6860611315846521, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.2801739130434783e-06, |
| "loss": 0.2391, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.6867548233253847, |
| "grad_norm": 1.015625, |
| "learning_rate": 3.278434782608696e-06, |
| "loss": 0.2181, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.6874485150661175, |
| "grad_norm": 0.890625, |
| "learning_rate": 3.2766956521739134e-06, |
| "loss": 0.2253, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.6881422068068502, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.2749565217391304e-06, |
| "loss": 0.2034, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.6888358985475829, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.273217391304348e-06, |
| "loss": 0.2552, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.6895295902883156, |
| "grad_norm": 1.6171875, |
| "learning_rate": 3.2714782608695656e-06, |
| "loss": 0.3157, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.6902232820290484, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.269739130434783e-06, |
| "loss": 0.218, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.690916973769781, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.268e-06, |
| "loss": 0.2232, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.6916106655105138, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.2662608695652177e-06, |
| "loss": 0.2971, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.6923043572512465, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.264521739130435e-06, |
| "loss": 0.2534, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.6929980489919791, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.262782608695653e-06, |
| "loss": 0.2167, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.6936917407327119, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.26104347826087e-06, |
| "loss": 0.2506, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.6943854324734446, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.259304347826087e-06, |
| "loss": 0.2381, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.6950791242141773, |
| "grad_norm": 1.3515625, |
| "learning_rate": 3.2575652173913045e-06, |
| "loss": 0.2228, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.69577281595491, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.2558260869565223e-06, |
| "loss": 0.299, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.6964665076956428, |
| "grad_norm": 1.125, |
| "learning_rate": 3.2540869565217393e-06, |
| "loss": 0.2691, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.6971601994363754, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.2523478260869566e-06, |
| "loss": 0.2412, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.6978538911771082, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.2506086956521744e-06, |
| "loss": 0.2672, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.6985475829178409, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.248869565217392e-06, |
| "loss": 0.2148, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.6992412746585736, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.2471304347826088e-06, |
| "loss": 0.246, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.6999349663993063, |
| "grad_norm": 0.8984375, |
| "learning_rate": 3.2453913043478266e-06, |
| "loss": 0.2934, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.700628658140039, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.243652173913044e-06, |
| "loss": 0.2409, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.7013223498807717, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.241913043478261e-06, |
| "loss": 0.2713, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.7020160416215044, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.2401739130434782e-06, |
| "loss": 0.2528, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.7027097333622372, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.238434782608696e-06, |
| "loss": 0.2619, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.7034034251029698, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.2366956521739134e-06, |
| "loss": 0.2569, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.7040971168437026, |
| "grad_norm": 1.2265625, |
| "learning_rate": 3.2349565217391304e-06, |
| "loss": 0.2033, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.7047908085844353, |
| "grad_norm": 1.4765625, |
| "learning_rate": 3.233217391304348e-06, |
| "loss": 0.2447, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.705484500325168, |
| "grad_norm": 0.83203125, |
| "learning_rate": 3.2314782608695655e-06, |
| "loss": 0.2063, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.7061781920659007, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.229739130434783e-06, |
| "loss": 0.235, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.7068718838066335, |
| "grad_norm": 1.375, |
| "learning_rate": 3.228e-06, |
| "loss": 0.2407, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.7075655755473661, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.2262608695652176e-06, |
| "loss": 0.2973, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.7082592672880988, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.224521739130435e-06, |
| "loss": 0.2291, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.7089529590288316, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.222782608695653e-06, |
| "loss": 0.2552, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.7096466507695642, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.2210434782608697e-06, |
| "loss": 0.2343, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.710340342510297, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.219304347826087e-06, |
| "loss": 0.2158, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.7110340342510297, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.2175652173913045e-06, |
| "loss": 0.2512, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.7117277259917624, |
| "grad_norm": 0.890625, |
| "learning_rate": 3.2158260869565223e-06, |
| "loss": 0.2346, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.7124214177324951, |
| "grad_norm": 1.0078125, |
| "learning_rate": 3.2140869565217392e-06, |
| "loss": 0.2133, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.7131151094732279, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.2123478260869566e-06, |
| "loss": 0.2386, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.7138088012139605, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.2106086956521744e-06, |
| "loss": 0.1876, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.7145024929546933, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.2088695652173918e-06, |
| "loss": 0.2198, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.715196184695426, |
| "grad_norm": 0.9453125, |
| "learning_rate": 3.2071304347826087e-06, |
| "loss": 0.218, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.7158898764361586, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.2053913043478265e-06, |
| "loss": 0.2749, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.7165835681768914, |
| "grad_norm": 0.99609375, |
| "learning_rate": 3.203652173913044e-06, |
| "loss": 0.2199, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.7172772599176241, |
| "grad_norm": 0.9921875, |
| "learning_rate": 3.2019130434782613e-06, |
| "loss": 0.2523, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.7179709516583568, |
| "grad_norm": 0.98828125, |
| "learning_rate": 3.200173913043478e-06, |
| "loss": 0.2313, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.7186646433990895, |
| "grad_norm": 1.25, |
| "learning_rate": 3.198434782608696e-06, |
| "loss": 0.3035, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.7193583351398223, |
| "grad_norm": 1.6015625, |
| "learning_rate": 3.1966956521739134e-06, |
| "loss": 0.3116, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.7200520268805549, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.194956521739131e-06, |
| "loss": 0.2449, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.7207457186212877, |
| "grad_norm": 1.796875, |
| "learning_rate": 3.193217391304348e-06, |
| "loss": 0.3198, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.7214394103620204, |
| "grad_norm": 1.25, |
| "learning_rate": 3.1914782608695655e-06, |
| "loss": 0.2539, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.722133102102753, |
| "grad_norm": 1.125, |
| "learning_rate": 3.189739130434783e-06, |
| "loss": 0.3254, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.7228267938434858, |
| "grad_norm": 1.375, |
| "learning_rate": 3.188e-06, |
| "loss": 0.2163, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.7235204855842186, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.1862608695652176e-06, |
| "loss": 0.253, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.7242141773249512, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.184521739130435e-06, |
| "loss": 0.2237, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.7249078690656839, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.1827826086956528e-06, |
| "loss": 0.2336, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.7256015608064167, |
| "grad_norm": 1.046875, |
| "learning_rate": 3.1810434782608697e-06, |
| "loss": 0.2501, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.7262952525471493, |
| "grad_norm": 1.3515625, |
| "learning_rate": 3.179304347826087e-06, |
| "loss": 0.2205, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.7269889442878821, |
| "grad_norm": 1.515625, |
| "learning_rate": 3.1775652173913045e-06, |
| "loss": 0.2509, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.7276826360286148, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.1758260869565222e-06, |
| "loss": 0.2539, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.7283763277693475, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.174086956521739e-06, |
| "loss": 0.2217, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.7290700195100802, |
| "grad_norm": 1.4375, |
| "learning_rate": 3.1723478260869566e-06, |
| "loss": 0.26, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.729763711250813, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.1706086956521744e-06, |
| "loss": 0.2377, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.7304574029915456, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.1688695652173917e-06, |
| "loss": 0.2349, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.7311510947322784, |
| "grad_norm": 0.953125, |
| "learning_rate": 3.1671304347826087e-06, |
| "loss": 0.2118, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.7318447864730111, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.1653913043478265e-06, |
| "loss": 0.2557, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.7325384782137437, |
| "grad_norm": 1.125, |
| "learning_rate": 3.163652173913044e-06, |
| "loss": 0.272, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.7332321699544765, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.1619130434782612e-06, |
| "loss": 0.2569, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.7339258616952092, |
| "grad_norm": 0.77734375, |
| "learning_rate": 3.160173913043478e-06, |
| "loss": 0.2273, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.7346195534359419, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.158434782608696e-06, |
| "loss": 0.2184, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.7353132451766746, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.1566956521739133e-06, |
| "loss": 0.2226, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.7360069369174074, |
| "grad_norm": 0.8984375, |
| "learning_rate": 3.154956521739131e-06, |
| "loss": 0.2256, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.73670062865814, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.153217391304348e-06, |
| "loss": 0.2162, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.7373943203988728, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.1514782608695654e-06, |
| "loss": 0.2247, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.7380880121396055, |
| "grad_norm": 1.6015625, |
| "learning_rate": 3.149739130434783e-06, |
| "loss": 0.2441, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.7387817038803381, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.1480000000000006e-06, |
| "loss": 0.2476, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.7394753956210709, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.1462608695652176e-06, |
| "loss": 0.301, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.7401690873618036, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.144521739130435e-06, |
| "loss": 0.267, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.7408627791025363, |
| "grad_norm": 1.1328125, |
| "learning_rate": 3.1427826086956527e-06, |
| "loss": 0.2188, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.741556470843269, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.14104347826087e-06, |
| "loss": 0.2561, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.7422501625840018, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.139304347826087e-06, |
| "loss": 0.2115, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.7429438543247344, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.1375652173913044e-06, |
| "loss": 0.2606, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.7436375460654672, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.135826086956522e-06, |
| "loss": 0.3279, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.7443312378061999, |
| "grad_norm": 1.59375, |
| "learning_rate": 3.134086956521739e-06, |
| "loss": 0.2229, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.7450249295469326, |
| "grad_norm": 1.546875, |
| "learning_rate": 3.1323478260869565e-06, |
| "loss": 0.3244, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.7457186212876653, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.1306086956521743e-06, |
| "loss": 0.2381, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.7464123130283981, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.1288695652173917e-06, |
| "loss": 0.1984, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.7471060047691307, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.1271304347826086e-06, |
| "loss": 0.2426, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.7477996965098634, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.1253913043478264e-06, |
| "loss": 0.2656, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.7484933882505962, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.123652173913044e-06, |
| "loss": 0.2148, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.7491870799913288, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.121913043478261e-06, |
| "loss": 0.2359, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.7498807717320616, |
| "grad_norm": 1.25, |
| "learning_rate": 3.120173913043478e-06, |
| "loss": 0.2798, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.7505744634727943, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.118434782608696e-06, |
| "loss": 0.2446, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.751268155213527, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.1166956521739133e-06, |
| "loss": 0.2492, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.7519618469542597, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.114956521739131e-06, |
| "loss": 0.2174, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.7526555386949925, |
| "grad_norm": 1.0234375, |
| "learning_rate": 3.113217391304348e-06, |
| "loss": 0.2919, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.7533492304357251, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.1114782608695654e-06, |
| "loss": 0.2294, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.7540429221764579, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.1097391304347828e-06, |
| "loss": 0.2915, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.7547366139171906, |
| "grad_norm": 1.5234375, |
| "learning_rate": 3.1080000000000006e-06, |
| "loss": 0.2996, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.7554303056579232, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.1062608695652175e-06, |
| "loss": 0.3066, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.756123997398656, |
| "grad_norm": 1.3671875, |
| "learning_rate": 3.104521739130435e-06, |
| "loss": 0.2634, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.7568176891393887, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.1027826086956527e-06, |
| "loss": 0.2178, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.7575113808801214, |
| "grad_norm": 0.91015625, |
| "learning_rate": 3.10104347826087e-06, |
| "loss": 0.2464, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.7582050726208541, |
| "grad_norm": 1.375, |
| "learning_rate": 3.099304347826087e-06, |
| "loss": 0.2308, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.7588987643615869, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.0975652173913044e-06, |
| "loss": 0.2179, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.7595924561023195, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.095826086956522e-06, |
| "loss": 0.2317, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.7602861478430523, |
| "grad_norm": 0.86328125, |
| "learning_rate": 3.0940869565217395e-06, |
| "loss": 0.2443, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.7609798395837849, |
| "grad_norm": 1.3515625, |
| "learning_rate": 3.0923478260869565e-06, |
| "loss": 0.3217, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.7616735313245176, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.0906086956521743e-06, |
| "loss": 0.2126, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.7623672230652504, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.0888695652173916e-06, |
| "loss": 0.2326, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.763060914805983, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.087130434782609e-06, |
| "loss": 0.2266, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.7637546065467158, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.0853913043478264e-06, |
| "loss": 0.2807, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.7644482982874485, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.0836521739130438e-06, |
| "loss": 0.3183, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.7651419900281812, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.081913043478261e-06, |
| "loss": 0.2456, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.7658356817689139, |
| "grad_norm": 1.203125, |
| "learning_rate": 3.080173913043478e-06, |
| "loss": 0.2157, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.7665293735096467, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.078434782608696e-06, |
| "loss": 0.2612, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.7672230652503793, |
| "grad_norm": 1.1953125, |
| "learning_rate": 3.0766956521739132e-06, |
| "loss": 0.253, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.7679167569911121, |
| "grad_norm": 0.90625, |
| "learning_rate": 3.074956521739131e-06, |
| "loss": 0.225, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.7686104487318448, |
| "grad_norm": 1.703125, |
| "learning_rate": 3.073217391304348e-06, |
| "loss": 0.2247, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.7693041404725774, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.0714782608695654e-06, |
| "loss": 0.2493, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.7699978322133102, |
| "grad_norm": 1.390625, |
| "learning_rate": 3.0697391304347827e-06, |
| "loss": 0.3239, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.770691523954043, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.0680000000000005e-06, |
| "loss": 0.2053, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.7713852156947756, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.0662608695652175e-06, |
| "loss": 0.2254, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.7720789074355083, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.064521739130435e-06, |
| "loss": 0.2715, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.7727725991762411, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.0627826086956526e-06, |
| "loss": 0.2294, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.7734662909169737, |
| "grad_norm": 1.390625, |
| "learning_rate": 3.06104347826087e-06, |
| "loss": 0.2423, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.7741599826577065, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.059304347826087e-06, |
| "loss": 0.2477, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.7748536743984392, |
| "grad_norm": 1.2578125, |
| "learning_rate": 3.0575652173913043e-06, |
| "loss": 0.2103, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.7755473661391719, |
| "grad_norm": 1.484375, |
| "learning_rate": 3.055826086956522e-06, |
| "loss": 0.2501, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.7762410578799046, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.0540869565217395e-06, |
| "loss": 0.2741, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.7769347496206374, |
| "grad_norm": 1.4453125, |
| "learning_rate": 3.0523478260869564e-06, |
| "loss": 0.2483, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.77762844136137, |
| "grad_norm": 1.53125, |
| "learning_rate": 3.0506086956521742e-06, |
| "loss": 0.2672, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.7783221331021027, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.0488695652173916e-06, |
| "loss": 0.225, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.7790158248428355, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.047130434782609e-06, |
| "loss": 0.2435, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.7797095165835681, |
| "grad_norm": 1.1484375, |
| "learning_rate": 3.0453913043478264e-06, |
| "loss": 0.251, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.7804032083243009, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.0436521739130437e-06, |
| "loss": 0.2056, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.7810969000650336, |
| "grad_norm": 1.4921875, |
| "learning_rate": 3.041913043478261e-06, |
| "loss": 0.2781, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.7817905918057663, |
| "grad_norm": 1.6171875, |
| "learning_rate": 3.040173913043479e-06, |
| "loss": 0.2197, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.782484283546499, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.038434782608696e-06, |
| "loss": 0.2486, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.7831779752872318, |
| "grad_norm": 1.0, |
| "learning_rate": 3.036695652173913e-06, |
| "loss": 0.2142, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.7838716670279644, |
| "grad_norm": 1.5, |
| "learning_rate": 3.034956521739131e-06, |
| "loss": 0.2634, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.7845653587686972, |
| "grad_norm": 1.3984375, |
| "learning_rate": 3.0332173913043484e-06, |
| "loss": 0.2851, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.7852590505094299, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.0314782608695653e-06, |
| "loss": 0.2177, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.7859527422501625, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.0297391304347827e-06, |
| "loss": 0.2772, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.7866464339908953, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.0280000000000005e-06, |
| "loss": 0.2819, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.787340125731628, |
| "grad_norm": 0.87890625, |
| "learning_rate": 3.0262608695652174e-06, |
| "loss": 0.2356, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.7880338174723607, |
| "grad_norm": 0.859375, |
| "learning_rate": 3.024521739130435e-06, |
| "loss": 0.2149, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.7887275092130934, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.0227826086956526e-06, |
| "loss": 0.2252, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.7894212009538262, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.02104347826087e-06, |
| "loss": 0.2917, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.7901148926945588, |
| "grad_norm": 1.4296875, |
| "learning_rate": 3.019304347826087e-06, |
| "loss": 0.257, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.7908085844352916, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.0175652173913043e-06, |
| "loss": 0.2266, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.7915022761760243, |
| "grad_norm": 1.0546875, |
| "learning_rate": 3.015826086956522e-06, |
| "loss": 0.2351, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.792195967916757, |
| "grad_norm": 1.46875, |
| "learning_rate": 3.0140869565217395e-06, |
| "loss": 0.2555, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.7928896596574897, |
| "grad_norm": 1.078125, |
| "learning_rate": 3.0123478260869564e-06, |
| "loss": 0.2709, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.7935833513982224, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.010608695652174e-06, |
| "loss": 0.2159, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.7942770431389551, |
| "grad_norm": 1.4375, |
| "learning_rate": 3.0088695652173916e-06, |
| "loss": 0.3059, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.7949707348796878, |
| "grad_norm": 1.125, |
| "learning_rate": 3.007130434782609e-06, |
| "loss": 0.2167, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.7956644266204206, |
| "grad_norm": 0.84765625, |
| "learning_rate": 3.0053913043478263e-06, |
| "loss": 0.2494, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.7963581183611532, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.0036521739130437e-06, |
| "loss": 0.2852, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.797051810101886, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.001913043478261e-06, |
| "loss": 0.2671, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.7977455018426187, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.000173913043479e-06, |
| "loss": 0.2252, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.7984391935833514, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.998434782608696e-06, |
| "loss": 0.2481, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.7991328853240841, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.996695652173913e-06, |
| "loss": 0.2194, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.7998265770648169, |
| "grad_norm": 1.125, |
| "learning_rate": 2.994956521739131e-06, |
| "loss": 0.2264, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.8005202688055495, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.9932173913043483e-06, |
| "loss": 0.2368, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.8012139605462822, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.9914782608695653e-06, |
| "loss": 0.2505, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.801907652287015, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.9897391304347827e-06, |
| "loss": 0.2175, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.8026013440277476, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.9880000000000004e-06, |
| "loss": 0.2256, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.8032950357684804, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.986260869565218e-06, |
| "loss": 0.2488, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.8039887275092131, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.9845217391304348e-06, |
| "loss": 0.2574, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.8046824192499458, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.9827826086956526e-06, |
| "loss": 0.2263, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.8053761109906785, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.98104347826087e-06, |
| "loss": 0.2127, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.8060698027314113, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.9793043478260873e-06, |
| "loss": 0.2328, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.8067634944721439, |
| "grad_norm": 1.25, |
| "learning_rate": 2.9775652173913042e-06, |
| "loss": 0.279, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.8074571862128767, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.975826086956522e-06, |
| "loss": 0.1984, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.8081508779536094, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.9740869565217394e-06, |
| "loss": 0.2232, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.808844569694342, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.9723478260869564e-06, |
| "loss": 0.2565, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.8095382614350748, |
| "grad_norm": 1.25, |
| "learning_rate": 2.970608695652174e-06, |
| "loss": 0.2786, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.8102319531758075, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.9688695652173915e-06, |
| "loss": 0.326, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.8109256449165402, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.967130434782609e-06, |
| "loss": 0.3024, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.8116193366572729, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.9653913043478263e-06, |
| "loss": 0.2609, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.8123130283980057, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.9636521739130436e-06, |
| "loss": 0.2344, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.8130067201387383, |
| "grad_norm": 1.4921875, |
| "learning_rate": 2.961913043478261e-06, |
| "loss": 0.2705, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.8137004118794711, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.960173913043479e-06, |
| "loss": 0.2354, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.8143941036202038, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.9584347826086958e-06, |
| "loss": 0.2062, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.8150877953609365, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.956695652173913e-06, |
| "loss": 0.2292, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.8157814871016692, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.954956521739131e-06, |
| "loss": 0.2944, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.816475178842402, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.9532173913043483e-06, |
| "loss": 0.2349, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.8171688705831346, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.9514782608695652e-06, |
| "loss": 0.2075, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.8178625623238673, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.9497391304347826e-06, |
| "loss": 0.2725, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.8185562540646001, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.9480000000000004e-06, |
| "loss": 0.2386, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.8192499458053327, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.9462608695652178e-06, |
| "loss": 0.2515, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.8199436375460655, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.9445217391304347e-06, |
| "loss": 0.2432, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.8206373292867982, |
| "grad_norm": 1.5703125, |
| "learning_rate": 2.9427826086956525e-06, |
| "loss": 0.2537, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.8213310210275309, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.94104347826087e-06, |
| "loss": 0.2072, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.8220247127682636, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.9393043478260873e-06, |
| "loss": 0.215, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.8227184045089964, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.937565217391304e-06, |
| "loss": 0.2244, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.823412096249729, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.935826086956522e-06, |
| "loss": 0.2119, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.8241057879904617, |
| "grad_norm": 1.625, |
| "learning_rate": 2.9340869565217394e-06, |
| "loss": 0.2654, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.8247994797311945, |
| "grad_norm": 2.21875, |
| "learning_rate": 2.932347826086957e-06, |
| "loss": 0.2935, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.8254931714719271, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.930608695652174e-06, |
| "loss": 0.3218, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.8261868632126599, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.9288695652173915e-06, |
| "loss": 0.2326, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.8268805549533926, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.927130434782609e-06, |
| "loss": 0.265, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.8275742466941253, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.9253913043478267e-06, |
| "loss": 0.2347, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.828267938434858, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.9236521739130436e-06, |
| "loss": 0.2534, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.8289616301755908, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.921913043478261e-06, |
| "loss": 0.2471, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.8296553219163234, |
| "grad_norm": 1.46875, |
| "learning_rate": 2.9201739130434788e-06, |
| "loss": 0.2258, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.8303490136570562, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.9184347826086957e-06, |
| "loss": 0.2336, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.8310427053977889, |
| "grad_norm": 0.9296875, |
| "learning_rate": 2.916695652173913e-06, |
| "loss": 0.1999, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.8317363971385215, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.914956521739131e-06, |
| "loss": 0.2423, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.8324300888792543, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.9132173913043483e-06, |
| "loss": 0.3134, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.833123780619987, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.911478260869565e-06, |
| "loss": 0.2421, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.8338174723607197, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.9097391304347826e-06, |
| "loss": 0.2408, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.8345111641014524, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.9080000000000004e-06, |
| "loss": 0.3132, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.8352048558421852, |
| "grad_norm": 1.4921875, |
| "learning_rate": 2.9062608695652177e-06, |
| "loss": 0.3039, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.8358985475829178, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.9045217391304347e-06, |
| "loss": 0.2368, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.8365922393236506, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.9027826086956525e-06, |
| "loss": 0.2429, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.8372859310643833, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.90104347826087e-06, |
| "loss": 0.2199, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.837979622805116, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.8993043478260872e-06, |
| "loss": 0.2216, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.8386733145458487, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.897565217391304e-06, |
| "loss": 0.2114, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.8393670062865815, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.895826086956522e-06, |
| "loss": 0.2314, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.8400606980273141, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.8940869565217393e-06, |
| "loss": 0.2591, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.8407543897680468, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.892347826086957e-06, |
| "loss": 0.2981, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.8414480815087796, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.890608695652174e-06, |
| "loss": 0.2615, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.8421417732495122, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.8888695652173914e-06, |
| "loss": 0.2214, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.842835464990245, |
| "grad_norm": 1.46875, |
| "learning_rate": 2.887130434782609e-06, |
| "loss": 0.2462, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.8435291567309777, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.8853913043478266e-06, |
| "loss": 0.2383, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.8442228484717104, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.8836521739130436e-06, |
| "loss": 0.2856, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.8449165402124431, |
| "grad_norm": 1.4375, |
| "learning_rate": 2.881913043478261e-06, |
| "loss": 0.2847, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.8456102319531759, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.8801739130434787e-06, |
| "loss": 0.2508, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.8463039236939085, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.878434782608696e-06, |
| "loss": 0.2317, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.8469976154346412, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.876695652173913e-06, |
| "loss": 0.3621, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.847691307175374, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.874956521739131e-06, |
| "loss": 0.2664, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.8483849989161066, |
| "grad_norm": 0.86328125, |
| "learning_rate": 2.873217391304348e-06, |
| "loss": 0.2206, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.8490786906568394, |
| "grad_norm": 1.6484375, |
| "learning_rate": 2.8714782608695656e-06, |
| "loss": 0.3451, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.849772382397572, |
| "grad_norm": 1.5390625, |
| "learning_rate": 2.8697391304347825e-06, |
| "loss": 0.2597, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.8504660741383048, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.8680000000000003e-06, |
| "loss": 0.2391, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.8511597658790375, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.8662608695652177e-06, |
| "loss": 0.2241, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.8518534576197702, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.8645217391304346e-06, |
| "loss": 0.2928, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.8525471493605029, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.8627826086956524e-06, |
| "loss": 0.2468, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.8532408411012357, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.86104347826087e-06, |
| "loss": 0.2234, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.8539345328419683, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.859304347826087e-06, |
| "loss": 0.2329, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.854628224582701, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.857565217391304e-06, |
| "loss": 0.2267, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.8553219163234338, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.855826086956522e-06, |
| "loss": 0.2083, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.8560156080641664, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.8540869565217393e-06, |
| "loss": 0.2738, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.8567092998048992, |
| "grad_norm": 1.421875, |
| "learning_rate": 2.852347826086957e-06, |
| "loss": 0.2128, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.8574029915456319, |
| "grad_norm": 1.515625, |
| "learning_rate": 2.850608695652174e-06, |
| "loss": 0.2226, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.8580966832863646, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.8488695652173914e-06, |
| "loss": 0.2691, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.8587903750270973, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.8471304347826088e-06, |
| "loss": 0.2323, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.8594840667678301, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.8453913043478266e-06, |
| "loss": 0.228, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.8601777585085627, |
| "grad_norm": 0.96484375, |
| "learning_rate": 2.8436521739130435e-06, |
| "loss": 0.2741, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.8608714502492955, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.841913043478261e-06, |
| "loss": 0.2483, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.8615651419900282, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.8401739130434787e-06, |
| "loss": 0.2552, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.8622588337307608, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.838434782608696e-06, |
| "loss": 0.2283, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.8629525254714936, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.836695652173913e-06, |
| "loss": 0.226, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.8636462172122263, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.834956521739131e-06, |
| "loss": 0.2357, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.864339908952959, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.833217391304348e-06, |
| "loss": 0.2832, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.8650336006936917, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.8314782608695655e-06, |
| "loss": 0.2512, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.8657272924344245, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.8297391304347825e-06, |
| "loss": 0.1749, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.8664209841751571, |
| "grad_norm": 1.578125, |
| "learning_rate": 2.8280000000000003e-06, |
| "loss": 0.3021, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.8671146759158899, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.8262608695652177e-06, |
| "loss": 0.257, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.8678083676566226, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.8245217391304354e-06, |
| "loss": 0.229, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.8685020593973553, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.8227826086956524e-06, |
| "loss": 0.2163, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.869195751138088, |
| "grad_norm": 1.4609375, |
| "learning_rate": 2.8210434782608698e-06, |
| "loss": 0.264, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.8698894428788208, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.819304347826087e-06, |
| "loss": 0.2647, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.8705831346195534, |
| "grad_norm": 1.25, |
| "learning_rate": 2.817565217391305e-06, |
| "loss": 0.2262, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.8712768263602861, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.815826086956522e-06, |
| "loss": 0.2163, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.8719705181010189, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.8140869565217393e-06, |
| "loss": 0.2299, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.8726642098417515, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.812347826086957e-06, |
| "loss": 0.2756, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.8733579015824843, |
| "grad_norm": 1.3125, |
| "learning_rate": 2.810608695652174e-06, |
| "loss": 0.2429, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.874051593323217, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.8088695652173914e-06, |
| "loss": 0.2404, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.8747452850639497, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.8071304347826087e-06, |
| "loss": 0.2412, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.8754389768046824, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.8053913043478265e-06, |
| "loss": 0.2682, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.8761326685454152, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.8036521739130435e-06, |
| "loss": 0.2255, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.8768263602861478, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.801913043478261e-06, |
| "loss": 0.2766, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.8775200520268805, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.8001739130434786e-06, |
| "loss": 0.2353, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.8782137437676133, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.798434782608696e-06, |
| "loss": 0.1858, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.8789074355083459, |
| "grad_norm": 1.2890625, |
| "learning_rate": 2.796695652173913e-06, |
| "loss": 0.214, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.8796011272490787, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.7949565217391308e-06, |
| "loss": 0.2371, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.8802948189898114, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.793217391304348e-06, |
| "loss": 0.23, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.8809885107305441, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.7914782608695655e-06, |
| "loss": 0.2453, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.8816822024712768, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.7897391304347824e-06, |
| "loss": 0.2131, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.8823758942120096, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.7880000000000002e-06, |
| "loss": 0.242, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.8830695859527422, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.7862608695652176e-06, |
| "loss": 0.3201, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.883763277693475, |
| "grad_norm": 0.94140625, |
| "learning_rate": 2.7845217391304354e-06, |
| "loss": 0.2541, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.8844569694342077, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.7827826086956524e-06, |
| "loss": 0.2767, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.8851506611749403, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.7810434782608697e-06, |
| "loss": 0.2321, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.8858443529156731, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.779304347826087e-06, |
| "loss": 0.2789, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.8865380446564058, |
| "grad_norm": 1.5546875, |
| "learning_rate": 2.777565217391305e-06, |
| "loss": 0.2188, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.8872317363971385, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.775826086956522e-06, |
| "loss": 0.2593, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.8879254281378712, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.774086956521739e-06, |
| "loss": 0.215, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.888619119878604, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.772347826086957e-06, |
| "loss": 0.2497, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.8893128116193366, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.7706086956521744e-06, |
| "loss": 0.2003, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.8900065033600694, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.7688695652173913e-06, |
| "loss": 0.2473, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.8907001951008021, |
| "grad_norm": 0.90234375, |
| "learning_rate": 2.7671304347826087e-06, |
| "loss": 0.2628, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.8913938868415348, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.7653913043478265e-06, |
| "loss": 0.2674, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.8920875785822675, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.763652173913044e-06, |
| "loss": 0.2468, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.8927812703230003, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.761913043478261e-06, |
| "loss": 0.2161, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.8934749620637329, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.7601739130434786e-06, |
| "loss": 0.2284, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.8941686538044656, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.758434782608696e-06, |
| "loss": 0.2432, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.8948623455451984, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.756695652173913e-06, |
| "loss": 0.2964, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.895556037285931, |
| "grad_norm": 1.375, |
| "learning_rate": 2.7549565217391307e-06, |
| "loss": 0.2177, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.8962497290266638, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.753217391304348e-06, |
| "loss": 0.2864, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.8969434207673965, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.7514782608695655e-06, |
| "loss": 0.265, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.8976371125081292, |
| "grad_norm": 1.53125, |
| "learning_rate": 2.7497391304347824e-06, |
| "loss": 0.3763, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.8983308042488619, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.748e-06, |
| "loss": 0.2525, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.8990244959895947, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.7462608695652176e-06, |
| "loss": 0.2358, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.8997181877303273, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.7445217391304354e-06, |
| "loss": 0.2315, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.90041187947106, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.7427826086956523e-06, |
| "loss": 0.2688, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.9011055712117928, |
| "grad_norm": 1.4296875, |
| "learning_rate": 2.7410434782608697e-06, |
| "loss": 0.2491, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.9017992629525254, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.739304347826087e-06, |
| "loss": 0.2402, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.9024929546932582, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.737565217391305e-06, |
| "loss": 0.258, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.9031866464339909, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.735826086956522e-06, |
| "loss": 0.263, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.9038803381747236, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.734086956521739e-06, |
| "loss": 0.2312, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.9045740299154563, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.732347826086957e-06, |
| "loss": 0.2835, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.9052677216561891, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.7306086956521743e-06, |
| "loss": 0.2328, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.9059614133969217, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.7288695652173913e-06, |
| "loss": 0.2367, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.9066551051376545, |
| "grad_norm": 1.6796875, |
| "learning_rate": 2.7271304347826087e-06, |
| "loss": 0.2629, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.9073487968783872, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.7253913043478264e-06, |
| "loss": 0.2833, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.9080424886191198, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.723652173913044e-06, |
| "loss": 0.2077, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.9087361803598526, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.7219130434782608e-06, |
| "loss": 0.213, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.9094298721005853, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.7201739130434786e-06, |
| "loss": 0.2784, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.910123563841318, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.718434782608696e-06, |
| "loss": 0.2505, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.9108172555820507, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.7166956521739133e-06, |
| "loss": 0.2348, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.9115109473227835, |
| "grad_norm": 1.828125, |
| "learning_rate": 2.7149565217391307e-06, |
| "loss": 0.2753, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.9122046390635161, |
| "grad_norm": 1.5625, |
| "learning_rate": 2.713217391304348e-06, |
| "loss": 0.263, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.9128983308042489, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.7114782608695654e-06, |
| "loss": 0.2445, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.9135920225449816, |
| "grad_norm": 1.5078125, |
| "learning_rate": 2.7097391304347832e-06, |
| "loss": 0.2877, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.708e-06, |
| "loss": 0.2414, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.914979406026447, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.7062608695652175e-06, |
| "loss": 0.2208, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.9156730977671798, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.7045217391304353e-06, |
| "loss": 0.2381, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.9163667895079124, |
| "grad_norm": 1.5078125, |
| "learning_rate": 2.7027826086956523e-06, |
| "loss": 0.2036, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.9170604812486451, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.7010434782608696e-06, |
| "loss": 0.2259, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.9177541729893779, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.699304347826087e-06, |
| "loss": 0.2317, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.9184478647301105, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.697565217391305e-06, |
| "loss": 0.2817, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.9191415564708433, |
| "grad_norm": 1.46875, |
| "learning_rate": 2.6958260869565218e-06, |
| "loss": 0.2527, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.919835248211576, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.694086956521739e-06, |
| "loss": 0.2398, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.9205289399523087, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.692347826086957e-06, |
| "loss": 0.2632, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.9212226316930414, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.6906086956521743e-06, |
| "loss": 0.218, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.9219163234337742, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.6888695652173912e-06, |
| "loss": 0.2906, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.9226100151745068, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.6871304347826086e-06, |
| "loss": 0.233, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.9233037069152396, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.6853913043478264e-06, |
| "loss": 0.2417, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.9239973986559723, |
| "grad_norm": 1.3671875, |
| "learning_rate": 2.6836521739130438e-06, |
| "loss": 0.3028, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.9246910903967049, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.6819130434782607e-06, |
| "loss": 0.2826, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.9253847821374377, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.6801739130434785e-06, |
| "loss": 0.238, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.9260784738781704, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.678434782608696e-06, |
| "loss": 0.2322, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.9267721656189031, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.6766956521739133e-06, |
| "loss": 0.2983, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.9274658573596358, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.6749565217391306e-06, |
| "loss": 0.2692, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.9281595491003686, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.673217391304348e-06, |
| "loss": 0.2064, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.9288532408411012, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.6714782608695654e-06, |
| "loss": 0.2225, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.929546932581834, |
| "grad_norm": 1.375, |
| "learning_rate": 2.669739130434783e-06, |
| "loss": 0.2397, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.9302406243225667, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.668e-06, |
| "loss": 0.238, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.9309343160632993, |
| "grad_norm": 1.8828125, |
| "learning_rate": 2.6662608695652175e-06, |
| "loss": 0.2753, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.9316280078040321, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.6645217391304353e-06, |
| "loss": 0.225, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.9323216995447648, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.6627826086956527e-06, |
| "loss": 0.2449, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.9330153912854975, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.6610434782608696e-06, |
| "loss": 0.2416, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.9337090830262302, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.659304347826087e-06, |
| "loss": 0.217, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.934402774766963, |
| "grad_norm": 1.5078125, |
| "learning_rate": 2.6575652173913048e-06, |
| "loss": 0.2213, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.9350964665076956, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.655826086956522e-06, |
| "loss": 0.3161, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.9357901582484284, |
| "grad_norm": 1.5234375, |
| "learning_rate": 2.654086956521739e-06, |
| "loss": 0.2487, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.9364838499891611, |
| "grad_norm": 1.6796875, |
| "learning_rate": 2.652347826086957e-06, |
| "loss": 0.3213, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.9371775417298938, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.6506086956521743e-06, |
| "loss": 0.2486, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.9378712334706265, |
| "grad_norm": 1.5078125, |
| "learning_rate": 2.648869565217391e-06, |
| "loss": 0.2348, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.9385649252113591, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.6471304347826086e-06, |
| "loss": 0.2137, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.9392586169520919, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.6453913043478264e-06, |
| "loss": 0.2431, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.9399523086928246, |
| "grad_norm": 0.82421875, |
| "learning_rate": 2.6436521739130437e-06, |
| "loss": 0.2795, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.9406460004335573, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.6419130434782607e-06, |
| "loss": 0.2674, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.94133969217429, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.6401739130434785e-06, |
| "loss": 0.2346, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.9420333839150228, |
| "grad_norm": 1.3125, |
| "learning_rate": 2.638434782608696e-06, |
| "loss": 0.2416, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.9427270756557554, |
| "grad_norm": 1.703125, |
| "learning_rate": 2.6366956521739132e-06, |
| "loss": 0.2999, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.9434207673964882, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.6349565217391306e-06, |
| "loss": 0.1903, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.9441144591372209, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.633217391304348e-06, |
| "loss": 0.2281, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.9448081508779536, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.6314782608695653e-06, |
| "loss": 0.2115, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.9455018426186863, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.629739130434783e-06, |
| "loss": 0.2522, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.946195534359419, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.628e-06, |
| "loss": 0.2353, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.9468892261001517, |
| "grad_norm": 1.3671875, |
| "learning_rate": 2.6262608695652175e-06, |
| "loss": 0.2585, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.9475829178408844, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.6245217391304352e-06, |
| "loss": 0.2067, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.9482766095816172, |
| "grad_norm": 1.4609375, |
| "learning_rate": 2.6227826086956526e-06, |
| "loss": 0.2414, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.9489703013223498, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.6210434782608696e-06, |
| "loss": 0.2992, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.9496639930630826, |
| "grad_norm": 1.578125, |
| "learning_rate": 2.619304347826087e-06, |
| "loss": 0.2543, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.9503576848038153, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.6175652173913047e-06, |
| "loss": 0.2452, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.951051376544548, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.615826086956522e-06, |
| "loss": 0.2564, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.9517450682852807, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.614086956521739e-06, |
| "loss": 0.2446, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.9524387600260135, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.612347826086957e-06, |
| "loss": 0.2343, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.9531324517667461, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.6106086956521742e-06, |
| "loss": 0.2255, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.9538261435074789, |
| "grad_norm": 1.6328125, |
| "learning_rate": 2.6088695652173916e-06, |
| "loss": 0.2345, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.9545198352482116, |
| "grad_norm": 1.4921875, |
| "learning_rate": 2.6071304347826085e-06, |
| "loss": 0.2453, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.9552135269889442, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.6053913043478263e-06, |
| "loss": 0.2377, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.955907218729677, |
| "grad_norm": 0.88671875, |
| "learning_rate": 2.6036521739130437e-06, |
| "loss": 0.2412, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.9566009104704097, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.6019130434782615e-06, |
| "loss": 0.3032, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.9572946022111424, |
| "grad_norm": 1.5703125, |
| "learning_rate": 2.6001739130434784e-06, |
| "loss": 0.2317, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.9579882939518751, |
| "grad_norm": 1.125, |
| "learning_rate": 2.598434782608696e-06, |
| "loss": 0.2275, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.9586819856926079, |
| "grad_norm": 1.25, |
| "learning_rate": 2.596695652173913e-06, |
| "loss": 0.2155, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.9593756774333405, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.5949565217391306e-06, |
| "loss": 0.2261, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.9600693691740733, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.593217391304348e-06, |
| "loss": 0.2183, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.960763060914806, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.5914782608695653e-06, |
| "loss": 0.2203, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.9614567526555386, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.589739130434783e-06, |
| "loss": 0.2219, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.9621504443962714, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.588e-06, |
| "loss": 0.25, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.9628441361370041, |
| "grad_norm": 0.921875, |
| "learning_rate": 2.5862608695652174e-06, |
| "loss": 0.2036, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.9635378278777368, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.584521739130435e-06, |
| "loss": 0.2385, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.9642315196184695, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.5827826086956526e-06, |
| "loss": 0.2036, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.9649252113592023, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.5810434782608695e-06, |
| "loss": 0.2155, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.9656189030999349, |
| "grad_norm": 0.93359375, |
| "learning_rate": 2.579304347826087e-06, |
| "loss": 0.238, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.9663125948406677, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.5775652173913047e-06, |
| "loss": 0.2178, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.9670062865814004, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.575826086956522e-06, |
| "loss": 0.2891, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.9676999783221331, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.574086956521739e-06, |
| "loss": 0.2354, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.9683936700628658, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.572347826086957e-06, |
| "loss": 0.2305, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.9690873618035986, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.570608695652174e-06, |
| "loss": 0.2175, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.9697810535443312, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.5688695652173915e-06, |
| "loss": 0.232, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.9704747452850639, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.5671304347826085e-06, |
| "loss": 0.2387, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.9711684370257967, |
| "grad_norm": 0.8671875, |
| "learning_rate": 2.5653913043478263e-06, |
| "loss": 0.3122, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.9718621287665293, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.5636521739130437e-06, |
| "loss": 0.2158, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.9725558205072621, |
| "grad_norm": 0.75390625, |
| "learning_rate": 2.5619130434782615e-06, |
| "loss": 0.222, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.9732495122479948, |
| "grad_norm": 1.125, |
| "learning_rate": 2.5601739130434784e-06, |
| "loss": 0.2692, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.9739432039887275, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.5584347826086958e-06, |
| "loss": 0.2721, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.9746368957294602, |
| "grad_norm": 1.5234375, |
| "learning_rate": 2.5566956521739136e-06, |
| "loss": 0.2335, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.975330587470193, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.554956521739131e-06, |
| "loss": 0.2132, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.9760242792109256, |
| "grad_norm": 1.25, |
| "learning_rate": 2.553217391304348e-06, |
| "loss": 0.2424, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.9767179709516584, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.5514782608695653e-06, |
| "loss": 0.2583, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.9774116626923911, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.549739130434783e-06, |
| "loss": 0.2165, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.9781053544331237, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.5480000000000004e-06, |
| "loss": 0.2442, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.9787990461738565, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.5462608695652174e-06, |
| "loss": 0.3018, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.9794927379145892, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.544521739130435e-06, |
| "loss": 0.2094, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.9801864296553219, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.5427826086956525e-06, |
| "loss": 0.2546, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.9808801213960546, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.5410434782608695e-06, |
| "loss": 0.2204, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.9815738131367874, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.539304347826087e-06, |
| "loss": 0.176, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.98226750487752, |
| "grad_norm": 1.015625, |
| "learning_rate": 2.5375652173913046e-06, |
| "loss": 0.2255, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.9829611966182528, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.535826086956522e-06, |
| "loss": 0.228, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.9836548883589855, |
| "grad_norm": 1.421875, |
| "learning_rate": 2.534086956521739e-06, |
| "loss": 0.2245, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.9843485800997182, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.5323478260869568e-06, |
| "loss": 0.2602, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.9850422718404509, |
| "grad_norm": 1.453125, |
| "learning_rate": 2.530608695652174e-06, |
| "loss": 0.3022, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.9857359635811836, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.5288695652173915e-06, |
| "loss": 0.2025, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.9864296553219163, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.527130434782609e-06, |
| "loss": 0.2147, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.987123347062649, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.5253913043478262e-06, |
| "loss": 0.2519, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.9878170388033818, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.5236521739130436e-06, |
| "loss": 0.2693, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.9885107305441144, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.5219130434782614e-06, |
| "loss": 0.246, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.9892044222848472, |
| "grad_norm": 1.125, |
| "learning_rate": 2.5201739130434784e-06, |
| "loss": 0.235, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.9898981140255799, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.5184347826086957e-06, |
| "loss": 0.2056, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.9905918057663126, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.5166956521739135e-06, |
| "loss": 0.2063, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.9912854975070453, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.514956521739131e-06, |
| "loss": 0.2341, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.9919791892477781, |
| "grad_norm": 1.3671875, |
| "learning_rate": 2.513217391304348e-06, |
| "loss": 0.21, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.9926728809885107, |
| "grad_norm": 1.4140625, |
| "learning_rate": 2.5114782608695652e-06, |
| "loss": 0.2236, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.9933665727292434, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.509739130434783e-06, |
| "loss": 0.2257, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.9940602644699762, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.5080000000000004e-06, |
| "loss": 0.2403, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.9947539562107088, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.5062608695652173e-06, |
| "loss": 0.2051, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.9954476479514416, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.504521739130435e-06, |
| "loss": 0.2264, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.9961413396921743, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.5027826086956525e-06, |
| "loss": 0.2702, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.996835031432907, |
| "grad_norm": 0.92578125, |
| "learning_rate": 2.50104347826087e-06, |
| "loss": 0.2241, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.9975287231736397, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.4993043478260872e-06, |
| "loss": 0.2191, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.9982224149143725, |
| "grad_norm": 1.5546875, |
| "learning_rate": 2.4975652173913046e-06, |
| "loss": 0.3005, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.9989161066551051, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.495826086956522e-06, |
| "loss": 0.3011, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.9996097983958379, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.4940869565217394e-06, |
| "loss": 0.2558, |
| "step": 14410 |
| }, |
| { |
| "epoch": 1.0002774766962932, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.4923478260869567e-06, |
| "loss": 0.2414, |
| "step": 14420 |
| }, |
| { |
| "epoch": 1.0009711684370257, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.490608695652174e-06, |
| "loss": 0.2269, |
| "step": 14430 |
| }, |
| { |
| "epoch": 1.0016648601777585, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.4888695652173915e-06, |
| "loss": 0.2052, |
| "step": 14440 |
| }, |
| { |
| "epoch": 1.0023585519184912, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.487130434782609e-06, |
| "loss": 0.2343, |
| "step": 14450 |
| }, |
| { |
| "epoch": 1.003052243659224, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.485391304347826e-06, |
| "loss": 0.2279, |
| "step": 14460 |
| }, |
| { |
| "epoch": 1.0037459353999567, |
| "grad_norm": 0.94921875, |
| "learning_rate": 2.4836521739130436e-06, |
| "loss": 0.2713, |
| "step": 14470 |
| }, |
| { |
| "epoch": 1.0044396271406895, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.481913043478261e-06, |
| "loss": 0.2522, |
| "step": 14480 |
| }, |
| { |
| "epoch": 1.005133318881422, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.4801739130434783e-06, |
| "loss": 0.2336, |
| "step": 14490 |
| }, |
| { |
| "epoch": 1.0058270106221547, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.4784347826086957e-06, |
| "loss": 0.228, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.0065207023628875, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.4766956521739135e-06, |
| "loss": 0.2082, |
| "step": 14510 |
| }, |
| { |
| "epoch": 1.0072143941036202, |
| "grad_norm": 1.25, |
| "learning_rate": 2.4749565217391304e-06, |
| "loss": 0.2847, |
| "step": 14520 |
| }, |
| { |
| "epoch": 1.007908085844353, |
| "grad_norm": 0.9453125, |
| "learning_rate": 2.4732173913043482e-06, |
| "loss": 0.2132, |
| "step": 14530 |
| }, |
| { |
| "epoch": 1.0086017775850857, |
| "grad_norm": 1.3671875, |
| "learning_rate": 2.471478260869565e-06, |
| "loss": 0.2656, |
| "step": 14540 |
| }, |
| { |
| "epoch": 1.0092954693258183, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.469739130434783e-06, |
| "loss": 0.2238, |
| "step": 14550 |
| }, |
| { |
| "epoch": 1.009989161066551, |
| "grad_norm": 1.6640625, |
| "learning_rate": 2.468e-06, |
| "loss": 0.3057, |
| "step": 14560 |
| }, |
| { |
| "epoch": 1.0106828528072838, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.4662608695652177e-06, |
| "loss": 0.2365, |
| "step": 14570 |
| }, |
| { |
| "epoch": 1.0113765445480165, |
| "grad_norm": 0.98046875, |
| "learning_rate": 2.464521739130435e-06, |
| "loss": 0.2215, |
| "step": 14580 |
| }, |
| { |
| "epoch": 1.0120702362887493, |
| "grad_norm": 1.4921875, |
| "learning_rate": 2.4627826086956525e-06, |
| "loss": 0.2266, |
| "step": 14590 |
| }, |
| { |
| "epoch": 1.012763928029482, |
| "grad_norm": 2.265625, |
| "learning_rate": 2.46104347826087e-06, |
| "loss": 0.2978, |
| "step": 14600 |
| }, |
| { |
| "epoch": 1.0134576197702145, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.459304347826087e-06, |
| "loss": 0.2397, |
| "step": 14610 |
| }, |
| { |
| "epoch": 1.0141513115109473, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.4575652173913046e-06, |
| "loss": 0.3155, |
| "step": 14620 |
| }, |
| { |
| "epoch": 1.01484500325168, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.455826086956522e-06, |
| "loss": 0.2873, |
| "step": 14630 |
| }, |
| { |
| "epoch": 1.0155386949924128, |
| "grad_norm": 1.7734375, |
| "learning_rate": 2.4540869565217393e-06, |
| "loss": 0.2612, |
| "step": 14640 |
| }, |
| { |
| "epoch": 1.0162323867331455, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.4523478260869567e-06, |
| "loss": 0.2231, |
| "step": 14650 |
| }, |
| { |
| "epoch": 1.0169260784738783, |
| "grad_norm": 1.5234375, |
| "learning_rate": 2.450608695652174e-06, |
| "loss": 0.3073, |
| "step": 14660 |
| }, |
| { |
| "epoch": 1.0176197702146108, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.4488695652173914e-06, |
| "loss": 0.3001, |
| "step": 14670 |
| }, |
| { |
| "epoch": 1.0183134619553436, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.447130434782609e-06, |
| "loss": 0.2108, |
| "step": 14680 |
| }, |
| { |
| "epoch": 1.0190071536960763, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.4453913043478266e-06, |
| "loss": 0.2234, |
| "step": 14690 |
| }, |
| { |
| "epoch": 1.019700845436809, |
| "grad_norm": 1.25, |
| "learning_rate": 2.4436521739130435e-06, |
| "loss": 0.235, |
| "step": 14700 |
| }, |
| { |
| "epoch": 1.0203945371775418, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.4419130434782613e-06, |
| "loss": 0.2167, |
| "step": 14710 |
| }, |
| { |
| "epoch": 1.0210882289182743, |
| "grad_norm": 0.97265625, |
| "learning_rate": 2.4401739130434783e-06, |
| "loss": 0.2383, |
| "step": 14720 |
| }, |
| { |
| "epoch": 1.021781920659007, |
| "grad_norm": 1.375, |
| "learning_rate": 2.438434782608696e-06, |
| "loss": 0.2186, |
| "step": 14730 |
| }, |
| { |
| "epoch": 1.0224756123997398, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.4366956521739134e-06, |
| "loss": 0.2027, |
| "step": 14740 |
| }, |
| { |
| "epoch": 1.0231693041404726, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.4349565217391304e-06, |
| "loss": 0.2397, |
| "step": 14750 |
| }, |
| { |
| "epoch": 1.0238629958812053, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.433217391304348e-06, |
| "loss": 0.2239, |
| "step": 14760 |
| }, |
| { |
| "epoch": 1.024556687621938, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.431478260869565e-06, |
| "loss": 0.2252, |
| "step": 14770 |
| }, |
| { |
| "epoch": 1.0252503793626706, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.429739130434783e-06, |
| "loss": 0.2448, |
| "step": 14780 |
| }, |
| { |
| "epoch": 1.0259440711034034, |
| "grad_norm": 1.375, |
| "learning_rate": 2.428e-06, |
| "loss": 0.2628, |
| "step": 14790 |
| }, |
| { |
| "epoch": 1.026637762844136, |
| "grad_norm": 1.5703125, |
| "learning_rate": 2.4262608695652177e-06, |
| "loss": 0.2583, |
| "step": 14800 |
| }, |
| { |
| "epoch": 1.0273314545848689, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.424521739130435e-06, |
| "loss": 0.2329, |
| "step": 14810 |
| }, |
| { |
| "epoch": 1.0280251463256016, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.4227826086956524e-06, |
| "loss": 0.2318, |
| "step": 14820 |
| }, |
| { |
| "epoch": 1.0287188380663344, |
| "grad_norm": 1.46875, |
| "learning_rate": 2.4210434782608698e-06, |
| "loss": 0.2351, |
| "step": 14830 |
| }, |
| { |
| "epoch": 1.0294125298070669, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.419304347826087e-06, |
| "loss": 0.2533, |
| "step": 14840 |
| }, |
| { |
| "epoch": 1.0301062215477996, |
| "grad_norm": 1.6328125, |
| "learning_rate": 2.4175652173913045e-06, |
| "loss": 0.2254, |
| "step": 14850 |
| }, |
| { |
| "epoch": 1.0307999132885324, |
| "grad_norm": 0.90234375, |
| "learning_rate": 2.415826086956522e-06, |
| "loss": 0.2149, |
| "step": 14860 |
| }, |
| { |
| "epoch": 1.0314936050292651, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.4140869565217393e-06, |
| "loss": 0.2698, |
| "step": 14870 |
| }, |
| { |
| "epoch": 1.0321872967699979, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.4123478260869566e-06, |
| "loss": 0.1998, |
| "step": 14880 |
| }, |
| { |
| "epoch": 1.0328809885107306, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.410608695652174e-06, |
| "loss": 0.241, |
| "step": 14890 |
| }, |
| { |
| "epoch": 1.0335746802514632, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.4088695652173914e-06, |
| "loss": 0.2182, |
| "step": 14900 |
| }, |
| { |
| "epoch": 1.034268371992196, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.4071304347826088e-06, |
| "loss": 0.2561, |
| "step": 14910 |
| }, |
| { |
| "epoch": 1.0349620637329286, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.4053913043478265e-06, |
| "loss": 0.2321, |
| "step": 14920 |
| }, |
| { |
| "epoch": 1.0356557554736614, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.4036521739130435e-06, |
| "loss": 0.2393, |
| "step": 14930 |
| }, |
| { |
| "epoch": 1.0363494472143941, |
| "grad_norm": 0.8984375, |
| "learning_rate": 2.4019130434782613e-06, |
| "loss": 0.23, |
| "step": 14940 |
| }, |
| { |
| "epoch": 1.037043138955127, |
| "grad_norm": 1.125, |
| "learning_rate": 2.4001739130434782e-06, |
| "loss": 0.2397, |
| "step": 14950 |
| }, |
| { |
| "epoch": 1.0377368306958594, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.398434782608696e-06, |
| "loss": 0.3271, |
| "step": 14960 |
| }, |
| { |
| "epoch": 1.0384305224365922, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.3966956521739134e-06, |
| "loss": 0.2324, |
| "step": 14970 |
| }, |
| { |
| "epoch": 1.039124214177325, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.3949565217391308e-06, |
| "loss": 0.2342, |
| "step": 14980 |
| }, |
| { |
| "epoch": 1.0398179059180577, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.393217391304348e-06, |
| "loss": 0.2481, |
| "step": 14990 |
| }, |
| { |
| "epoch": 1.0405115976587904, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.3914782608695655e-06, |
| "loss": 0.2157, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.0412052893995232, |
| "grad_norm": 1.515625, |
| "learning_rate": 2.389739130434783e-06, |
| "loss": 0.2339, |
| "step": 15010 |
| }, |
| { |
| "epoch": 1.0418989811402557, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.3880000000000003e-06, |
| "loss": 0.2366, |
| "step": 15020 |
| }, |
| { |
| "epoch": 1.0425926728809884, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.3862608695652176e-06, |
| "loss": 0.227, |
| "step": 15030 |
| }, |
| { |
| "epoch": 1.0432863646217212, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.384521739130435e-06, |
| "loss": 0.2355, |
| "step": 15040 |
| }, |
| { |
| "epoch": 1.043980056362454, |
| "grad_norm": 1.25, |
| "learning_rate": 2.3827826086956524e-06, |
| "loss": 0.2874, |
| "step": 15050 |
| }, |
| { |
| "epoch": 1.0446737481031867, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.3810434782608697e-06, |
| "loss": 0.235, |
| "step": 15060 |
| }, |
| { |
| "epoch": 1.0453674398439194, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.379304347826087e-06, |
| "loss": 0.2298, |
| "step": 15070 |
| }, |
| { |
| "epoch": 1.046061131584652, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.3775652173913045e-06, |
| "loss": 0.2549, |
| "step": 15080 |
| }, |
| { |
| "epoch": 1.0467548233253847, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.375826086956522e-06, |
| "loss": 0.1974, |
| "step": 15090 |
| }, |
| { |
| "epoch": 1.0474485150661175, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.3740869565217392e-06, |
| "loss": 0.2338, |
| "step": 15100 |
| }, |
| { |
| "epoch": 1.0481422068068502, |
| "grad_norm": 1.640625, |
| "learning_rate": 2.3723478260869566e-06, |
| "loss": 0.2567, |
| "step": 15110 |
| }, |
| { |
| "epoch": 1.048835898547583, |
| "grad_norm": 1.453125, |
| "learning_rate": 2.370608695652174e-06, |
| "loss": 0.2979, |
| "step": 15120 |
| }, |
| { |
| "epoch": 1.0495295902883157, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.3688695652173913e-06, |
| "loss": 0.2831, |
| "step": 15130 |
| }, |
| { |
| "epoch": 1.0502232820290482, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.3671304347826087e-06, |
| "loss": 0.2648, |
| "step": 15140 |
| }, |
| { |
| "epoch": 1.050916973769781, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.3653913043478265e-06, |
| "loss": 0.2652, |
| "step": 15150 |
| }, |
| { |
| "epoch": 1.0516106655105137, |
| "grad_norm": 0.89453125, |
| "learning_rate": 2.3636521739130435e-06, |
| "loss": 0.1974, |
| "step": 15160 |
| }, |
| { |
| "epoch": 1.0523043572512465, |
| "grad_norm": 1.0, |
| "learning_rate": 2.3619130434782613e-06, |
| "loss": 0.2288, |
| "step": 15170 |
| }, |
| { |
| "epoch": 1.0529980489919792, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.360173913043478e-06, |
| "loss": 0.2257, |
| "step": 15180 |
| }, |
| { |
| "epoch": 1.053691740732712, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.358434782608696e-06, |
| "loss": 0.2343, |
| "step": 15190 |
| }, |
| { |
| "epoch": 1.0543854324734445, |
| "grad_norm": 1.78125, |
| "learning_rate": 2.3566956521739134e-06, |
| "loss": 0.2327, |
| "step": 15200 |
| }, |
| { |
| "epoch": 1.0550791242141773, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.3549565217391307e-06, |
| "loss": 0.2529, |
| "step": 15210 |
| }, |
| { |
| "epoch": 1.05577281595491, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.353217391304348e-06, |
| "loss": 0.2498, |
| "step": 15220 |
| }, |
| { |
| "epoch": 1.0564665076956428, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.3514782608695655e-06, |
| "loss": 0.2349, |
| "step": 15230 |
| }, |
| { |
| "epoch": 1.0571601994363755, |
| "grad_norm": 0.9765625, |
| "learning_rate": 2.349739130434783e-06, |
| "loss": 0.2656, |
| "step": 15240 |
| }, |
| { |
| "epoch": 1.0578538911771083, |
| "grad_norm": 1.25, |
| "learning_rate": 2.3480000000000002e-06, |
| "loss": 0.2531, |
| "step": 15250 |
| }, |
| { |
| "epoch": 1.0585475829178408, |
| "grad_norm": 1.6171875, |
| "learning_rate": 2.3462608695652176e-06, |
| "loss": 0.2521, |
| "step": 15260 |
| }, |
| { |
| "epoch": 1.0592412746585735, |
| "grad_norm": 1.375, |
| "learning_rate": 2.344521739130435e-06, |
| "loss": 0.266, |
| "step": 15270 |
| }, |
| { |
| "epoch": 1.0599349663993063, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.3427826086956523e-06, |
| "loss": 0.2564, |
| "step": 15280 |
| }, |
| { |
| "epoch": 1.060628658140039, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.3410434782608697e-06, |
| "loss": 0.2466, |
| "step": 15290 |
| }, |
| { |
| "epoch": 1.0613223498807718, |
| "grad_norm": 1.125, |
| "learning_rate": 2.339304347826087e-06, |
| "loss": 0.2282, |
| "step": 15300 |
| }, |
| { |
| "epoch": 1.0620160416215045, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.3375652173913044e-06, |
| "loss": 0.3059, |
| "step": 15310 |
| }, |
| { |
| "epoch": 1.062709733362237, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.335826086956522e-06, |
| "loss": 0.3653, |
| "step": 15320 |
| }, |
| { |
| "epoch": 1.0634034251029698, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.3340869565217396e-06, |
| "loss": 0.228, |
| "step": 15330 |
| }, |
| { |
| "epoch": 1.0640971168437026, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.3323478260869566e-06, |
| "loss": 0.2498, |
| "step": 15340 |
| }, |
| { |
| "epoch": 1.0647908085844353, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.3306086956521744e-06, |
| "loss": 0.2772, |
| "step": 15350 |
| }, |
| { |
| "epoch": 1.065484500325168, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.3288695652173913e-06, |
| "loss": 0.2754, |
| "step": 15360 |
| }, |
| { |
| "epoch": 1.0661781920659008, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.3271304347826087e-06, |
| "loss": 0.2358, |
| "step": 15370 |
| }, |
| { |
| "epoch": 1.0668718838066333, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.3253913043478265e-06, |
| "loss": 0.3299, |
| "step": 15380 |
| }, |
| { |
| "epoch": 1.067565575547366, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.3236521739130434e-06, |
| "loss": 0.2327, |
| "step": 15390 |
| }, |
| { |
| "epoch": 1.0682592672880988, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.321913043478261e-06, |
| "loss": 0.2344, |
| "step": 15400 |
| }, |
| { |
| "epoch": 1.0689529590288316, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.320173913043478e-06, |
| "loss": 0.2527, |
| "step": 15410 |
| }, |
| { |
| "epoch": 1.0696466507695643, |
| "grad_norm": 1.625, |
| "learning_rate": 2.318434782608696e-06, |
| "loss": 0.2592, |
| "step": 15420 |
| }, |
| { |
| "epoch": 1.070340342510297, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.3166956521739133e-06, |
| "loss": 0.271, |
| "step": 15430 |
| }, |
| { |
| "epoch": 1.0710340342510296, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.3149565217391307e-06, |
| "loss": 0.2486, |
| "step": 15440 |
| }, |
| { |
| "epoch": 1.0717277259917624, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.313217391304348e-06, |
| "loss": 0.2046, |
| "step": 15450 |
| }, |
| { |
| "epoch": 1.072421417732495, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.3114782608695654e-06, |
| "loss": 0.2947, |
| "step": 15460 |
| }, |
| { |
| "epoch": 1.0731151094732279, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.309739130434783e-06, |
| "loss": 0.2328, |
| "step": 15470 |
| }, |
| { |
| "epoch": 1.0738088012139606, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.308e-06, |
| "loss": 0.2694, |
| "step": 15480 |
| }, |
| { |
| "epoch": 1.0745024929546934, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.3062608695652176e-06, |
| "loss": 0.2282, |
| "step": 15490 |
| }, |
| { |
| "epoch": 1.0751961846954259, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.304521739130435e-06, |
| "loss": 0.2505, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.0758898764361586, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.3027826086956523e-06, |
| "loss": 0.219, |
| "step": 15510 |
| }, |
| { |
| "epoch": 1.0765835681768914, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.3010434782608697e-06, |
| "loss": 0.2595, |
| "step": 15520 |
| }, |
| { |
| "epoch": 1.0772772599176241, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.299304347826087e-06, |
| "loss": 0.2773, |
| "step": 15530 |
| }, |
| { |
| "epoch": 1.0779709516583569, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.2975652173913044e-06, |
| "loss": 0.2845, |
| "step": 15540 |
| }, |
| { |
| "epoch": 1.0786646433990896, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.2958260869565218e-06, |
| "loss": 0.2097, |
| "step": 15550 |
| }, |
| { |
| "epoch": 1.0793583351398222, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.2940869565217396e-06, |
| "loss": 0.2575, |
| "step": 15560 |
| }, |
| { |
| "epoch": 1.080052026880555, |
| "grad_norm": 1.421875, |
| "learning_rate": 2.2923478260869565e-06, |
| "loss": 0.2349, |
| "step": 15570 |
| }, |
| { |
| "epoch": 1.0807457186212877, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.2906086956521743e-06, |
| "loss": 0.2144, |
| "step": 15580 |
| }, |
| { |
| "epoch": 1.0814394103620204, |
| "grad_norm": 1.5234375, |
| "learning_rate": 2.2888695652173913e-06, |
| "loss": 0.2695, |
| "step": 15590 |
| }, |
| { |
| "epoch": 1.0821331021027532, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.287130434782609e-06, |
| "loss": 0.2336, |
| "step": 15600 |
| }, |
| { |
| "epoch": 1.082826793843486, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.2853913043478264e-06, |
| "loss": 0.2254, |
| "step": 15610 |
| }, |
| { |
| "epoch": 1.0835204855842184, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.283652173913044e-06, |
| "loss": 0.1932, |
| "step": 15620 |
| }, |
| { |
| "epoch": 1.0842141773249512, |
| "grad_norm": 1.40625, |
| "learning_rate": 2.281913043478261e-06, |
| "loss": 0.2222, |
| "step": 15630 |
| }, |
| { |
| "epoch": 1.084907869065684, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.2801739130434785e-06, |
| "loss": 0.197, |
| "step": 15640 |
| }, |
| { |
| "epoch": 1.0856015608064167, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.278434782608696e-06, |
| "loss": 0.2951, |
| "step": 15650 |
| }, |
| { |
| "epoch": 1.0862952525471494, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.2766956521739133e-06, |
| "loss": 0.2323, |
| "step": 15660 |
| }, |
| { |
| "epoch": 1.0869889442878822, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.2749565217391307e-06, |
| "loss": 0.2331, |
| "step": 15670 |
| }, |
| { |
| "epoch": 1.0876826360286147, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.273217391304348e-06, |
| "loss": 0.2251, |
| "step": 15680 |
| }, |
| { |
| "epoch": 1.0883763277693475, |
| "grad_norm": 1.2265625, |
| "learning_rate": 2.2714782608695654e-06, |
| "loss": 0.2431, |
| "step": 15690 |
| }, |
| { |
| "epoch": 1.0890700195100802, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.2697391304347828e-06, |
| "loss": 0.2459, |
| "step": 15700 |
| }, |
| { |
| "epoch": 1.089763711250813, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.268e-06, |
| "loss": 0.1969, |
| "step": 15710 |
| }, |
| { |
| "epoch": 1.0904574029915457, |
| "grad_norm": 1.3125, |
| "learning_rate": 2.2662608695652175e-06, |
| "loss": 0.2647, |
| "step": 15720 |
| }, |
| { |
| "epoch": 1.0911510947322784, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.264521739130435e-06, |
| "loss": 0.2552, |
| "step": 15730 |
| }, |
| { |
| "epoch": 1.091844786473011, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.2627826086956523e-06, |
| "loss": 0.2258, |
| "step": 15740 |
| }, |
| { |
| "epoch": 1.0925384782137437, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.2610434782608696e-06, |
| "loss": 0.268, |
| "step": 15750 |
| }, |
| { |
| "epoch": 1.0932321699544765, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.259304347826087e-06, |
| "loss": 0.2232, |
| "step": 15760 |
| }, |
| { |
| "epoch": 1.0939258616952092, |
| "grad_norm": 1.625, |
| "learning_rate": 2.2575652173913044e-06, |
| "loss": 0.2888, |
| "step": 15770 |
| }, |
| { |
| "epoch": 1.094619553435942, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.2558260869565217e-06, |
| "loss": 0.2236, |
| "step": 15780 |
| }, |
| { |
| "epoch": 1.0953132451766745, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.2540869565217395e-06, |
| "loss": 0.2405, |
| "step": 15790 |
| }, |
| { |
| "epoch": 1.0960069369174072, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.2523478260869565e-06, |
| "loss": 0.2135, |
| "step": 15800 |
| }, |
| { |
| "epoch": 1.09670062865814, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.2506086956521743e-06, |
| "loss": 0.2673, |
| "step": 15810 |
| }, |
| { |
| "epoch": 1.0973943203988727, |
| "grad_norm": 1.0, |
| "learning_rate": 2.2488695652173912e-06, |
| "loss": 0.2108, |
| "step": 15820 |
| }, |
| { |
| "epoch": 1.0980880121396055, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.247130434782609e-06, |
| "loss": 0.2335, |
| "step": 15830 |
| }, |
| { |
| "epoch": 1.0987817038803382, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.2453913043478264e-06, |
| "loss": 0.2494, |
| "step": 15840 |
| }, |
| { |
| "epoch": 1.099475395621071, |
| "grad_norm": 1.8125, |
| "learning_rate": 2.2436521739130438e-06, |
| "loss": 0.371, |
| "step": 15850 |
| }, |
| { |
| "epoch": 1.1001690873618035, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.241913043478261e-06, |
| "loss": 0.3074, |
| "step": 15860 |
| }, |
| { |
| "epoch": 1.1008627791025363, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.2401739130434785e-06, |
| "loss": 0.2505, |
| "step": 15870 |
| }, |
| { |
| "epoch": 1.101556470843269, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.238434782608696e-06, |
| "loss": 0.2206, |
| "step": 15880 |
| }, |
| { |
| "epoch": 1.1022501625840018, |
| "grad_norm": 1.1015625, |
| "learning_rate": 2.2366956521739132e-06, |
| "loss": 0.28, |
| "step": 15890 |
| }, |
| { |
| "epoch": 1.1029438543247345, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.2349565217391306e-06, |
| "loss": 0.2172, |
| "step": 15900 |
| }, |
| { |
| "epoch": 1.103637546065467, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.233217391304348e-06, |
| "loss": 0.2868, |
| "step": 15910 |
| }, |
| { |
| "epoch": 1.1043312378061998, |
| "grad_norm": 0.91796875, |
| "learning_rate": 2.2314782608695654e-06, |
| "loss": 0.2454, |
| "step": 15920 |
| }, |
| { |
| "epoch": 1.1050249295469325, |
| "grad_norm": 1.4609375, |
| "learning_rate": 2.2297391304347827e-06, |
| "loss": 0.2951, |
| "step": 15930 |
| }, |
| { |
| "epoch": 1.1057186212876653, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.228e-06, |
| "loss": 0.2183, |
| "step": 15940 |
| }, |
| { |
| "epoch": 1.106412313028398, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.226260869565218e-06, |
| "loss": 0.2182, |
| "step": 15950 |
| }, |
| { |
| "epoch": 1.1071060047691308, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.224521739130435e-06, |
| "loss": 0.1977, |
| "step": 15960 |
| }, |
| { |
| "epoch": 1.1077996965098635, |
| "grad_norm": 1.4140625, |
| "learning_rate": 2.2227826086956526e-06, |
| "loss": 0.2098, |
| "step": 15970 |
| }, |
| { |
| "epoch": 1.108493388250596, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.2210434782608696e-06, |
| "loss": 0.2466, |
| "step": 15980 |
| }, |
| { |
| "epoch": 1.1091870799913288, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.219304347826087e-06, |
| "loss": 0.2097, |
| "step": 15990 |
| }, |
| { |
| "epoch": 1.1098807717320616, |
| "grad_norm": 1.125, |
| "learning_rate": 2.2175652173913043e-06, |
| "loss": 0.2058, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.1105744634727943, |
| "grad_norm": 1.703125, |
| "learning_rate": 2.2158260869565217e-06, |
| "loss": 0.2597, |
| "step": 16010 |
| }, |
| { |
| "epoch": 1.111268155213527, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.2140869565217395e-06, |
| "loss": 0.2305, |
| "step": 16020 |
| }, |
| { |
| "epoch": 1.1119618469542596, |
| "grad_norm": 1.2265625, |
| "learning_rate": 2.2123478260869564e-06, |
| "loss": 0.2129, |
| "step": 16030 |
| }, |
| { |
| "epoch": 1.1126555386949923, |
| "grad_norm": 0.984375, |
| "learning_rate": 2.2106086956521742e-06, |
| "loss": 0.2537, |
| "step": 16040 |
| }, |
| { |
| "epoch": 1.113349230435725, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.208869565217391e-06, |
| "loss": 0.2468, |
| "step": 16050 |
| }, |
| { |
| "epoch": 1.1140429221764578, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.207130434782609e-06, |
| "loss": 0.2024, |
| "step": 16060 |
| }, |
| { |
| "epoch": 1.1147366139171906, |
| "grad_norm": 1.4375, |
| "learning_rate": 2.2053913043478263e-06, |
| "loss": 0.2803, |
| "step": 16070 |
| }, |
| { |
| "epoch": 1.1154303056579233, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.2036521739130437e-06, |
| "loss": 0.2135, |
| "step": 16080 |
| }, |
| { |
| "epoch": 1.116123997398656, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.201913043478261e-06, |
| "loss": 0.2603, |
| "step": 16090 |
| }, |
| { |
| "epoch": 1.1168176891393886, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.2001739130434785e-06, |
| "loss": 0.2369, |
| "step": 16100 |
| }, |
| { |
| "epoch": 1.1175113808801214, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.198434782608696e-06, |
| "loss": 0.2354, |
| "step": 16110 |
| }, |
| { |
| "epoch": 1.1182050726208541, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.196695652173913e-06, |
| "loss": 0.1965, |
| "step": 16120 |
| }, |
| { |
| "epoch": 1.1188987643615869, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.1949565217391306e-06, |
| "loss": 0.2309, |
| "step": 16130 |
| }, |
| { |
| "epoch": 1.1195924561023196, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.193217391304348e-06, |
| "loss": 0.244, |
| "step": 16140 |
| }, |
| { |
| "epoch": 1.1202861478430521, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.1914782608695653e-06, |
| "loss": 0.2881, |
| "step": 16150 |
| }, |
| { |
| "epoch": 1.1209798395837849, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.1897391304347827e-06, |
| "loss": 0.2484, |
| "step": 16160 |
| }, |
| { |
| "epoch": 1.1216735313245176, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.188e-06, |
| "loss": 0.2577, |
| "step": 16170 |
| }, |
| { |
| "epoch": 1.1223672230652504, |
| "grad_norm": 1.25, |
| "learning_rate": 2.186260869565218e-06, |
| "loss": 0.2607, |
| "step": 16180 |
| }, |
| { |
| "epoch": 1.1230609148059831, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.184521739130435e-06, |
| "loss": 0.2481, |
| "step": 16190 |
| }, |
| { |
| "epoch": 1.1237546065467159, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.1827826086956526e-06, |
| "loss": 0.2204, |
| "step": 16200 |
| }, |
| { |
| "epoch": 1.1244482982874484, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.1810434782608695e-06, |
| "loss": 0.2596, |
| "step": 16210 |
| }, |
| { |
| "epoch": 1.1251419900281812, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.1793043478260873e-06, |
| "loss": 0.2827, |
| "step": 16220 |
| }, |
| { |
| "epoch": 1.125835681768914, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.1775652173913047e-06, |
| "loss": 0.2533, |
| "step": 16230 |
| }, |
| { |
| "epoch": 1.1265293735096467, |
| "grad_norm": 0.953125, |
| "learning_rate": 2.175826086956522e-06, |
| "loss": 0.2578, |
| "step": 16240 |
| }, |
| { |
| "epoch": 1.1272230652503794, |
| "grad_norm": 1.125, |
| "learning_rate": 2.1740869565217395e-06, |
| "loss": 0.2142, |
| "step": 16250 |
| }, |
| { |
| "epoch": 1.1279167569911122, |
| "grad_norm": 1.1953125, |
| "learning_rate": 2.172347826086957e-06, |
| "loss": 0.2088, |
| "step": 16260 |
| }, |
| { |
| "epoch": 1.1286104487318447, |
| "grad_norm": 1.109375, |
| "learning_rate": 2.170608695652174e-06, |
| "loss": 0.2214, |
| "step": 16270 |
| }, |
| { |
| "epoch": 1.1293041404725774, |
| "grad_norm": 1.0703125, |
| "learning_rate": 2.1688695652173916e-06, |
| "loss": 0.2313, |
| "step": 16280 |
| }, |
| { |
| "epoch": 1.1299978322133102, |
| "grad_norm": 1.0859375, |
| "learning_rate": 2.167130434782609e-06, |
| "loss": 0.2488, |
| "step": 16290 |
| }, |
| { |
| "epoch": 1.130691523954043, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.1653913043478263e-06, |
| "loss": 0.2474, |
| "step": 16300 |
| }, |
| { |
| "epoch": 1.1313852156947757, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.1636521739130437e-06, |
| "loss": 0.2476, |
| "step": 16310 |
| }, |
| { |
| "epoch": 1.1320789074355084, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.161913043478261e-06, |
| "loss": 0.2353, |
| "step": 16320 |
| }, |
| { |
| "epoch": 1.1327725991762412, |
| "grad_norm": 0.796875, |
| "learning_rate": 2.1601739130434784e-06, |
| "loss": 0.2156, |
| "step": 16330 |
| }, |
| { |
| "epoch": 1.1334662909169737, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.158434782608696e-06, |
| "loss": 0.2233, |
| "step": 16340 |
| }, |
| { |
| "epoch": 1.1341599826577065, |
| "grad_norm": 1.375, |
| "learning_rate": 2.156695652173913e-06, |
| "loss": 0.2461, |
| "step": 16350 |
| }, |
| { |
| "epoch": 1.1348536743984392, |
| "grad_norm": 0.96875, |
| "learning_rate": 2.1549565217391305e-06, |
| "loss": 0.2388, |
| "step": 16360 |
| }, |
| { |
| "epoch": 1.135547366139172, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.153217391304348e-06, |
| "loss": 0.1843, |
| "step": 16370 |
| }, |
| { |
| "epoch": 1.1362410578799047, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.1514782608695653e-06, |
| "loss": 0.2327, |
| "step": 16380 |
| }, |
| { |
| "epoch": 1.1369347496206372, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.1497391304347826e-06, |
| "loss": 0.364, |
| "step": 16390 |
| }, |
| { |
| "epoch": 1.13762844136137, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.148e-06, |
| "loss": 0.2377, |
| "step": 16400 |
| }, |
| { |
| "epoch": 1.1383221331021027, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.146260869565218e-06, |
| "loss": 0.2195, |
| "step": 16410 |
| }, |
| { |
| "epoch": 1.1390158248428355, |
| "grad_norm": 1.2265625, |
| "learning_rate": 2.1445217391304348e-06, |
| "loss": 0.2238, |
| "step": 16420 |
| }, |
| { |
| "epoch": 1.1397095165835682, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.1427826086956526e-06, |
| "loss": 0.2505, |
| "step": 16430 |
| }, |
| { |
| "epoch": 1.140403208324301, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.1410434782608695e-06, |
| "loss": 0.2871, |
| "step": 16440 |
| }, |
| { |
| "epoch": 1.1410969000650335, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.1393043478260873e-06, |
| "loss": 0.248, |
| "step": 16450 |
| }, |
| { |
| "epoch": 1.1417905918057663, |
| "grad_norm": 0.98828125, |
| "learning_rate": 2.1375652173913047e-06, |
| "loss": 0.2396, |
| "step": 16460 |
| }, |
| { |
| "epoch": 1.142484283546499, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.135826086956522e-06, |
| "loss": 0.225, |
| "step": 16470 |
| }, |
| { |
| "epoch": 1.1431779752872318, |
| "grad_norm": 1.0390625, |
| "learning_rate": 2.1340869565217394e-06, |
| "loss": 0.2223, |
| "step": 16480 |
| }, |
| { |
| "epoch": 1.1438716670279645, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.1323478260869568e-06, |
| "loss": 0.2289, |
| "step": 16490 |
| }, |
| { |
| "epoch": 1.1445653587686972, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.130608695652174e-06, |
| "loss": 0.2196, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.1452590505094298, |
| "grad_norm": 1.375, |
| "learning_rate": 2.1288695652173915e-06, |
| "loss": 0.3, |
| "step": 16510 |
| }, |
| { |
| "epoch": 1.1459527422501625, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.127130434782609e-06, |
| "loss": 0.2349, |
| "step": 16520 |
| }, |
| { |
| "epoch": 1.1466464339908953, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.1253913043478263e-06, |
| "loss": 0.2661, |
| "step": 16530 |
| }, |
| { |
| "epoch": 1.147340125731628, |
| "grad_norm": 1.0546875, |
| "learning_rate": 2.1236521739130436e-06, |
| "loss": 0.2279, |
| "step": 16540 |
| }, |
| { |
| "epoch": 1.1480338174723608, |
| "grad_norm": 0.9375, |
| "learning_rate": 2.121913043478261e-06, |
| "loss": 0.1994, |
| "step": 16550 |
| }, |
| { |
| "epoch": 1.1487275092130935, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.1201739130434784e-06, |
| "loss": 0.2183, |
| "step": 16560 |
| }, |
| { |
| "epoch": 1.149421200953826, |
| "grad_norm": 1.2109375, |
| "learning_rate": 2.1184347826086957e-06, |
| "loss": 0.213, |
| "step": 16570 |
| }, |
| { |
| "epoch": 1.1501148926945588, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.116695652173913e-06, |
| "loss": 0.2149, |
| "step": 16580 |
| }, |
| { |
| "epoch": 1.1508085844352915, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.114956521739131e-06, |
| "loss": 0.2344, |
| "step": 16590 |
| }, |
| { |
| "epoch": 1.1515022761760243, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.113217391304348e-06, |
| "loss": 0.307, |
| "step": 16600 |
| }, |
| { |
| "epoch": 1.152195967916757, |
| "grad_norm": 1.125, |
| "learning_rate": 2.1114782608695652e-06, |
| "loss": 0.2201, |
| "step": 16610 |
| }, |
| { |
| "epoch": 1.1528896596574898, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.1097391304347826e-06, |
| "loss": 0.2571, |
| "step": 16620 |
| }, |
| { |
| "epoch": 1.1535833513982223, |
| "grad_norm": 1.4140625, |
| "learning_rate": 2.108e-06, |
| "loss": 0.2089, |
| "step": 16630 |
| }, |
| { |
| "epoch": 1.154277043138955, |
| "grad_norm": 1.25, |
| "learning_rate": 2.1062608695652178e-06, |
| "loss": 0.3009, |
| "step": 16640 |
| }, |
| { |
| "epoch": 1.1549707348796878, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.1045217391304347e-06, |
| "loss": 0.2993, |
| "step": 16650 |
| }, |
| { |
| "epoch": 1.1556644266204206, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.1027826086956525e-06, |
| "loss": 0.2074, |
| "step": 16660 |
| }, |
| { |
| "epoch": 1.1563581183611533, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.1010434782608695e-06, |
| "loss": 0.2703, |
| "step": 16670 |
| }, |
| { |
| "epoch": 1.157051810101886, |
| "grad_norm": 0.91796875, |
| "learning_rate": 2.0993043478260873e-06, |
| "loss": 0.2232, |
| "step": 16680 |
| }, |
| { |
| "epoch": 1.1577455018426186, |
| "grad_norm": 1.1484375, |
| "learning_rate": 2.0975652173913046e-06, |
| "loss": 0.2249, |
| "step": 16690 |
| }, |
| { |
| "epoch": 1.1584391935833513, |
| "grad_norm": 1.3984375, |
| "learning_rate": 2.095826086956522e-06, |
| "loss": 0.2514, |
| "step": 16700 |
| }, |
| { |
| "epoch": 1.159132885324084, |
| "grad_norm": 1.6171875, |
| "learning_rate": 2.0940869565217394e-06, |
| "loss": 0.2142, |
| "step": 16710 |
| }, |
| { |
| "epoch": 1.1598265770648168, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.0923478260869567e-06, |
| "loss": 0.2381, |
| "step": 16720 |
| }, |
| { |
| "epoch": 1.1605202688055496, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.090608695652174e-06, |
| "loss": 0.2305, |
| "step": 16730 |
| }, |
| { |
| "epoch": 1.1612139605462823, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.0888695652173915e-06, |
| "loss": 0.2416, |
| "step": 16740 |
| }, |
| { |
| "epoch": 1.1619076522870149, |
| "grad_norm": 1.5625, |
| "learning_rate": 2.087130434782609e-06, |
| "loss": 0.3016, |
| "step": 16750 |
| }, |
| { |
| "epoch": 1.1626013440277476, |
| "grad_norm": 1.9765625, |
| "learning_rate": 2.0853913043478262e-06, |
| "loss": 0.3109, |
| "step": 16760 |
| }, |
| { |
| "epoch": 1.1632950357684804, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.0836521739130436e-06, |
| "loss": 0.2107, |
| "step": 16770 |
| }, |
| { |
| "epoch": 1.1639887275092131, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.081913043478261e-06, |
| "loss": 0.2386, |
| "step": 16780 |
| }, |
| { |
| "epoch": 1.1646824192499459, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.0801739130434783e-06, |
| "loss": 0.2904, |
| "step": 16790 |
| }, |
| { |
| "epoch": 1.1653761109906786, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.0784347826086957e-06, |
| "loss": 0.2027, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1.1660698027314111, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.076695652173913e-06, |
| "loss": 0.2178, |
| "step": 16810 |
| }, |
| { |
| "epoch": 1.166763494472144, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.074956521739131e-06, |
| "loss": 0.2418, |
| "step": 16820 |
| }, |
| { |
| "epoch": 1.1674571862128766, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.073217391304348e-06, |
| "loss": 0.2702, |
| "step": 16830 |
| }, |
| { |
| "epoch": 1.1681508779536094, |
| "grad_norm": 1.484375, |
| "learning_rate": 2.0714782608695656e-06, |
| "loss": 0.2903, |
| "step": 16840 |
| }, |
| { |
| "epoch": 1.1688445696943421, |
| "grad_norm": 1.7109375, |
| "learning_rate": 2.0697391304347826e-06, |
| "loss": 0.2985, |
| "step": 16850 |
| }, |
| { |
| "epoch": 1.1695382614350747, |
| "grad_norm": 1.0625, |
| "learning_rate": 2.0680000000000004e-06, |
| "loss": 0.2587, |
| "step": 16860 |
| }, |
| { |
| "epoch": 1.1702319531758074, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.0662608695652177e-06, |
| "loss": 0.2319, |
| "step": 16870 |
| }, |
| { |
| "epoch": 1.1709256449165402, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.064521739130435e-06, |
| "loss": 0.2573, |
| "step": 16880 |
| }, |
| { |
| "epoch": 1.171619336657273, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.0627826086956525e-06, |
| "loss": 0.2489, |
| "step": 16890 |
| }, |
| { |
| "epoch": 1.1723130283980057, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.06104347826087e-06, |
| "loss": 0.2214, |
| "step": 16900 |
| }, |
| { |
| "epoch": 1.1730067201387384, |
| "grad_norm": 1.15625, |
| "learning_rate": 2.0593043478260872e-06, |
| "loss": 0.2204, |
| "step": 16910 |
| }, |
| { |
| "epoch": 1.1737004118794712, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.0575652173913046e-06, |
| "loss": 0.2512, |
| "step": 16920 |
| }, |
| { |
| "epoch": 1.1743941036202037, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.055826086956522e-06, |
| "loss": 0.2447, |
| "step": 16930 |
| }, |
| { |
| "epoch": 1.1750877953609364, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.0540869565217393e-06, |
| "loss": 0.2006, |
| "step": 16940 |
| }, |
| { |
| "epoch": 1.1757814871016692, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.0523478260869567e-06, |
| "loss": 0.2405, |
| "step": 16950 |
| }, |
| { |
| "epoch": 1.176475178842402, |
| "grad_norm": 1.5703125, |
| "learning_rate": 2.050608695652174e-06, |
| "loss": 0.2739, |
| "step": 16960 |
| }, |
| { |
| "epoch": 1.1771688705831347, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.0488695652173914e-06, |
| "loss": 0.2256, |
| "step": 16970 |
| }, |
| { |
| "epoch": 1.1778625623238672, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.047130434782609e-06, |
| "loss": 0.2264, |
| "step": 16980 |
| }, |
| { |
| "epoch": 1.1785562540646, |
| "grad_norm": 1.59375, |
| "learning_rate": 2.045391304347826e-06, |
| "loss": 0.2409, |
| "step": 16990 |
| }, |
| { |
| "epoch": 1.1792499458053327, |
| "grad_norm": 1.265625, |
| "learning_rate": 2.0436521739130436e-06, |
| "loss": 0.2711, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.1799436375460655, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.041913043478261e-06, |
| "loss": 0.2456, |
| "step": 17010 |
| }, |
| { |
| "epoch": 1.1806373292867982, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.0401739130434783e-06, |
| "loss": 0.244, |
| "step": 17020 |
| }, |
| { |
| "epoch": 1.181331021027531, |
| "grad_norm": 1.5390625, |
| "learning_rate": 2.0384347826086957e-06, |
| "loss": 0.2305, |
| "step": 17030 |
| }, |
| { |
| "epoch": 1.1820247127682637, |
| "grad_norm": 0.91796875, |
| "learning_rate": 2.036695652173913e-06, |
| "loss": 0.2297, |
| "step": 17040 |
| }, |
| { |
| "epoch": 1.1827184045089962, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.034956521739131e-06, |
| "loss": 0.2263, |
| "step": 17050 |
| }, |
| { |
| "epoch": 1.183412096249729, |
| "grad_norm": 1.6328125, |
| "learning_rate": 2.0332173913043478e-06, |
| "loss": 0.2718, |
| "step": 17060 |
| }, |
| { |
| "epoch": 1.1841057879904617, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.0314782608695656e-06, |
| "loss": 0.2617, |
| "step": 17070 |
| }, |
| { |
| "epoch": 1.1847994797311945, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.0297391304347825e-06, |
| "loss": 0.2891, |
| "step": 17080 |
| }, |
| { |
| "epoch": 1.1854931714719272, |
| "grad_norm": 1.25, |
| "learning_rate": 2.0280000000000003e-06, |
| "loss": 0.2944, |
| "step": 17090 |
| }, |
| { |
| "epoch": 1.1861868632126598, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.0262608695652177e-06, |
| "loss": 0.2248, |
| "step": 17100 |
| }, |
| { |
| "epoch": 1.1868805549533925, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.024521739130435e-06, |
| "loss": 0.2692, |
| "step": 17110 |
| }, |
| { |
| "epoch": 1.1875742466941253, |
| "grad_norm": 1.328125, |
| "learning_rate": 2.0227826086956524e-06, |
| "loss": 0.2349, |
| "step": 17120 |
| }, |
| { |
| "epoch": 1.188267938434858, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.02104347826087e-06, |
| "loss": 0.2191, |
| "step": 17130 |
| }, |
| { |
| "epoch": 1.1889616301755908, |
| "grad_norm": 1.0078125, |
| "learning_rate": 2.019304347826087e-06, |
| "loss": 0.2398, |
| "step": 17140 |
| }, |
| { |
| "epoch": 1.1896553219163235, |
| "grad_norm": 1.2578125, |
| "learning_rate": 2.0175652173913045e-06, |
| "loss": 0.2761, |
| "step": 17150 |
| }, |
| { |
| "epoch": 1.1903490136570563, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.015826086956522e-06, |
| "loss": 0.2036, |
| "step": 17160 |
| }, |
| { |
| "epoch": 1.1910427053977888, |
| "grad_norm": 1.609375, |
| "learning_rate": 2.0140869565217393e-06, |
| "loss": 0.2639, |
| "step": 17170 |
| }, |
| { |
| "epoch": 1.1917363971385215, |
| "grad_norm": 1.25, |
| "learning_rate": 2.0123478260869567e-06, |
| "loss": 0.195, |
| "step": 17180 |
| }, |
| { |
| "epoch": 1.1924300888792543, |
| "grad_norm": 1.3125, |
| "learning_rate": 2.010608695652174e-06, |
| "loss": 0.2145, |
| "step": 17190 |
| }, |
| { |
| "epoch": 1.193123780619987, |
| "grad_norm": 1.84375, |
| "learning_rate": 2.0088695652173914e-06, |
| "loss": 0.3457, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1.1938174723607198, |
| "grad_norm": 1.28125, |
| "learning_rate": 2.007130434782609e-06, |
| "loss": 0.2548, |
| "step": 17210 |
| }, |
| { |
| "epoch": 1.1945111641014523, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.005391304347826e-06, |
| "loss": 0.24, |
| "step": 17220 |
| }, |
| { |
| "epoch": 1.195204855842185, |
| "grad_norm": 1.1640625, |
| "learning_rate": 2.0036521739130435e-06, |
| "loss": 0.2872, |
| "step": 17230 |
| }, |
| { |
| "epoch": 1.1958985475829178, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.001913043478261e-06, |
| "loss": 0.231, |
| "step": 17240 |
| }, |
| { |
| "epoch": 1.1965922393236506, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.0001739130434783e-06, |
| "loss": 0.2923, |
| "step": 17250 |
| }, |
| { |
| "epoch": 1.1972859310643833, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.9984347826086956e-06, |
| "loss": 0.2832, |
| "step": 17260 |
| }, |
| { |
| "epoch": 1.197979622805116, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.996695652173913e-06, |
| "loss": 0.2875, |
| "step": 17270 |
| }, |
| { |
| "epoch": 1.1986733145458488, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.994956521739131e-06, |
| "loss": 0.2527, |
| "step": 17280 |
| }, |
| { |
| "epoch": 1.1993670062865813, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.9932173913043477e-06, |
| "loss": 0.2425, |
| "step": 17290 |
| }, |
| { |
| "epoch": 1.200060698027314, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.9914782608695655e-06, |
| "loss": 0.2546, |
| "step": 17300 |
| }, |
| { |
| "epoch": 1.2007543897680468, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.9897391304347825e-06, |
| "loss": 0.255, |
| "step": 17310 |
| }, |
| { |
| "epoch": 1.2014480815087796, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.9880000000000003e-06, |
| "loss": 0.2478, |
| "step": 17320 |
| }, |
| { |
| "epoch": 1.2021417732495123, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.9862608695652176e-06, |
| "loss": 0.2083, |
| "step": 17330 |
| }, |
| { |
| "epoch": 1.2028354649902449, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.984521739130435e-06, |
| "loss": 0.2753, |
| "step": 17340 |
| }, |
| { |
| "epoch": 1.2035291567309776, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.9827826086956524e-06, |
| "loss": 0.3128, |
| "step": 17350 |
| }, |
| { |
| "epoch": 1.2042228484717103, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9810434782608698e-06, |
| "loss": 0.2478, |
| "step": 17360 |
| }, |
| { |
| "epoch": 1.204916540212443, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.979304347826087e-06, |
| "loss": 0.328, |
| "step": 17370 |
| }, |
| { |
| "epoch": 1.2056102319531758, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.9775652173913045e-06, |
| "loss": 0.2781, |
| "step": 17380 |
| }, |
| { |
| "epoch": 1.2063039236939086, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.975826086956522e-06, |
| "loss": 0.3133, |
| "step": 17390 |
| }, |
| { |
| "epoch": 1.2069976154346413, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.9740869565217392e-06, |
| "loss": 0.212, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1.2076913071753739, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.9723478260869566e-06, |
| "loss": 0.2839, |
| "step": 17410 |
| }, |
| { |
| "epoch": 1.2083849989161066, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.970608695652174e-06, |
| "loss": 0.2442, |
| "step": 17420 |
| }, |
| { |
| "epoch": 1.2090786906568394, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.9688695652173914e-06, |
| "loss": 0.2751, |
| "step": 17430 |
| }, |
| { |
| "epoch": 1.2097723823975721, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.967130434782609e-06, |
| "loss": 0.2529, |
| "step": 17440 |
| }, |
| { |
| "epoch": 1.2104660741383049, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.965391304347826e-06, |
| "loss": 0.2791, |
| "step": 17450 |
| }, |
| { |
| "epoch": 1.2111597658790374, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.963652173913044e-06, |
| "loss": 0.2276, |
| "step": 17460 |
| }, |
| { |
| "epoch": 1.2118534576197701, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.961913043478261e-06, |
| "loss": 0.2515, |
| "step": 17470 |
| }, |
| { |
| "epoch": 1.212547149360503, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.9601739130434786e-06, |
| "loss": 0.2579, |
| "step": 17480 |
| }, |
| { |
| "epoch": 1.2132408411012356, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9584347826086956e-06, |
| "loss": 0.2222, |
| "step": 17490 |
| }, |
| { |
| "epoch": 1.2139345328419684, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.9566956521739134e-06, |
| "loss": 0.2072, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.2146282245827011, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.9549565217391308e-06, |
| "loss": 0.2278, |
| "step": 17510 |
| }, |
| { |
| "epoch": 1.215321916323434, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.953217391304348e-06, |
| "loss": 0.2338, |
| "step": 17520 |
| }, |
| { |
| "epoch": 1.2160156080641664, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.9514782608695655e-06, |
| "loss": 0.235, |
| "step": 17530 |
| }, |
| { |
| "epoch": 1.2167092998048992, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.9497391304347824e-06, |
| "loss": 0.2458, |
| "step": 17540 |
| }, |
| { |
| "epoch": 1.217402991545632, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.9480000000000002e-06, |
| "loss": 0.2322, |
| "step": 17550 |
| }, |
| { |
| "epoch": 1.2180966832863647, |
| "grad_norm": 0.8671875, |
| "learning_rate": 1.9462608695652176e-06, |
| "loss": 0.2301, |
| "step": 17560 |
| }, |
| { |
| "epoch": 1.2187903750270974, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.944521739130435e-06, |
| "loss": 0.2707, |
| "step": 17570 |
| }, |
| { |
| "epoch": 1.21948406676783, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.9427826086956524e-06, |
| "loss": 0.2158, |
| "step": 17580 |
| }, |
| { |
| "epoch": 1.2201777585085627, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.9410434782608697e-06, |
| "loss": 0.2404, |
| "step": 17590 |
| }, |
| { |
| "epoch": 1.2208714502492954, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.939304347826087e-06, |
| "loss": 0.2562, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1.2215651419900282, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.9375652173913045e-06, |
| "loss": 0.2154, |
| "step": 17610 |
| }, |
| { |
| "epoch": 1.222258833730761, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.935826086956522e-06, |
| "loss": 0.2309, |
| "step": 17620 |
| }, |
| { |
| "epoch": 1.2229525254714937, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.934086956521739e-06, |
| "loss": 0.229, |
| "step": 17630 |
| }, |
| { |
| "epoch": 1.2236462172122264, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.9323478260869566e-06, |
| "loss": 0.2428, |
| "step": 17640 |
| }, |
| { |
| "epoch": 1.224339908952959, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.930608695652174e-06, |
| "loss": 0.2227, |
| "step": 17650 |
| }, |
| { |
| "epoch": 1.2250336006936917, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.9288695652173913e-06, |
| "loss": 0.3012, |
| "step": 17660 |
| }, |
| { |
| "epoch": 1.2257272924344245, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.927130434782609e-06, |
| "loss": 0.2796, |
| "step": 17670 |
| }, |
| { |
| "epoch": 1.2264209841751572, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.925391304347826e-06, |
| "loss": 0.2393, |
| "step": 17680 |
| }, |
| { |
| "epoch": 1.22711467591589, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.923652173913044e-06, |
| "loss": 0.2685, |
| "step": 17690 |
| }, |
| { |
| "epoch": 1.2278083676566225, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.921913043478261e-06, |
| "loss": 0.2598, |
| "step": 17700 |
| }, |
| { |
| "epoch": 1.2285020593973552, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.9201739130434786e-06, |
| "loss": 0.237, |
| "step": 17710 |
| }, |
| { |
| "epoch": 1.229195751138088, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.9184347826086955e-06, |
| "loss": 0.2421, |
| "step": 17720 |
| }, |
| { |
| "epoch": 1.2298894428788207, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.9166956521739133e-06, |
| "loss": 0.2501, |
| "step": 17730 |
| }, |
| { |
| "epoch": 1.2305831346195535, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.9149565217391307e-06, |
| "loss": 0.21, |
| "step": 17740 |
| }, |
| { |
| "epoch": 1.2312768263602862, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.913217391304348e-06, |
| "loss": 0.2558, |
| "step": 17750 |
| }, |
| { |
| "epoch": 1.2319705181010188, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.9114782608695655e-06, |
| "loss": 0.2314, |
| "step": 17760 |
| }, |
| { |
| "epoch": 1.2326642098417515, |
| "grad_norm": 1.6171875, |
| "learning_rate": 1.909739130434783e-06, |
| "loss": 0.2358, |
| "step": 17770 |
| }, |
| { |
| "epoch": 1.2333579015824843, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.908e-06, |
| "loss": 0.2434, |
| "step": 17780 |
| }, |
| { |
| "epoch": 1.234051593323217, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.9062608695652176e-06, |
| "loss": 0.2677, |
| "step": 17790 |
| }, |
| { |
| "epoch": 1.2347452850639498, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.904521739130435e-06, |
| "loss": 0.2215, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1.2354389768046825, |
| "grad_norm": 1.0, |
| "learning_rate": 1.9027826086956525e-06, |
| "loss": 0.2329, |
| "step": 17810 |
| }, |
| { |
| "epoch": 1.236132668545415, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.9010434782608697e-06, |
| "loss": 0.2311, |
| "step": 17820 |
| }, |
| { |
| "epoch": 1.2368263602861478, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.8993043478260873e-06, |
| "loss": 0.2237, |
| "step": 17830 |
| }, |
| { |
| "epoch": 1.2375200520268805, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.8975652173913044e-06, |
| "loss": 0.254, |
| "step": 17840 |
| }, |
| { |
| "epoch": 1.2382137437676133, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.8958260869565218e-06, |
| "loss": 0.2176, |
| "step": 17850 |
| }, |
| { |
| "epoch": 1.238907435508346, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.8940869565217394e-06, |
| "loss": 0.2342, |
| "step": 17860 |
| }, |
| { |
| "epoch": 1.2396011272490788, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.8923478260869565e-06, |
| "loss": 0.2417, |
| "step": 17870 |
| }, |
| { |
| "epoch": 1.2402948189898113, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.8906086956521741e-06, |
| "loss": 0.2707, |
| "step": 17880 |
| }, |
| { |
| "epoch": 1.240988510730544, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.8888695652173913e-06, |
| "loss": 0.2278, |
| "step": 17890 |
| }, |
| { |
| "epoch": 1.2416822024712768, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.8871304347826089e-06, |
| "loss": 0.2455, |
| "step": 17900 |
| }, |
| { |
| "epoch": 1.2423758942120096, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.8853913043478262e-06, |
| "loss": 0.2575, |
| "step": 17910 |
| }, |
| { |
| "epoch": 1.2430695859527423, |
| "grad_norm": 1.5, |
| "learning_rate": 1.8836521739130436e-06, |
| "loss": 0.2959, |
| "step": 17920 |
| }, |
| { |
| "epoch": 1.243763277693475, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.881913043478261e-06, |
| "loss": 0.2936, |
| "step": 17930 |
| }, |
| { |
| "epoch": 1.2444569694342076, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.8801739130434786e-06, |
| "loss": 0.3073, |
| "step": 17940 |
| }, |
| { |
| "epoch": 1.2451506611749403, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.8784347826086957e-06, |
| "loss": 0.2216, |
| "step": 17950 |
| }, |
| { |
| "epoch": 1.245844352915673, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.8766956521739133e-06, |
| "loss": 0.2443, |
| "step": 17960 |
| }, |
| { |
| "epoch": 1.2465380446564058, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.8749565217391305e-06, |
| "loss": 0.2522, |
| "step": 17970 |
| }, |
| { |
| "epoch": 1.2472317363971386, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.873217391304348e-06, |
| "loss": 0.2404, |
| "step": 17980 |
| }, |
| { |
| "epoch": 1.2479254281378713, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.8714782608695652e-06, |
| "loss": 0.2511, |
| "step": 17990 |
| }, |
| { |
| "epoch": 1.2486191198786039, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.8697391304347828e-06, |
| "loss": 0.3144, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.2493128116193366, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.8680000000000002e-06, |
| "loss": 0.2274, |
| "step": 18010 |
| }, |
| { |
| "epoch": 1.2500065033600694, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.8662608695652175e-06, |
| "loss": 0.3148, |
| "step": 18020 |
| }, |
| { |
| "epoch": 1.250700195100802, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.864521739130435e-06, |
| "loss": 0.2123, |
| "step": 18030 |
| }, |
| { |
| "epoch": 1.2513938868415349, |
| "grad_norm": 1.25, |
| "learning_rate": 1.8627826086956525e-06, |
| "loss": 0.2383, |
| "step": 18040 |
| }, |
| { |
| "epoch": 1.2520875785822674, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.8610434782608696e-06, |
| "loss": 0.2308, |
| "step": 18050 |
| }, |
| { |
| "epoch": 1.2527812703230001, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.8593043478260872e-06, |
| "loss": 0.2131, |
| "step": 18060 |
| }, |
| { |
| "epoch": 1.2534749620637329, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.8575652173913044e-06, |
| "loss": 0.264, |
| "step": 18070 |
| }, |
| { |
| "epoch": 1.2541686538044656, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.855826086956522e-06, |
| "loss": 0.2561, |
| "step": 18080 |
| }, |
| { |
| "epoch": 1.2548623455451984, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.8540869565217393e-06, |
| "loss": 0.3174, |
| "step": 18090 |
| }, |
| { |
| "epoch": 1.2555560372859311, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.8523478260869567e-06, |
| "loss": 0.2361, |
| "step": 18100 |
| }, |
| { |
| "epoch": 1.2562497290266639, |
| "grad_norm": 1.4765625, |
| "learning_rate": 1.850608695652174e-06, |
| "loss": 0.2928, |
| "step": 18110 |
| }, |
| { |
| "epoch": 1.2569434207673966, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.8488695652173917e-06, |
| "loss": 0.1963, |
| "step": 18120 |
| }, |
| { |
| "epoch": 1.2576371125081292, |
| "grad_norm": 1.25, |
| "learning_rate": 1.8471304347826088e-06, |
| "loss": 0.261, |
| "step": 18130 |
| }, |
| { |
| "epoch": 1.258330804248862, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.8453913043478264e-06, |
| "loss": 0.2202, |
| "step": 18140 |
| }, |
| { |
| "epoch": 1.2590244959895946, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.8436521739130436e-06, |
| "loss": 0.3148, |
| "step": 18150 |
| }, |
| { |
| "epoch": 1.2597181877303274, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.841913043478261e-06, |
| "loss": 0.2427, |
| "step": 18160 |
| }, |
| { |
| "epoch": 1.26041187947106, |
| "grad_norm": 1.125, |
| "learning_rate": 1.8401739130434785e-06, |
| "loss": 0.2313, |
| "step": 18170 |
| }, |
| { |
| "epoch": 1.2611055712117927, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.8384347826086957e-06, |
| "loss": 0.2316, |
| "step": 18180 |
| }, |
| { |
| "epoch": 1.2617992629525254, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.8366956521739133e-06, |
| "loss": 0.2298, |
| "step": 18190 |
| }, |
| { |
| "epoch": 1.2624929546932582, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8349565217391304e-06, |
| "loss": 0.2165, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1.263186646433991, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.833217391304348e-06, |
| "loss": 0.2387, |
| "step": 18210 |
| }, |
| { |
| "epoch": 1.2638803381747237, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.8314782608695652e-06, |
| "loss": 0.2883, |
| "step": 18220 |
| }, |
| { |
| "epoch": 1.2645740299154564, |
| "grad_norm": 1.6015625, |
| "learning_rate": 1.8297391304347827e-06, |
| "loss": 0.2552, |
| "step": 18230 |
| }, |
| { |
| "epoch": 1.265267721656189, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.8280000000000001e-06, |
| "loss": 0.2563, |
| "step": 18240 |
| }, |
| { |
| "epoch": 1.2659614133969217, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.8262608695652175e-06, |
| "loss": 0.2093, |
| "step": 18250 |
| }, |
| { |
| "epoch": 1.2666551051376544, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.8245217391304349e-06, |
| "loss": 0.241, |
| "step": 18260 |
| }, |
| { |
| "epoch": 1.2673487968783872, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.8227826086956524e-06, |
| "loss": 0.2887, |
| "step": 18270 |
| }, |
| { |
| "epoch": 1.26804248861912, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.8210434782608696e-06, |
| "loss": 0.2639, |
| "step": 18280 |
| }, |
| { |
| "epoch": 1.2687361803598525, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.8193043478260872e-06, |
| "loss": 0.2598, |
| "step": 18290 |
| }, |
| { |
| "epoch": 1.2694298721005852, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.8175652173913043e-06, |
| "loss": 0.248, |
| "step": 18300 |
| }, |
| { |
| "epoch": 1.270123563841318, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.815826086956522e-06, |
| "loss": 0.1977, |
| "step": 18310 |
| }, |
| { |
| "epoch": 1.2708172555820507, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.8140869565217393e-06, |
| "loss": 0.2536, |
| "step": 18320 |
| }, |
| { |
| "epoch": 1.2715109473227835, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.8123478260869567e-06, |
| "loss": 0.2557, |
| "step": 18330 |
| }, |
| { |
| "epoch": 1.2722046390635162, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.810608695652174e-06, |
| "loss": 0.3069, |
| "step": 18340 |
| }, |
| { |
| "epoch": 1.272898330804249, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.8088695652173916e-06, |
| "loss": 0.2315, |
| "step": 18350 |
| }, |
| { |
| "epoch": 1.2735920225449815, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.8071304347826088e-06, |
| "loss": 0.202, |
| "step": 18360 |
| }, |
| { |
| "epoch": 1.2742857142857142, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.8053913043478264e-06, |
| "loss": 0.304, |
| "step": 18370 |
| }, |
| { |
| "epoch": 1.274979406026447, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.8036521739130435e-06, |
| "loss": 0.259, |
| "step": 18380 |
| }, |
| { |
| "epoch": 1.2756730977671797, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.8019130434782611e-06, |
| "loss": 0.2263, |
| "step": 18390 |
| }, |
| { |
| "epoch": 1.2763667895079125, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.8001739130434785e-06, |
| "loss": 0.2381, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1.277060481248645, |
| "grad_norm": 1.5, |
| "learning_rate": 1.7984347826086958e-06, |
| "loss": 0.2154, |
| "step": 18410 |
| }, |
| { |
| "epoch": 1.2777541729893778, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.7966956521739132e-06, |
| "loss": 0.2191, |
| "step": 18420 |
| }, |
| { |
| "epoch": 1.2784478647301105, |
| "grad_norm": 1.0, |
| "learning_rate": 1.7949565217391308e-06, |
| "loss": 0.2516, |
| "step": 18430 |
| }, |
| { |
| "epoch": 1.2791415564708433, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.793217391304348e-06, |
| "loss": 0.2271, |
| "step": 18440 |
| }, |
| { |
| "epoch": 1.279835248211576, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.7914782608695655e-06, |
| "loss": 0.2308, |
| "step": 18450 |
| }, |
| { |
| "epoch": 1.2805289399523088, |
| "grad_norm": 1.25, |
| "learning_rate": 1.7897391304347827e-06, |
| "loss": 0.3075, |
| "step": 18460 |
| }, |
| { |
| "epoch": 1.2812226316930415, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.788e-06, |
| "loss": 0.2446, |
| "step": 18470 |
| }, |
| { |
| "epoch": 1.281916323433774, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.7862608695652174e-06, |
| "loss": 0.2359, |
| "step": 18480 |
| }, |
| { |
| "epoch": 1.2826100151745068, |
| "grad_norm": 2.171875, |
| "learning_rate": 1.7845217391304348e-06, |
| "loss": 0.3021, |
| "step": 18490 |
| }, |
| { |
| "epoch": 1.2833037069152395, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.7827826086956524e-06, |
| "loss": 0.2255, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.2839973986559723, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.7810434782608696e-06, |
| "loss": 0.2562, |
| "step": 18510 |
| }, |
| { |
| "epoch": 1.284691090396705, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.7793043478260871e-06, |
| "loss": 0.2513, |
| "step": 18520 |
| }, |
| { |
| "epoch": 1.2853847821374376, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.7775652173913043e-06, |
| "loss": 0.2296, |
| "step": 18530 |
| }, |
| { |
| "epoch": 1.2860784738781703, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.7758260869565219e-06, |
| "loss": 0.2612, |
| "step": 18540 |
| }, |
| { |
| "epoch": 1.286772165618903, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.7740869565217393e-06, |
| "loss": 0.2088, |
| "step": 18550 |
| }, |
| { |
| "epoch": 1.2874658573596358, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.7723478260869566e-06, |
| "loss": 0.2278, |
| "step": 18560 |
| }, |
| { |
| "epoch": 1.2881595491003686, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.770608695652174e-06, |
| "loss": 0.2695, |
| "step": 18570 |
| }, |
| { |
| "epoch": 1.2888532408411013, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.7688695652173916e-06, |
| "loss": 0.237, |
| "step": 18580 |
| }, |
| { |
| "epoch": 1.289546932581834, |
| "grad_norm": 1.53125, |
| "learning_rate": 1.7671304347826087e-06, |
| "loss": 0.3094, |
| "step": 18590 |
| }, |
| { |
| "epoch": 1.2902406243225666, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.7653913043478263e-06, |
| "loss": 0.2231, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1.2909343160632993, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.7636521739130435e-06, |
| "loss": 0.2858, |
| "step": 18610 |
| }, |
| { |
| "epoch": 1.291628007804032, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.761913043478261e-06, |
| "loss": 0.2832, |
| "step": 18620 |
| }, |
| { |
| "epoch": 1.2923216995447648, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.7601739130434784e-06, |
| "loss": 0.2447, |
| "step": 18630 |
| }, |
| { |
| "epoch": 1.2930153912854976, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.7584347826086958e-06, |
| "loss": 0.2911, |
| "step": 18640 |
| }, |
| { |
| "epoch": 1.2937090830262301, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.7566956521739132e-06, |
| "loss": 0.261, |
| "step": 18650 |
| }, |
| { |
| "epoch": 1.2944027747669629, |
| "grad_norm": 1.5, |
| "learning_rate": 1.7549565217391308e-06, |
| "loss": 0.2486, |
| "step": 18660 |
| }, |
| { |
| "epoch": 1.2950964665076956, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.753217391304348e-06, |
| "loss": 0.236, |
| "step": 18670 |
| }, |
| { |
| "epoch": 1.2957901582484284, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.7514782608695655e-06, |
| "loss": 0.2216, |
| "step": 18680 |
| }, |
| { |
| "epoch": 1.296483849989161, |
| "grad_norm": 1.125, |
| "learning_rate": 1.7497391304347827e-06, |
| "loss": 0.2348, |
| "step": 18690 |
| }, |
| { |
| "epoch": 1.2971775417298939, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.7480000000000002e-06, |
| "loss": 0.2821, |
| "step": 18700 |
| }, |
| { |
| "epoch": 1.2978712334706266, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.7462608695652174e-06, |
| "loss": 0.2281, |
| "step": 18710 |
| }, |
| { |
| "epoch": 1.2985649252113591, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.744521739130435e-06, |
| "loss": 0.3438, |
| "step": 18720 |
| }, |
| { |
| "epoch": 1.2992586169520919, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.7427826086956524e-06, |
| "loss": 0.307, |
| "step": 18730 |
| }, |
| { |
| "epoch": 1.2999523086928246, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.7410434782608697e-06, |
| "loss": 0.2792, |
| "step": 18740 |
| }, |
| { |
| "epoch": 1.3006460004335574, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.739304347826087e-06, |
| "loss": 0.246, |
| "step": 18750 |
| }, |
| { |
| "epoch": 1.3013396921742901, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.7375652173913047e-06, |
| "loss": 0.2229, |
| "step": 18760 |
| }, |
| { |
| "epoch": 1.3020333839150227, |
| "grad_norm": 1.4921875, |
| "learning_rate": 1.7358260869565218e-06, |
| "loss": 0.2921, |
| "step": 18770 |
| }, |
| { |
| "epoch": 1.3027270756557554, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.7340869565217392e-06, |
| "loss": 0.2882, |
| "step": 18780 |
| }, |
| { |
| "epoch": 1.3034207673964882, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.7323478260869566e-06, |
| "loss": 0.2465, |
| "step": 18790 |
| }, |
| { |
| "epoch": 1.304114459137221, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.730608695652174e-06, |
| "loss": 0.2151, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1.3048081508779537, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.7288695652173915e-06, |
| "loss": 0.2568, |
| "step": 18810 |
| }, |
| { |
| "epoch": 1.3055018426186864, |
| "grad_norm": 1.5703125, |
| "learning_rate": 1.7271304347826087e-06, |
| "loss": 0.2507, |
| "step": 18820 |
| }, |
| { |
| "epoch": 1.3061955343594192, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.7253913043478263e-06, |
| "loss": 0.2791, |
| "step": 18830 |
| }, |
| { |
| "epoch": 1.3068892261001517, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.7236521739130434e-06, |
| "loss": 0.2841, |
| "step": 18840 |
| }, |
| { |
| "epoch": 1.3075829178408844, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.721913043478261e-06, |
| "loss": 0.2459, |
| "step": 18850 |
| }, |
| { |
| "epoch": 1.3082766095816172, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.7201739130434784e-06, |
| "loss": 0.2317, |
| "step": 18860 |
| }, |
| { |
| "epoch": 1.30897030132235, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.7184347826086958e-06, |
| "loss": 0.2455, |
| "step": 18870 |
| }, |
| { |
| "epoch": 1.3096639930630825, |
| "grad_norm": 1.6953125, |
| "learning_rate": 1.7166956521739131e-06, |
| "loss": 0.2476, |
| "step": 18880 |
| }, |
| { |
| "epoch": 1.3103576848038152, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.7149565217391307e-06, |
| "loss": 0.2529, |
| "step": 18890 |
| }, |
| { |
| "epoch": 1.311051376544548, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.7132173913043479e-06, |
| "loss": 0.2066, |
| "step": 18900 |
| }, |
| { |
| "epoch": 1.3117450682852807, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.7114782608695655e-06, |
| "loss": 0.2273, |
| "step": 18910 |
| }, |
| { |
| "epoch": 1.3124387600260135, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.7097391304347826e-06, |
| "loss": 0.1843, |
| "step": 18920 |
| }, |
| { |
| "epoch": 1.3131324517667462, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.7080000000000002e-06, |
| "loss": 0.2144, |
| "step": 18930 |
| }, |
| { |
| "epoch": 1.313826143507479, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.7062608695652174e-06, |
| "loss": 0.2223, |
| "step": 18940 |
| }, |
| { |
| "epoch": 1.3145198352482117, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.704521739130435e-06, |
| "loss": 0.2149, |
| "step": 18950 |
| }, |
| { |
| "epoch": 1.3152135269889442, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.7027826086956523e-06, |
| "loss": 0.2343, |
| "step": 18960 |
| }, |
| { |
| "epoch": 1.315907218729677, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.7010434782608697e-06, |
| "loss": 0.2878, |
| "step": 18970 |
| }, |
| { |
| "epoch": 1.3166009104704097, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.699304347826087e-06, |
| "loss": 0.2503, |
| "step": 18980 |
| }, |
| { |
| "epoch": 1.3172946022111425, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.6975652173913046e-06, |
| "loss": 0.2683, |
| "step": 18990 |
| }, |
| { |
| "epoch": 1.317988293951875, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.6958260869565218e-06, |
| "loss": 0.226, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.3186819856926077, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.6940869565217394e-06, |
| "loss": 0.2354, |
| "step": 19010 |
| }, |
| { |
| "epoch": 1.3193756774333405, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.6923478260869565e-06, |
| "loss": 0.2512, |
| "step": 19020 |
| }, |
| { |
| "epoch": 1.3200693691740732, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.6906086956521741e-06, |
| "loss": 0.215, |
| "step": 19030 |
| }, |
| { |
| "epoch": 1.320763060914806, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.6888695652173915e-06, |
| "loss": 0.2324, |
| "step": 19040 |
| }, |
| { |
| "epoch": 1.3214567526555387, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.6871304347826089e-06, |
| "loss": 0.2898, |
| "step": 19050 |
| }, |
| { |
| "epoch": 1.3221504443962715, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.6853913043478262e-06, |
| "loss": 0.2415, |
| "step": 19060 |
| }, |
| { |
| "epoch": 1.3228441361370042, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.6836521739130438e-06, |
| "loss": 0.2365, |
| "step": 19070 |
| }, |
| { |
| "epoch": 1.3235378278777368, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.681913043478261e-06, |
| "loss": 0.2251, |
| "step": 19080 |
| }, |
| { |
| "epoch": 1.3242315196184695, |
| "grad_norm": 0.9921875, |
| "learning_rate": 1.6801739130434784e-06, |
| "loss": 0.2137, |
| "step": 19090 |
| }, |
| { |
| "epoch": 1.3249252113592023, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.6784347826086957e-06, |
| "loss": 0.2239, |
| "step": 19100 |
| }, |
| { |
| "epoch": 1.325618903099935, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.676695652173913e-06, |
| "loss": 0.2748, |
| "step": 19110 |
| }, |
| { |
| "epoch": 1.3263125948406675, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.6749565217391307e-06, |
| "loss": 0.2748, |
| "step": 19120 |
| }, |
| { |
| "epoch": 1.3270062865814003, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6732173913043478e-06, |
| "loss": 0.2845, |
| "step": 19130 |
| }, |
| { |
| "epoch": 1.327699978322133, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.6714782608695654e-06, |
| "loss": 0.2227, |
| "step": 19140 |
| }, |
| { |
| "epoch": 1.3283936700628658, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.6697391304347826e-06, |
| "loss": 0.2701, |
| "step": 19150 |
| }, |
| { |
| "epoch": 1.3290873618035985, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.6680000000000002e-06, |
| "loss": 0.3026, |
| "step": 19160 |
| }, |
| { |
| "epoch": 1.3297810535443313, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.6662608695652175e-06, |
| "loss": 0.2401, |
| "step": 19170 |
| }, |
| { |
| "epoch": 1.330474745285064, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.664521739130435e-06, |
| "loss": 0.2907, |
| "step": 19180 |
| }, |
| { |
| "epoch": 1.3311684370257968, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.6627826086956523e-06, |
| "loss": 0.2404, |
| "step": 19190 |
| }, |
| { |
| "epoch": 1.3318621287665293, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.6610434782608699e-06, |
| "loss": 0.2548, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1.332555820507262, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.659304347826087e-06, |
| "loss": 0.3054, |
| "step": 19210 |
| }, |
| { |
| "epoch": 1.3332495122479948, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.6575652173913046e-06, |
| "loss": 0.2174, |
| "step": 19220 |
| }, |
| { |
| "epoch": 1.3339432039887276, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.6558260869565218e-06, |
| "loss": 0.2712, |
| "step": 19230 |
| }, |
| { |
| "epoch": 1.33463689572946, |
| "grad_norm": 1.5234375, |
| "learning_rate": 1.6540869565217393e-06, |
| "loss": 0.2191, |
| "step": 19240 |
| }, |
| { |
| "epoch": 1.3353305874701928, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.6523478260869565e-06, |
| "loss": 0.2889, |
| "step": 19250 |
| }, |
| { |
| "epoch": 1.3360242792109256, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.650608695652174e-06, |
| "loss": 0.2662, |
| "step": 19260 |
| }, |
| { |
| "epoch": 1.3367179709516583, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.6488695652173915e-06, |
| "loss": 0.2417, |
| "step": 19270 |
| }, |
| { |
| "epoch": 1.337411662692391, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.6471304347826088e-06, |
| "loss": 0.2668, |
| "step": 19280 |
| }, |
| { |
| "epoch": 1.3381053544331238, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.6453913043478262e-06, |
| "loss": 0.2028, |
| "step": 19290 |
| }, |
| { |
| "epoch": 1.3387990461738566, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.6436521739130438e-06, |
| "loss": 0.2831, |
| "step": 19300 |
| }, |
| { |
| "epoch": 1.3394927379145893, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.641913043478261e-06, |
| "loss": 0.2186, |
| "step": 19310 |
| }, |
| { |
| "epoch": 1.3401864296553219, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.6401739130434785e-06, |
| "loss": 0.2243, |
| "step": 19320 |
| }, |
| { |
| "epoch": 1.3408801213960546, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.6384347826086957e-06, |
| "loss": 0.2012, |
| "step": 19330 |
| }, |
| { |
| "epoch": 1.3415738131367874, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.6366956521739133e-06, |
| "loss": 0.2255, |
| "step": 19340 |
| }, |
| { |
| "epoch": 1.3422675048775201, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.6349565217391306e-06, |
| "loss": 0.2735, |
| "step": 19350 |
| }, |
| { |
| "epoch": 1.3429611966182526, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.633217391304348e-06, |
| "loss": 0.3033, |
| "step": 19360 |
| }, |
| { |
| "epoch": 1.3436548883589854, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.6314782608695654e-06, |
| "loss": 0.2539, |
| "step": 19370 |
| }, |
| { |
| "epoch": 1.3443485800997181, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.629739130434783e-06, |
| "loss": 0.2277, |
| "step": 19380 |
| }, |
| { |
| "epoch": 1.3450422718404509, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.6280000000000001e-06, |
| "loss": 0.2215, |
| "step": 19390 |
| }, |
| { |
| "epoch": 1.3457359635811836, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.6262608695652175e-06, |
| "loss": 0.2031, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1.3464296553219164, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.6245217391304349e-06, |
| "loss": 0.2154, |
| "step": 19410 |
| }, |
| { |
| "epoch": 1.3471233470626491, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.6227826086956522e-06, |
| "loss": 0.2311, |
| "step": 19420 |
| }, |
| { |
| "epoch": 1.3478170388033819, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.6210434782608698e-06, |
| "loss": 0.2294, |
| "step": 19430 |
| }, |
| { |
| "epoch": 1.3485107305441144, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.619304347826087e-06, |
| "loss": 0.3163, |
| "step": 19440 |
| }, |
| { |
| "epoch": 1.3492044222848472, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.6175652173913046e-06, |
| "loss": 0.1935, |
| "step": 19450 |
| }, |
| { |
| "epoch": 1.34989811402558, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.6158260869565217e-06, |
| "loss": 0.2372, |
| "step": 19460 |
| }, |
| { |
| "epoch": 1.3505918057663127, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.6140869565217393e-06, |
| "loss": 0.3196, |
| "step": 19470 |
| }, |
| { |
| "epoch": 1.3512854975070452, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.6123478260869565e-06, |
| "loss": 0.2137, |
| "step": 19480 |
| }, |
| { |
| "epoch": 1.351979189247778, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.610608695652174e-06, |
| "loss": 0.2553, |
| "step": 19490 |
| }, |
| { |
| "epoch": 1.3526728809885107, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.6088695652173914e-06, |
| "loss": 0.2622, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.3533665727292434, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.6071304347826088e-06, |
| "loss": 0.2121, |
| "step": 19510 |
| }, |
| { |
| "epoch": 1.3540602644699762, |
| "grad_norm": 1.953125, |
| "learning_rate": 1.6053913043478262e-06, |
| "loss": 0.2425, |
| "step": 19520 |
| }, |
| { |
| "epoch": 1.354753956210709, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.6036521739130437e-06, |
| "loss": 0.2537, |
| "step": 19530 |
| }, |
| { |
| "epoch": 1.3554476479514417, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.601913043478261e-06, |
| "loss": 0.3255, |
| "step": 19540 |
| }, |
| { |
| "epoch": 1.3561413396921742, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.6001739130434785e-06, |
| "loss": 0.2601, |
| "step": 19550 |
| }, |
| { |
| "epoch": 1.356835031432907, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.5984347826086956e-06, |
| "loss": 0.2258, |
| "step": 19560 |
| }, |
| { |
| "epoch": 1.3575287231736397, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.5966956521739132e-06, |
| "loss": 0.2909, |
| "step": 19570 |
| }, |
| { |
| "epoch": 1.3582224149143725, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.5949565217391306e-06, |
| "loss": 0.254, |
| "step": 19580 |
| }, |
| { |
| "epoch": 1.3589161066551052, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.593217391304348e-06, |
| "loss": 0.2798, |
| "step": 19590 |
| }, |
| { |
| "epoch": 1.3596097983958377, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.5914782608695653e-06, |
| "loss": 0.2097, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1.3603034901365705, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.589739130434783e-06, |
| "loss": 0.3109, |
| "step": 19610 |
| }, |
| { |
| "epoch": 1.3609971818773032, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.588e-06, |
| "loss": 0.3035, |
| "step": 19620 |
| }, |
| { |
| "epoch": 1.361690873618036, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.5862608695652177e-06, |
| "loss": 0.3036, |
| "step": 19630 |
| }, |
| { |
| "epoch": 1.3623845653587687, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.5845217391304348e-06, |
| "loss": 0.2218, |
| "step": 19640 |
| }, |
| { |
| "epoch": 1.3630782570995015, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.5827826086956524e-06, |
| "loss": 0.2462, |
| "step": 19650 |
| }, |
| { |
| "epoch": 1.3637719488402342, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.5810434782608698e-06, |
| "loss": 0.2062, |
| "step": 19660 |
| }, |
| { |
| "epoch": 1.3644656405809668, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.5793043478260872e-06, |
| "loss": 0.2235, |
| "step": 19670 |
| }, |
| { |
| "epoch": 1.3651593323216995, |
| "grad_norm": 1.5703125, |
| "learning_rate": 1.5775652173913045e-06, |
| "loss": 0.2577, |
| "step": 19680 |
| }, |
| { |
| "epoch": 1.3658530240624323, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.5758260869565221e-06, |
| "loss": 0.2769, |
| "step": 19690 |
| }, |
| { |
| "epoch": 1.366546715803165, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.5740869565217393e-06, |
| "loss": 0.222, |
| "step": 19700 |
| }, |
| { |
| "epoch": 1.3672404075438978, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.5723478260869564e-06, |
| "loss": 0.2199, |
| "step": 19710 |
| }, |
| { |
| "epoch": 1.3679340992846303, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.570608695652174e-06, |
| "loss": 0.2524, |
| "step": 19720 |
| }, |
| { |
| "epoch": 1.368627791025363, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.5688695652173914e-06, |
| "loss": 0.2201, |
| "step": 19730 |
| }, |
| { |
| "epoch": 1.3693214827660958, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.5671304347826088e-06, |
| "loss": 0.3256, |
| "step": 19740 |
| }, |
| { |
| "epoch": 1.3700151745068285, |
| "grad_norm": 1.546875, |
| "learning_rate": 1.5653913043478261e-06, |
| "loss": 0.2929, |
| "step": 19750 |
| }, |
| { |
| "epoch": 1.3707088662475613, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.5636521739130437e-06, |
| "loss": 0.2536, |
| "step": 19760 |
| }, |
| { |
| "epoch": 1.371402557988294, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.5619130434782609e-06, |
| "loss": 0.2349, |
| "step": 19770 |
| }, |
| { |
| "epoch": 1.3720962497290268, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.5601739130434784e-06, |
| "loss": 0.2195, |
| "step": 19780 |
| }, |
| { |
| "epoch": 1.3727899414697593, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.5584347826086956e-06, |
| "loss": 0.278, |
| "step": 19790 |
| }, |
| { |
| "epoch": 1.373483633210492, |
| "grad_norm": 1.6953125, |
| "learning_rate": 1.5566956521739132e-06, |
| "loss": 0.2423, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1.3741773249512248, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.5549565217391306e-06, |
| "loss": 0.2768, |
| "step": 19810 |
| }, |
| { |
| "epoch": 1.3748710166919575, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.553217391304348e-06, |
| "loss": 0.2441, |
| "step": 19820 |
| }, |
| { |
| "epoch": 1.3755647084326903, |
| "grad_norm": 1.6015625, |
| "learning_rate": 1.5514782608695653e-06, |
| "loss": 0.2551, |
| "step": 19830 |
| }, |
| { |
| "epoch": 1.3762584001734228, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.5497391304347829e-06, |
| "loss": 0.2861, |
| "step": 19840 |
| }, |
| { |
| "epoch": 1.3769520919141556, |
| "grad_norm": 1.5, |
| "learning_rate": 1.548e-06, |
| "loss": 0.2289, |
| "step": 19850 |
| }, |
| { |
| "epoch": 1.3776457836548883, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.5462608695652176e-06, |
| "loss": 0.2622, |
| "step": 19860 |
| }, |
| { |
| "epoch": 1.378339475395621, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.5445217391304348e-06, |
| "loss": 0.3027, |
| "step": 19870 |
| }, |
| { |
| "epoch": 1.3790331671363538, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.5427826086956524e-06, |
| "loss": 0.2128, |
| "step": 19880 |
| }, |
| { |
| "epoch": 1.3797268588770866, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.5410434782608697e-06, |
| "loss": 0.2341, |
| "step": 19890 |
| }, |
| { |
| "epoch": 1.3804205506178193, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.5393043478260871e-06, |
| "loss": 0.2186, |
| "step": 19900 |
| }, |
| { |
| "epoch": 1.3811142423585518, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.5375652173913045e-06, |
| "loss": 0.2346, |
| "step": 19910 |
| }, |
| { |
| "epoch": 1.3818079340992846, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.535826086956522e-06, |
| "loss": 0.2231, |
| "step": 19920 |
| }, |
| { |
| "epoch": 1.3825016258400173, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.5340869565217392e-06, |
| "loss": 0.2264, |
| "step": 19930 |
| }, |
| { |
| "epoch": 1.38319531758075, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.5323478260869568e-06, |
| "loss": 0.2376, |
| "step": 19940 |
| }, |
| { |
| "epoch": 1.3838890093214828, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.530608695652174e-06, |
| "loss": 0.2588, |
| "step": 19950 |
| }, |
| { |
| "epoch": 1.3845827010622154, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.5288695652173916e-06, |
| "loss": 0.2747, |
| "step": 19960 |
| }, |
| { |
| "epoch": 1.3852763928029481, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.5271304347826087e-06, |
| "loss": 0.2317, |
| "step": 19970 |
| }, |
| { |
| "epoch": 1.3859700845436809, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.5253913043478263e-06, |
| "loss": 0.2596, |
| "step": 19980 |
| }, |
| { |
| "epoch": 1.3866637762844136, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.5236521739130437e-06, |
| "loss": 0.2658, |
| "step": 19990 |
| }, |
| { |
| "epoch": 1.3873574680251464, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.521913043478261e-06, |
| "loss": 0.2576, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.3880511597658791, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.5201739130434784e-06, |
| "loss": 0.237, |
| "step": 20010 |
| }, |
| { |
| "epoch": 1.3887448515066119, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.5184347826086956e-06, |
| "loss": 0.2249, |
| "step": 20020 |
| }, |
| { |
| "epoch": 1.3894385432473444, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.5166956521739131e-06, |
| "loss": 0.2541, |
| "step": 20030 |
| }, |
| { |
| "epoch": 1.3901322349880771, |
| "grad_norm": 0.87109375, |
| "learning_rate": 1.5149565217391305e-06, |
| "loss": 0.2328, |
| "step": 20040 |
| }, |
| { |
| "epoch": 1.39082592672881, |
| "grad_norm": 0.8515625, |
| "learning_rate": 1.5132173913043479e-06, |
| "loss": 0.2299, |
| "step": 20050 |
| }, |
| { |
| "epoch": 1.3915196184695426, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.5114782608695653e-06, |
| "loss": 0.2728, |
| "step": 20060 |
| }, |
| { |
| "epoch": 1.3922133102102754, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.5097391304347828e-06, |
| "loss": 0.2113, |
| "step": 20070 |
| }, |
| { |
| "epoch": 1.392907001951008, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.508e-06, |
| "loss": 0.2075, |
| "step": 20080 |
| }, |
| { |
| "epoch": 1.3936006936917407, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.5062608695652176e-06, |
| "loss": 0.2242, |
| "step": 20090 |
| }, |
| { |
| "epoch": 1.3942943854324734, |
| "grad_norm": 0.90625, |
| "learning_rate": 1.5045217391304347e-06, |
| "loss": 0.2404, |
| "step": 20100 |
| }, |
| { |
| "epoch": 1.3949880771732062, |
| "grad_norm": 1.25, |
| "learning_rate": 1.5027826086956523e-06, |
| "loss": 0.2921, |
| "step": 20110 |
| }, |
| { |
| "epoch": 1.395681768913939, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.5010434782608697e-06, |
| "loss": 0.2469, |
| "step": 20120 |
| }, |
| { |
| "epoch": 1.3963754606546717, |
| "grad_norm": 1.53125, |
| "learning_rate": 1.499304347826087e-06, |
| "loss": 0.2326, |
| "step": 20130 |
| }, |
| { |
| "epoch": 1.3970691523954044, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.4975652173913044e-06, |
| "loss": 0.2378, |
| "step": 20140 |
| }, |
| { |
| "epoch": 1.397762844136137, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.495826086956522e-06, |
| "loss": 0.2893, |
| "step": 20150 |
| }, |
| { |
| "epoch": 1.3984565358768697, |
| "grad_norm": 0.7890625, |
| "learning_rate": 1.4940869565217392e-06, |
| "loss": 0.2289, |
| "step": 20160 |
| }, |
| { |
| "epoch": 1.3991502276176024, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.4923478260869568e-06, |
| "loss": 0.2284, |
| "step": 20170 |
| }, |
| { |
| "epoch": 1.3998439193583352, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.490608695652174e-06, |
| "loss": 0.2175, |
| "step": 20180 |
| }, |
| { |
| "epoch": 1.4005376110990677, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.4888695652173915e-06, |
| "loss": 0.213, |
| "step": 20190 |
| }, |
| { |
| "epoch": 1.4012313028398005, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.4871304347826087e-06, |
| "loss": 0.2365, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1.4019249945805332, |
| "grad_norm": 1.515625, |
| "learning_rate": 1.4853913043478263e-06, |
| "loss": 0.238, |
| "step": 20210 |
| }, |
| { |
| "epoch": 1.402618686321266, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.4836521739130436e-06, |
| "loss": 0.227, |
| "step": 20220 |
| }, |
| { |
| "epoch": 1.4033123780619987, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.481913043478261e-06, |
| "loss": 0.2311, |
| "step": 20230 |
| }, |
| { |
| "epoch": 1.4040060698027315, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.4801739130434784e-06, |
| "loss": 0.2447, |
| "step": 20240 |
| }, |
| { |
| "epoch": 1.4046997615434642, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.478434782608696e-06, |
| "loss": 0.2528, |
| "step": 20250 |
| }, |
| { |
| "epoch": 1.405393453284197, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.4766956521739131e-06, |
| "loss": 0.2466, |
| "step": 20260 |
| }, |
| { |
| "epoch": 1.4060871450249295, |
| "grad_norm": 1.515625, |
| "learning_rate": 1.4749565217391307e-06, |
| "loss": 0.2668, |
| "step": 20270 |
| }, |
| { |
| "epoch": 1.4067808367656622, |
| "grad_norm": 1.25, |
| "learning_rate": 1.4732173913043478e-06, |
| "loss": 0.2382, |
| "step": 20280 |
| }, |
| { |
| "epoch": 1.407474528506395, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.4714782608695654e-06, |
| "loss": 0.2393, |
| "step": 20290 |
| }, |
| { |
| "epoch": 1.4081682202471277, |
| "grad_norm": 0.828125, |
| "learning_rate": 1.4697391304347828e-06, |
| "loss": 0.2099, |
| "step": 20300 |
| }, |
| { |
| "epoch": 1.4088619119878603, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.4680000000000002e-06, |
| "loss": 0.2382, |
| "step": 20310 |
| }, |
| { |
| "epoch": 1.409555603728593, |
| "grad_norm": 1.5703125, |
| "learning_rate": 1.4662608695652175e-06, |
| "loss": 0.3246, |
| "step": 20320 |
| }, |
| { |
| "epoch": 1.4102492954693258, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.4645217391304347e-06, |
| "loss": 0.3255, |
| "step": 20330 |
| }, |
| { |
| "epoch": 1.4109429872100585, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.4627826086956523e-06, |
| "loss": 0.1941, |
| "step": 20340 |
| }, |
| { |
| "epoch": 1.4116366789507913, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.4610434782608697e-06, |
| "loss": 0.2243, |
| "step": 20350 |
| }, |
| { |
| "epoch": 1.412330370691524, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.459304347826087e-06, |
| "loss": 0.219, |
| "step": 20360 |
| }, |
| { |
| "epoch": 1.4130240624322568, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.4575652173913044e-06, |
| "loss": 0.225, |
| "step": 20370 |
| }, |
| { |
| "epoch": 1.4137177541729895, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.455826086956522e-06, |
| "loss": 0.2449, |
| "step": 20380 |
| }, |
| { |
| "epoch": 1.414411445913722, |
| "grad_norm": 0.984375, |
| "learning_rate": 1.4540869565217391e-06, |
| "loss": 0.2189, |
| "step": 20390 |
| }, |
| { |
| "epoch": 1.4151051376544548, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.4523478260869567e-06, |
| "loss": 0.2627, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1.4157988293951875, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.4506086956521739e-06, |
| "loss": 0.2063, |
| "step": 20410 |
| }, |
| { |
| "epoch": 1.4164925211359203, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.4488695652173915e-06, |
| "loss": 0.213, |
| "step": 20420 |
| }, |
| { |
| "epoch": 1.4171862128766528, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.4471304347826086e-06, |
| "loss": 0.2139, |
| "step": 20430 |
| }, |
| { |
| "epoch": 1.4178799046173856, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.4453913043478262e-06, |
| "loss": 0.2276, |
| "step": 20440 |
| }, |
| { |
| "epoch": 1.4185735963581183, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.4436521739130436e-06, |
| "loss": 0.2084, |
| "step": 20450 |
| }, |
| { |
| "epoch": 1.419267288098851, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.441913043478261e-06, |
| "loss": 0.2437, |
| "step": 20460 |
| }, |
| { |
| "epoch": 1.4199609798395838, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.4401739130434783e-06, |
| "loss": 0.2394, |
| "step": 20470 |
| }, |
| { |
| "epoch": 1.4206546715803166, |
| "grad_norm": 1.4765625, |
| "learning_rate": 1.438434782608696e-06, |
| "loss": 0.251, |
| "step": 20480 |
| }, |
| { |
| "epoch": 1.4213483633210493, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.436695652173913e-06, |
| "loss": 0.288, |
| "step": 20490 |
| }, |
| { |
| "epoch": 1.422042055061782, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.4349565217391306e-06, |
| "loss": 0.2525, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.4227357468025146, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.4332173913043478e-06, |
| "loss": 0.2392, |
| "step": 20510 |
| }, |
| { |
| "epoch": 1.4234294385432473, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.4314782608695654e-06, |
| "loss": 0.2057, |
| "step": 20520 |
| }, |
| { |
| "epoch": 1.42412313028398, |
| "grad_norm": 1.5, |
| "learning_rate": 1.4297391304347828e-06, |
| "loss": 0.2368, |
| "step": 20530 |
| }, |
| { |
| "epoch": 1.4248168220247128, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.4280000000000001e-06, |
| "loss": 0.2351, |
| "step": 20540 |
| }, |
| { |
| "epoch": 1.4255105137654454, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.4262608695652175e-06, |
| "loss": 0.2066, |
| "step": 20550 |
| }, |
| { |
| "epoch": 1.426204205506178, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.424521739130435e-06, |
| "loss": 0.2405, |
| "step": 20560 |
| }, |
| { |
| "epoch": 1.4268978972469109, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.4227826086956522e-06, |
| "loss": 0.2749, |
| "step": 20570 |
| }, |
| { |
| "epoch": 1.4275915889876436, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.4210434782608698e-06, |
| "loss": 0.2632, |
| "step": 20580 |
| }, |
| { |
| "epoch": 1.4282852807283763, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.419304347826087e-06, |
| "loss": 0.3155, |
| "step": 20590 |
| }, |
| { |
| "epoch": 1.428978972469109, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.4175652173913046e-06, |
| "loss": 0.2016, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1.4296726642098418, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.415826086956522e-06, |
| "loss": 0.2906, |
| "step": 20610 |
| }, |
| { |
| "epoch": 1.4303663559505746, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.4140869565217393e-06, |
| "loss": 0.2672, |
| "step": 20620 |
| }, |
| { |
| "epoch": 1.4310600476913071, |
| "grad_norm": 1.609375, |
| "learning_rate": 1.4123478260869567e-06, |
| "loss": 0.2527, |
| "step": 20630 |
| }, |
| { |
| "epoch": 1.4317537394320399, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.4106086956521738e-06, |
| "loss": 0.3494, |
| "step": 20640 |
| }, |
| { |
| "epoch": 1.4324474311727726, |
| "grad_norm": 0.8984375, |
| "learning_rate": 1.4088695652173914e-06, |
| "loss": 0.2156, |
| "step": 20650 |
| }, |
| { |
| "epoch": 1.4331411229135054, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.4071304347826086e-06, |
| "loss": 0.2444, |
| "step": 20660 |
| }, |
| { |
| "epoch": 1.433834814654238, |
| "grad_norm": 0.8828125, |
| "learning_rate": 1.4053913043478262e-06, |
| "loss": 0.2617, |
| "step": 20670 |
| }, |
| { |
| "epoch": 1.4345285063949706, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.4036521739130435e-06, |
| "loss": 0.2533, |
| "step": 20680 |
| }, |
| { |
| "epoch": 1.4352221981357034, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.401913043478261e-06, |
| "loss": 0.2659, |
| "step": 20690 |
| }, |
| { |
| "epoch": 1.4359158898764361, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.4001739130434783e-06, |
| "loss": 0.2591, |
| "step": 20700 |
| }, |
| { |
| "epoch": 1.436609581617169, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.3984347826086959e-06, |
| "loss": 0.2581, |
| "step": 20710 |
| }, |
| { |
| "epoch": 1.4373032733579016, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.396695652173913e-06, |
| "loss": 0.273, |
| "step": 20720 |
| }, |
| { |
| "epoch": 1.4379969650986344, |
| "grad_norm": 1.125, |
| "learning_rate": 1.3949565217391306e-06, |
| "loss": 0.2176, |
| "step": 20730 |
| }, |
| { |
| "epoch": 1.4386906568393671, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.3932173913043478e-06, |
| "loss": 0.2122, |
| "step": 20740 |
| }, |
| { |
| "epoch": 1.4393843485800997, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.3914782608695654e-06, |
| "loss": 0.2135, |
| "step": 20750 |
| }, |
| { |
| "epoch": 1.4400780403208324, |
| "grad_norm": 0.75, |
| "learning_rate": 1.3897391304347827e-06, |
| "loss": 0.2196, |
| "step": 20760 |
| }, |
| { |
| "epoch": 1.4407717320615652, |
| "grad_norm": 0.82421875, |
| "learning_rate": 1.388e-06, |
| "loss": 0.2397, |
| "step": 20770 |
| }, |
| { |
| "epoch": 1.441465423802298, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.3862608695652175e-06, |
| "loss": 0.2441, |
| "step": 20780 |
| }, |
| { |
| "epoch": 1.4421591155430304, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.384521739130435e-06, |
| "loss": 0.2584, |
| "step": 20790 |
| }, |
| { |
| "epoch": 1.4428528072837632, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.3827826086956522e-06, |
| "loss": 0.2123, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1.443546499024496, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.3810434782608698e-06, |
| "loss": 0.2157, |
| "step": 20810 |
| }, |
| { |
| "epoch": 1.4442401907652287, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.379304347826087e-06, |
| "loss": 0.2407, |
| "step": 20820 |
| }, |
| { |
| "epoch": 1.4449338825059614, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.3775652173913045e-06, |
| "loss": 0.2347, |
| "step": 20830 |
| }, |
| { |
| "epoch": 1.4456275742466942, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.375826086956522e-06, |
| "loss": 0.2429, |
| "step": 20840 |
| }, |
| { |
| "epoch": 1.446321265987427, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.3740869565217393e-06, |
| "loss": 0.2441, |
| "step": 20850 |
| }, |
| { |
| "epoch": 1.4470149577281595, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.3723478260869566e-06, |
| "loss": 0.2429, |
| "step": 20860 |
| }, |
| { |
| "epoch": 1.4477086494688922, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.3706086956521742e-06, |
| "loss": 0.2133, |
| "step": 20870 |
| }, |
| { |
| "epoch": 1.448402341209625, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.3688695652173914e-06, |
| "loss": 0.232, |
| "step": 20880 |
| }, |
| { |
| "epoch": 1.4490960329503577, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.367130434782609e-06, |
| "loss": 0.2202, |
| "step": 20890 |
| }, |
| { |
| "epoch": 1.4497897246910905, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.3653913043478261e-06, |
| "loss": 0.2187, |
| "step": 20900 |
| }, |
| { |
| "epoch": 1.450483416431823, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.3636521739130437e-06, |
| "loss": 0.2306, |
| "step": 20910 |
| }, |
| { |
| "epoch": 1.4511771081725557, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.3619130434782609e-06, |
| "loss": 0.2472, |
| "step": 20920 |
| }, |
| { |
| "epoch": 1.4518707999132885, |
| "grad_norm": 1.78125, |
| "learning_rate": 1.3601739130434782e-06, |
| "loss": 0.2569, |
| "step": 20930 |
| }, |
| { |
| "epoch": 1.4525644916540212, |
| "grad_norm": 0.85546875, |
| "learning_rate": 1.3584347826086958e-06, |
| "loss": 0.2277, |
| "step": 20940 |
| }, |
| { |
| "epoch": 1.453258183394754, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.356695652173913e-06, |
| "loss": 0.2238, |
| "step": 20950 |
| }, |
| { |
| "epoch": 1.4539518751354867, |
| "grad_norm": 1.375, |
| "learning_rate": 1.3549565217391306e-06, |
| "loss": 0.2185, |
| "step": 20960 |
| }, |
| { |
| "epoch": 1.4546455668762195, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.3532173913043477e-06, |
| "loss": 0.2834, |
| "step": 20970 |
| }, |
| { |
| "epoch": 1.455339258616952, |
| "grad_norm": 1.25, |
| "learning_rate": 1.3514782608695653e-06, |
| "loss": 0.2911, |
| "step": 20980 |
| }, |
| { |
| "epoch": 1.4560329503576848, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.3497391304347827e-06, |
| "loss": 0.2226, |
| "step": 20990 |
| }, |
| { |
| "epoch": 1.4567266420984175, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.348e-06, |
| "loss": 0.2248, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.4574203338391503, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.3462608695652174e-06, |
| "loss": 0.2437, |
| "step": 21010 |
| }, |
| { |
| "epoch": 1.458114025579883, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.344521739130435e-06, |
| "loss": 0.2421, |
| "step": 21020 |
| }, |
| { |
| "epoch": 1.4588077173206155, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.3427826086956522e-06, |
| "loss": 0.2161, |
| "step": 21030 |
| }, |
| { |
| "epoch": 1.4595014090613483, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.3410434782608697e-06, |
| "loss": 0.245, |
| "step": 21040 |
| }, |
| { |
| "epoch": 1.460195100802081, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.339304347826087e-06, |
| "loss": 0.2432, |
| "step": 21050 |
| }, |
| { |
| "epoch": 1.4608887925428138, |
| "grad_norm": 1.5625, |
| "learning_rate": 1.3375652173913045e-06, |
| "loss": 0.2379, |
| "step": 21060 |
| }, |
| { |
| "epoch": 1.4615824842835465, |
| "grad_norm": 1.5859375, |
| "learning_rate": 1.3358260869565219e-06, |
| "loss": 0.2513, |
| "step": 21070 |
| }, |
| { |
| "epoch": 1.4622761760242793, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.3340869565217392e-06, |
| "loss": 0.3091, |
| "step": 21080 |
| }, |
| { |
| "epoch": 1.462969867765012, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.3323478260869566e-06, |
| "loss": 0.2542, |
| "step": 21090 |
| }, |
| { |
| "epoch": 1.4636635595057446, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.3306086956521742e-06, |
| "loss": 0.2138, |
| "step": 21100 |
| }, |
| { |
| "epoch": 1.4643572512464773, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.3288695652173913e-06, |
| "loss": 0.252, |
| "step": 21110 |
| }, |
| { |
| "epoch": 1.46505094298721, |
| "grad_norm": 1.25, |
| "learning_rate": 1.327130434782609e-06, |
| "loss": 0.2159, |
| "step": 21120 |
| }, |
| { |
| "epoch": 1.4657446347279428, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.325391304347826e-06, |
| "loss": 0.2535, |
| "step": 21130 |
| }, |
| { |
| "epoch": 1.4664383264686756, |
| "grad_norm": 1.6328125, |
| "learning_rate": 1.3236521739130437e-06, |
| "loss": 0.3247, |
| "step": 21140 |
| }, |
| { |
| "epoch": 1.467132018209408, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.3219130434782608e-06, |
| "loss": 0.2415, |
| "step": 21150 |
| }, |
| { |
| "epoch": 1.4678257099501408, |
| "grad_norm": 1.125, |
| "learning_rate": 1.3201739130434784e-06, |
| "loss": 0.2295, |
| "step": 21160 |
| }, |
| { |
| "epoch": 1.4685194016908736, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.3184347826086958e-06, |
| "loss": 0.2348, |
| "step": 21170 |
| }, |
| { |
| "epoch": 1.4692130934316063, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.3166956521739134e-06, |
| "loss": 0.2122, |
| "step": 21180 |
| }, |
| { |
| "epoch": 1.469906785172339, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.3149565217391305e-06, |
| "loss": 0.2565, |
| "step": 21190 |
| }, |
| { |
| "epoch": 1.4706004769130718, |
| "grad_norm": 1.390625, |
| "learning_rate": 1.3132173913043481e-06, |
| "loss": 0.2724, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1.4712941686538046, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.3114782608695653e-06, |
| "loss": 0.2413, |
| "step": 21210 |
| }, |
| { |
| "epoch": 1.471987860394537, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.3097391304347829e-06, |
| "loss": 0.235, |
| "step": 21220 |
| }, |
| { |
| "epoch": 1.4726815521352699, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.308e-06, |
| "loss": 0.2664, |
| "step": 21230 |
| }, |
| { |
| "epoch": 1.4733752438760026, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.3062608695652174e-06, |
| "loss": 0.2151, |
| "step": 21240 |
| }, |
| { |
| "epoch": 1.4740689356167354, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.304521739130435e-06, |
| "loss": 0.2598, |
| "step": 21250 |
| }, |
| { |
| "epoch": 1.474762627357468, |
| "grad_norm": 1.7265625, |
| "learning_rate": 1.3027826086956521e-06, |
| "loss": 0.2444, |
| "step": 21260 |
| }, |
| { |
| "epoch": 1.4754563190982006, |
| "grad_norm": 1.671875, |
| "learning_rate": 1.3010434782608697e-06, |
| "loss": 0.3088, |
| "step": 21270 |
| }, |
| { |
| "epoch": 1.4761500108389334, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.2993043478260869e-06, |
| "loss": 0.2525, |
| "step": 21280 |
| }, |
| { |
| "epoch": 1.4768437025796661, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.2975652173913045e-06, |
| "loss": 0.2769, |
| "step": 21290 |
| }, |
| { |
| "epoch": 1.4775373943203989, |
| "grad_norm": 1.125, |
| "learning_rate": 1.2958260869565218e-06, |
| "loss": 0.2298, |
| "step": 21300 |
| }, |
| { |
| "epoch": 1.4782310860611316, |
| "grad_norm": 2.3125, |
| "learning_rate": 1.2940869565217392e-06, |
| "loss": 0.3014, |
| "step": 21310 |
| }, |
| { |
| "epoch": 1.4789247778018644, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.2923478260869566e-06, |
| "loss": 0.2391, |
| "step": 21320 |
| }, |
| { |
| "epoch": 1.4796184695425971, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.2906086956521741e-06, |
| "loss": 0.2179, |
| "step": 21330 |
| }, |
| { |
| "epoch": 1.4803121612833297, |
| "grad_norm": 1.25, |
| "learning_rate": 1.2888695652173913e-06, |
| "loss": 0.2062, |
| "step": 21340 |
| }, |
| { |
| "epoch": 1.4810058530240624, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.2871304347826089e-06, |
| "loss": 0.2396, |
| "step": 21350 |
| }, |
| { |
| "epoch": 1.4816995447647952, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.285391304347826e-06, |
| "loss": 0.2306, |
| "step": 21360 |
| }, |
| { |
| "epoch": 1.482393236505528, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.2836521739130436e-06, |
| "loss": 0.3046, |
| "step": 21370 |
| }, |
| { |
| "epoch": 1.4830869282462606, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.281913043478261e-06, |
| "loss": 0.2427, |
| "step": 21380 |
| }, |
| { |
| "epoch": 1.4837806199869932, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.2801739130434784e-06, |
| "loss": 0.2876, |
| "step": 21390 |
| }, |
| { |
| "epoch": 1.484474311727726, |
| "grad_norm": 1.25, |
| "learning_rate": 1.2784347826086957e-06, |
| "loss": 0.2684, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1.4851680034684587, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.2766956521739133e-06, |
| "loss": 0.2441, |
| "step": 21410 |
| }, |
| { |
| "epoch": 1.4858616952091914, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.2749565217391305e-06, |
| "loss": 0.2629, |
| "step": 21420 |
| }, |
| { |
| "epoch": 1.4865553869499242, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.273217391304348e-06, |
| "loss": 0.3055, |
| "step": 21430 |
| }, |
| { |
| "epoch": 1.487249078690657, |
| "grad_norm": 1.7265625, |
| "learning_rate": 1.2714782608695652e-06, |
| "loss": 0.2489, |
| "step": 21440 |
| }, |
| { |
| "epoch": 1.4879427704313897, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.2697391304347828e-06, |
| "loss": 0.2639, |
| "step": 21450 |
| }, |
| { |
| "epoch": 1.4886364621721222, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.268e-06, |
| "loss": 0.2326, |
| "step": 21460 |
| }, |
| { |
| "epoch": 1.489330153912855, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.2662608695652176e-06, |
| "loss": 0.2768, |
| "step": 21470 |
| }, |
| { |
| "epoch": 1.4900238456535877, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.264521739130435e-06, |
| "loss": 0.2324, |
| "step": 21480 |
| }, |
| { |
| "epoch": 1.4907175373943204, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.2627826086956523e-06, |
| "loss": 0.2632, |
| "step": 21490 |
| }, |
| { |
| "epoch": 1.491411229135053, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.2610434782608697e-06, |
| "loss": 0.1857, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.4921049208757857, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.2593043478260873e-06, |
| "loss": 0.2186, |
| "step": 21510 |
| }, |
| { |
| "epoch": 1.4927986126165185, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.2575652173913044e-06, |
| "loss": 0.2557, |
| "step": 21520 |
| }, |
| { |
| "epoch": 1.4934923043572512, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.255826086956522e-06, |
| "loss": 0.2264, |
| "step": 21530 |
| }, |
| { |
| "epoch": 1.494185996097984, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.2540869565217392e-06, |
| "loss": 0.2318, |
| "step": 21540 |
| }, |
| { |
| "epoch": 1.4948796878387167, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.2523478260869565e-06, |
| "loss": 0.2262, |
| "step": 21550 |
| }, |
| { |
| "epoch": 1.4955733795794495, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.2506086956521741e-06, |
| "loss": 0.2534, |
| "step": 21560 |
| }, |
| { |
| "epoch": 1.4962670713201822, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.2488695652173915e-06, |
| "loss": 0.2823, |
| "step": 21570 |
| }, |
| { |
| "epoch": 1.4969607630609147, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.2471304347826088e-06, |
| "loss": 0.2572, |
| "step": 21580 |
| }, |
| { |
| "epoch": 1.4976544548016475, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.2453913043478262e-06, |
| "loss": 0.2499, |
| "step": 21590 |
| }, |
| { |
| "epoch": 1.4983481465423802, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.2436521739130436e-06, |
| "loss": 0.2307, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1.499041838283113, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.241913043478261e-06, |
| "loss": 0.2116, |
| "step": 21610 |
| }, |
| { |
| "epoch": 1.4997355300238455, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.2401739130434783e-06, |
| "loss": 0.2305, |
| "step": 21620 |
| }, |
| { |
| "epoch": 1.5004292217645783, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.2384347826086957e-06, |
| "loss": 0.3057, |
| "step": 21630 |
| }, |
| { |
| "epoch": 1.501122913505311, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.2366956521739133e-06, |
| "loss": 0.24, |
| "step": 21640 |
| }, |
| { |
| "epoch": 1.5018166052460438, |
| "grad_norm": 1.40625, |
| "learning_rate": 1.2349565217391307e-06, |
| "loss": 0.2924, |
| "step": 21650 |
| }, |
| { |
| "epoch": 1.5025102969867765, |
| "grad_norm": 1.125, |
| "learning_rate": 1.233217391304348e-06, |
| "loss": 0.2801, |
| "step": 21660 |
| }, |
| { |
| "epoch": 1.5032039887275093, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.2314782608695654e-06, |
| "loss": 0.2273, |
| "step": 21670 |
| }, |
| { |
| "epoch": 1.503897680468242, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.2297391304347828e-06, |
| "loss": 0.2464, |
| "step": 21680 |
| }, |
| { |
| "epoch": 1.5045913722089748, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.2280000000000001e-06, |
| "loss": 0.284, |
| "step": 21690 |
| }, |
| { |
| "epoch": 1.5052850639497075, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.2262608695652175e-06, |
| "loss": 0.2401, |
| "step": 21700 |
| }, |
| { |
| "epoch": 1.50597875569044, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.2245217391304349e-06, |
| "loss": 0.2709, |
| "step": 21710 |
| }, |
| { |
| "epoch": 1.5066724474311728, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.2227826086956523e-06, |
| "loss": 0.2462, |
| "step": 21720 |
| }, |
| { |
| "epoch": 1.5073661391719055, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.2210434782608696e-06, |
| "loss": 0.2562, |
| "step": 21730 |
| }, |
| { |
| "epoch": 1.508059830912638, |
| "grad_norm": 0.9765625, |
| "learning_rate": 1.219304347826087e-06, |
| "loss": 0.2227, |
| "step": 21740 |
| }, |
| { |
| "epoch": 1.5087535226533708, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.2175652173913044e-06, |
| "loss": 0.2564, |
| "step": 21750 |
| }, |
| { |
| "epoch": 1.5094472143941036, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.2158260869565217e-06, |
| "loss": 0.2372, |
| "step": 21760 |
| }, |
| { |
| "epoch": 1.5101409061348363, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.2140869565217391e-06, |
| "loss": 0.2139, |
| "step": 21770 |
| }, |
| { |
| "epoch": 1.510834597875569, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.2123478260869565e-06, |
| "loss": 0.2311, |
| "step": 21780 |
| }, |
| { |
| "epoch": 1.5115282896163018, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.210608695652174e-06, |
| "loss": 0.2667, |
| "step": 21790 |
| }, |
| { |
| "epoch": 1.5122219813570346, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.2088695652173914e-06, |
| "loss": 0.2117, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1.5129156730977673, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.2071304347826088e-06, |
| "loss": 0.2567, |
| "step": 21810 |
| }, |
| { |
| "epoch": 1.5136093648384998, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.2053913043478262e-06, |
| "loss": 0.2249, |
| "step": 21820 |
| }, |
| { |
| "epoch": 1.5143030565792326, |
| "grad_norm": 1.125, |
| "learning_rate": 1.2036521739130436e-06, |
| "loss": 0.2845, |
| "step": 21830 |
| }, |
| { |
| "epoch": 1.5149967483199653, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.201913043478261e-06, |
| "loss": 0.2194, |
| "step": 21840 |
| }, |
| { |
| "epoch": 1.5156904400606979, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.2001739130434783e-06, |
| "loss": 0.2333, |
| "step": 21850 |
| }, |
| { |
| "epoch": 1.5163841318014306, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.1984347826086957e-06, |
| "loss": 0.2, |
| "step": 21860 |
| }, |
| { |
| "epoch": 1.5170778235421634, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.1966956521739132e-06, |
| "loss": 0.2649, |
| "step": 21870 |
| }, |
| { |
| "epoch": 1.517771515282896, |
| "grad_norm": 1.6171875, |
| "learning_rate": 1.1949565217391306e-06, |
| "loss": 0.2606, |
| "step": 21880 |
| }, |
| { |
| "epoch": 1.5184652070236289, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.193217391304348e-06, |
| "loss": 0.2429, |
| "step": 21890 |
| }, |
| { |
| "epoch": 1.5191588987643616, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.1914782608695654e-06, |
| "loss": 0.2327, |
| "step": 21900 |
| }, |
| { |
| "epoch": 1.5198525905050944, |
| "grad_norm": 1.0, |
| "learning_rate": 1.1897391304347827e-06, |
| "loss": 0.2417, |
| "step": 21910 |
| }, |
| { |
| "epoch": 1.520546282245827, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.188e-06, |
| "loss": 0.2538, |
| "step": 21920 |
| }, |
| { |
| "epoch": 1.5212399739865599, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.1862608695652175e-06, |
| "loss": 0.2746, |
| "step": 21930 |
| }, |
| { |
| "epoch": 1.5219336657272924, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.1845217391304348e-06, |
| "loss": 0.2102, |
| "step": 21940 |
| }, |
| { |
| "epoch": 1.5226273574680251, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.1827826086956522e-06, |
| "loss": 0.2367, |
| "step": 21950 |
| }, |
| { |
| "epoch": 1.5233210492087579, |
| "grad_norm": 0.85546875, |
| "learning_rate": 1.1810434782608698e-06, |
| "loss": 0.2139, |
| "step": 21960 |
| }, |
| { |
| "epoch": 1.5240147409494904, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.1793043478260872e-06, |
| "loss": 0.3149, |
| "step": 21970 |
| }, |
| { |
| "epoch": 1.5247084326902232, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.1775652173913045e-06, |
| "loss": 0.2062, |
| "step": 21980 |
| }, |
| { |
| "epoch": 1.525402124430956, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.175826086956522e-06, |
| "loss": 0.2454, |
| "step": 21990 |
| }, |
| { |
| "epoch": 1.5260958161716887, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.1740869565217393e-06, |
| "loss": 0.1977, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.5267895079124214, |
| "grad_norm": 1.5546875, |
| "learning_rate": 1.1723478260869567e-06, |
| "loss": 0.26, |
| "step": 22010 |
| }, |
| { |
| "epoch": 1.5274831996531542, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.170608695652174e-06, |
| "loss": 0.2235, |
| "step": 22020 |
| }, |
| { |
| "epoch": 1.528176891393887, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.1688695652173914e-06, |
| "loss": 0.2297, |
| "step": 22030 |
| }, |
| { |
| "epoch": 1.5288705831346197, |
| "grad_norm": 1.25, |
| "learning_rate": 1.1671304347826088e-06, |
| "loss": 0.2571, |
| "step": 22040 |
| }, |
| { |
| "epoch": 1.5295642748753524, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.1653913043478261e-06, |
| "loss": 0.2845, |
| "step": 22050 |
| }, |
| { |
| "epoch": 1.530257966616085, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.1636521739130435e-06, |
| "loss": 0.2432, |
| "step": 22060 |
| }, |
| { |
| "epoch": 1.5309516583568177, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.1619130434782609e-06, |
| "loss": 0.2807, |
| "step": 22070 |
| }, |
| { |
| "epoch": 1.5316453500975504, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.1601739130434783e-06, |
| "loss": 0.222, |
| "step": 22080 |
| }, |
| { |
| "epoch": 1.532339041838283, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.1584347826086956e-06, |
| "loss": 0.2292, |
| "step": 22090 |
| }, |
| { |
| "epoch": 1.5330327335790157, |
| "grad_norm": 1.8203125, |
| "learning_rate": 1.1566956521739132e-06, |
| "loss": 0.2663, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.5337264253197485, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.1549565217391306e-06, |
| "loss": 0.2528, |
| "step": 22110 |
| }, |
| { |
| "epoch": 1.5344201170604812, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.153217391304348e-06, |
| "loss": 0.2552, |
| "step": 22120 |
| }, |
| { |
| "epoch": 1.535113808801214, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.1514782608695653e-06, |
| "loss": 0.1995, |
| "step": 22130 |
| }, |
| { |
| "epoch": 1.5358075005419467, |
| "grad_norm": 1.453125, |
| "learning_rate": 1.1497391304347827e-06, |
| "loss": 0.2302, |
| "step": 22140 |
| }, |
| { |
| "epoch": 1.5365011922826795, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.148e-06, |
| "loss": 0.2027, |
| "step": 22150 |
| }, |
| { |
| "epoch": 1.5371948840234122, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.1462608695652174e-06, |
| "loss": 0.234, |
| "step": 22160 |
| }, |
| { |
| "epoch": 1.537888575764145, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.1445217391304348e-06, |
| "loss": 0.2325, |
| "step": 22170 |
| }, |
| { |
| "epoch": 1.5385822675048775, |
| "grad_norm": 1.703125, |
| "learning_rate": 1.1427826086956522e-06, |
| "loss": 0.2567, |
| "step": 22180 |
| }, |
| { |
| "epoch": 1.5392759592456102, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.1410434782608698e-06, |
| "loss": 0.2226, |
| "step": 22190 |
| }, |
| { |
| "epoch": 1.539969650986343, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.1393043478260871e-06, |
| "loss": 0.2206, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.5406633427270755, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.1375652173913045e-06, |
| "loss": 0.2342, |
| "step": 22210 |
| }, |
| { |
| "epoch": 1.5413570344678083, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.1358260869565219e-06, |
| "loss": 0.2695, |
| "step": 22220 |
| }, |
| { |
| "epoch": 1.542050726208541, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.1340869565217392e-06, |
| "loss": 0.2812, |
| "step": 22230 |
| }, |
| { |
| "epoch": 1.5427444179492737, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.1323478260869566e-06, |
| "loss": 0.2701, |
| "step": 22240 |
| }, |
| { |
| "epoch": 1.5434381096900065, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.130608695652174e-06, |
| "loss": 0.2258, |
| "step": 22250 |
| }, |
| { |
| "epoch": 1.5441318014307392, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.1288695652173914e-06, |
| "loss": 0.2496, |
| "step": 22260 |
| }, |
| { |
| "epoch": 1.544825493171472, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.127130434782609e-06, |
| "loss": 0.2058, |
| "step": 22270 |
| }, |
| { |
| "epoch": 1.5455191849122047, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.1253913043478263e-06, |
| "loss": 0.2076, |
| "step": 22280 |
| }, |
| { |
| "epoch": 1.5462128766529375, |
| "grad_norm": 0.8984375, |
| "learning_rate": 1.1236521739130437e-06, |
| "loss": 0.2568, |
| "step": 22290 |
| }, |
| { |
| "epoch": 1.54690656839367, |
| "grad_norm": 1.2578125, |
| "learning_rate": 1.121913043478261e-06, |
| "loss": 0.2804, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.5476002601344028, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.1201739130434784e-06, |
| "loss": 0.2401, |
| "step": 22310 |
| }, |
| { |
| "epoch": 1.5482939518751355, |
| "grad_norm": 0.953125, |
| "learning_rate": 1.1184347826086958e-06, |
| "loss": 0.2465, |
| "step": 22320 |
| }, |
| { |
| "epoch": 1.548987643615868, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.1166956521739132e-06, |
| "loss": 0.3361, |
| "step": 22330 |
| }, |
| { |
| "epoch": 1.5496813353566008, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.1149565217391305e-06, |
| "loss": 0.2217, |
| "step": 22340 |
| }, |
| { |
| "epoch": 1.5503750270973335, |
| "grad_norm": 1.1875, |
| "learning_rate": 1.113217391304348e-06, |
| "loss": 0.2402, |
| "step": 22350 |
| }, |
| { |
| "epoch": 1.5510687188380663, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.1114782608695653e-06, |
| "loss": 0.2065, |
| "step": 22360 |
| }, |
| { |
| "epoch": 1.551762410578799, |
| "grad_norm": 0.921875, |
| "learning_rate": 1.1097391304347827e-06, |
| "loss": 0.237, |
| "step": 22370 |
| }, |
| { |
| "epoch": 1.5524561023195318, |
| "grad_norm": 1.03125, |
| "learning_rate": 1.108e-06, |
| "loss": 0.3074, |
| "step": 22380 |
| }, |
| { |
| "epoch": 1.5531497940602645, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.1062608695652174e-06, |
| "loss": 0.2363, |
| "step": 22390 |
| }, |
| { |
| "epoch": 1.5538434858009973, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.1045217391304348e-06, |
| "loss": 0.3468, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.55453717754173, |
| "grad_norm": 1.578125, |
| "learning_rate": 1.1027826086956521e-06, |
| "loss": 0.2699, |
| "step": 22410 |
| }, |
| { |
| "epoch": 1.5552308692824626, |
| "grad_norm": 1.375, |
| "learning_rate": 1.1010434782608697e-06, |
| "loss": 0.2687, |
| "step": 22420 |
| }, |
| { |
| "epoch": 1.5559245610231953, |
| "grad_norm": 0.9375, |
| "learning_rate": 1.099304347826087e-06, |
| "loss": 0.2097, |
| "step": 22430 |
| }, |
| { |
| "epoch": 1.556618252763928, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.0975652173913045e-06, |
| "loss": 0.2544, |
| "step": 22440 |
| }, |
| { |
| "epoch": 1.5573119445046606, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.0958260869565218e-06, |
| "loss": 0.229, |
| "step": 22450 |
| }, |
| { |
| "epoch": 1.5580056362453933, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.0940869565217392e-06, |
| "loss": 0.3094, |
| "step": 22460 |
| }, |
| { |
| "epoch": 1.558699327986126, |
| "grad_norm": 0.9296875, |
| "learning_rate": 1.0923478260869566e-06, |
| "loss": 0.1948, |
| "step": 22470 |
| }, |
| { |
| "epoch": 1.5593930197268588, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.090608695652174e-06, |
| "loss": 0.2203, |
| "step": 22480 |
| }, |
| { |
| "epoch": 1.5600867114675916, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.0888695652173913e-06, |
| "loss": 0.2523, |
| "step": 22490 |
| }, |
| { |
| "epoch": 1.5607804032083243, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.087130434782609e-06, |
| "loss": 0.2415, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.561474094949057, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.0853913043478263e-06, |
| "loss": 0.268, |
| "step": 22510 |
| }, |
| { |
| "epoch": 1.5621677866897898, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.0836521739130436e-06, |
| "loss": 0.2474, |
| "step": 22520 |
| }, |
| { |
| "epoch": 1.5628614784305226, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.081913043478261e-06, |
| "loss": 0.3104, |
| "step": 22530 |
| }, |
| { |
| "epoch": 1.5635551701712551, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.0801739130434784e-06, |
| "loss": 0.2487, |
| "step": 22540 |
| }, |
| { |
| "epoch": 1.5642488619119879, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.0784347826086958e-06, |
| "loss": 0.2224, |
| "step": 22550 |
| }, |
| { |
| "epoch": 1.5649425536527206, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.0766956521739131e-06, |
| "loss": 0.2211, |
| "step": 22560 |
| }, |
| { |
| "epoch": 1.5656362453934531, |
| "grad_norm": 0.99609375, |
| "learning_rate": 1.0749565217391305e-06, |
| "loss": 0.2382, |
| "step": 22570 |
| }, |
| { |
| "epoch": 1.566329937134186, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.0732173913043479e-06, |
| "loss": 0.2285, |
| "step": 22580 |
| }, |
| { |
| "epoch": 1.5670236288749186, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.0714782608695655e-06, |
| "loss": 0.3084, |
| "step": 22590 |
| }, |
| { |
| "epoch": 1.5677173206156514, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.0697391304347828e-06, |
| "loss": 0.2316, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.5684110123563841, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.0680000000000002e-06, |
| "loss": 0.2359, |
| "step": 22610 |
| }, |
| { |
| "epoch": 1.5691047040971169, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.0662608695652176e-06, |
| "loss": 0.3312, |
| "step": 22620 |
| }, |
| { |
| "epoch": 1.5697983958378496, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.064521739130435e-06, |
| "loss": 0.2094, |
| "step": 22630 |
| }, |
| { |
| "epoch": 1.5704920875785824, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.062782608695652e-06, |
| "loss": 0.2005, |
| "step": 22640 |
| }, |
| { |
| "epoch": 1.5711857793193151, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.0610434782608697e-06, |
| "loss": 0.2151, |
| "step": 22650 |
| }, |
| { |
| "epoch": 1.5718794710600477, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.059304347826087e-06, |
| "loss": 0.226, |
| "step": 22660 |
| }, |
| { |
| "epoch": 1.5725731628007804, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.0575652173913044e-06, |
| "loss": 0.2764, |
| "step": 22670 |
| }, |
| { |
| "epoch": 1.5732668545415132, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.0558260869565218e-06, |
| "loss": 0.2149, |
| "step": 22680 |
| }, |
| { |
| "epoch": 1.5739605462822457, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.0540869565217392e-06, |
| "loss": 0.2572, |
| "step": 22690 |
| }, |
| { |
| "epoch": 1.5746542380229784, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.0523478260869565e-06, |
| "loss": 0.2572, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.5753479297637112, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.050608695652174e-06, |
| "loss": 0.241, |
| "step": 22710 |
| }, |
| { |
| "epoch": 1.576041621504444, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.0488695652173913e-06, |
| "loss": 0.2464, |
| "step": 22720 |
| }, |
| { |
| "epoch": 1.5767353132451767, |
| "grad_norm": 1.5, |
| "learning_rate": 1.0471304347826089e-06, |
| "loss": 0.2494, |
| "step": 22730 |
| }, |
| { |
| "epoch": 1.5774290049859094, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.0453913043478262e-06, |
| "loss": 0.314, |
| "step": 22740 |
| }, |
| { |
| "epoch": 1.5781226967266422, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.0436521739130436e-06, |
| "loss": 0.231, |
| "step": 22750 |
| }, |
| { |
| "epoch": 1.578816388467375, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.041913043478261e-06, |
| "loss": 0.3283, |
| "step": 22760 |
| }, |
| { |
| "epoch": 1.5795100802081077, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.0401739130434783e-06, |
| "loss": 0.3071, |
| "step": 22770 |
| }, |
| { |
| "epoch": 1.5802037719488402, |
| "grad_norm": 1.125, |
| "learning_rate": 1.0384347826086957e-06, |
| "loss": 0.2743, |
| "step": 22780 |
| }, |
| { |
| "epoch": 1.580897463689573, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.036695652173913e-06, |
| "loss": 0.253, |
| "step": 22790 |
| }, |
| { |
| "epoch": 1.5815911554303057, |
| "grad_norm": 1.046875, |
| "learning_rate": 1.0349565217391305e-06, |
| "loss": 0.2143, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.5822848471710382, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.0332173913043478e-06, |
| "loss": 0.2481, |
| "step": 22810 |
| }, |
| { |
| "epoch": 1.582978538911771, |
| "grad_norm": 1.375, |
| "learning_rate": 1.0314782608695654e-06, |
| "loss": 0.2572, |
| "step": 22820 |
| }, |
| { |
| "epoch": 1.5836722306525037, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.0297391304347828e-06, |
| "loss": 0.2987, |
| "step": 22830 |
| }, |
| { |
| "epoch": 1.5843659223932365, |
| "grad_norm": 1.375, |
| "learning_rate": 1.0280000000000002e-06, |
| "loss": 0.2331, |
| "step": 22840 |
| }, |
| { |
| "epoch": 1.5850596141339692, |
| "grad_norm": 0.95703125, |
| "learning_rate": 1.0262608695652175e-06, |
| "loss": 0.2076, |
| "step": 22850 |
| }, |
| { |
| "epoch": 1.585753305874702, |
| "grad_norm": 1.125, |
| "learning_rate": 1.024521739130435e-06, |
| "loss": 0.2365, |
| "step": 22860 |
| }, |
| { |
| "epoch": 1.5864469976154347, |
| "grad_norm": 1.8828125, |
| "learning_rate": 1.0227826086956523e-06, |
| "loss": 0.2808, |
| "step": 22870 |
| }, |
| { |
| "epoch": 1.5871406893561675, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.0210434782608696e-06, |
| "loss": 0.2484, |
| "step": 22880 |
| }, |
| { |
| "epoch": 1.5878343810969002, |
| "grad_norm": 1.625, |
| "learning_rate": 1.019304347826087e-06, |
| "loss": 0.2585, |
| "step": 22890 |
| }, |
| { |
| "epoch": 1.5885280728376328, |
| "grad_norm": 1.1015625, |
| "learning_rate": 1.0175652173913044e-06, |
| "loss": 0.2316, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.5892217645783655, |
| "grad_norm": 1.6015625, |
| "learning_rate": 1.015826086956522e-06, |
| "loss": 0.2097, |
| "step": 22910 |
| }, |
| { |
| "epoch": 1.5899154563190983, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.0140869565217393e-06, |
| "loss": 0.275, |
| "step": 22920 |
| }, |
| { |
| "epoch": 1.5906091480598308, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.0123478260869567e-06, |
| "loss": 0.2504, |
| "step": 22930 |
| }, |
| { |
| "epoch": 1.5913028398005635, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.0106086956521739e-06, |
| "loss": 0.2771, |
| "step": 22940 |
| }, |
| { |
| "epoch": 1.5919965315412963, |
| "grad_norm": 1.375, |
| "learning_rate": 1.0088695652173912e-06, |
| "loss": 0.2555, |
| "step": 22950 |
| }, |
| { |
| "epoch": 1.592690223282029, |
| "grad_norm": 1.2421875, |
| "learning_rate": 1.0071304347826088e-06, |
| "loss": 0.2447, |
| "step": 22960 |
| }, |
| { |
| "epoch": 1.5933839150227618, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.0053913043478262e-06, |
| "loss": 0.2536, |
| "step": 22970 |
| }, |
| { |
| "epoch": 1.5940776067634945, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.0036521739130436e-06, |
| "loss": 0.2336, |
| "step": 22980 |
| }, |
| { |
| "epoch": 1.5947712985042273, |
| "grad_norm": 1.4765625, |
| "learning_rate": 1.001913043478261e-06, |
| "loss": 0.2328, |
| "step": 22990 |
| }, |
| { |
| "epoch": 1.59546499024496, |
| "grad_norm": 1.703125, |
| "learning_rate": 1.0001739130434783e-06, |
| "loss": 0.2339, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.5961586819856928, |
| "grad_norm": 1.4609375, |
| "learning_rate": 9.984347826086957e-07, |
| "loss": 0.2749, |
| "step": 23010 |
| }, |
| { |
| "epoch": 1.5968523737264253, |
| "grad_norm": 1.390625, |
| "learning_rate": 9.96695652173913e-07, |
| "loss": 0.2484, |
| "step": 23020 |
| }, |
| { |
| "epoch": 1.597546065467158, |
| "grad_norm": 1.2734375, |
| "learning_rate": 9.949565217391304e-07, |
| "loss": 0.2099, |
| "step": 23030 |
| }, |
| { |
| "epoch": 1.5982397572078908, |
| "grad_norm": 2.0, |
| "learning_rate": 9.932173913043478e-07, |
| "loss": 0.2897, |
| "step": 23040 |
| }, |
| { |
| "epoch": 1.5989334489486233, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.914782608695654e-07, |
| "loss": 0.2628, |
| "step": 23050 |
| }, |
| { |
| "epoch": 1.599627140689356, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.897391304347827e-07, |
| "loss": 0.1823, |
| "step": 23060 |
| }, |
| { |
| "epoch": 1.6003208324300888, |
| "grad_norm": 0.796875, |
| "learning_rate": 9.880000000000001e-07, |
| "loss": 0.2547, |
| "step": 23070 |
| }, |
| { |
| "epoch": 1.6010145241708216, |
| "grad_norm": 1.1328125, |
| "learning_rate": 9.862608695652175e-07, |
| "loss": 0.2082, |
| "step": 23080 |
| }, |
| { |
| "epoch": 1.6017082159115543, |
| "grad_norm": 0.78515625, |
| "learning_rate": 9.845217391304349e-07, |
| "loss": 0.2021, |
| "step": 23090 |
| }, |
| { |
| "epoch": 1.602401907652287, |
| "grad_norm": 1.0859375, |
| "learning_rate": 9.827826086956522e-07, |
| "loss": 0.2168, |
| "step": 23100 |
| }, |
| { |
| "epoch": 1.6030955993930198, |
| "grad_norm": 1.3984375, |
| "learning_rate": 9.810434782608696e-07, |
| "loss": 0.3124, |
| "step": 23110 |
| }, |
| { |
| "epoch": 1.6037892911337526, |
| "grad_norm": 1.234375, |
| "learning_rate": 9.79304347826087e-07, |
| "loss": 0.2166, |
| "step": 23120 |
| }, |
| { |
| "epoch": 1.604482982874485, |
| "grad_norm": 0.9609375, |
| "learning_rate": 9.775652173913043e-07, |
| "loss": 0.2531, |
| "step": 23130 |
| }, |
| { |
| "epoch": 1.6051766746152178, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.75826086956522e-07, |
| "loss": 0.3279, |
| "step": 23140 |
| }, |
| { |
| "epoch": 1.6058703663559506, |
| "grad_norm": 1.453125, |
| "learning_rate": 9.740869565217393e-07, |
| "loss": 0.242, |
| "step": 23150 |
| }, |
| { |
| "epoch": 1.6065640580966831, |
| "grad_norm": 1.078125, |
| "learning_rate": 9.723478260869567e-07, |
| "loss": 0.1916, |
| "step": 23160 |
| }, |
| { |
| "epoch": 1.6072577498374159, |
| "grad_norm": 1.53125, |
| "learning_rate": 9.70608695652174e-07, |
| "loss": 0.3144, |
| "step": 23170 |
| }, |
| { |
| "epoch": 1.6079514415781486, |
| "grad_norm": 1.15625, |
| "learning_rate": 9.688695652173914e-07, |
| "loss": 0.251, |
| "step": 23180 |
| }, |
| { |
| "epoch": 1.6086451333188814, |
| "grad_norm": 1.359375, |
| "learning_rate": 9.671304347826088e-07, |
| "loss": 0.2514, |
| "step": 23190 |
| }, |
| { |
| "epoch": 1.6093388250596141, |
| "grad_norm": 1.8984375, |
| "learning_rate": 9.653913043478261e-07, |
| "loss": 0.3102, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1.6100325168003469, |
| "grad_norm": 1.2421875, |
| "learning_rate": 9.636521739130435e-07, |
| "loss": 0.2325, |
| "step": 23210 |
| }, |
| { |
| "epoch": 1.6107262085410796, |
| "grad_norm": 0.984375, |
| "learning_rate": 9.61913043478261e-07, |
| "loss": 0.2195, |
| "step": 23220 |
| }, |
| { |
| "epoch": 1.6114199002818124, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.601739130434785e-07, |
| "loss": 0.2705, |
| "step": 23230 |
| }, |
| { |
| "epoch": 1.6121135920225451, |
| "grad_norm": 1.0234375, |
| "learning_rate": 9.584347826086958e-07, |
| "loss": 0.2394, |
| "step": 23240 |
| }, |
| { |
| "epoch": 1.6128072837632776, |
| "grad_norm": 1.2578125, |
| "learning_rate": 9.56695652173913e-07, |
| "loss": 0.2567, |
| "step": 23250 |
| }, |
| { |
| "epoch": 1.6135009755040104, |
| "grad_norm": 0.9453125, |
| "learning_rate": 9.549565217391304e-07, |
| "loss": 0.2116, |
| "step": 23260 |
| }, |
| { |
| "epoch": 1.6141946672447431, |
| "grad_norm": 1.125, |
| "learning_rate": 9.532173913043479e-07, |
| "loss": 0.2329, |
| "step": 23270 |
| }, |
| { |
| "epoch": 1.6148883589854757, |
| "grad_norm": 1.4765625, |
| "learning_rate": 9.514782608695652e-07, |
| "loss": 0.2868, |
| "step": 23280 |
| }, |
| { |
| "epoch": 1.6155820507262084, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.497391304347826e-07, |
| "loss": 0.289, |
| "step": 23290 |
| }, |
| { |
| "epoch": 1.6162757424669412, |
| "grad_norm": 1.171875, |
| "learning_rate": 9.480000000000001e-07, |
| "loss": 0.2032, |
| "step": 23300 |
| }, |
| { |
| "epoch": 1.616969434207674, |
| "grad_norm": 1.2890625, |
| "learning_rate": 9.462608695652174e-07, |
| "loss": 0.257, |
| "step": 23310 |
| }, |
| { |
| "epoch": 1.6176631259484067, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.445217391304348e-07, |
| "loss": 0.2119, |
| "step": 23320 |
| }, |
| { |
| "epoch": 1.6183568176891394, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.427826086956522e-07, |
| "loss": 0.247, |
| "step": 23330 |
| }, |
| { |
| "epoch": 1.6190505094298722, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.410434782608697e-07, |
| "loss": 0.2446, |
| "step": 23340 |
| }, |
| { |
| "epoch": 1.619744201170605, |
| "grad_norm": 1.5625, |
| "learning_rate": 9.39304347826087e-07, |
| "loss": 0.2619, |
| "step": 23350 |
| }, |
| { |
| "epoch": 1.6204378929113377, |
| "grad_norm": 1.390625, |
| "learning_rate": 9.375652173913044e-07, |
| "loss": 0.257, |
| "step": 23360 |
| }, |
| { |
| "epoch": 1.6211315846520702, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.358260869565218e-07, |
| "loss": 0.2905, |
| "step": 23370 |
| }, |
| { |
| "epoch": 1.621825276392803, |
| "grad_norm": 1.0703125, |
| "learning_rate": 9.340869565217391e-07, |
| "loss": 0.2398, |
| "step": 23380 |
| }, |
| { |
| "epoch": 1.6225189681335357, |
| "grad_norm": 1.2421875, |
| "learning_rate": 9.323478260869566e-07, |
| "loss": 0.2423, |
| "step": 23390 |
| }, |
| { |
| "epoch": 1.6232126598742682, |
| "grad_norm": 1.3046875, |
| "learning_rate": 9.30608695652174e-07, |
| "loss": 0.2582, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1.623906351615001, |
| "grad_norm": 1.3359375, |
| "learning_rate": 9.288695652173914e-07, |
| "loss": 0.2746, |
| "step": 23410 |
| }, |
| { |
| "epoch": 1.6246000433557337, |
| "grad_norm": 1.125, |
| "learning_rate": 9.271304347826087e-07, |
| "loss": 0.2465, |
| "step": 23420 |
| }, |
| { |
| "epoch": 1.6252937350964665, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.253913043478262e-07, |
| "loss": 0.223, |
| "step": 23430 |
| }, |
| { |
| "epoch": 1.6259874268371992, |
| "grad_norm": 1.203125, |
| "learning_rate": 9.236521739130436e-07, |
| "loss": 0.2014, |
| "step": 23440 |
| }, |
| { |
| "epoch": 1.626681118577932, |
| "grad_norm": 1.21875, |
| "learning_rate": 9.21913043478261e-07, |
| "loss": 0.2196, |
| "step": 23450 |
| }, |
| { |
| "epoch": 1.6273748103186647, |
| "grad_norm": 0.9765625, |
| "learning_rate": 9.201739130434783e-07, |
| "loss": 0.2466, |
| "step": 23460 |
| }, |
| { |
| "epoch": 1.6280685020593975, |
| "grad_norm": 1.03125, |
| "learning_rate": 9.184347826086958e-07, |
| "loss": 0.2713, |
| "step": 23470 |
| }, |
| { |
| "epoch": 1.6287621938001302, |
| "grad_norm": 0.7734375, |
| "learning_rate": 9.166956521739132e-07, |
| "loss": 0.2448, |
| "step": 23480 |
| }, |
| { |
| "epoch": 1.6294558855408627, |
| "grad_norm": 1.8203125, |
| "learning_rate": 9.149565217391305e-07, |
| "loss": 0.2153, |
| "step": 23490 |
| }, |
| { |
| "epoch": 1.6301495772815955, |
| "grad_norm": 1.1484375, |
| "learning_rate": 9.132173913043479e-07, |
| "loss": 0.2345, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.6308432690223282, |
| "grad_norm": 1.484375, |
| "learning_rate": 9.114782608695653e-07, |
| "loss": 0.2398, |
| "step": 23510 |
| }, |
| { |
| "epoch": 1.6315369607630608, |
| "grad_norm": 1.546875, |
| "learning_rate": 9.097391304347828e-07, |
| "loss": 0.2641, |
| "step": 23520 |
| }, |
| { |
| "epoch": 1.6322306525037935, |
| "grad_norm": 1.265625, |
| "learning_rate": 9.080000000000001e-07, |
| "loss": 0.216, |
| "step": 23530 |
| }, |
| { |
| "epoch": 1.6329243442445263, |
| "grad_norm": 1.109375, |
| "learning_rate": 9.062608695652175e-07, |
| "loss": 0.2306, |
| "step": 23540 |
| }, |
| { |
| "epoch": 1.633618035985259, |
| "grad_norm": 1.2109375, |
| "learning_rate": 9.045217391304349e-07, |
| "loss": 0.233, |
| "step": 23550 |
| }, |
| { |
| "epoch": 1.6343117277259918, |
| "grad_norm": 1.1953125, |
| "learning_rate": 9.027826086956521e-07, |
| "loss": 0.2194, |
| "step": 23560 |
| }, |
| { |
| "epoch": 1.6350054194667245, |
| "grad_norm": 0.96484375, |
| "learning_rate": 9.010434782608696e-07, |
| "loss": 0.1982, |
| "step": 23570 |
| }, |
| { |
| "epoch": 1.6356991112074573, |
| "grad_norm": 1.40625, |
| "learning_rate": 8.99304347826087e-07, |
| "loss": 0.2591, |
| "step": 23580 |
| }, |
| { |
| "epoch": 1.63639280294819, |
| "grad_norm": 1.3046875, |
| "learning_rate": 8.975652173913044e-07, |
| "loss": 0.2702, |
| "step": 23590 |
| }, |
| { |
| "epoch": 1.6370864946889228, |
| "grad_norm": 1.5625, |
| "learning_rate": 8.958260869565217e-07, |
| "loss": 0.2622, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1.6377801864296553, |
| "grad_norm": 0.9453125, |
| "learning_rate": 8.940869565217391e-07, |
| "loss": 0.2386, |
| "step": 23610 |
| }, |
| { |
| "epoch": 1.638473878170388, |
| "grad_norm": 1.0234375, |
| "learning_rate": 8.923478260869566e-07, |
| "loss": 0.2167, |
| "step": 23620 |
| }, |
| { |
| "epoch": 1.6391675699111208, |
| "grad_norm": 1.109375, |
| "learning_rate": 8.90608695652174e-07, |
| "loss": 0.2375, |
| "step": 23630 |
| }, |
| { |
| "epoch": 1.6398612616518533, |
| "grad_norm": 1.21875, |
| "learning_rate": 8.888695652173913e-07, |
| "loss": 0.2261, |
| "step": 23640 |
| }, |
| { |
| "epoch": 1.640554953392586, |
| "grad_norm": 1.1953125, |
| "learning_rate": 8.871304347826087e-07, |
| "loss": 0.2658, |
| "step": 23650 |
| }, |
| { |
| "epoch": 1.6412486451333188, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.853913043478262e-07, |
| "loss": 0.2705, |
| "step": 23660 |
| }, |
| { |
| "epoch": 1.6419423368740516, |
| "grad_norm": 1.0078125, |
| "learning_rate": 8.836521739130435e-07, |
| "loss": 0.2295, |
| "step": 23670 |
| }, |
| { |
| "epoch": 1.6426360286147843, |
| "grad_norm": 0.99609375, |
| "learning_rate": 8.819130434782609e-07, |
| "loss": 0.2608, |
| "step": 23680 |
| }, |
| { |
| "epoch": 1.643329720355517, |
| "grad_norm": 0.984375, |
| "learning_rate": 8.801739130434783e-07, |
| "loss": 0.2073, |
| "step": 23690 |
| }, |
| { |
| "epoch": 1.6440234120962498, |
| "grad_norm": 1.25, |
| "learning_rate": 8.784347826086958e-07, |
| "loss": 0.2729, |
| "step": 23700 |
| }, |
| { |
| "epoch": 1.6447171038369826, |
| "grad_norm": 1.09375, |
| "learning_rate": 8.766956521739131e-07, |
| "loss": 0.2614, |
| "step": 23710 |
| }, |
| { |
| "epoch": 1.6454107955777153, |
| "grad_norm": 1.09375, |
| "learning_rate": 8.749565217391305e-07, |
| "loss": 0.2278, |
| "step": 23720 |
| }, |
| { |
| "epoch": 1.6461044873184478, |
| "grad_norm": 1.2109375, |
| "learning_rate": 8.732173913043479e-07, |
| "loss": 0.2365, |
| "step": 23730 |
| }, |
| { |
| "epoch": 1.6467981790591806, |
| "grad_norm": 1.3125, |
| "learning_rate": 8.714782608695654e-07, |
| "loss": 0.2512, |
| "step": 23740 |
| }, |
| { |
| "epoch": 1.6474918707999133, |
| "grad_norm": 1.46875, |
| "learning_rate": 8.697391304347827e-07, |
| "loss": 0.3113, |
| "step": 23750 |
| }, |
| { |
| "epoch": 1.6481855625406459, |
| "grad_norm": 1.375, |
| "learning_rate": 8.680000000000001e-07, |
| "loss": 0.2599, |
| "step": 23760 |
| }, |
| { |
| "epoch": 1.6488792542813786, |
| "grad_norm": 0.9921875, |
| "learning_rate": 8.662608695652175e-07, |
| "loss": 0.2318, |
| "step": 23770 |
| }, |
| { |
| "epoch": 1.6495729460221114, |
| "grad_norm": 0.96484375, |
| "learning_rate": 8.645217391304348e-07, |
| "loss": 0.2484, |
| "step": 23780 |
| }, |
| { |
| "epoch": 1.650266637762844, |
| "grad_norm": 1.609375, |
| "learning_rate": 8.627826086956523e-07, |
| "loss": 0.2319, |
| "step": 23790 |
| }, |
| { |
| "epoch": 1.6509603295035769, |
| "grad_norm": 1.1953125, |
| "learning_rate": 8.610434782608697e-07, |
| "loss": 0.2098, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1.6516540212443096, |
| "grad_norm": 1.0078125, |
| "learning_rate": 8.593043478260871e-07, |
| "loss": 0.2516, |
| "step": 23810 |
| }, |
| { |
| "epoch": 1.6523477129850423, |
| "grad_norm": 0.9765625, |
| "learning_rate": 8.575652173913044e-07, |
| "loss": 0.205, |
| "step": 23820 |
| }, |
| { |
| "epoch": 1.653041404725775, |
| "grad_norm": 1.4375, |
| "learning_rate": 8.558260869565219e-07, |
| "loss": 0.2468, |
| "step": 23830 |
| }, |
| { |
| "epoch": 1.6537350964665078, |
| "grad_norm": 1.375, |
| "learning_rate": 8.540869565217393e-07, |
| "loss": 0.2423, |
| "step": 23840 |
| }, |
| { |
| "epoch": 1.6544287882072404, |
| "grad_norm": 1.171875, |
| "learning_rate": 8.523478260869566e-07, |
| "loss": 0.2577, |
| "step": 23850 |
| }, |
| { |
| "epoch": 1.6551224799479731, |
| "grad_norm": 1.296875, |
| "learning_rate": 8.50608695652174e-07, |
| "loss": 0.2681, |
| "step": 23860 |
| }, |
| { |
| "epoch": 1.6558161716887059, |
| "grad_norm": 1.234375, |
| "learning_rate": 8.488695652173913e-07, |
| "loss": 0.2362, |
| "step": 23870 |
| }, |
| { |
| "epoch": 1.6565098634294384, |
| "grad_norm": 1.3203125, |
| "learning_rate": 8.471304347826087e-07, |
| "loss": 0.2296, |
| "step": 23880 |
| }, |
| { |
| "epoch": 1.6572035551701711, |
| "grad_norm": 1.265625, |
| "learning_rate": 8.453913043478261e-07, |
| "loss": 0.2451, |
| "step": 23890 |
| }, |
| { |
| "epoch": 1.657897246910904, |
| "grad_norm": 1.140625, |
| "learning_rate": 8.436521739130435e-07, |
| "loss": 0.2586, |
| "step": 23900 |
| }, |
| { |
| "epoch": 1.6585909386516366, |
| "grad_norm": 1.3203125, |
| "learning_rate": 8.419130434782609e-07, |
| "loss": 0.1875, |
| "step": 23910 |
| }, |
| { |
| "epoch": 1.6592846303923694, |
| "grad_norm": 1.2109375, |
| "learning_rate": 8.401739130434782e-07, |
| "loss": 0.2252, |
| "step": 23920 |
| }, |
| { |
| "epoch": 1.6599783221331021, |
| "grad_norm": 1.4375, |
| "learning_rate": 8.384347826086957e-07, |
| "loss": 0.2808, |
| "step": 23930 |
| }, |
| { |
| "epoch": 1.660672013873835, |
| "grad_norm": 1.3125, |
| "learning_rate": 8.366956521739131e-07, |
| "loss": 0.2576, |
| "step": 23940 |
| }, |
| { |
| "epoch": 1.6613657056145676, |
| "grad_norm": 1.1484375, |
| "learning_rate": 8.349565217391305e-07, |
| "loss": 0.2447, |
| "step": 23950 |
| }, |
| { |
| "epoch": 1.6620593973553004, |
| "grad_norm": 1.4921875, |
| "learning_rate": 8.332173913043478e-07, |
| "loss": 0.2325, |
| "step": 23960 |
| }, |
| { |
| "epoch": 1.662753089096033, |
| "grad_norm": 1.265625, |
| "learning_rate": 8.314782608695653e-07, |
| "loss": 0.1994, |
| "step": 23970 |
| }, |
| { |
| "epoch": 1.6634467808367657, |
| "grad_norm": 1.109375, |
| "learning_rate": 8.297391304347827e-07, |
| "loss": 0.2345, |
| "step": 23980 |
| }, |
| { |
| "epoch": 1.6641404725774984, |
| "grad_norm": 1.4453125, |
| "learning_rate": 8.280000000000001e-07, |
| "loss": 0.3115, |
| "step": 23990 |
| }, |
| { |
| "epoch": 1.664834164318231, |
| "grad_norm": 1.09375, |
| "learning_rate": 8.262608695652174e-07, |
| "loss": 0.2616, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.6655278560589637, |
| "grad_norm": 1.390625, |
| "learning_rate": 8.245217391304348e-07, |
| "loss": 0.2968, |
| "step": 24010 |
| }, |
| { |
| "epoch": 1.6662215477996964, |
| "grad_norm": 1.28125, |
| "learning_rate": 8.227826086956523e-07, |
| "loss": 0.2652, |
| "step": 24020 |
| }, |
| { |
| "epoch": 1.6669152395404292, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.210434782608696e-07, |
| "loss": 0.275, |
| "step": 24030 |
| }, |
| { |
| "epoch": 1.667608931281162, |
| "grad_norm": 1.1171875, |
| "learning_rate": 8.19304347826087e-07, |
| "loss": 0.202, |
| "step": 24040 |
| }, |
| { |
| "epoch": 1.6683026230218947, |
| "grad_norm": 1.1796875, |
| "learning_rate": 8.175652173913044e-07, |
| "loss": 0.2373, |
| "step": 24050 |
| }, |
| { |
| "epoch": 1.6689963147626274, |
| "grad_norm": 1.21875, |
| "learning_rate": 8.158260869565219e-07, |
| "loss": 0.2368, |
| "step": 24060 |
| }, |
| { |
| "epoch": 1.6696900065033602, |
| "grad_norm": 1.546875, |
| "learning_rate": 8.140869565217392e-07, |
| "loss": 0.2361, |
| "step": 24070 |
| }, |
| { |
| "epoch": 1.670383698244093, |
| "grad_norm": 1.5078125, |
| "learning_rate": 8.123478260869566e-07, |
| "loss": 0.2362, |
| "step": 24080 |
| }, |
| { |
| "epoch": 1.6710773899848255, |
| "grad_norm": 1.28125, |
| "learning_rate": 8.10608695652174e-07, |
| "loss": 0.2434, |
| "step": 24090 |
| }, |
| { |
| "epoch": 1.6717710817255582, |
| "grad_norm": 1.2578125, |
| "learning_rate": 8.088695652173915e-07, |
| "loss": 0.2348, |
| "step": 24100 |
| }, |
| { |
| "epoch": 1.672464773466291, |
| "grad_norm": 1.0625, |
| "learning_rate": 8.071304347826088e-07, |
| "loss": 0.2239, |
| "step": 24110 |
| }, |
| { |
| "epoch": 1.6731584652070235, |
| "grad_norm": 1.3359375, |
| "learning_rate": 8.053913043478262e-07, |
| "loss": 0.2476, |
| "step": 24120 |
| }, |
| { |
| "epoch": 1.6738521569477562, |
| "grad_norm": 1.5234375, |
| "learning_rate": 8.036521739130436e-07, |
| "loss": 0.3045, |
| "step": 24130 |
| }, |
| { |
| "epoch": 1.674545848688489, |
| "grad_norm": 1.03125, |
| "learning_rate": 8.019130434782609e-07, |
| "loss": 0.2166, |
| "step": 24140 |
| }, |
| { |
| "epoch": 1.6752395404292217, |
| "grad_norm": 1.25, |
| "learning_rate": 8.001739130434784e-07, |
| "loss": 0.2401, |
| "step": 24150 |
| }, |
| { |
| "epoch": 1.6759332321699545, |
| "grad_norm": 1.0, |
| "learning_rate": 7.984347826086958e-07, |
| "loss": 0.2398, |
| "step": 24160 |
| }, |
| { |
| "epoch": 1.6766269239106872, |
| "grad_norm": 1.21875, |
| "learning_rate": 7.966956521739132e-07, |
| "loss": 0.2428, |
| "step": 24170 |
| }, |
| { |
| "epoch": 1.67732061565142, |
| "grad_norm": 1.0859375, |
| "learning_rate": 7.949565217391304e-07, |
| "loss": 0.2544, |
| "step": 24180 |
| }, |
| { |
| "epoch": 1.6780143073921527, |
| "grad_norm": 0.9765625, |
| "learning_rate": 7.932173913043478e-07, |
| "loss": 0.1938, |
| "step": 24190 |
| }, |
| { |
| "epoch": 1.6787079991328855, |
| "grad_norm": 1.2265625, |
| "learning_rate": 7.914782608695653e-07, |
| "loss": 0.2606, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1.679401690873618, |
| "grad_norm": 1.125, |
| "learning_rate": 7.897391304347826e-07, |
| "loss": 0.2166, |
| "step": 24210 |
| }, |
| { |
| "epoch": 1.6800953826143508, |
| "grad_norm": 1.3125, |
| "learning_rate": 7.88e-07, |
| "loss": 0.2149, |
| "step": 24220 |
| }, |
| { |
| "epoch": 1.6807890743550835, |
| "grad_norm": 0.875, |
| "learning_rate": 7.862608695652174e-07, |
| "loss": 0.2509, |
| "step": 24230 |
| }, |
| { |
| "epoch": 1.681482766095816, |
| "grad_norm": 1.2734375, |
| "learning_rate": 7.845217391304348e-07, |
| "loss": 0.255, |
| "step": 24240 |
| }, |
| { |
| "epoch": 1.6821764578365488, |
| "grad_norm": 0.84375, |
| "learning_rate": 7.827826086956522e-07, |
| "loss": 0.2084, |
| "step": 24250 |
| }, |
| { |
| "epoch": 1.6828701495772815, |
| "grad_norm": 1.3671875, |
| "learning_rate": 7.810434782608696e-07, |
| "loss": 0.2378, |
| "step": 24260 |
| }, |
| { |
| "epoch": 1.6835638413180143, |
| "grad_norm": 1.2265625, |
| "learning_rate": 7.79304347826087e-07, |
| "loss": 0.2314, |
| "step": 24270 |
| }, |
| { |
| "epoch": 1.684257533058747, |
| "grad_norm": 1.3359375, |
| "learning_rate": 7.775652173913043e-07, |
| "loss": 0.2701, |
| "step": 24280 |
| }, |
| { |
| "epoch": 1.6849512247994798, |
| "grad_norm": 1.171875, |
| "learning_rate": 7.758260869565218e-07, |
| "loss": 0.2682, |
| "step": 24290 |
| }, |
| { |
| "epoch": 1.6856449165402125, |
| "grad_norm": 1.40625, |
| "learning_rate": 7.740869565217392e-07, |
| "loss": 0.235, |
| "step": 24300 |
| }, |
| { |
| "epoch": 1.6863386082809453, |
| "grad_norm": 1.4609375, |
| "learning_rate": 7.723478260869566e-07, |
| "loss": 0.2136, |
| "step": 24310 |
| }, |
| { |
| "epoch": 1.687032300021678, |
| "grad_norm": 1.4140625, |
| "learning_rate": 7.706086956521739e-07, |
| "loss": 0.2159, |
| "step": 24320 |
| }, |
| { |
| "epoch": 1.6877259917624106, |
| "grad_norm": 1.25, |
| "learning_rate": 7.688695652173914e-07, |
| "loss": 0.1848, |
| "step": 24330 |
| }, |
| { |
| "epoch": 1.6884196835031433, |
| "grad_norm": 1.3359375, |
| "learning_rate": 7.671304347826088e-07, |
| "loss": 0.2464, |
| "step": 24340 |
| }, |
| { |
| "epoch": 1.6891133752438758, |
| "grad_norm": 1.2109375, |
| "learning_rate": 7.653913043478262e-07, |
| "loss": 0.2249, |
| "step": 24350 |
| }, |
| { |
| "epoch": 1.6898070669846086, |
| "grad_norm": 1.3046875, |
| "learning_rate": 7.636521739130435e-07, |
| "loss": 0.2389, |
| "step": 24360 |
| }, |
| { |
| "epoch": 1.6905007587253413, |
| "grad_norm": 1.125, |
| "learning_rate": 7.619130434782609e-07, |
| "loss": 0.2296, |
| "step": 24370 |
| }, |
| { |
| "epoch": 1.691194450466074, |
| "grad_norm": 1.3984375, |
| "learning_rate": 7.601739130434784e-07, |
| "loss": 0.2403, |
| "step": 24380 |
| }, |
| { |
| "epoch": 1.6918881422068068, |
| "grad_norm": 1.515625, |
| "learning_rate": 7.584347826086957e-07, |
| "loss": 0.3061, |
| "step": 24390 |
| }, |
| { |
| "epoch": 1.6925818339475396, |
| "grad_norm": 1.4140625, |
| "learning_rate": 7.566956521739131e-07, |
| "loss": 0.225, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1.6932755256882723, |
| "grad_norm": 1.21875, |
| "learning_rate": 7.549565217391305e-07, |
| "loss": 0.234, |
| "step": 24410 |
| }, |
| { |
| "epoch": 1.693969217429005, |
| "grad_norm": 1.046875, |
| "learning_rate": 7.53217391304348e-07, |
| "loss": 0.2418, |
| "step": 24420 |
| }, |
| { |
| "epoch": 1.6946629091697378, |
| "grad_norm": 1.375, |
| "learning_rate": 7.514782608695653e-07, |
| "loss": 0.3734, |
| "step": 24430 |
| }, |
| { |
| "epoch": 1.6953566009104704, |
| "grad_norm": 1.234375, |
| "learning_rate": 7.497391304347827e-07, |
| "loss": 0.2375, |
| "step": 24440 |
| }, |
| { |
| "epoch": 1.696050292651203, |
| "grad_norm": 1.3046875, |
| "learning_rate": 7.480000000000001e-07, |
| "loss": 0.2323, |
| "step": 24450 |
| }, |
| { |
| "epoch": 1.6967439843919359, |
| "grad_norm": 1.125, |
| "learning_rate": 7.462608695652176e-07, |
| "loss": 0.3004, |
| "step": 24460 |
| }, |
| { |
| "epoch": 1.6974376761326684, |
| "grad_norm": 1.28125, |
| "learning_rate": 7.445217391304349e-07, |
| "loss": 0.2572, |
| "step": 24470 |
| }, |
| { |
| "epoch": 1.6981313678734011, |
| "grad_norm": 1.4375, |
| "learning_rate": 7.427826086956523e-07, |
| "loss": 0.2329, |
| "step": 24480 |
| }, |
| { |
| "epoch": 1.6988250596141339, |
| "grad_norm": 1.125, |
| "learning_rate": 7.410434782608696e-07, |
| "loss": 0.2483, |
| "step": 24490 |
| }, |
| { |
| "epoch": 1.6995187513548666, |
| "grad_norm": 1.578125, |
| "learning_rate": 7.393043478260869e-07, |
| "loss": 0.205, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.7002124430955994, |
| "grad_norm": 1.25, |
| "learning_rate": 7.375652173913043e-07, |
| "loss": 0.2956, |
| "step": 24510 |
| }, |
| { |
| "epoch": 1.7009061348363321, |
| "grad_norm": 1.15625, |
| "learning_rate": 7.358260869565218e-07, |
| "loss": 0.2632, |
| "step": 24520 |
| }, |
| { |
| "epoch": 1.7015998265770649, |
| "grad_norm": 1.234375, |
| "learning_rate": 7.340869565217392e-07, |
| "loss": 0.244, |
| "step": 24530 |
| }, |
| { |
| "epoch": 1.7022935183177976, |
| "grad_norm": 1.1875, |
| "learning_rate": 7.323478260869565e-07, |
| "loss": 0.2039, |
| "step": 24540 |
| }, |
| { |
| "epoch": 1.7029872100585304, |
| "grad_norm": 1.734375, |
| "learning_rate": 7.306086956521739e-07, |
| "loss": 0.3392, |
| "step": 24550 |
| }, |
| { |
| "epoch": 1.703680901799263, |
| "grad_norm": 1.4140625, |
| "learning_rate": 7.288695652173914e-07, |
| "loss": 0.2325, |
| "step": 24560 |
| }, |
| { |
| "epoch": 1.7043745935399957, |
| "grad_norm": 1.171875, |
| "learning_rate": 7.271304347826087e-07, |
| "loss": 0.219, |
| "step": 24570 |
| }, |
| { |
| "epoch": 1.7050682852807284, |
| "grad_norm": 1.1875, |
| "learning_rate": 7.253913043478261e-07, |
| "loss": 0.229, |
| "step": 24580 |
| }, |
| { |
| "epoch": 1.705761977021461, |
| "grad_norm": 1.1328125, |
| "learning_rate": 7.236521739130435e-07, |
| "loss": 0.232, |
| "step": 24590 |
| }, |
| { |
| "epoch": 1.7064556687621937, |
| "grad_norm": 1.015625, |
| "learning_rate": 7.219130434782609e-07, |
| "loss": 0.2213, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1.7071493605029264, |
| "grad_norm": 1.125, |
| "learning_rate": 7.201739130434783e-07, |
| "loss": 0.2818, |
| "step": 24610 |
| }, |
| { |
| "epoch": 1.7078430522436592, |
| "grad_norm": 1.375, |
| "learning_rate": 7.184347826086957e-07, |
| "loss": 0.2612, |
| "step": 24620 |
| }, |
| { |
| "epoch": 1.708536743984392, |
| "grad_norm": 1.609375, |
| "learning_rate": 7.166956521739131e-07, |
| "loss": 0.2474, |
| "step": 24630 |
| }, |
| { |
| "epoch": 1.7092304357251247, |
| "grad_norm": 1.484375, |
| "learning_rate": 7.149565217391304e-07, |
| "loss": 0.2656, |
| "step": 24640 |
| }, |
| { |
| "epoch": 1.7099241274658574, |
| "grad_norm": 1.1328125, |
| "learning_rate": 7.132173913043479e-07, |
| "loss": 0.2126, |
| "step": 24650 |
| }, |
| { |
| "epoch": 1.7106178192065902, |
| "grad_norm": 1.3203125, |
| "learning_rate": 7.114782608695653e-07, |
| "loss": 0.2265, |
| "step": 24660 |
| }, |
| { |
| "epoch": 1.711311510947323, |
| "grad_norm": 1.4609375, |
| "learning_rate": 7.097391304347827e-07, |
| "loss": 0.2468, |
| "step": 24670 |
| }, |
| { |
| "epoch": 1.7120052026880554, |
| "grad_norm": 1.1015625, |
| "learning_rate": 7.08e-07, |
| "loss": 0.2395, |
| "step": 24680 |
| }, |
| { |
| "epoch": 1.7126988944287882, |
| "grad_norm": 1.21875, |
| "learning_rate": 7.062608695652175e-07, |
| "loss": 0.2194, |
| "step": 24690 |
| }, |
| { |
| "epoch": 1.713392586169521, |
| "grad_norm": 1.265625, |
| "learning_rate": 7.045217391304349e-07, |
| "loss": 0.2355, |
| "step": 24700 |
| }, |
| { |
| "epoch": 1.7140862779102535, |
| "grad_norm": 1.125, |
| "learning_rate": 7.027826086956523e-07, |
| "loss": 0.2146, |
| "step": 24710 |
| }, |
| { |
| "epoch": 1.7147799696509862, |
| "grad_norm": 1.3125, |
| "learning_rate": 7.010434782608696e-07, |
| "loss": 0.2325, |
| "step": 24720 |
| }, |
| { |
| "epoch": 1.715473661391719, |
| "grad_norm": 1.046875, |
| "learning_rate": 6.99304347826087e-07, |
| "loss": 0.252, |
| "step": 24730 |
| }, |
| { |
| "epoch": 1.7161673531324517, |
| "grad_norm": 0.9921875, |
| "learning_rate": 6.975652173913045e-07, |
| "loss": 0.2504, |
| "step": 24740 |
| }, |
| { |
| "epoch": 1.7168610448731845, |
| "grad_norm": 0.97265625, |
| "learning_rate": 6.958260869565218e-07, |
| "loss": 0.2483, |
| "step": 24750 |
| }, |
| { |
| "epoch": 1.7175547366139172, |
| "grad_norm": 1.765625, |
| "learning_rate": 6.940869565217392e-07, |
| "loss": 0.2785, |
| "step": 24760 |
| }, |
| { |
| "epoch": 1.71824842835465, |
| "grad_norm": 1.828125, |
| "learning_rate": 6.923478260869566e-07, |
| "loss": 0.3625, |
| "step": 24770 |
| }, |
| { |
| "epoch": 1.7189421200953827, |
| "grad_norm": 1.21875, |
| "learning_rate": 6.906086956521741e-07, |
| "loss": 0.2435, |
| "step": 24780 |
| }, |
| { |
| "epoch": 1.7196358118361155, |
| "grad_norm": 1.5390625, |
| "learning_rate": 6.888695652173914e-07, |
| "loss": 0.2389, |
| "step": 24790 |
| }, |
| { |
| "epoch": 1.720329503576848, |
| "grad_norm": 1.25, |
| "learning_rate": 6.871304347826087e-07, |
| "loss": 0.2638, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1.7210231953175807, |
| "grad_norm": 1.109375, |
| "learning_rate": 6.853913043478261e-07, |
| "loss": 0.2871, |
| "step": 24810 |
| }, |
| { |
| "epoch": 1.7217168870583135, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.836521739130434e-07, |
| "loss": 0.2482, |
| "step": 24820 |
| }, |
| { |
| "epoch": 1.722410578799046, |
| "grad_norm": 1.6484375, |
| "learning_rate": 6.819130434782609e-07, |
| "loss": 0.2159, |
| "step": 24830 |
| }, |
| { |
| "epoch": 1.7231042705397788, |
| "grad_norm": 1.2421875, |
| "learning_rate": 6.801739130434783e-07, |
| "loss": 0.2426, |
| "step": 24840 |
| }, |
| { |
| "epoch": 1.7237979622805115, |
| "grad_norm": 1.6484375, |
| "learning_rate": 6.784347826086957e-07, |
| "loss": 0.2381, |
| "step": 24850 |
| }, |
| { |
| "epoch": 1.7244916540212443, |
| "grad_norm": 1.046875, |
| "learning_rate": 6.76695652173913e-07, |
| "loss": 0.2482, |
| "step": 24860 |
| }, |
| { |
| "epoch": 1.725185345761977, |
| "grad_norm": 1.09375, |
| "learning_rate": 6.749565217391304e-07, |
| "loss": 0.2337, |
| "step": 24870 |
| }, |
| { |
| "epoch": 1.7258790375027098, |
| "grad_norm": 0.7578125, |
| "learning_rate": 6.732173913043479e-07, |
| "loss": 0.2348, |
| "step": 24880 |
| }, |
| { |
| "epoch": 1.7265727292434425, |
| "grad_norm": 1.578125, |
| "learning_rate": 6.714782608695653e-07, |
| "loss": 0.271, |
| "step": 24890 |
| }, |
| { |
| "epoch": 1.7272664209841753, |
| "grad_norm": 0.96875, |
| "learning_rate": 6.697391304347826e-07, |
| "loss": 0.2612, |
| "step": 24900 |
| }, |
| { |
| "epoch": 1.727960112724908, |
| "grad_norm": 1.1796875, |
| "learning_rate": 6.68e-07, |
| "loss": 0.2523, |
| "step": 24910 |
| }, |
| { |
| "epoch": 1.7286538044656405, |
| "grad_norm": 1.3828125, |
| "learning_rate": 6.662608695652175e-07, |
| "loss": 0.2531, |
| "step": 24920 |
| }, |
| { |
| "epoch": 1.7293474962063733, |
| "grad_norm": 1.171875, |
| "learning_rate": 6.645217391304348e-07, |
| "loss": 0.226, |
| "step": 24930 |
| }, |
| { |
| "epoch": 1.730041187947106, |
| "grad_norm": 1.15625, |
| "learning_rate": 6.627826086956522e-07, |
| "loss": 0.2511, |
| "step": 24940 |
| }, |
| { |
| "epoch": 1.7307348796878386, |
| "grad_norm": 1.1328125, |
| "learning_rate": 6.610434782608696e-07, |
| "loss": 0.2648, |
| "step": 24950 |
| }, |
| { |
| "epoch": 1.7314285714285713, |
| "grad_norm": 1.4296875, |
| "learning_rate": 6.593043478260871e-07, |
| "loss": 0.2495, |
| "step": 24960 |
| }, |
| { |
| "epoch": 1.732122263169304, |
| "grad_norm": 1.03125, |
| "learning_rate": 6.575652173913044e-07, |
| "loss": 0.2491, |
| "step": 24970 |
| }, |
| { |
| "epoch": 1.7328159549100368, |
| "grad_norm": 1.1875, |
| "learning_rate": 6.558260869565218e-07, |
| "loss": 0.1996, |
| "step": 24980 |
| }, |
| { |
| "epoch": 1.7335096466507696, |
| "grad_norm": 1.265625, |
| "learning_rate": 6.540869565217392e-07, |
| "loss": 0.2316, |
| "step": 24990 |
| }, |
| { |
| "epoch": 1.7342033383915023, |
| "grad_norm": 1.1484375, |
| "learning_rate": 6.523478260869566e-07, |
| "loss": 0.2417, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.734897030132235, |
| "grad_norm": 1.0078125, |
| "learning_rate": 6.50608695652174e-07, |
| "loss": 0.2705, |
| "step": 25010 |
| }, |
| { |
| "epoch": 1.7355907218729678, |
| "grad_norm": 1.265625, |
| "learning_rate": 6.488695652173914e-07, |
| "loss": 0.234, |
| "step": 25020 |
| }, |
| { |
| "epoch": 1.7362844136137006, |
| "grad_norm": 1.3515625, |
| "learning_rate": 6.471304347826088e-07, |
| "loss": 0.2018, |
| "step": 25030 |
| }, |
| { |
| "epoch": 1.736978105354433, |
| "grad_norm": 1.71875, |
| "learning_rate": 6.453913043478261e-07, |
| "loss": 0.26, |
| "step": 25040 |
| }, |
| { |
| "epoch": 1.7376717970951658, |
| "grad_norm": 1.4375, |
| "learning_rate": 6.436521739130436e-07, |
| "loss": 0.2964, |
| "step": 25050 |
| }, |
| { |
| "epoch": 1.7383654888358986, |
| "grad_norm": 1.1484375, |
| "learning_rate": 6.41913043478261e-07, |
| "loss": 0.2349, |
| "step": 25060 |
| }, |
| { |
| "epoch": 1.7390591805766311, |
| "grad_norm": 1.359375, |
| "learning_rate": 6.401739130434784e-07, |
| "loss": 0.249, |
| "step": 25070 |
| }, |
| { |
| "epoch": 1.7397528723173639, |
| "grad_norm": 1.953125, |
| "learning_rate": 6.384347826086957e-07, |
| "loss": 0.2878, |
| "step": 25080 |
| }, |
| { |
| "epoch": 1.7404465640580966, |
| "grad_norm": 1.3671875, |
| "learning_rate": 6.366956521739132e-07, |
| "loss": 0.2922, |
| "step": 25090 |
| }, |
| { |
| "epoch": 1.7411402557988294, |
| "grad_norm": 1.328125, |
| "learning_rate": 6.349565217391306e-07, |
| "loss": 0.2339, |
| "step": 25100 |
| }, |
| { |
| "epoch": 1.741833947539562, |
| "grad_norm": 1.3671875, |
| "learning_rate": 6.332173913043478e-07, |
| "loss": 0.2677, |
| "step": 25110 |
| }, |
| { |
| "epoch": 1.7425276392802949, |
| "grad_norm": 1.5546875, |
| "learning_rate": 6.314782608695652e-07, |
| "loss": 0.2195, |
| "step": 25120 |
| }, |
| { |
| "epoch": 1.7432213310210276, |
| "grad_norm": 1.234375, |
| "learning_rate": 6.297391304347826e-07, |
| "loss": 0.2228, |
| "step": 25130 |
| }, |
| { |
| "epoch": 1.7439150227617604, |
| "grad_norm": 1.15625, |
| "learning_rate": 6.28e-07, |
| "loss": 0.1983, |
| "step": 25140 |
| }, |
| { |
| "epoch": 1.744608714502493, |
| "grad_norm": 1.3359375, |
| "learning_rate": 6.262608695652174e-07, |
| "loss": 0.2045, |
| "step": 25150 |
| }, |
| { |
| "epoch": 1.7453024062432256, |
| "grad_norm": 1.53125, |
| "learning_rate": 6.245217391304348e-07, |
| "loss": 0.2562, |
| "step": 25160 |
| }, |
| { |
| "epoch": 1.7459960979839584, |
| "grad_norm": 1.3984375, |
| "learning_rate": 6.227826086956523e-07, |
| "loss": 0.2394, |
| "step": 25170 |
| }, |
| { |
| "epoch": 1.7466897897246911, |
| "grad_norm": 0.98046875, |
| "learning_rate": 6.210434782608697e-07, |
| "loss": 0.2807, |
| "step": 25180 |
| }, |
| { |
| "epoch": 1.7473834814654237, |
| "grad_norm": 1.21875, |
| "learning_rate": 6.19304347826087e-07, |
| "loss": 0.2084, |
| "step": 25190 |
| }, |
| { |
| "epoch": 1.7480771732061564, |
| "grad_norm": 1.125, |
| "learning_rate": 6.175652173913044e-07, |
| "loss": 0.2275, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1.7487708649468892, |
| "grad_norm": 1.9921875, |
| "learning_rate": 6.158260869565218e-07, |
| "loss": 0.3696, |
| "step": 25210 |
| }, |
| { |
| "epoch": 1.749464556687622, |
| "grad_norm": 1.6484375, |
| "learning_rate": 6.140869565217391e-07, |
| "loss": 0.3133, |
| "step": 25220 |
| }, |
| { |
| "epoch": 1.7501582484283547, |
| "grad_norm": 1.2265625, |
| "learning_rate": 6.123478260869565e-07, |
| "loss": 0.2205, |
| "step": 25230 |
| }, |
| { |
| "epoch": 1.7508519401690874, |
| "grad_norm": 0.9765625, |
| "learning_rate": 6.10608695652174e-07, |
| "loss": 0.2292, |
| "step": 25240 |
| }, |
| { |
| "epoch": 1.7515456319098202, |
| "grad_norm": 1.1640625, |
| "learning_rate": 6.088695652173914e-07, |
| "loss": 0.2705, |
| "step": 25250 |
| }, |
| { |
| "epoch": 1.752239323650553, |
| "grad_norm": 1.0625, |
| "learning_rate": 6.071304347826087e-07, |
| "loss": 0.2202, |
| "step": 25260 |
| }, |
| { |
| "epoch": 1.7529330153912857, |
| "grad_norm": 1.546875, |
| "learning_rate": 6.053913043478261e-07, |
| "loss": 0.2228, |
| "step": 25270 |
| }, |
| { |
| "epoch": 1.7536267071320182, |
| "grad_norm": 1.3046875, |
| "learning_rate": 6.036521739130436e-07, |
| "loss": 0.242, |
| "step": 25280 |
| }, |
| { |
| "epoch": 1.754320398872751, |
| "grad_norm": 0.96875, |
| "learning_rate": 6.01913043478261e-07, |
| "loss": 0.2173, |
| "step": 25290 |
| }, |
| { |
| "epoch": 1.7550140906134837, |
| "grad_norm": 1.375, |
| "learning_rate": 6.001739130434783e-07, |
| "loss": 0.2418, |
| "step": 25300 |
| }, |
| { |
| "epoch": 1.7557077823542162, |
| "grad_norm": 1.375, |
| "learning_rate": 5.984347826086957e-07, |
| "loss": 0.3065, |
| "step": 25310 |
| }, |
| { |
| "epoch": 1.756401474094949, |
| "grad_norm": 1.1875, |
| "learning_rate": 5.966956521739132e-07, |
| "loss": 0.2139, |
| "step": 25320 |
| }, |
| { |
| "epoch": 1.7570951658356817, |
| "grad_norm": 1.03125, |
| "learning_rate": 5.949565217391305e-07, |
| "loss": 0.2255, |
| "step": 25330 |
| }, |
| { |
| "epoch": 1.7577888575764145, |
| "grad_norm": 0.953125, |
| "learning_rate": 5.932173913043478e-07, |
| "loss": 0.2343, |
| "step": 25340 |
| }, |
| { |
| "epoch": 1.7584825493171472, |
| "grad_norm": 1.265625, |
| "learning_rate": 5.914782608695653e-07, |
| "loss": 0.2387, |
| "step": 25350 |
| }, |
| { |
| "epoch": 1.75917624105788, |
| "grad_norm": 1.171875, |
| "learning_rate": 5.897391304347827e-07, |
| "loss": 0.2096, |
| "step": 25360 |
| }, |
| { |
| "epoch": 1.7598699327986127, |
| "grad_norm": 1.6796875, |
| "learning_rate": 5.88e-07, |
| "loss": 0.2429, |
| "step": 25370 |
| }, |
| { |
| "epoch": 1.7605636245393455, |
| "grad_norm": 1.03125, |
| "learning_rate": 5.862608695652174e-07, |
| "loss": 0.2416, |
| "step": 25380 |
| }, |
| { |
| "epoch": 1.7612573162800782, |
| "grad_norm": 1.28125, |
| "learning_rate": 5.845217391304349e-07, |
| "loss": 0.2747, |
| "step": 25390 |
| }, |
| { |
| "epoch": 1.7619510080208107, |
| "grad_norm": 1.2265625, |
| "learning_rate": 5.827826086956522e-07, |
| "loss": 0.2036, |
| "step": 25400 |
| }, |
| { |
| "epoch": 1.7626446997615435, |
| "grad_norm": 1.1328125, |
| "learning_rate": 5.810434782608696e-07, |
| "loss": 0.2278, |
| "step": 25410 |
| }, |
| { |
| "epoch": 1.7633383915022762, |
| "grad_norm": 1.2890625, |
| "learning_rate": 5.79304347826087e-07, |
| "loss": 0.2085, |
| "step": 25420 |
| }, |
| { |
| "epoch": 1.7640320832430088, |
| "grad_norm": 0.98828125, |
| "learning_rate": 5.775652173913044e-07, |
| "loss": 0.2329, |
| "step": 25430 |
| }, |
| { |
| "epoch": 1.7647257749837415, |
| "grad_norm": 1.3203125, |
| "learning_rate": 5.758260869565218e-07, |
| "loss": 0.2339, |
| "step": 25440 |
| }, |
| { |
| "epoch": 1.7654194667244743, |
| "grad_norm": 1.140625, |
| "learning_rate": 5.740869565217392e-07, |
| "loss": 0.2597, |
| "step": 25450 |
| }, |
| { |
| "epoch": 1.766113158465207, |
| "grad_norm": 1.1796875, |
| "learning_rate": 5.723478260869566e-07, |
| "loss": 0.2197, |
| "step": 25460 |
| }, |
| { |
| "epoch": 1.7668068502059397, |
| "grad_norm": 1.0859375, |
| "learning_rate": 5.70608695652174e-07, |
| "loss": 0.2542, |
| "step": 25470 |
| }, |
| { |
| "epoch": 1.7675005419466725, |
| "grad_norm": 1.15625, |
| "learning_rate": 5.688695652173914e-07, |
| "loss": 0.2738, |
| "step": 25480 |
| }, |
| { |
| "epoch": 1.7681942336874052, |
| "grad_norm": 1.4140625, |
| "learning_rate": 5.671304347826087e-07, |
| "loss": 0.2285, |
| "step": 25490 |
| }, |
| { |
| "epoch": 1.768887925428138, |
| "grad_norm": 1.2265625, |
| "learning_rate": 5.653913043478261e-07, |
| "loss": 0.2296, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.7695816171688707, |
| "grad_norm": 1.6015625, |
| "learning_rate": 5.636521739130435e-07, |
| "loss": 0.2071, |
| "step": 25510 |
| }, |
| { |
| "epoch": 1.7702753089096033, |
| "grad_norm": 1.1953125, |
| "learning_rate": 5.619130434782609e-07, |
| "loss": 0.2482, |
| "step": 25520 |
| }, |
| { |
| "epoch": 1.770969000650336, |
| "grad_norm": 1.109375, |
| "learning_rate": 5.601739130434783e-07, |
| "loss": 0.3006, |
| "step": 25530 |
| }, |
| { |
| "epoch": 1.7716626923910688, |
| "grad_norm": 1.21875, |
| "learning_rate": 5.584347826086957e-07, |
| "loss": 0.2639, |
| "step": 25540 |
| }, |
| { |
| "epoch": 1.7723563841318013, |
| "grad_norm": 1.5546875, |
| "learning_rate": 5.566956521739131e-07, |
| "loss": 0.2591, |
| "step": 25550 |
| }, |
| { |
| "epoch": 1.773050075872534, |
| "grad_norm": 1.1875, |
| "learning_rate": 5.549565217391305e-07, |
| "loss": 0.3238, |
| "step": 25560 |
| }, |
| { |
| "epoch": 1.7737437676132668, |
| "grad_norm": 1.8515625, |
| "learning_rate": 5.532173913043479e-07, |
| "loss": 0.2856, |
| "step": 25570 |
| }, |
| { |
| "epoch": 1.7744374593539995, |
| "grad_norm": 1.1640625, |
| "learning_rate": 5.514782608695652e-07, |
| "loss": 0.3195, |
| "step": 25580 |
| }, |
| { |
| "epoch": 1.7751311510947323, |
| "grad_norm": 1.59375, |
| "learning_rate": 5.497391304347826e-07, |
| "loss": 0.2509, |
| "step": 25590 |
| }, |
| { |
| "epoch": 1.775824842835465, |
| "grad_norm": 1.140625, |
| "learning_rate": 5.480000000000001e-07, |
| "loss": 0.2397, |
| "step": 25600 |
| }, |
| { |
| "epoch": 1.7765185345761978, |
| "grad_norm": 1.1328125, |
| "learning_rate": 5.462608695652175e-07, |
| "loss": 0.2355, |
| "step": 25610 |
| }, |
| { |
| "epoch": 1.7772122263169305, |
| "grad_norm": 1.21875, |
| "learning_rate": 5.445217391304348e-07, |
| "loss": 0.1725, |
| "step": 25620 |
| }, |
| { |
| "epoch": 1.777905918057663, |
| "grad_norm": 1.1484375, |
| "learning_rate": 5.427826086956522e-07, |
| "loss": 0.3111, |
| "step": 25630 |
| }, |
| { |
| "epoch": 1.7785996097983958, |
| "grad_norm": 1.3046875, |
| "learning_rate": 5.410434782608697e-07, |
| "loss": 0.2527, |
| "step": 25640 |
| }, |
| { |
| "epoch": 1.7792933015391286, |
| "grad_norm": 1.28125, |
| "learning_rate": 5.393043478260869e-07, |
| "loss": 0.2612, |
| "step": 25650 |
| }, |
| { |
| "epoch": 1.779986993279861, |
| "grad_norm": 1.5390625, |
| "learning_rate": 5.375652173913043e-07, |
| "loss": 0.2509, |
| "step": 25660 |
| }, |
| { |
| "epoch": 1.7806806850205938, |
| "grad_norm": 1.390625, |
| "learning_rate": 5.358260869565218e-07, |
| "loss": 0.2457, |
| "step": 25670 |
| }, |
| { |
| "epoch": 1.7813743767613266, |
| "grad_norm": 1.15625, |
| "learning_rate": 5.340869565217392e-07, |
| "loss": 0.2486, |
| "step": 25680 |
| }, |
| { |
| "epoch": 1.7820680685020593, |
| "grad_norm": 1.3125, |
| "learning_rate": 5.323478260869565e-07, |
| "loss": 0.2046, |
| "step": 25690 |
| }, |
| { |
| "epoch": 1.782761760242792, |
| "grad_norm": 1.125, |
| "learning_rate": 5.306086956521739e-07, |
| "loss": 0.2119, |
| "step": 25700 |
| }, |
| { |
| "epoch": 1.7834554519835248, |
| "grad_norm": 1.0, |
| "learning_rate": 5.288695652173914e-07, |
| "loss": 0.2471, |
| "step": 25710 |
| }, |
| { |
| "epoch": 1.7841491437242576, |
| "grad_norm": 0.93359375, |
| "learning_rate": 5.271304347826088e-07, |
| "loss": 0.2273, |
| "step": 25720 |
| }, |
| { |
| "epoch": 1.7848428354649903, |
| "grad_norm": 0.95703125, |
| "learning_rate": 5.253913043478261e-07, |
| "loss": 0.2592, |
| "step": 25730 |
| }, |
| { |
| "epoch": 1.785536527205723, |
| "grad_norm": 1.3359375, |
| "learning_rate": 5.236521739130435e-07, |
| "loss": 0.2367, |
| "step": 25740 |
| }, |
| { |
| "epoch": 1.7862302189464556, |
| "grad_norm": 1.3046875, |
| "learning_rate": 5.21913043478261e-07, |
| "loss": 0.2357, |
| "step": 25750 |
| }, |
| { |
| "epoch": 1.7869239106871884, |
| "grad_norm": 1.2734375, |
| "learning_rate": 5.201739130434783e-07, |
| "loss": 0.219, |
| "step": 25760 |
| }, |
| { |
| "epoch": 1.7876176024279211, |
| "grad_norm": 1.171875, |
| "learning_rate": 5.184347826086957e-07, |
| "loss": 0.2937, |
| "step": 25770 |
| }, |
| { |
| "epoch": 1.7883112941686536, |
| "grad_norm": 0.984375, |
| "learning_rate": 5.166956521739131e-07, |
| "loss": 0.2343, |
| "step": 25780 |
| }, |
| { |
| "epoch": 1.7890049859093864, |
| "grad_norm": 1.3515625, |
| "learning_rate": 5.149565217391305e-07, |
| "loss": 0.226, |
| "step": 25790 |
| }, |
| { |
| "epoch": 1.7896986776501191, |
| "grad_norm": 0.921875, |
| "learning_rate": 5.132173913043478e-07, |
| "loss": 0.2387, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1.790392369390852, |
| "grad_norm": 1.1015625, |
| "learning_rate": 5.114782608695652e-07, |
| "loss": 0.2074, |
| "step": 25810 |
| }, |
| { |
| "epoch": 1.7910860611315846, |
| "grad_norm": 1.1328125, |
| "learning_rate": 5.097391304347827e-07, |
| "loss": 0.243, |
| "step": 25820 |
| }, |
| { |
| "epoch": 1.7917797528723174, |
| "grad_norm": 1.21875, |
| "learning_rate": 5.08e-07, |
| "loss": 0.2289, |
| "step": 25830 |
| }, |
| { |
| "epoch": 1.7924734446130501, |
| "grad_norm": 1.1875, |
| "learning_rate": 5.062608695652174e-07, |
| "loss": 0.2942, |
| "step": 25840 |
| }, |
| { |
| "epoch": 1.7931671363537829, |
| "grad_norm": 1.1953125, |
| "learning_rate": 5.045217391304348e-07, |
| "loss": 0.2439, |
| "step": 25850 |
| }, |
| { |
| "epoch": 1.7938608280945156, |
| "grad_norm": 0.98046875, |
| "learning_rate": 5.027826086956522e-07, |
| "loss": 0.2651, |
| "step": 25860 |
| }, |
| { |
| "epoch": 1.7945545198352482, |
| "grad_norm": 1.4375, |
| "learning_rate": 5.010434782608696e-07, |
| "loss": 0.2474, |
| "step": 25870 |
| }, |
| { |
| "epoch": 1.795248211575981, |
| "grad_norm": 1.578125, |
| "learning_rate": 4.99304347826087e-07, |
| "loss": 0.2417, |
| "step": 25880 |
| }, |
| { |
| "epoch": 1.7959419033167137, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.975652173913044e-07, |
| "loss": 0.2635, |
| "step": 25890 |
| }, |
| { |
| "epoch": 1.7966355950574462, |
| "grad_norm": 1.84375, |
| "learning_rate": 4.958260869565218e-07, |
| "loss": 0.3054, |
| "step": 25900 |
| }, |
| { |
| "epoch": 1.797329286798179, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.940869565217392e-07, |
| "loss": 0.2265, |
| "step": 25910 |
| }, |
| { |
| "epoch": 1.7980229785389117, |
| "grad_norm": 1.2109375, |
| "learning_rate": 4.923478260869566e-07, |
| "loss": 0.2727, |
| "step": 25920 |
| }, |
| { |
| "epoch": 1.7987166702796444, |
| "grad_norm": 1.0390625, |
| "learning_rate": 4.90608695652174e-07, |
| "loss": 0.2491, |
| "step": 25930 |
| }, |
| { |
| "epoch": 1.7994103620203772, |
| "grad_norm": 1.2578125, |
| "learning_rate": 4.888695652173913e-07, |
| "loss": 0.2025, |
| "step": 25940 |
| }, |
| { |
| "epoch": 1.80010405376111, |
| "grad_norm": 0.96484375, |
| "learning_rate": 4.871304347826088e-07, |
| "loss": 0.2422, |
| "step": 25950 |
| }, |
| { |
| "epoch": 1.8007977455018427, |
| "grad_norm": 1.4453125, |
| "learning_rate": 4.853913043478261e-07, |
| "loss": 0.2329, |
| "step": 25960 |
| }, |
| { |
| "epoch": 1.8014914372425754, |
| "grad_norm": 1.609375, |
| "learning_rate": 4.836521739130435e-07, |
| "loss": 0.2516, |
| "step": 25970 |
| }, |
| { |
| "epoch": 1.8021851289833082, |
| "grad_norm": 1.1953125, |
| "learning_rate": 4.819130434782609e-07, |
| "loss": 0.1947, |
| "step": 25980 |
| }, |
| { |
| "epoch": 1.8028788207240407, |
| "grad_norm": 1.390625, |
| "learning_rate": 4.801739130434783e-07, |
| "loss": 0.2751, |
| "step": 25990 |
| }, |
| { |
| "epoch": 1.8035725124647735, |
| "grad_norm": 1.5234375, |
| "learning_rate": 4.784347826086957e-07, |
| "loss": 0.2421, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.8042662042055062, |
| "grad_norm": 1.4609375, |
| "learning_rate": 4.7669565217391305e-07, |
| "loss": 0.2389, |
| "step": 26010 |
| }, |
| { |
| "epoch": 1.8049598959462387, |
| "grad_norm": 1.21875, |
| "learning_rate": 4.7495652173913047e-07, |
| "loss": 0.2199, |
| "step": 26020 |
| }, |
| { |
| "epoch": 1.8056535876869715, |
| "grad_norm": 1.71875, |
| "learning_rate": 4.7321739130434784e-07, |
| "loss": 0.2385, |
| "step": 26030 |
| }, |
| { |
| "epoch": 1.8063472794277042, |
| "grad_norm": 1.25, |
| "learning_rate": 4.7147826086956527e-07, |
| "loss": 0.2346, |
| "step": 26040 |
| }, |
| { |
| "epoch": 1.807040971168437, |
| "grad_norm": 1.3203125, |
| "learning_rate": 4.6973913043478264e-07, |
| "loss": 0.2409, |
| "step": 26050 |
| }, |
| { |
| "epoch": 1.8077346629091697, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.6800000000000006e-07, |
| "loss": 0.2329, |
| "step": 26060 |
| }, |
| { |
| "epoch": 1.8084283546499025, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.6626086956521743e-07, |
| "loss": 0.2252, |
| "step": 26070 |
| }, |
| { |
| "epoch": 1.8091220463906352, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.6452173913043486e-07, |
| "loss": 0.2564, |
| "step": 26080 |
| }, |
| { |
| "epoch": 1.809815738131368, |
| "grad_norm": 1.5078125, |
| "learning_rate": 4.6278260869565223e-07, |
| "loss": 0.2508, |
| "step": 26090 |
| }, |
| { |
| "epoch": 1.8105094298721007, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.6104347826086965e-07, |
| "loss": 0.2399, |
| "step": 26100 |
| }, |
| { |
| "epoch": 1.8112031216128333, |
| "grad_norm": 1.5234375, |
| "learning_rate": 4.5930434782608697e-07, |
| "loss": 0.2153, |
| "step": 26110 |
| }, |
| { |
| "epoch": 1.811896813353566, |
| "grad_norm": 1.3828125, |
| "learning_rate": 4.5756521739130434e-07, |
| "loss": 0.2163, |
| "step": 26120 |
| }, |
| { |
| "epoch": 1.8125905050942988, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.5582608695652177e-07, |
| "loss": 0.2719, |
| "step": 26130 |
| }, |
| { |
| "epoch": 1.8132841968350313, |
| "grad_norm": 0.9609375, |
| "learning_rate": 4.5408695652173914e-07, |
| "loss": 0.1888, |
| "step": 26140 |
| }, |
| { |
| "epoch": 1.813977888575764, |
| "grad_norm": 1.2734375, |
| "learning_rate": 4.5234782608695656e-07, |
| "loss": 0.3202, |
| "step": 26150 |
| }, |
| { |
| "epoch": 1.8146715803164968, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.5060869565217393e-07, |
| "loss": 0.21, |
| "step": 26160 |
| }, |
| { |
| "epoch": 1.8153652720572295, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.4886956521739136e-07, |
| "loss": 0.194, |
| "step": 26170 |
| }, |
| { |
| "epoch": 1.8160589637979623, |
| "grad_norm": 1.296875, |
| "learning_rate": 4.4713043478260873e-07, |
| "loss": 0.2186, |
| "step": 26180 |
| }, |
| { |
| "epoch": 1.816752655538695, |
| "grad_norm": 0.74609375, |
| "learning_rate": 4.4539130434782615e-07, |
| "loss": 0.2346, |
| "step": 26190 |
| }, |
| { |
| "epoch": 1.8174463472794278, |
| "grad_norm": 1.375, |
| "learning_rate": 4.436521739130435e-07, |
| "loss": 0.2373, |
| "step": 26200 |
| }, |
| { |
| "epoch": 1.8181400390201605, |
| "grad_norm": 1.0, |
| "learning_rate": 4.419130434782609e-07, |
| "loss": 0.262, |
| "step": 26210 |
| }, |
| { |
| "epoch": 1.8188337307608933, |
| "grad_norm": 0.97265625, |
| "learning_rate": 4.401739130434783e-07, |
| "loss": 0.2297, |
| "step": 26220 |
| }, |
| { |
| "epoch": 1.8195274225016258, |
| "grad_norm": 1.1171875, |
| "learning_rate": 4.384347826086957e-07, |
| "loss": 0.2302, |
| "step": 26230 |
| }, |
| { |
| "epoch": 1.8202211142423586, |
| "grad_norm": 0.94921875, |
| "learning_rate": 4.366956521739131e-07, |
| "loss": 0.2239, |
| "step": 26240 |
| }, |
| { |
| "epoch": 1.8209148059830913, |
| "grad_norm": 1.1796875, |
| "learning_rate": 4.349565217391305e-07, |
| "loss": 0.2234, |
| "step": 26250 |
| }, |
| { |
| "epoch": 1.8216084977238238, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.332173913043479e-07, |
| "loss": 0.242, |
| "step": 26260 |
| }, |
| { |
| "epoch": 1.8223021894645566, |
| "grad_norm": 1.9609375, |
| "learning_rate": 4.314782608695652e-07, |
| "loss": 0.2617, |
| "step": 26270 |
| }, |
| { |
| "epoch": 1.8229958812052893, |
| "grad_norm": 1.4765625, |
| "learning_rate": 4.297391304347826e-07, |
| "loss": 0.2493, |
| "step": 26280 |
| }, |
| { |
| "epoch": 1.823689572946022, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.28e-07, |
| "loss": 0.2419, |
| "step": 26290 |
| }, |
| { |
| "epoch": 1.8243832646867548, |
| "grad_norm": 1.2265625, |
| "learning_rate": 4.262608695652174e-07, |
| "loss": 0.2478, |
| "step": 26300 |
| }, |
| { |
| "epoch": 1.8250769564274876, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.245217391304348e-07, |
| "loss": 0.2801, |
| "step": 26310 |
| }, |
| { |
| "epoch": 1.8257706481682203, |
| "grad_norm": 1.03125, |
| "learning_rate": 4.227826086956522e-07, |
| "loss": 0.2001, |
| "step": 26320 |
| }, |
| { |
| "epoch": 1.826464339908953, |
| "grad_norm": 0.890625, |
| "learning_rate": 4.210434782608696e-07, |
| "loss": 0.2197, |
| "step": 26330 |
| }, |
| { |
| "epoch": 1.8271580316496858, |
| "grad_norm": 1.28125, |
| "learning_rate": 4.19304347826087e-07, |
| "loss": 0.2616, |
| "step": 26340 |
| }, |
| { |
| "epoch": 1.8278517233904183, |
| "grad_norm": 1.40625, |
| "learning_rate": 4.175652173913044e-07, |
| "loss": 0.2663, |
| "step": 26350 |
| }, |
| { |
| "epoch": 1.828545415131151, |
| "grad_norm": 1.3515625, |
| "learning_rate": 4.158260869565218e-07, |
| "loss": 0.2307, |
| "step": 26360 |
| }, |
| { |
| "epoch": 1.8292391068718838, |
| "grad_norm": 1.3671875, |
| "learning_rate": 4.140869565217392e-07, |
| "loss": 0.2576, |
| "step": 26370 |
| }, |
| { |
| "epoch": 1.8299327986126164, |
| "grad_norm": 1.109375, |
| "learning_rate": 4.1234782608695657e-07, |
| "loss": 0.1986, |
| "step": 26380 |
| }, |
| { |
| "epoch": 1.8306264903533491, |
| "grad_norm": 1.3359375, |
| "learning_rate": 4.1060869565217394e-07, |
| "loss": 0.2378, |
| "step": 26390 |
| }, |
| { |
| "epoch": 1.8313201820940819, |
| "grad_norm": 1.78125, |
| "learning_rate": 4.0886956521739137e-07, |
| "loss": 0.2416, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1.8320138738348146, |
| "grad_norm": 1.7890625, |
| "learning_rate": 4.0713043478260874e-07, |
| "loss": 0.3052, |
| "step": 26410 |
| }, |
| { |
| "epoch": 1.8327075655755474, |
| "grad_norm": 1.15625, |
| "learning_rate": 4.053913043478261e-07, |
| "loss": 0.2222, |
| "step": 26420 |
| }, |
| { |
| "epoch": 1.8334012573162801, |
| "grad_norm": 1.2890625, |
| "learning_rate": 4.036521739130435e-07, |
| "loss": 0.2901, |
| "step": 26430 |
| }, |
| { |
| "epoch": 1.8340949490570129, |
| "grad_norm": 1.4921875, |
| "learning_rate": 4.0191304347826085e-07, |
| "loss": 0.2526, |
| "step": 26440 |
| }, |
| { |
| "epoch": 1.8347886407977456, |
| "grad_norm": 1.234375, |
| "learning_rate": 4.001739130434783e-07, |
| "loss": 0.2955, |
| "step": 26450 |
| }, |
| { |
| "epoch": 1.8354823325384784, |
| "grad_norm": 1.8359375, |
| "learning_rate": 3.9843478260869565e-07, |
| "loss": 0.301, |
| "step": 26460 |
| }, |
| { |
| "epoch": 1.836176024279211, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.9669565217391307e-07, |
| "loss": 0.2689, |
| "step": 26470 |
| }, |
| { |
| "epoch": 1.8368697160199436, |
| "grad_norm": 0.921875, |
| "learning_rate": 3.9495652173913044e-07, |
| "loss": 0.2165, |
| "step": 26480 |
| }, |
| { |
| "epoch": 1.8375634077606764, |
| "grad_norm": 1.96875, |
| "learning_rate": 3.9321739130434787e-07, |
| "loss": 0.3363, |
| "step": 26490 |
| }, |
| { |
| "epoch": 1.838257099501409, |
| "grad_norm": 1.5625, |
| "learning_rate": 3.9147826086956524e-07, |
| "loss": 0.2889, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.8389507912421417, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.8973913043478266e-07, |
| "loss": 0.205, |
| "step": 26510 |
| }, |
| { |
| "epoch": 1.8396444829828744, |
| "grad_norm": 1.265625, |
| "learning_rate": 3.8800000000000003e-07, |
| "loss": 0.2951, |
| "step": 26520 |
| }, |
| { |
| "epoch": 1.8403381747236072, |
| "grad_norm": 1.25, |
| "learning_rate": 3.8626086956521746e-07, |
| "loss": 0.2299, |
| "step": 26530 |
| }, |
| { |
| "epoch": 1.84103186646434, |
| "grad_norm": 1.40625, |
| "learning_rate": 3.8452173913043483e-07, |
| "loss": 0.233, |
| "step": 26540 |
| }, |
| { |
| "epoch": 1.8417255582050727, |
| "grad_norm": 1.234375, |
| "learning_rate": 3.8278260869565225e-07, |
| "loss": 0.2723, |
| "step": 26550 |
| }, |
| { |
| "epoch": 1.8424192499458054, |
| "grad_norm": 1.21875, |
| "learning_rate": 3.810434782608696e-07, |
| "loss": 0.2245, |
| "step": 26560 |
| }, |
| { |
| "epoch": 1.8431129416865382, |
| "grad_norm": 1.0, |
| "learning_rate": 3.7930434782608705e-07, |
| "loss": 0.2378, |
| "step": 26570 |
| }, |
| { |
| "epoch": 1.843806633427271, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.7756521739130437e-07, |
| "loss": 0.2427, |
| "step": 26580 |
| }, |
| { |
| "epoch": 1.8445003251680034, |
| "grad_norm": 1.0703125, |
| "learning_rate": 3.7582608695652174e-07, |
| "loss": 0.2357, |
| "step": 26590 |
| }, |
| { |
| "epoch": 1.8451940169087362, |
| "grad_norm": 1.359375, |
| "learning_rate": 3.7408695652173916e-07, |
| "loss": 0.3183, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1.845887708649469, |
| "grad_norm": 1.03125, |
| "learning_rate": 3.7234782608695653e-07, |
| "loss": 0.3006, |
| "step": 26610 |
| }, |
| { |
| "epoch": 1.8465814003902015, |
| "grad_norm": 1.5546875, |
| "learning_rate": 3.7060869565217396e-07, |
| "loss": 0.2224, |
| "step": 26620 |
| }, |
| { |
| "epoch": 1.8472750921309342, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.6886956521739133e-07, |
| "loss": 0.221, |
| "step": 26630 |
| }, |
| { |
| "epoch": 1.847968783871667, |
| "grad_norm": 1.109375, |
| "learning_rate": 3.671304347826087e-07, |
| "loss": 0.2246, |
| "step": 26640 |
| }, |
| { |
| "epoch": 1.8486624756123997, |
| "grad_norm": 1.53125, |
| "learning_rate": 3.653913043478261e-07, |
| "loss": 0.2503, |
| "step": 26650 |
| }, |
| { |
| "epoch": 1.8493561673531325, |
| "grad_norm": 1.34375, |
| "learning_rate": 3.636521739130435e-07, |
| "loss": 0.1998, |
| "step": 26660 |
| }, |
| { |
| "epoch": 1.8500498590938652, |
| "grad_norm": 1.171875, |
| "learning_rate": 3.619130434782609e-07, |
| "loss": 0.2254, |
| "step": 26670 |
| }, |
| { |
| "epoch": 1.850743550834598, |
| "grad_norm": 1.25, |
| "learning_rate": 3.601739130434783e-07, |
| "loss": 0.2482, |
| "step": 26680 |
| }, |
| { |
| "epoch": 1.8514372425753307, |
| "grad_norm": 1.0, |
| "learning_rate": 3.584347826086957e-07, |
| "loss": 0.2333, |
| "step": 26690 |
| }, |
| { |
| "epoch": 1.8521309343160635, |
| "grad_norm": 1.515625, |
| "learning_rate": 3.566956521739131e-07, |
| "loss": 0.2732, |
| "step": 26700 |
| }, |
| { |
| "epoch": 1.852824626056796, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.549565217391305e-07, |
| "loss": 0.2439, |
| "step": 26710 |
| }, |
| { |
| "epoch": 1.8535183177975287, |
| "grad_norm": 1.25, |
| "learning_rate": 3.532173913043479e-07, |
| "loss": 0.196, |
| "step": 26720 |
| }, |
| { |
| "epoch": 1.8542120095382615, |
| "grad_norm": 1.625, |
| "learning_rate": 3.514782608695652e-07, |
| "loss": 0.2994, |
| "step": 26730 |
| }, |
| { |
| "epoch": 1.854905701278994, |
| "grad_norm": 1.2421875, |
| "learning_rate": 3.497391304347826e-07, |
| "loss": 0.2272, |
| "step": 26740 |
| }, |
| { |
| "epoch": 1.8555993930197268, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.48e-07, |
| "loss": 0.2331, |
| "step": 26750 |
| }, |
| { |
| "epoch": 1.8562930847604595, |
| "grad_norm": 1.4453125, |
| "learning_rate": 3.462608695652174e-07, |
| "loss": 0.2274, |
| "step": 26760 |
| }, |
| { |
| "epoch": 1.8569867765011923, |
| "grad_norm": 1.53125, |
| "learning_rate": 3.445217391304348e-07, |
| "loss": 0.2571, |
| "step": 26770 |
| }, |
| { |
| "epoch": 1.857680468241925, |
| "grad_norm": 1.453125, |
| "learning_rate": 3.427826086956522e-07, |
| "loss": 0.2547, |
| "step": 26780 |
| }, |
| { |
| "epoch": 1.8583741599826578, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.410434782608696e-07, |
| "loss": 0.2549, |
| "step": 26790 |
| }, |
| { |
| "epoch": 1.8590678517233905, |
| "grad_norm": 1.140625, |
| "learning_rate": 3.39304347826087e-07, |
| "loss": 0.278, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1.8597615434641233, |
| "grad_norm": 0.97265625, |
| "learning_rate": 3.375652173913044e-07, |
| "loss": 0.2091, |
| "step": 26810 |
| }, |
| { |
| "epoch": 1.860455235204856, |
| "grad_norm": 2.015625, |
| "learning_rate": 3.3582608695652175e-07, |
| "loss": 0.2894, |
| "step": 26820 |
| }, |
| { |
| "epoch": 1.8611489269455885, |
| "grad_norm": 0.85546875, |
| "learning_rate": 3.3408695652173917e-07, |
| "loss": 0.2035, |
| "step": 26830 |
| }, |
| { |
| "epoch": 1.8618426186863213, |
| "grad_norm": 1.65625, |
| "learning_rate": 3.3234782608695654e-07, |
| "loss": 0.2096, |
| "step": 26840 |
| }, |
| { |
| "epoch": 1.862536310427054, |
| "grad_norm": 1.296875, |
| "learning_rate": 3.3060869565217397e-07, |
| "loss": 0.2182, |
| "step": 26850 |
| }, |
| { |
| "epoch": 1.8632300021677866, |
| "grad_norm": 1.2109375, |
| "learning_rate": 3.2886956521739134e-07, |
| "loss": 0.2323, |
| "step": 26860 |
| }, |
| { |
| "epoch": 1.8639236939085193, |
| "grad_norm": 0.96875, |
| "learning_rate": 3.2713043478260876e-07, |
| "loss": 0.307, |
| "step": 26870 |
| }, |
| { |
| "epoch": 1.864617385649252, |
| "grad_norm": 1.125, |
| "learning_rate": 3.2539130434782614e-07, |
| "loss": 0.2165, |
| "step": 26880 |
| }, |
| { |
| "epoch": 1.8653110773899848, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.2365217391304345e-07, |
| "loss": 0.2663, |
| "step": 26890 |
| }, |
| { |
| "epoch": 1.8660047691307176, |
| "grad_norm": 0.9765625, |
| "learning_rate": 3.219130434782609e-07, |
| "loss": 0.2491, |
| "step": 26900 |
| }, |
| { |
| "epoch": 1.8666984608714503, |
| "grad_norm": 1.609375, |
| "learning_rate": 3.2017391304347825e-07, |
| "loss": 0.3144, |
| "step": 26910 |
| }, |
| { |
| "epoch": 1.867392152612183, |
| "grad_norm": 1.578125, |
| "learning_rate": 3.1843478260869567e-07, |
| "loss": 0.2495, |
| "step": 26920 |
| }, |
| { |
| "epoch": 1.8680858443529158, |
| "grad_norm": 1.28125, |
| "learning_rate": 3.1669565217391304e-07, |
| "loss": 0.3146, |
| "step": 26930 |
| }, |
| { |
| "epoch": 1.8687795360936483, |
| "grad_norm": 1.2734375, |
| "learning_rate": 3.1495652173913047e-07, |
| "loss": 0.2254, |
| "step": 26940 |
| }, |
| { |
| "epoch": 1.869473227834381, |
| "grad_norm": 1.2890625, |
| "learning_rate": 3.1321739130434784e-07, |
| "loss": 0.2286, |
| "step": 26950 |
| }, |
| { |
| "epoch": 1.8701669195751138, |
| "grad_norm": 1.328125, |
| "learning_rate": 3.1147826086956526e-07, |
| "loss": 0.238, |
| "step": 26960 |
| }, |
| { |
| "epoch": 1.8708606113158464, |
| "grad_norm": 1.1015625, |
| "learning_rate": 3.0973913043478263e-07, |
| "loss": 0.2336, |
| "step": 26970 |
| }, |
| { |
| "epoch": 1.871554303056579, |
| "grad_norm": 1.15625, |
| "learning_rate": 3.0800000000000006e-07, |
| "loss": 0.2426, |
| "step": 26980 |
| }, |
| { |
| "epoch": 1.8722479947973119, |
| "grad_norm": 1.0390625, |
| "learning_rate": 3.0626086956521743e-07, |
| "loss": 0.2485, |
| "step": 26990 |
| }, |
| { |
| "epoch": 1.8729416865380446, |
| "grad_norm": 1.0625, |
| "learning_rate": 3.045217391304348e-07, |
| "loss": 0.2637, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.8736353782787774, |
| "grad_norm": 1.0859375, |
| "learning_rate": 3.0278260869565217e-07, |
| "loss": 0.2391, |
| "step": 27010 |
| }, |
| { |
| "epoch": 1.87432907001951, |
| "grad_norm": 1.3359375, |
| "learning_rate": 3.010434782608696e-07, |
| "loss": 0.2819, |
| "step": 27020 |
| }, |
| { |
| "epoch": 1.8750227617602429, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.9930434782608697e-07, |
| "loss": 0.238, |
| "step": 27030 |
| }, |
| { |
| "epoch": 1.8757164535009756, |
| "grad_norm": 1.3125, |
| "learning_rate": 2.975652173913044e-07, |
| "loss": 0.2429, |
| "step": 27040 |
| }, |
| { |
| "epoch": 1.8764101452417083, |
| "grad_norm": 1.53125, |
| "learning_rate": 2.9582608695652176e-07, |
| "loss": 0.2977, |
| "step": 27050 |
| }, |
| { |
| "epoch": 1.8771038369824409, |
| "grad_norm": 1.375, |
| "learning_rate": 2.940869565217392e-07, |
| "loss": 0.2399, |
| "step": 27060 |
| }, |
| { |
| "epoch": 1.8777975287231736, |
| "grad_norm": 1.5234375, |
| "learning_rate": 2.9234782608695656e-07, |
| "loss": 0.2749, |
| "step": 27070 |
| }, |
| { |
| "epoch": 1.8784912204639064, |
| "grad_norm": 1.3203125, |
| "learning_rate": 2.9060869565217393e-07, |
| "loss": 0.2985, |
| "step": 27080 |
| }, |
| { |
| "epoch": 1.879184912204639, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.888695652173913e-07, |
| "loss": 0.228, |
| "step": 27090 |
| }, |
| { |
| "epoch": 1.8798786039453717, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.871304347826087e-07, |
| "loss": 0.2449, |
| "step": 27100 |
| }, |
| { |
| "epoch": 1.8805722956861044, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.853913043478261e-07, |
| "loss": 0.1995, |
| "step": 27110 |
| }, |
| { |
| "epoch": 1.8812659874268371, |
| "grad_norm": 0.9609375, |
| "learning_rate": 2.836521739130435e-07, |
| "loss": 0.2751, |
| "step": 27120 |
| }, |
| { |
| "epoch": 1.88195967916757, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.819130434782609e-07, |
| "loss": 0.2217, |
| "step": 27130 |
| }, |
| { |
| "epoch": 1.8826533709083026, |
| "grad_norm": 1.359375, |
| "learning_rate": 2.801739130434783e-07, |
| "loss": 0.2758, |
| "step": 27140 |
| }, |
| { |
| "epoch": 1.8833470626490354, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.784347826086957e-07, |
| "loss": 0.2817, |
| "step": 27150 |
| }, |
| { |
| "epoch": 1.8840407543897681, |
| "grad_norm": 1.703125, |
| "learning_rate": 2.7669565217391306e-07, |
| "loss": 0.2296, |
| "step": 27160 |
| }, |
| { |
| "epoch": 1.884734446130501, |
| "grad_norm": 1.3046875, |
| "learning_rate": 2.7495652173913043e-07, |
| "loss": 0.219, |
| "step": 27170 |
| }, |
| { |
| "epoch": 1.8854281378712334, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.7321739130434785e-07, |
| "loss": 0.2468, |
| "step": 27180 |
| }, |
| { |
| "epoch": 1.8861218296119662, |
| "grad_norm": 1.6953125, |
| "learning_rate": 2.714782608695652e-07, |
| "loss": 0.2361, |
| "step": 27190 |
| }, |
| { |
| "epoch": 1.886815521352699, |
| "grad_norm": 1.296875, |
| "learning_rate": 2.6973913043478265e-07, |
| "loss": 0.2341, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1.8875092130934314, |
| "grad_norm": 1.4453125, |
| "learning_rate": 2.68e-07, |
| "loss": 0.258, |
| "step": 27210 |
| }, |
| { |
| "epoch": 1.8882029048341642, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.6626086956521744e-07, |
| "loss": 0.2434, |
| "step": 27220 |
| }, |
| { |
| "epoch": 1.888896596574897, |
| "grad_norm": 1.1171875, |
| "learning_rate": 2.645217391304348e-07, |
| "loss": 0.2194, |
| "step": 27230 |
| }, |
| { |
| "epoch": 1.8895902883156297, |
| "grad_norm": 1.1328125, |
| "learning_rate": 2.627826086956522e-07, |
| "loss": 0.2651, |
| "step": 27240 |
| }, |
| { |
| "epoch": 1.8902839800563624, |
| "grad_norm": 1.25, |
| "learning_rate": 2.6104347826086955e-07, |
| "loss": 0.2527, |
| "step": 27250 |
| }, |
| { |
| "epoch": 1.8909776717970952, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.59304347826087e-07, |
| "loss": 0.2297, |
| "step": 27260 |
| }, |
| { |
| "epoch": 1.891671363537828, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.5756521739130435e-07, |
| "loss": 0.2108, |
| "step": 27270 |
| }, |
| { |
| "epoch": 1.8923650552785607, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.558260869565218e-07, |
| "loss": 0.2001, |
| "step": 27280 |
| }, |
| { |
| "epoch": 1.8930587470192934, |
| "grad_norm": 0.9921875, |
| "learning_rate": 2.5408695652173915e-07, |
| "loss": 0.2906, |
| "step": 27290 |
| }, |
| { |
| "epoch": 1.893752438760026, |
| "grad_norm": 1.6796875, |
| "learning_rate": 2.5234782608695657e-07, |
| "loss": 0.2958, |
| "step": 27300 |
| }, |
| { |
| "epoch": 1.8944461305007587, |
| "grad_norm": 1.140625, |
| "learning_rate": 2.5060869565217394e-07, |
| "loss": 0.32, |
| "step": 27310 |
| }, |
| { |
| "epoch": 1.8951398222414915, |
| "grad_norm": 1.25, |
| "learning_rate": 2.488695652173913e-07, |
| "loss": 0.2223, |
| "step": 27320 |
| }, |
| { |
| "epoch": 1.895833513982224, |
| "grad_norm": 1.421875, |
| "learning_rate": 2.4713043478260874e-07, |
| "loss": 0.323, |
| "step": 27330 |
| }, |
| { |
| "epoch": 1.8965272057229567, |
| "grad_norm": 1.34375, |
| "learning_rate": 2.453913043478261e-07, |
| "loss": 0.2378, |
| "step": 27340 |
| }, |
| { |
| "epoch": 1.8972208974636895, |
| "grad_norm": 1.2890625, |
| "learning_rate": 2.436521739130435e-07, |
| "loss": 0.26, |
| "step": 27350 |
| }, |
| { |
| "epoch": 1.8979145892044222, |
| "grad_norm": 1.2421875, |
| "learning_rate": 2.419130434782609e-07, |
| "loss": 0.2459, |
| "step": 27360 |
| }, |
| { |
| "epoch": 1.898608280945155, |
| "grad_norm": 1.125, |
| "learning_rate": 2.4017391304347827e-07, |
| "loss": 0.2385, |
| "step": 27370 |
| }, |
| { |
| "epoch": 1.8993019726858877, |
| "grad_norm": 1.390625, |
| "learning_rate": 2.384347826086957e-07, |
| "loss": 0.2089, |
| "step": 27380 |
| }, |
| { |
| "epoch": 1.8999956644266205, |
| "grad_norm": 0.99609375, |
| "learning_rate": 2.3669565217391304e-07, |
| "loss": 0.2246, |
| "step": 27390 |
| }, |
| { |
| "epoch": 1.9006893561673532, |
| "grad_norm": 1.3515625, |
| "learning_rate": 2.3495652173913044e-07, |
| "loss": 0.2281, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1.901383047908086, |
| "grad_norm": 1.4765625, |
| "learning_rate": 2.3321739130434784e-07, |
| "loss": 0.2276, |
| "step": 27410 |
| }, |
| { |
| "epoch": 1.9020767396488185, |
| "grad_norm": 1.0234375, |
| "learning_rate": 2.3147826086956523e-07, |
| "loss": 0.3057, |
| "step": 27420 |
| }, |
| { |
| "epoch": 1.9027704313895513, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.2973913043478263e-07, |
| "loss": 0.237, |
| "step": 27430 |
| }, |
| { |
| "epoch": 1.903464123130284, |
| "grad_norm": 1.3828125, |
| "learning_rate": 2.2800000000000003e-07, |
| "loss": 0.2445, |
| "step": 27440 |
| }, |
| { |
| "epoch": 1.9041578148710165, |
| "grad_norm": 1.171875, |
| "learning_rate": 2.2626086956521743e-07, |
| "loss": 0.2948, |
| "step": 27450 |
| }, |
| { |
| "epoch": 1.9048515066117493, |
| "grad_norm": 1.203125, |
| "learning_rate": 2.2452173913043483e-07, |
| "loss": 0.2938, |
| "step": 27460 |
| }, |
| { |
| "epoch": 1.905545198352482, |
| "grad_norm": 1.21875, |
| "learning_rate": 2.2278260869565217e-07, |
| "loss": 0.3153, |
| "step": 27470 |
| }, |
| { |
| "epoch": 1.9062388900932148, |
| "grad_norm": 1.234375, |
| "learning_rate": 2.2104347826086957e-07, |
| "loss": 0.2448, |
| "step": 27480 |
| }, |
| { |
| "epoch": 1.9069325818339475, |
| "grad_norm": 1.046875, |
| "learning_rate": 2.1930434782608696e-07, |
| "loss": 0.2489, |
| "step": 27490 |
| }, |
| { |
| "epoch": 1.9076262735746803, |
| "grad_norm": 0.95703125, |
| "learning_rate": 2.1756521739130436e-07, |
| "loss": 0.2393, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.908319965315413, |
| "grad_norm": 0.90234375, |
| "learning_rate": 2.1582608695652176e-07, |
| "loss": 0.2245, |
| "step": 27510 |
| }, |
| { |
| "epoch": 1.9090136570561458, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.1408695652173916e-07, |
| "loss": 0.2445, |
| "step": 27520 |
| }, |
| { |
| "epoch": 1.9097073487968785, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.1234782608695656e-07, |
| "loss": 0.2321, |
| "step": 27530 |
| }, |
| { |
| "epoch": 1.910401040537611, |
| "grad_norm": 1.03125, |
| "learning_rate": 2.1060869565217393e-07, |
| "loss": 0.2311, |
| "step": 27540 |
| }, |
| { |
| "epoch": 1.9110947322783438, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.088695652173913e-07, |
| "loss": 0.2688, |
| "step": 27550 |
| }, |
| { |
| "epoch": 1.9117884240190766, |
| "grad_norm": 1.1875, |
| "learning_rate": 2.071304347826087e-07, |
| "loss": 0.2381, |
| "step": 27560 |
| }, |
| { |
| "epoch": 1.912482115759809, |
| "grad_norm": 1.09375, |
| "learning_rate": 2.053913043478261e-07, |
| "loss": 0.2505, |
| "step": 27570 |
| }, |
| { |
| "epoch": 1.9131758075005418, |
| "grad_norm": 1.1796875, |
| "learning_rate": 2.036521739130435e-07, |
| "loss": 0.2384, |
| "step": 27580 |
| }, |
| { |
| "epoch": 1.9138694992412746, |
| "grad_norm": 1.3359375, |
| "learning_rate": 2.019130434782609e-07, |
| "loss": 0.2604, |
| "step": 27590 |
| }, |
| { |
| "epoch": 1.9145631909820073, |
| "grad_norm": 1.2734375, |
| "learning_rate": 2.0017391304347829e-07, |
| "loss": 0.2229, |
| "step": 27600 |
| }, |
| { |
| "epoch": 1.91525688272274, |
| "grad_norm": 1.7421875, |
| "learning_rate": 1.9843478260869568e-07, |
| "loss": 0.3559, |
| "step": 27610 |
| }, |
| { |
| "epoch": 1.9159505744634728, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.9669565217391305e-07, |
| "loss": 0.2457, |
| "step": 27620 |
| }, |
| { |
| "epoch": 1.9166442662042056, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.9495652173913045e-07, |
| "loss": 0.2954, |
| "step": 27630 |
| }, |
| { |
| "epoch": 1.9173379579449383, |
| "grad_norm": 1.1640625, |
| "learning_rate": 1.9321739130434782e-07, |
| "loss": 0.3314, |
| "step": 27640 |
| }, |
| { |
| "epoch": 1.918031649685671, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.9147826086956522e-07, |
| "loss": 0.2632, |
| "step": 27650 |
| }, |
| { |
| "epoch": 1.9187253414264036, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.8973913043478262e-07, |
| "loss": 0.2361, |
| "step": 27660 |
| }, |
| { |
| "epoch": 1.9194190331671364, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.8800000000000002e-07, |
| "loss": 0.2164, |
| "step": 27670 |
| }, |
| { |
| "epoch": 1.920112724907869, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.8626086956521741e-07, |
| "loss": 0.2366, |
| "step": 27680 |
| }, |
| { |
| "epoch": 1.9208064166486016, |
| "grad_norm": 1.421875, |
| "learning_rate": 1.845217391304348e-07, |
| "loss": 0.2236, |
| "step": 27690 |
| }, |
| { |
| "epoch": 1.9215001083893344, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.8278260869565218e-07, |
| "loss": 0.2692, |
| "step": 27700 |
| }, |
| { |
| "epoch": 1.9221938001300671, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.8104347826086958e-07, |
| "loss": 0.3485, |
| "step": 27710 |
| }, |
| { |
| "epoch": 1.9228874918707999, |
| "grad_norm": 1.171875, |
| "learning_rate": 1.7930434782608698e-07, |
| "loss": 0.3303, |
| "step": 27720 |
| }, |
| { |
| "epoch": 1.9235811836115326, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.7756521739130437e-07, |
| "loss": 0.2459, |
| "step": 27730 |
| }, |
| { |
| "epoch": 1.9242748753522654, |
| "grad_norm": 1.609375, |
| "learning_rate": 1.7582608695652175e-07, |
| "loss": 0.2606, |
| "step": 27740 |
| }, |
| { |
| "epoch": 1.9249685670929981, |
| "grad_norm": 1.3359375, |
| "learning_rate": 1.7408695652173914e-07, |
| "loss": 0.2583, |
| "step": 27750 |
| }, |
| { |
| "epoch": 1.9256622588337309, |
| "grad_norm": 1.015625, |
| "learning_rate": 1.7234782608695654e-07, |
| "loss": 0.2222, |
| "step": 27760 |
| }, |
| { |
| "epoch": 1.9263559505744636, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.706086956521739e-07, |
| "loss": 0.2098, |
| "step": 27770 |
| }, |
| { |
| "epoch": 1.9270496423151962, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.688695652173913e-07, |
| "loss": 0.2314, |
| "step": 27780 |
| }, |
| { |
| "epoch": 1.927743334055929, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.671304347826087e-07, |
| "loss": 0.2344, |
| "step": 27790 |
| }, |
| { |
| "epoch": 1.9284370257966617, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.653913043478261e-07, |
| "loss": 0.2579, |
| "step": 27800 |
| }, |
| { |
| "epoch": 1.9291307175373942, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.636521739130435e-07, |
| "loss": 0.2024, |
| "step": 27810 |
| }, |
| { |
| "epoch": 1.929824409278127, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.619130434782609e-07, |
| "loss": 0.2661, |
| "step": 27820 |
| }, |
| { |
| "epoch": 1.9305181010188597, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.6017391304347827e-07, |
| "loss": 0.2161, |
| "step": 27830 |
| }, |
| { |
| "epoch": 1.9312117927595924, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.5843478260869567e-07, |
| "loss": 0.2477, |
| "step": 27840 |
| }, |
| { |
| "epoch": 1.9319054845003252, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.5669565217391304e-07, |
| "loss": 0.2482, |
| "step": 27850 |
| }, |
| { |
| "epoch": 1.932599176241058, |
| "grad_norm": 1.7265625, |
| "learning_rate": 1.5495652173913046e-07, |
| "loss": 0.2296, |
| "step": 27860 |
| }, |
| { |
| "epoch": 1.9332928679817907, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.5321739130434784e-07, |
| "loss": 0.2083, |
| "step": 27870 |
| }, |
| { |
| "epoch": 1.9339865597225234, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.5147826086956523e-07, |
| "loss": 0.2222, |
| "step": 27880 |
| }, |
| { |
| "epoch": 1.9346802514632562, |
| "grad_norm": 0.9453125, |
| "learning_rate": 1.4973913043478263e-07, |
| "loss": 0.2657, |
| "step": 27890 |
| }, |
| { |
| "epoch": 1.9353739432039887, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.4800000000000003e-07, |
| "loss": 0.2672, |
| "step": 27900 |
| }, |
| { |
| "epoch": 1.9360676349447214, |
| "grad_norm": 0.9609375, |
| "learning_rate": 1.462608695652174e-07, |
| "loss": 0.2201, |
| "step": 27910 |
| }, |
| { |
| "epoch": 1.9367613266854542, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.445217391304348e-07, |
| "loss": 0.2807, |
| "step": 27920 |
| }, |
| { |
| "epoch": 1.9374550184261867, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.427826086956522e-07, |
| "loss": 0.2759, |
| "step": 27930 |
| }, |
| { |
| "epoch": 1.9381487101669195, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.410434782608696e-07, |
| "loss": 0.2086, |
| "step": 27940 |
| }, |
| { |
| "epoch": 1.9388424019076522, |
| "grad_norm": 1.3984375, |
| "learning_rate": 1.3930434782608696e-07, |
| "loss": 0.2209, |
| "step": 27950 |
| }, |
| { |
| "epoch": 1.939536093648385, |
| "grad_norm": 2.015625, |
| "learning_rate": 1.3756521739130436e-07, |
| "loss": 0.2617, |
| "step": 27960 |
| }, |
| { |
| "epoch": 1.9402297853891177, |
| "grad_norm": 1.3046875, |
| "learning_rate": 1.3582608695652176e-07, |
| "loss": 0.25, |
| "step": 27970 |
| }, |
| { |
| "epoch": 1.9409234771298505, |
| "grad_norm": 1.2109375, |
| "learning_rate": 1.3408695652173916e-07, |
| "loss": 0.1882, |
| "step": 27980 |
| }, |
| { |
| "epoch": 1.9416171688705832, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.3234782608695653e-07, |
| "loss": 0.279, |
| "step": 27990 |
| }, |
| { |
| "epoch": 1.942310860611316, |
| "grad_norm": 1.25, |
| "learning_rate": 1.3060869565217392e-07, |
| "loss": 0.2479, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.9430045523520487, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.2886956521739132e-07, |
| "loss": 0.2482, |
| "step": 28010 |
| }, |
| { |
| "epoch": 1.9436982440927812, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.2713043478260872e-07, |
| "loss": 0.2375, |
| "step": 28020 |
| }, |
| { |
| "epoch": 1.944391935833514, |
| "grad_norm": 1.0390625, |
| "learning_rate": 1.253913043478261e-07, |
| "loss": 0.215, |
| "step": 28030 |
| }, |
| { |
| "epoch": 1.9450856275742467, |
| "grad_norm": 1.3125, |
| "learning_rate": 1.236521739130435e-07, |
| "loss": 0.2363, |
| "step": 28040 |
| }, |
| { |
| "epoch": 1.9457793193149793, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.2191304347826089e-07, |
| "loss": 0.3115, |
| "step": 28050 |
| }, |
| { |
| "epoch": 1.946473011055712, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.2017391304347826e-07, |
| "loss": 0.2421, |
| "step": 28060 |
| }, |
| { |
| "epoch": 1.9471667027964448, |
| "grad_norm": 1.1328125, |
| "learning_rate": 1.1843478260869566e-07, |
| "loss": 0.204, |
| "step": 28070 |
| }, |
| { |
| "epoch": 1.9478603945371775, |
| "grad_norm": 1.0859375, |
| "learning_rate": 1.1669565217391305e-07, |
| "loss": 0.2407, |
| "step": 28080 |
| }, |
| { |
| "epoch": 1.9485540862779103, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.1495652173913045e-07, |
| "loss": 0.2806, |
| "step": 28090 |
| }, |
| { |
| "epoch": 1.949247778018643, |
| "grad_norm": 1.0703125, |
| "learning_rate": 1.1321739130434782e-07, |
| "loss": 0.205, |
| "step": 28100 |
| }, |
| { |
| "epoch": 1.9499414697593758, |
| "grad_norm": 1.4609375, |
| "learning_rate": 1.1147826086956522e-07, |
| "loss": 0.29, |
| "step": 28110 |
| }, |
| { |
| "epoch": 1.9506351615001085, |
| "grad_norm": 1.1484375, |
| "learning_rate": 1.0973913043478262e-07, |
| "loss": 0.256, |
| "step": 28120 |
| }, |
| { |
| "epoch": 1.9513288532408413, |
| "grad_norm": 1.328125, |
| "learning_rate": 1.0800000000000001e-07, |
| "loss": 0.2424, |
| "step": 28130 |
| }, |
| { |
| "epoch": 1.9520225449815738, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.062608695652174e-07, |
| "loss": 0.2434, |
| "step": 28140 |
| }, |
| { |
| "epoch": 1.9527162367223065, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.0452173913043478e-07, |
| "loss": 0.273, |
| "step": 28150 |
| }, |
| { |
| "epoch": 1.9534099284630393, |
| "grad_norm": 1.265625, |
| "learning_rate": 1.0278260869565218e-07, |
| "loss": 0.2397, |
| "step": 28160 |
| }, |
| { |
| "epoch": 1.9541036202037718, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.0104347826086958e-07, |
| "loss": 0.2082, |
| "step": 28170 |
| }, |
| { |
| "epoch": 1.9547973119445046, |
| "grad_norm": 1.140625, |
| "learning_rate": 9.930434782608696e-08, |
| "loss": 0.2713, |
| "step": 28180 |
| }, |
| { |
| "epoch": 1.9554910036852373, |
| "grad_norm": 1.25, |
| "learning_rate": 9.756521739130436e-08, |
| "loss": 0.2822, |
| "step": 28190 |
| }, |
| { |
| "epoch": 1.95618469542597, |
| "grad_norm": 1.296875, |
| "learning_rate": 9.582608695652174e-08, |
| "loss": 0.2366, |
| "step": 28200 |
| }, |
| { |
| "epoch": 1.9568783871667028, |
| "grad_norm": 1.734375, |
| "learning_rate": 9.408695652173914e-08, |
| "loss": 0.2482, |
| "step": 28210 |
| }, |
| { |
| "epoch": 1.9575720789074356, |
| "grad_norm": 1.1171875, |
| "learning_rate": 9.234782608695653e-08, |
| "loss": 0.2706, |
| "step": 28220 |
| }, |
| { |
| "epoch": 1.9582657706481683, |
| "grad_norm": 1.203125, |
| "learning_rate": 9.060869565217392e-08, |
| "loss": 0.2504, |
| "step": 28230 |
| }, |
| { |
| "epoch": 1.958959462388901, |
| "grad_norm": 0.8984375, |
| "learning_rate": 8.886956521739131e-08, |
| "loss": 0.3229, |
| "step": 28240 |
| }, |
| { |
| "epoch": 1.9596531541296336, |
| "grad_norm": 1.09375, |
| "learning_rate": 8.71304347826087e-08, |
| "loss": 0.2716, |
| "step": 28250 |
| }, |
| { |
| "epoch": 1.9603468458703663, |
| "grad_norm": 1.6640625, |
| "learning_rate": 8.539130434782609e-08, |
| "loss": 0.2757, |
| "step": 28260 |
| }, |
| { |
| "epoch": 1.961040537611099, |
| "grad_norm": 1.21875, |
| "learning_rate": 8.365217391304349e-08, |
| "loss": 0.2666, |
| "step": 28270 |
| }, |
| { |
| "epoch": 1.9617342293518316, |
| "grad_norm": 1.203125, |
| "learning_rate": 8.191304347826089e-08, |
| "loss": 0.2189, |
| "step": 28280 |
| }, |
| { |
| "epoch": 1.9624279210925644, |
| "grad_norm": 1.3828125, |
| "learning_rate": 8.017391304347827e-08, |
| "loss": 0.2109, |
| "step": 28290 |
| }, |
| { |
| "epoch": 1.9631216128332971, |
| "grad_norm": 1.1640625, |
| "learning_rate": 7.843478260869565e-08, |
| "loss": 0.2154, |
| "step": 28300 |
| }, |
| { |
| "epoch": 1.9638153045740299, |
| "grad_norm": 1.046875, |
| "learning_rate": 7.669565217391305e-08, |
| "loss": 0.2053, |
| "step": 28310 |
| }, |
| { |
| "epoch": 1.9645089963147626, |
| "grad_norm": 1.171875, |
| "learning_rate": 7.495652173913045e-08, |
| "loss": 0.3179, |
| "step": 28320 |
| }, |
| { |
| "epoch": 1.9652026880554954, |
| "grad_norm": 1.234375, |
| "learning_rate": 7.321739130434783e-08, |
| "loss": 0.2356, |
| "step": 28330 |
| }, |
| { |
| "epoch": 1.965896379796228, |
| "grad_norm": 1.3203125, |
| "learning_rate": 7.147826086956522e-08, |
| "loss": 0.223, |
| "step": 28340 |
| }, |
| { |
| "epoch": 1.9665900715369609, |
| "grad_norm": 1.2109375, |
| "learning_rate": 6.973913043478262e-08, |
| "loss": 0.27, |
| "step": 28350 |
| }, |
| { |
| "epoch": 1.9672837632776936, |
| "grad_norm": 1.0859375, |
| "learning_rate": 6.8e-08, |
| "loss": 0.2565, |
| "step": 28360 |
| }, |
| { |
| "epoch": 1.9679774550184261, |
| "grad_norm": 1.015625, |
| "learning_rate": 6.62608695652174e-08, |
| "loss": 0.2209, |
| "step": 28370 |
| }, |
| { |
| "epoch": 1.9686711467591589, |
| "grad_norm": 1.1484375, |
| "learning_rate": 6.452173913043478e-08, |
| "loss": 0.2149, |
| "step": 28380 |
| }, |
| { |
| "epoch": 1.9693648384998916, |
| "grad_norm": 1.3046875, |
| "learning_rate": 6.278260869565218e-08, |
| "loss": 0.2276, |
| "step": 28390 |
| }, |
| { |
| "epoch": 1.9700585302406242, |
| "grad_norm": 1.5, |
| "learning_rate": 6.104347826086956e-08, |
| "loss": 0.2533, |
| "step": 28400 |
| }, |
| { |
| "epoch": 1.970752221981357, |
| "grad_norm": 1.296875, |
| "learning_rate": 5.930434782608696e-08, |
| "loss": 0.2304, |
| "step": 28410 |
| }, |
| { |
| "epoch": 1.9714459137220897, |
| "grad_norm": 0.97265625, |
| "learning_rate": 5.756521739130435e-08, |
| "loss": 0.2486, |
| "step": 28420 |
| }, |
| { |
| "epoch": 1.9721396054628224, |
| "grad_norm": 1.0, |
| "learning_rate": 5.5826086956521744e-08, |
| "loss": 0.234, |
| "step": 28430 |
| }, |
| { |
| "epoch": 1.9728332972035552, |
| "grad_norm": 1.421875, |
| "learning_rate": 5.4086956521739135e-08, |
| "loss": 0.2633, |
| "step": 28440 |
| }, |
| { |
| "epoch": 1.973526988944288, |
| "grad_norm": 1.046875, |
| "learning_rate": 5.2347826086956526e-08, |
| "loss": 0.2266, |
| "step": 28450 |
| }, |
| { |
| "epoch": 1.9742206806850207, |
| "grad_norm": 1.3359375, |
| "learning_rate": 5.0608695652173917e-08, |
| "loss": 0.2356, |
| "step": 28460 |
| }, |
| { |
| "epoch": 1.9749143724257534, |
| "grad_norm": 1.0234375, |
| "learning_rate": 4.886956521739131e-08, |
| "loss": 0.2128, |
| "step": 28470 |
| }, |
| { |
| "epoch": 1.9756080641664862, |
| "grad_norm": 1.09375, |
| "learning_rate": 4.71304347826087e-08, |
| "loss": 0.2174, |
| "step": 28480 |
| }, |
| { |
| "epoch": 1.9763017559072187, |
| "grad_norm": 1.3046875, |
| "learning_rate": 4.5391304347826096e-08, |
| "loss": 0.2517, |
| "step": 28490 |
| }, |
| { |
| "epoch": 1.9769954476479514, |
| "grad_norm": 1.25, |
| "learning_rate": 4.365217391304348e-08, |
| "loss": 0.227, |
| "step": 28500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 28750, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.3842958733451e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|