| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 19351, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000516769159216578, |
| "grad_norm": 2.2854151725769043, |
| "learning_rate": 2.58384579608289e-05, |
| "loss": 8.3208, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001033538318433156, |
| "grad_norm": 1.7825466394424438, |
| "learning_rate": 5.16769159216578e-05, |
| "loss": 7.8091, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.001550307477649734, |
| "grad_norm": 1.5718376636505127, |
| "learning_rate": 7.751537388248669e-05, |
| "loss": 7.23, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.002067076636866312, |
| "grad_norm": 0.9627140164375305, |
| "learning_rate": 0.0001033538318433156, |
| "loss": 6.6397, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0025838457960828897, |
| "grad_norm": 0.7302472591400146, |
| "learning_rate": 0.0001291922898041445, |
| "loss": 6.2658, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.003100614955299468, |
| "grad_norm": 0.6241187453269958, |
| "learning_rate": 0.00015503074776497338, |
| "loss": 5.9946, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0036173841145160455, |
| "grad_norm": 1.97952139377594, |
| "learning_rate": 0.0001808692057258023, |
| "loss": 5.7746, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.004134153273732624, |
| "grad_norm": 0.8876756429672241, |
| "learning_rate": 0.0002067076636866312, |
| "loss": 5.6108, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.004650922432949202, |
| "grad_norm": 0.9505946040153503, |
| "learning_rate": 0.00023254612164746007, |
| "loss": 5.4343, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.005167691592165779, |
| "grad_norm": 0.8270168304443359, |
| "learning_rate": 0.000258384579608289, |
| "loss": 5.1914, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.005684460751382358, |
| "grad_norm": 1.4167860746383667, |
| "learning_rate": 0.0002842230375691179, |
| "loss": 4.9526, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.006201229910598936, |
| "grad_norm": 0.8067450523376465, |
| "learning_rate": 0.00031006149552994676, |
| "loss": 4.7455, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.006717999069815513, |
| "grad_norm": 1.0726768970489502, |
| "learning_rate": 0.0003358999534907757, |
| "loss": 4.5333, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.007234768229032091, |
| "grad_norm": 0.9126196503639221, |
| "learning_rate": 0.0003617384114516046, |
| "loss": 4.3803, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.0077515373882486695, |
| "grad_norm": 1.0933046340942383, |
| "learning_rate": 0.0003875768694124335, |
| "loss": 4.2341, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.008268306547465248, |
| "grad_norm": 1.2751045227050781, |
| "learning_rate": 0.0004134153273732624, |
| "loss": 4.1177, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.008785075706681826, |
| "grad_norm": 1.0147113800048828, |
| "learning_rate": 0.0004392537853340913, |
| "loss": 4.006, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.009301844865898403, |
| "grad_norm": 1.2339316606521606, |
| "learning_rate": 0.00046509224329492014, |
| "loss": 3.9218, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.009818614025114981, |
| "grad_norm": 1.2789371013641357, |
| "learning_rate": 0.0004909307012557491, |
| "loss": 3.8193, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.010335383184331559, |
| "grad_norm": 1.337181568145752, |
| "learning_rate": 0.0005, |
| "loss": 3.729, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.010852152343548136, |
| "grad_norm": 1.29481840133667, |
| "learning_rate": 0.0005, |
| "loss": 3.6638, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.011368921502764716, |
| "grad_norm": 1.2725201845169067, |
| "learning_rate": 0.0005, |
| "loss": 3.6127, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.011885690661981294, |
| "grad_norm": 1.4627310037612915, |
| "learning_rate": 0.0005, |
| "loss": 3.5382, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.012402459821197871, |
| "grad_norm": 1.0869964361190796, |
| "learning_rate": 0.0005, |
| "loss": 3.5016, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.012919228980414449, |
| "grad_norm": 1.142592191696167, |
| "learning_rate": 0.0005, |
| "loss": 3.4392, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.013435998139631027, |
| "grad_norm": 1.0130606889724731, |
| "learning_rate": 0.0005, |
| "loss": 3.3923, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.013952767298847604, |
| "grad_norm": 1.2244436740875244, |
| "learning_rate": 0.0005, |
| "loss": 3.3526, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.014469536458064182, |
| "grad_norm": 1.164255976676941, |
| "learning_rate": 0.0005, |
| "loss": 3.2819, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.014986305617280761, |
| "grad_norm": 0.9863433241844177, |
| "learning_rate": 0.0005, |
| "loss": 3.2612, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.015503074776497339, |
| "grad_norm": 1.1939715147018433, |
| "learning_rate": 0.0005, |
| "loss": 3.2136, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.016019843935713915, |
| "grad_norm": 1.0233927965164185, |
| "learning_rate": 0.0005, |
| "loss": 3.1914, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.016536613094930496, |
| "grad_norm": 1.3167331218719482, |
| "learning_rate": 0.0005, |
| "loss": 3.1614, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.017053382254147074, |
| "grad_norm": 1.10019850730896, |
| "learning_rate": 0.0005, |
| "loss": 3.1133, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.01757015141336365, |
| "grad_norm": 1.267260193824768, |
| "learning_rate": 0.0005, |
| "loss": 3.0851, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.01808692057258023, |
| "grad_norm": 1.134069800376892, |
| "learning_rate": 0.0005, |
| "loss": 3.0513, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.018603689731796807, |
| "grad_norm": 1.3286008834838867, |
| "learning_rate": 0.0005, |
| "loss": 3.0269, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.019120458891013385, |
| "grad_norm": 1.3457223176956177, |
| "learning_rate": 0.0005, |
| "loss": 2.9893, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.019637228050229962, |
| "grad_norm": 1.254515290260315, |
| "learning_rate": 0.0005, |
| "loss": 2.9698, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.02015399720944654, |
| "grad_norm": 1.1265138387680054, |
| "learning_rate": 0.0005, |
| "loss": 2.9409, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.020670766368663118, |
| "grad_norm": 1.1716142892837524, |
| "learning_rate": 0.0005, |
| "loss": 2.9289, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.021187535527879695, |
| "grad_norm": 1.7391645908355713, |
| "learning_rate": 0.0005, |
| "loss": 2.9168, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.021704304687096273, |
| "grad_norm": 1.2796969413757324, |
| "learning_rate": 0.0005, |
| "loss": 2.8871, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.02222107384631285, |
| "grad_norm": 1.192845106124878, |
| "learning_rate": 0.0005, |
| "loss": 2.85, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.02273784300552943, |
| "grad_norm": 1.2399319410324097, |
| "learning_rate": 0.0005, |
| "loss": 2.8469, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.02325461216474601, |
| "grad_norm": 2.0635640621185303, |
| "learning_rate": 0.0005, |
| "loss": 2.8174, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.023771381323962587, |
| "grad_norm": 1.2288042306900024, |
| "learning_rate": 0.0005, |
| "loss": 2.8065, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.024288150483179165, |
| "grad_norm": 1.1476280689239502, |
| "learning_rate": 0.0005, |
| "loss": 2.7702, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.024804919642395742, |
| "grad_norm": 1.0889836549758911, |
| "learning_rate": 0.0005, |
| "loss": 2.7561, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.02532168880161232, |
| "grad_norm": 1.2561166286468506, |
| "learning_rate": 0.0005, |
| "loss": 2.7407, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.025838457960828898, |
| "grad_norm": 1.105919599533081, |
| "learning_rate": 0.0005, |
| "loss": 2.7356, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.026355227120045475, |
| "grad_norm": 1.2789875268936157, |
| "learning_rate": 0.0005, |
| "loss": 2.6941, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.026871996279262053, |
| "grad_norm": 1.2486238479614258, |
| "learning_rate": 0.0005, |
| "loss": 2.7009, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.02738876543847863, |
| "grad_norm": 1.3023815155029297, |
| "learning_rate": 0.0005, |
| "loss": 2.6761, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.02790553459769521, |
| "grad_norm": 1.3703725337982178, |
| "learning_rate": 0.0005, |
| "loss": 2.659, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.028422303756911786, |
| "grad_norm": 1.1285632848739624, |
| "learning_rate": 0.0005, |
| "loss": 2.6444, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.028939072916128364, |
| "grad_norm": 1.6290286779403687, |
| "learning_rate": 0.0005, |
| "loss": 2.607, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.029455842075344945, |
| "grad_norm": 1.2040055990219116, |
| "learning_rate": 0.0005, |
| "loss": 2.6061, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.029972611234561523, |
| "grad_norm": 1.13448166847229, |
| "learning_rate": 0.0005, |
| "loss": 2.5709, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0304893803937781, |
| "grad_norm": 1.9924914836883545, |
| "learning_rate": 0.0005, |
| "loss": 2.5914, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.031006149552994678, |
| "grad_norm": 1.6680738925933838, |
| "learning_rate": 0.0005, |
| "loss": 2.5523, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.03152291871221125, |
| "grad_norm": 1.5603681802749634, |
| "learning_rate": 0.0005, |
| "loss": 2.5157, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.03203968787142783, |
| "grad_norm": 1.2942813634872437, |
| "learning_rate": 0.0005, |
| "loss": 2.4997, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.032556457030644415, |
| "grad_norm": 1.4313350915908813, |
| "learning_rate": 0.0005, |
| "loss": 2.4648, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.03307322618986099, |
| "grad_norm": 1.41900634765625, |
| "learning_rate": 0.0005, |
| "loss": 2.4899, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.03358999534907757, |
| "grad_norm": 2.0101678371429443, |
| "learning_rate": 0.0005, |
| "loss": 2.4705, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.03410676450829415, |
| "grad_norm": 1.567264437675476, |
| "learning_rate": 0.0005, |
| "loss": 2.4655, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.034623533667510725, |
| "grad_norm": 1.504845142364502, |
| "learning_rate": 0.0005, |
| "loss": 2.4357, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0351403028267273, |
| "grad_norm": 1.3784253597259521, |
| "learning_rate": 0.0005, |
| "loss": 2.4484, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.03565707198594388, |
| "grad_norm": 1.2612375020980835, |
| "learning_rate": 0.0005, |
| "loss": 2.4108, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.03617384114516046, |
| "grad_norm": 1.2809542417526245, |
| "learning_rate": 0.0005, |
| "loss": 2.429, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.036690610304377036, |
| "grad_norm": 1.282116174697876, |
| "learning_rate": 0.0005, |
| "loss": 2.4136, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.037207379463593614, |
| "grad_norm": 2.6279282569885254, |
| "learning_rate": 0.0005, |
| "loss": 2.4043, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.03772414862281019, |
| "grad_norm": 1.158665657043457, |
| "learning_rate": 0.0005, |
| "loss": 2.3666, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.03824091778202677, |
| "grad_norm": 1.0581611394882202, |
| "learning_rate": 0.0005, |
| "loss": 2.3588, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.03875768694124335, |
| "grad_norm": 1.225664496421814, |
| "learning_rate": 0.0005, |
| "loss": 2.3499, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.039274456100459924, |
| "grad_norm": 1.9708060026168823, |
| "learning_rate": 0.0005, |
| "loss": 2.3462, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.0397912252596765, |
| "grad_norm": 1.856858730316162, |
| "learning_rate": 0.0005, |
| "loss": 2.3443, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.04030799441889308, |
| "grad_norm": 1.4437602758407593, |
| "learning_rate": 0.0005, |
| "loss": 2.289, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.04082476357810966, |
| "grad_norm": 1.289876937866211, |
| "learning_rate": 0.0005, |
| "loss": 2.2881, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.041341532737326235, |
| "grad_norm": 1.2440109252929688, |
| "learning_rate": 0.0005, |
| "loss": 2.2639, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.04185830189654281, |
| "grad_norm": 1.2062422037124634, |
| "learning_rate": 0.0005, |
| "loss": 2.263, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.04237507105575939, |
| "grad_norm": 1.171238660812378, |
| "learning_rate": 0.0005, |
| "loss": 2.259, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.04289184021497597, |
| "grad_norm": 1.1757316589355469, |
| "learning_rate": 0.0005, |
| "loss": 2.2527, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.043408609374192546, |
| "grad_norm": 1.3125736713409424, |
| "learning_rate": 0.0005, |
| "loss": 2.2429, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.04392537853340912, |
| "grad_norm": 1.2145166397094727, |
| "learning_rate": 0.0005, |
| "loss": 2.2615, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0444421476926257, |
| "grad_norm": 1.3434226512908936, |
| "learning_rate": 0.0005, |
| "loss": 2.2398, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.04495891685184228, |
| "grad_norm": 1.5729234218597412, |
| "learning_rate": 0.0005, |
| "loss": 2.2263, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.04547568601105886, |
| "grad_norm": 1.4341133832931519, |
| "learning_rate": 0.0005, |
| "loss": 2.193, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.04599245517027544, |
| "grad_norm": 1.1859130859375, |
| "learning_rate": 0.0005, |
| "loss": 2.1928, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.04650922432949202, |
| "grad_norm": 1.8495599031448364, |
| "learning_rate": 0.0005, |
| "loss": 2.182, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.047025993488708596, |
| "grad_norm": 1.416601538658142, |
| "learning_rate": 0.0005, |
| "loss": 2.1748, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.047542762647925174, |
| "grad_norm": 1.2966763973236084, |
| "learning_rate": 0.0005, |
| "loss": 2.1481, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.04805953180714175, |
| "grad_norm": 1.499526858329773, |
| "learning_rate": 0.0005, |
| "loss": 2.1442, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.04857630096635833, |
| "grad_norm": 1.2409976720809937, |
| "learning_rate": 0.0005, |
| "loss": 2.1788, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.04909307012557491, |
| "grad_norm": 1.2645729780197144, |
| "learning_rate": 0.0005, |
| "loss": 2.1577, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.049609839284791485, |
| "grad_norm": 1.297904372215271, |
| "learning_rate": 0.0005, |
| "loss": 2.1412, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.05012660844400806, |
| "grad_norm": 1.3194257020950317, |
| "learning_rate": 0.0005, |
| "loss": 2.1242, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.05064337760322464, |
| "grad_norm": 1.4184504747390747, |
| "learning_rate": 0.0005, |
| "loss": 2.1414, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.05116014676244122, |
| "grad_norm": 2.1643896102905273, |
| "learning_rate": 0.0005, |
| "loss": 2.12, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.051676915921657796, |
| "grad_norm": 2.3233911991119385, |
| "learning_rate": 0.0005, |
| "loss": 2.0972, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05219368508087437, |
| "grad_norm": 2.2393977642059326, |
| "learning_rate": 0.0005, |
| "loss": 2.0906, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.05271045424009095, |
| "grad_norm": 1.298572063446045, |
| "learning_rate": 0.0005, |
| "loss": 2.0664, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.05322722339930753, |
| "grad_norm": 1.8283194303512573, |
| "learning_rate": 0.0005, |
| "loss": 2.0726, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.053743992558524106, |
| "grad_norm": 1.4193987846374512, |
| "learning_rate": 0.0005, |
| "loss": 2.0643, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.054260761717740684, |
| "grad_norm": 1.3053640127182007, |
| "learning_rate": 0.0005, |
| "loss": 2.0124, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.05477753087695726, |
| "grad_norm": 1.598849892616272, |
| "learning_rate": 0.0005, |
| "loss": 2.0588, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.05529430003617384, |
| "grad_norm": 1.3975911140441895, |
| "learning_rate": 0.0005, |
| "loss": 2.0349, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.05581106919539042, |
| "grad_norm": 1.1616796255111694, |
| "learning_rate": 0.0005, |
| "loss": 2.0226, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.056327838354606995, |
| "grad_norm": 1.3756109476089478, |
| "learning_rate": 0.0005, |
| "loss": 2.0305, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.05684460751382357, |
| "grad_norm": 1.4646899700164795, |
| "learning_rate": 0.0005, |
| "loss": 2.0329, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.05736137667304015, |
| "grad_norm": 1.215995192527771, |
| "learning_rate": 0.0005, |
| "loss": 2.039, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.05787814583225673, |
| "grad_norm": 1.6965135335922241, |
| "learning_rate": 0.0005, |
| "loss": 2.0142, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.05839491499147331, |
| "grad_norm": 1.466676950454712, |
| "learning_rate": 0.0005, |
| "loss": 2.0231, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.05891168415068989, |
| "grad_norm": 1.5624445676803589, |
| "learning_rate": 0.0005, |
| "loss": 1.9893, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.05942845330990647, |
| "grad_norm": 1.3193562030792236, |
| "learning_rate": 0.0005, |
| "loss": 1.9744, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.059945222469123045, |
| "grad_norm": 1.1906757354736328, |
| "learning_rate": 0.0005, |
| "loss": 1.9866, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.06046199162833962, |
| "grad_norm": 2.6814322471618652, |
| "learning_rate": 0.0005, |
| "loss": 1.9743, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.0609787607875562, |
| "grad_norm": 1.7349072694778442, |
| "learning_rate": 0.0005, |
| "loss": 1.9815, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.06149552994677278, |
| "grad_norm": 2.099928617477417, |
| "learning_rate": 0.0005, |
| "loss": 1.9469, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.062012299105989356, |
| "grad_norm": 1.141414999961853, |
| "learning_rate": 0.0005, |
| "loss": 1.9422, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.06252906826520593, |
| "grad_norm": 1.1726713180541992, |
| "learning_rate": 0.0005, |
| "loss": 1.956, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.0630458374244225, |
| "grad_norm": 1.7521125078201294, |
| "learning_rate": 0.0005, |
| "loss": 1.9423, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.06356260658363909, |
| "grad_norm": 1.1364134550094604, |
| "learning_rate": 0.0005, |
| "loss": 1.9614, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.06407937574285566, |
| "grad_norm": 1.3168714046478271, |
| "learning_rate": 0.0005, |
| "loss": 1.9571, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.06459614490207224, |
| "grad_norm": 1.787176251411438, |
| "learning_rate": 0.0005, |
| "loss": 1.9566, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.06511291406128883, |
| "grad_norm": 1.2864240407943726, |
| "learning_rate": 0.0005, |
| "loss": 1.9349, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.0656296832205054, |
| "grad_norm": 1.5742415189743042, |
| "learning_rate": 0.0005, |
| "loss": 1.9006, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.06614645237972198, |
| "grad_norm": 1.349160075187683, |
| "learning_rate": 0.0005, |
| "loss": 1.9083, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.06666322153893856, |
| "grad_norm": 2.4660980701446533, |
| "learning_rate": 0.0005, |
| "loss": 1.9296, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.06717999069815514, |
| "grad_norm": 2.0999865531921387, |
| "learning_rate": 0.0005, |
| "loss": 1.876, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.06769675985737171, |
| "grad_norm": 1.3416152000427246, |
| "learning_rate": 0.0005, |
| "loss": 1.8874, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.0682135290165883, |
| "grad_norm": 1.2447683811187744, |
| "learning_rate": 0.0005, |
| "loss": 1.8892, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.06873029817580487, |
| "grad_norm": 1.1316670179367065, |
| "learning_rate": 0.0005, |
| "loss": 1.8754, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.06924706733502145, |
| "grad_norm": 1.354366421699524, |
| "learning_rate": 0.0005, |
| "loss": 1.8778, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.06976383649423802, |
| "grad_norm": 1.7485758066177368, |
| "learning_rate": 0.0005, |
| "loss": 1.8982, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.0702806056534546, |
| "grad_norm": 1.5367459058761597, |
| "learning_rate": 0.0005, |
| "loss": 1.8862, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.07079737481267118, |
| "grad_norm": 1.112423062324524, |
| "learning_rate": 0.0005, |
| "loss": 1.8748, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.07131414397188776, |
| "grad_norm": 1.269370436668396, |
| "learning_rate": 0.0005, |
| "loss": 1.8389, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.07183091313110433, |
| "grad_norm": 1.6045186519622803, |
| "learning_rate": 0.0005, |
| "loss": 1.8814, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.07234768229032092, |
| "grad_norm": 2.199096918106079, |
| "learning_rate": 0.0005, |
| "loss": 1.8576, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.07286445144953749, |
| "grad_norm": 1.2949317693710327, |
| "learning_rate": 0.0005, |
| "loss": 1.837, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.07338122060875407, |
| "grad_norm": 1.2082808017730713, |
| "learning_rate": 0.0005, |
| "loss": 1.8191, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.07389798976797064, |
| "grad_norm": 1.632419466972351, |
| "learning_rate": 0.0005, |
| "loss": 1.8192, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.07441475892718723, |
| "grad_norm": 1.1036083698272705, |
| "learning_rate": 0.0005, |
| "loss": 1.8046, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.0749315280864038, |
| "grad_norm": 1.6769006252288818, |
| "learning_rate": 0.0005, |
| "loss": 1.8174, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.07544829724562038, |
| "grad_norm": 1.587368130683899, |
| "learning_rate": 0.0005, |
| "loss": 1.8311, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.07596506640483695, |
| "grad_norm": 1.063362956047058, |
| "learning_rate": 0.0005, |
| "loss": 1.7886, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.07648183556405354, |
| "grad_norm": 1.258238673210144, |
| "learning_rate": 0.0005, |
| "loss": 1.8063, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.07699860472327011, |
| "grad_norm": 1.3020492792129517, |
| "learning_rate": 0.0005, |
| "loss": 1.8289, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.0775153738824867, |
| "grad_norm": 2.0609872341156006, |
| "learning_rate": 0.0005, |
| "loss": 1.7893, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.07803214304170328, |
| "grad_norm": 1.1070424318313599, |
| "learning_rate": 0.0005, |
| "loss": 1.768, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.07854891220091985, |
| "grad_norm": 2.055048704147339, |
| "learning_rate": 0.0005, |
| "loss": 1.7597, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.07906568136013643, |
| "grad_norm": 1.4444563388824463, |
| "learning_rate": 0.0005, |
| "loss": 1.7827, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.079582450519353, |
| "grad_norm": 1.388077735900879, |
| "learning_rate": 0.0005, |
| "loss": 1.7753, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.08009921967856959, |
| "grad_norm": 1.2297486066818237, |
| "learning_rate": 0.0005, |
| "loss": 1.7371, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.08061598883778616, |
| "grad_norm": 1.1055219173431396, |
| "learning_rate": 0.0005, |
| "loss": 1.7616, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.08113275799700274, |
| "grad_norm": 1.330352783203125, |
| "learning_rate": 0.0005, |
| "loss": 1.7753, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.08164952715621931, |
| "grad_norm": 1.0750646591186523, |
| "learning_rate": 0.0005, |
| "loss": 1.7551, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0821662963154359, |
| "grad_norm": 1.1137466430664062, |
| "learning_rate": 0.0005, |
| "loss": 1.7686, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.08268306547465247, |
| "grad_norm": 1.2276798486709595, |
| "learning_rate": 0.0005, |
| "loss": 1.7617, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.08319983463386905, |
| "grad_norm": 1.0940239429473877, |
| "learning_rate": 0.0005, |
| "loss": 1.7269, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.08371660379308563, |
| "grad_norm": 1.1361453533172607, |
| "learning_rate": 0.0005, |
| "loss": 1.7481, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.08423337295230221, |
| "grad_norm": 1.482571005821228, |
| "learning_rate": 0.0005, |
| "loss": 1.7265, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.08475014211151878, |
| "grad_norm": 1.2309211492538452, |
| "learning_rate": 0.0005, |
| "loss": 1.7087, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.08526691127073537, |
| "grad_norm": 1.162300705909729, |
| "learning_rate": 0.0005, |
| "loss": 1.708, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.08578368042995194, |
| "grad_norm": 1.1956666707992554, |
| "learning_rate": 0.0005, |
| "loss": 1.73, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.08630044958916852, |
| "grad_norm": 1.5038352012634277, |
| "learning_rate": 0.0005, |
| "loss": 1.7213, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.08681721874838509, |
| "grad_norm": 1.2151919603347778, |
| "learning_rate": 0.0005, |
| "loss": 1.7224, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.08733398790760168, |
| "grad_norm": 1.0433135032653809, |
| "learning_rate": 0.0005, |
| "loss": 1.7049, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.08785075706681825, |
| "grad_norm": 1.8113486766815186, |
| "learning_rate": 0.0005, |
| "loss": 1.7132, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.08836752622603483, |
| "grad_norm": 0.9753373861312866, |
| "learning_rate": 0.0005, |
| "loss": 1.7109, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.0888842953852514, |
| "grad_norm": 1.355560064315796, |
| "learning_rate": 0.0005, |
| "loss": 1.7041, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.08940106454446799, |
| "grad_norm": 1.1716082096099854, |
| "learning_rate": 0.0005, |
| "loss": 1.7114, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.08991783370368456, |
| "grad_norm": 1.10747492313385, |
| "learning_rate": 0.0005, |
| "loss": 1.7068, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.09043460286290114, |
| "grad_norm": 1.0477211475372314, |
| "learning_rate": 0.0005, |
| "loss": 1.6935, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.09095137202211773, |
| "grad_norm": 1.1489983797073364, |
| "learning_rate": 0.0005, |
| "loss": 1.6976, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.0914681411813343, |
| "grad_norm": 1.2262177467346191, |
| "learning_rate": 0.0005, |
| "loss": 1.689, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.09198491034055088, |
| "grad_norm": 1.111374020576477, |
| "learning_rate": 0.0005, |
| "loss": 1.6811, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.09250167949976745, |
| "grad_norm": 1.0549476146697998, |
| "learning_rate": 0.0005, |
| "loss": 1.6539, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.09301844865898404, |
| "grad_norm": 1.2341543436050415, |
| "learning_rate": 0.0005, |
| "loss": 1.6643, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.09353521781820061, |
| "grad_norm": 1.6305192708969116, |
| "learning_rate": 0.0005, |
| "loss": 1.6553, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.09405198697741719, |
| "grad_norm": 1.0614426136016846, |
| "learning_rate": 0.0005, |
| "loss": 1.6474, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.09456875613663376, |
| "grad_norm": 1.1092963218688965, |
| "learning_rate": 0.0005, |
| "loss": 1.6686, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.09508552529585035, |
| "grad_norm": 1.521255373954773, |
| "learning_rate": 0.0005, |
| "loss": 1.6586, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.09560229445506692, |
| "grad_norm": 1.353458285331726, |
| "learning_rate": 0.0005, |
| "loss": 1.6632, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0961190636142835, |
| "grad_norm": 1.0622385740280151, |
| "learning_rate": 0.0005, |
| "loss": 1.6417, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.09663583277350007, |
| "grad_norm": 1.1304274797439575, |
| "learning_rate": 0.0005, |
| "loss": 1.6374, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.09715260193271666, |
| "grad_norm": 1.6776567697525024, |
| "learning_rate": 0.0005, |
| "loss": 1.648, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.09766937109193323, |
| "grad_norm": 1.2316774129867554, |
| "learning_rate": 0.0005, |
| "loss": 1.6055, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.09818614025114981, |
| "grad_norm": 1.1291395425796509, |
| "learning_rate": 0.0005, |
| "loss": 1.6199, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.09870290941036639, |
| "grad_norm": 1.2423152923583984, |
| "learning_rate": 0.0005, |
| "loss": 1.6343, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.09921967856958297, |
| "grad_norm": 1.6953014135360718, |
| "learning_rate": 0.0005, |
| "loss": 1.6011, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.09973644772879954, |
| "grad_norm": 1.078352451324463, |
| "learning_rate": 0.0005, |
| "loss": 1.5773, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.10025321688801613, |
| "grad_norm": 1.1383408308029175, |
| "learning_rate": 0.0005, |
| "loss": 1.6175, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.1007699860472327, |
| "grad_norm": 0.998919665813446, |
| "learning_rate": 0.0005, |
| "loss": 1.6388, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.10128675520644928, |
| "grad_norm": 1.6332008838653564, |
| "learning_rate": 0.0005, |
| "loss": 1.5987, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.10180352436566585, |
| "grad_norm": 1.041397213935852, |
| "learning_rate": 0.0005, |
| "loss": 1.6043, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.10232029352488244, |
| "grad_norm": 1.1090408563613892, |
| "learning_rate": 0.0005, |
| "loss": 1.5859, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.102837062684099, |
| "grad_norm": 1.0914579629898071, |
| "learning_rate": 0.0005, |
| "loss": 1.6069, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.10335383184331559, |
| "grad_norm": 1.5213651657104492, |
| "learning_rate": 0.0005, |
| "loss": 1.5897, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.10387060100253218, |
| "grad_norm": 1.5415380001068115, |
| "learning_rate": 0.0005, |
| "loss": 1.6035, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.10438737016174875, |
| "grad_norm": 1.1095470190048218, |
| "learning_rate": 0.0005, |
| "loss": 1.5697, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.10490413932096533, |
| "grad_norm": 1.3773058652877808, |
| "learning_rate": 0.0005, |
| "loss": 1.5827, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.1054209084801819, |
| "grad_norm": 0.9746466279029846, |
| "learning_rate": 0.0005, |
| "loss": 1.5612, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.10593767763939849, |
| "grad_norm": 1.047061800956726, |
| "learning_rate": 0.0005, |
| "loss": 1.5707, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.10645444679861506, |
| "grad_norm": 0.9137332439422607, |
| "learning_rate": 0.0005, |
| "loss": 1.5646, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.10697121595783164, |
| "grad_norm": 0.9967837929725647, |
| "learning_rate": 0.0005, |
| "loss": 1.5659, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.10748798511704821, |
| "grad_norm": 1.2617110013961792, |
| "learning_rate": 0.0005, |
| "loss": 1.5673, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.1080047542762648, |
| "grad_norm": 0.9831250309944153, |
| "learning_rate": 0.0005, |
| "loss": 1.5742, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.10852152343548137, |
| "grad_norm": 1.1735457181930542, |
| "learning_rate": 0.0005, |
| "loss": 1.5811, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.10903829259469795, |
| "grad_norm": 1.1183675527572632, |
| "learning_rate": 0.0005, |
| "loss": 1.5546, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.10955506175391452, |
| "grad_norm": 1.3536667823791504, |
| "learning_rate": 0.0005, |
| "loss": 1.5551, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.11007183091313111, |
| "grad_norm": 0.9417304396629333, |
| "learning_rate": 0.0005, |
| "loss": 1.562, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.11058860007234768, |
| "grad_norm": 0.9261025786399841, |
| "learning_rate": 0.0005, |
| "loss": 1.5736, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.11110536923156426, |
| "grad_norm": 1.1396183967590332, |
| "learning_rate": 0.0005, |
| "loss": 1.5417, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.11162213839078083, |
| "grad_norm": 0.9720540642738342, |
| "learning_rate": 0.0005, |
| "loss": 1.5231, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.11213890754999742, |
| "grad_norm": 0.9784930348396301, |
| "learning_rate": 0.0005, |
| "loss": 1.5428, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.11265567670921399, |
| "grad_norm": 1.037022590637207, |
| "learning_rate": 0.0005, |
| "loss": 1.5562, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.11317244586843057, |
| "grad_norm": 1.3437378406524658, |
| "learning_rate": 0.0005, |
| "loss": 1.5452, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.11368921502764714, |
| "grad_norm": 1.2525360584259033, |
| "learning_rate": 0.0005, |
| "loss": 1.5372, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.11420598418686373, |
| "grad_norm": 1.0389316082000732, |
| "learning_rate": 0.0005, |
| "loss": 1.5273, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.1147227533460803, |
| "grad_norm": 1.2379904985427856, |
| "learning_rate": 0.0005, |
| "loss": 1.5281, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.11523952250529688, |
| "grad_norm": 1.0728790760040283, |
| "learning_rate": 0.0005, |
| "loss": 1.5228, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.11575629166451346, |
| "grad_norm": 1.54011070728302, |
| "learning_rate": 0.0005, |
| "loss": 1.5257, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.11627306082373004, |
| "grad_norm": 1.4011873006820679, |
| "learning_rate": 0.0005, |
| "loss": 1.5258, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.11678982998294662, |
| "grad_norm": 1.2126344442367554, |
| "learning_rate": 0.0005, |
| "loss": 1.5249, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.1173065991421632, |
| "grad_norm": 1.1125898361206055, |
| "learning_rate": 0.0005, |
| "loss": 1.5034, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.11782336830137978, |
| "grad_norm": 1.0404047966003418, |
| "learning_rate": 0.0005, |
| "loss": 1.5243, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.11834013746059635, |
| "grad_norm": 0.9504315257072449, |
| "learning_rate": 0.0005, |
| "loss": 1.501, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.11885690661981294, |
| "grad_norm": 1.0554097890853882, |
| "learning_rate": 0.0005, |
| "loss": 1.5115, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.1193736757790295, |
| "grad_norm": 0.9352626204490662, |
| "learning_rate": 0.0005, |
| "loss": 1.5038, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.11989044493824609, |
| "grad_norm": 0.9765718579292297, |
| "learning_rate": 0.0005, |
| "loss": 1.5019, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.12040721409746266, |
| "grad_norm": 1.2419780492782593, |
| "learning_rate": 0.0005, |
| "loss": 1.4993, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.12092398325667925, |
| "grad_norm": 1.0337820053100586, |
| "learning_rate": 0.0005, |
| "loss": 1.4843, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.12144075241589582, |
| "grad_norm": 1.0803256034851074, |
| "learning_rate": 0.0005, |
| "loss": 1.4902, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.1219575215751124, |
| "grad_norm": 0.9424406886100769, |
| "learning_rate": 0.0005, |
| "loss": 1.5031, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.12247429073432897, |
| "grad_norm": 0.9924182891845703, |
| "learning_rate": 0.0005, |
| "loss": 1.489, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.12299105989354556, |
| "grad_norm": 1.0602052211761475, |
| "learning_rate": 0.0005, |
| "loss": 1.4801, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.12350782905276213, |
| "grad_norm": 0.9463520646095276, |
| "learning_rate": 0.0005, |
| "loss": 1.49, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.12402459821197871, |
| "grad_norm": 0.9301887154579163, |
| "learning_rate": 0.0005, |
| "loss": 1.4923, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.12454136737119528, |
| "grad_norm": 0.9018756151199341, |
| "learning_rate": 0.0005, |
| "loss": 1.457, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.12505813653041187, |
| "grad_norm": 0.9669187068939209, |
| "learning_rate": 0.0005, |
| "loss": 1.4691, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.12557490568962845, |
| "grad_norm": 0.9768301248550415, |
| "learning_rate": 0.0005, |
| "loss": 1.4448, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.126091674848845, |
| "grad_norm": 0.9736414551734924, |
| "learning_rate": 0.0005, |
| "loss": 1.4671, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.1266084440080616, |
| "grad_norm": 1.3117995262145996, |
| "learning_rate": 0.0005, |
| "loss": 1.4577, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.12712521316727818, |
| "grad_norm": 0.976732075214386, |
| "learning_rate": 0.0005, |
| "loss": 1.4624, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.12764198232649476, |
| "grad_norm": 1.1756422519683838, |
| "learning_rate": 0.0005, |
| "loss": 1.4675, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.12815875148571132, |
| "grad_norm": 0.9411507844924927, |
| "learning_rate": 0.0005, |
| "loss": 1.4634, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.1286755206449279, |
| "grad_norm": 1.6214072704315186, |
| "learning_rate": 0.0005, |
| "loss": 1.4685, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.1291922898041445, |
| "grad_norm": 1.0801911354064941, |
| "learning_rate": 0.0005, |
| "loss": 1.4468, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.12970905896336107, |
| "grad_norm": 0.9756599068641663, |
| "learning_rate": 0.0005, |
| "loss": 1.4438, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.13022582812257766, |
| "grad_norm": 1.1823363304138184, |
| "learning_rate": 0.0005, |
| "loss": 1.4522, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.13074259728179422, |
| "grad_norm": 1.0005122423171997, |
| "learning_rate": 0.0005, |
| "loss": 1.436, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.1312593664410108, |
| "grad_norm": 1.4303867816925049, |
| "learning_rate": 0.0005, |
| "loss": 1.4411, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.13177613560022738, |
| "grad_norm": 0.867132842540741, |
| "learning_rate": 0.0005, |
| "loss": 1.4558, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.13229290475944397, |
| "grad_norm": 0.9243984222412109, |
| "learning_rate": 0.0005, |
| "loss": 1.4282, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.13280967391866053, |
| "grad_norm": 1.1926263570785522, |
| "learning_rate": 0.0005, |
| "loss": 1.4187, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.1333264430778771, |
| "grad_norm": 1.1110721826553345, |
| "learning_rate": 0.0005, |
| "loss": 1.4302, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.1338432122370937, |
| "grad_norm": 0.9598495960235596, |
| "learning_rate": 0.0005, |
| "loss": 1.4459, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.13435998139631028, |
| "grad_norm": 0.9147258996963501, |
| "learning_rate": 0.0005, |
| "loss": 1.4174, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.13487675055552684, |
| "grad_norm": 0.8530228734016418, |
| "learning_rate": 0.0005, |
| "loss": 1.4348, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.13539351971474342, |
| "grad_norm": 1.0487037897109985, |
| "learning_rate": 0.0005, |
| "loss": 1.4302, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.13591028887396, |
| "grad_norm": 1.0711545944213867, |
| "learning_rate": 0.0005, |
| "loss": 1.425, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.1364270580331766, |
| "grad_norm": 1.0053889751434326, |
| "learning_rate": 0.0005, |
| "loss": 1.4099, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.13694382719239315, |
| "grad_norm": 0.8895754814147949, |
| "learning_rate": 0.0005, |
| "loss": 1.4101, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.13746059635160973, |
| "grad_norm": 1.1464654207229614, |
| "learning_rate": 0.0005, |
| "loss": 1.409, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.13797736551082632, |
| "grad_norm": 1.4213604927062988, |
| "learning_rate": 0.0005, |
| "loss": 1.4333, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.1384941346700429, |
| "grad_norm": 0.8963467478752136, |
| "learning_rate": 0.0005, |
| "loss": 1.4047, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.13901090382925946, |
| "grad_norm": 0.9514134526252747, |
| "learning_rate": 0.0005, |
| "loss": 1.3923, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.13952767298847604, |
| "grad_norm": 0.8818897604942322, |
| "learning_rate": 0.0005, |
| "loss": 1.4031, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.14004444214769263, |
| "grad_norm": 0.8554843664169312, |
| "learning_rate": 0.0005, |
| "loss": 1.4005, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.1405612113069092, |
| "grad_norm": 0.9477766752243042, |
| "learning_rate": 0.0005, |
| "loss": 1.3871, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.14107798046612577, |
| "grad_norm": 0.9560056924819946, |
| "learning_rate": 0.0005, |
| "loss": 1.388, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.14159474962534235, |
| "grad_norm": 1.325939655303955, |
| "learning_rate": 0.0005, |
| "loss": 1.372, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.14211151878455894, |
| "grad_norm": 0.9184489846229553, |
| "learning_rate": 0.0005, |
| "loss": 1.3901, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.14262828794377552, |
| "grad_norm": 0.905005693435669, |
| "learning_rate": 0.0005, |
| "loss": 1.3652, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.1431450571029921, |
| "grad_norm": 0.9112023115158081, |
| "learning_rate": 0.0005, |
| "loss": 1.3805, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.14366182626220866, |
| "grad_norm": 0.909542977809906, |
| "learning_rate": 0.0005, |
| "loss": 1.3851, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.14417859542142525, |
| "grad_norm": 0.8679105639457703, |
| "learning_rate": 0.0005, |
| "loss": 1.3776, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.14469536458064183, |
| "grad_norm": 0.884416401386261, |
| "learning_rate": 0.0005, |
| "loss": 1.3787, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.14521213373985842, |
| "grad_norm": 0.8939566612243652, |
| "learning_rate": 0.0005, |
| "loss": 1.3695, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.14572890289907497, |
| "grad_norm": 1.2388486862182617, |
| "learning_rate": 0.0005, |
| "loss": 1.3926, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.14624567205829156, |
| "grad_norm": 1.2662867307662964, |
| "learning_rate": 0.0005, |
| "loss": 1.3804, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.14676244121750814, |
| "grad_norm": 0.8967621326446533, |
| "learning_rate": 0.0005, |
| "loss": 1.3513, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.14727921037672473, |
| "grad_norm": 0.8640676736831665, |
| "learning_rate": 0.0005, |
| "loss": 1.3546, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.14779597953594129, |
| "grad_norm": 1.0147978067398071, |
| "learning_rate": 0.0005, |
| "loss": 1.3699, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.14831274869515787, |
| "grad_norm": 0.8949346542358398, |
| "learning_rate": 0.0005, |
| "loss": 1.345, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.14882951785437445, |
| "grad_norm": 0.8535652756690979, |
| "learning_rate": 0.0005, |
| "loss": 1.3724, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.14934628701359104, |
| "grad_norm": 0.840876042842865, |
| "learning_rate": 0.0005, |
| "loss": 1.3692, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.1498630561728076, |
| "grad_norm": 0.8421388864517212, |
| "learning_rate": 0.0005, |
| "loss": 1.3639, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.15037982533202418, |
| "grad_norm": 0.8401720523834229, |
| "learning_rate": 0.0005, |
| "loss": 1.348, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.15089659449124077, |
| "grad_norm": 0.8139095306396484, |
| "learning_rate": 0.0005, |
| "loss": 1.365, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.15141336365045735, |
| "grad_norm": 0.8704052567481995, |
| "learning_rate": 0.0005, |
| "loss": 1.3482, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.1519301328096739, |
| "grad_norm": 0.8963611125946045, |
| "learning_rate": 0.0005, |
| "loss": 1.3336, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.1524469019688905, |
| "grad_norm": 0.8725153207778931, |
| "learning_rate": 0.0005, |
| "loss": 1.3724, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.15296367112810708, |
| "grad_norm": 0.9125774502754211, |
| "learning_rate": 0.0005, |
| "loss": 1.3377, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.15348044028732366, |
| "grad_norm": 1.1160928010940552, |
| "learning_rate": 0.0005, |
| "loss": 1.3582, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.15399720944654022, |
| "grad_norm": 0.8732350468635559, |
| "learning_rate": 0.0005, |
| "loss": 1.3471, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.1545139786057568, |
| "grad_norm": 0.8881607055664062, |
| "learning_rate": 0.0005, |
| "loss": 1.3552, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.1550307477649734, |
| "grad_norm": 1.0814484357833862, |
| "learning_rate": 0.0005, |
| "loss": 1.3628, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.15554751692418997, |
| "grad_norm": 0.81389319896698, |
| "learning_rate": 0.0005, |
| "loss": 1.3249, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.15606428608340656, |
| "grad_norm": 0.8424196839332581, |
| "learning_rate": 0.0005, |
| "loss": 1.323, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.1565810552426231, |
| "grad_norm": 0.8028131127357483, |
| "learning_rate": 0.0005, |
| "loss": 1.3302, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.1570978244018397, |
| "grad_norm": 0.8348473906517029, |
| "learning_rate": 0.0005, |
| "loss": 1.3314, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.15761459356105628, |
| "grad_norm": 1.2074034214019775, |
| "learning_rate": 0.0005, |
| "loss": 1.3355, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.15813136272027287, |
| "grad_norm": 0.8177675604820251, |
| "learning_rate": 0.0005, |
| "loss": 1.3427, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.15864813187948942, |
| "grad_norm": 0.796273410320282, |
| "learning_rate": 0.0005, |
| "loss": 1.3088, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.159164901038706, |
| "grad_norm": 1.0104438066482544, |
| "learning_rate": 0.0005, |
| "loss": 1.3255, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.1596816701979226, |
| "grad_norm": 0.9192485809326172, |
| "learning_rate": 0.0005, |
| "loss": 1.3347, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.16019843935713918, |
| "grad_norm": 0.912550151348114, |
| "learning_rate": 0.0005, |
| "loss": 1.3157, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.16071520851635573, |
| "grad_norm": 0.9644028544425964, |
| "learning_rate": 0.0005, |
| "loss": 1.3242, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.16123197767557232, |
| "grad_norm": 0.9894726872444153, |
| "learning_rate": 0.0005, |
| "loss": 1.2968, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.1617487468347889, |
| "grad_norm": 0.9292682409286499, |
| "learning_rate": 0.0005, |
| "loss": 1.3342, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.1622655159940055, |
| "grad_norm": 0.9219216704368591, |
| "learning_rate": 0.0005, |
| "loss": 1.3242, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.16278228515322204, |
| "grad_norm": 1.1059894561767578, |
| "learning_rate": 0.0005, |
| "loss": 1.3238, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.16329905431243863, |
| "grad_norm": 0.8726058602333069, |
| "learning_rate": 0.0005, |
| "loss": 1.315, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.16381582347165521, |
| "grad_norm": 0.8204345107078552, |
| "learning_rate": 0.0005, |
| "loss": 1.3085, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.1643325926308718, |
| "grad_norm": 0.9515188932418823, |
| "learning_rate": 0.0005, |
| "loss": 1.2986, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.16484936179008836, |
| "grad_norm": 0.8825114369392395, |
| "learning_rate": 0.0005, |
| "loss": 1.2921, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.16536613094930494, |
| "grad_norm": 0.8144583702087402, |
| "learning_rate": 0.0005, |
| "loss": 1.2991, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.16588290010852152, |
| "grad_norm": 0.8747395873069763, |
| "learning_rate": 0.0005, |
| "loss": 1.2936, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.1663996692677381, |
| "grad_norm": 0.9829278588294983, |
| "learning_rate": 0.0005, |
| "loss": 1.2898, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.16691643842695467, |
| "grad_norm": 0.917072594165802, |
| "learning_rate": 0.0005, |
| "loss": 1.3056, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.16743320758617125, |
| "grad_norm": 0.893224835395813, |
| "learning_rate": 0.0005, |
| "loss": 1.2958, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.16794997674538784, |
| "grad_norm": 0.8513831496238708, |
| "learning_rate": 0.0005, |
| "loss": 1.3073, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.16846674590460442, |
| "grad_norm": 0.7902063727378845, |
| "learning_rate": 0.0005, |
| "loss": 1.2962, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.168983515063821, |
| "grad_norm": 0.8533388376235962, |
| "learning_rate": 0.0005, |
| "loss": 1.3034, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.16950028422303756, |
| "grad_norm": 0.89384526014328, |
| "learning_rate": 0.0005, |
| "loss": 1.306, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.17001705338225415, |
| "grad_norm": 1.1740915775299072, |
| "learning_rate": 0.0005, |
| "loss": 1.2861, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.17053382254147073, |
| "grad_norm": 0.7941210269927979, |
| "learning_rate": 0.0005, |
| "loss": 1.29, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.17105059170068732, |
| "grad_norm": 0.82374107837677, |
| "learning_rate": 0.0005, |
| "loss": 1.2715, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.17156736085990387, |
| "grad_norm": 0.9856778979301453, |
| "learning_rate": 0.0005, |
| "loss": 1.2908, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.17208413001912046, |
| "grad_norm": 0.777244508266449, |
| "learning_rate": 0.0005, |
| "loss": 1.2891, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.17260089917833704, |
| "grad_norm": 0.8938208222389221, |
| "learning_rate": 0.0005, |
| "loss": 1.285, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.17311766833755363, |
| "grad_norm": 0.8124037384986877, |
| "learning_rate": 0.0005, |
| "loss": 1.2908, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.17363443749677018, |
| "grad_norm": 0.9345457553863525, |
| "learning_rate": 0.0005, |
| "loss": 1.2964, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.17415120665598677, |
| "grad_norm": 0.7821003794670105, |
| "learning_rate": 0.0005, |
| "loss": 1.2767, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.17466797581520335, |
| "grad_norm": 0.8330212831497192, |
| "learning_rate": 0.0005, |
| "loss": 1.2779, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.17518474497441994, |
| "grad_norm": 0.764042854309082, |
| "learning_rate": 0.0005, |
| "loss": 1.2698, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.1757015141336365, |
| "grad_norm": 0.9339214563369751, |
| "learning_rate": 0.0005, |
| "loss": 1.2777, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.17621828329285308, |
| "grad_norm": 0.8121135830879211, |
| "learning_rate": 0.0005, |
| "loss": 1.2869, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.17673505245206966, |
| "grad_norm": 0.8460163474082947, |
| "learning_rate": 0.0005, |
| "loss": 1.2913, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.17725182161128625, |
| "grad_norm": 1.3961695432662964, |
| "learning_rate": 0.0005, |
| "loss": 1.2971, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.1777685907705028, |
| "grad_norm": 0.8089907765388489, |
| "learning_rate": 0.0005, |
| "loss": 1.2612, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.1782853599297194, |
| "grad_norm": 0.8770979046821594, |
| "learning_rate": 0.0005, |
| "loss": 1.2739, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.17880212908893597, |
| "grad_norm": 0.8448237776756287, |
| "learning_rate": 0.0005, |
| "loss": 1.2735, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.17931889824815256, |
| "grad_norm": 0.9335261583328247, |
| "learning_rate": 0.0005, |
| "loss": 1.2671, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.17983566740736912, |
| "grad_norm": 0.7510360479354858, |
| "learning_rate": 0.0005, |
| "loss": 1.2691, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.1803524365665857, |
| "grad_norm": 0.7871717810630798, |
| "learning_rate": 0.0005, |
| "loss": 1.2642, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.18086920572580228, |
| "grad_norm": 1.1407464742660522, |
| "learning_rate": 0.0005, |
| "loss": 1.248, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.18138597488501887, |
| "grad_norm": 0.8027787208557129, |
| "learning_rate": 0.0005, |
| "loss": 1.2557, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.18190274404423545, |
| "grad_norm": 0.8517947793006897, |
| "learning_rate": 0.0005, |
| "loss": 1.2529, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.182419513203452, |
| "grad_norm": 0.9083014726638794, |
| "learning_rate": 0.0005, |
| "loss": 1.2489, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.1829362823626686, |
| "grad_norm": 1.0628485679626465, |
| "learning_rate": 0.0005, |
| "loss": 1.2669, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.18345305152188518, |
| "grad_norm": 1.0175726413726807, |
| "learning_rate": 0.0005, |
| "loss": 1.2473, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.18396982068110176, |
| "grad_norm": 0.7979172468185425, |
| "learning_rate": 0.0005, |
| "loss": 1.2471, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.18448658984031832, |
| "grad_norm": 0.7472112774848938, |
| "learning_rate": 0.0005, |
| "loss": 1.2413, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.1850033589995349, |
| "grad_norm": 0.8240432739257812, |
| "learning_rate": 0.0005, |
| "loss": 1.2521, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.1855201281587515, |
| "grad_norm": 0.8023159503936768, |
| "learning_rate": 0.0005, |
| "loss": 1.2471, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.18603689731796808, |
| "grad_norm": 0.7950299978256226, |
| "learning_rate": 0.0005, |
| "loss": 1.2327, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.18655366647718463, |
| "grad_norm": 0.7718859314918518, |
| "learning_rate": 0.0005, |
| "loss": 1.2417, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.18707043563640122, |
| "grad_norm": 0.8416433334350586, |
| "learning_rate": 0.0005, |
| "loss": 1.2531, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.1875872047956178, |
| "grad_norm": 0.7842203974723816, |
| "learning_rate": 0.0005, |
| "loss": 1.2435, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.18810397395483439, |
| "grad_norm": 0.8708809614181519, |
| "learning_rate": 0.0005, |
| "loss": 1.245, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.18862074311405094, |
| "grad_norm": 0.8131195902824402, |
| "learning_rate": 0.0005, |
| "loss": 1.244, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.18913751227326753, |
| "grad_norm": 0.8010774254798889, |
| "learning_rate": 0.0005, |
| "loss": 1.245, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.1896542814324841, |
| "grad_norm": 0.7978084087371826, |
| "learning_rate": 0.0005, |
| "loss": 1.2475, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.1901710505917007, |
| "grad_norm": 0.7844563722610474, |
| "learning_rate": 0.0005, |
| "loss": 1.2325, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.19068781975091725, |
| "grad_norm": 0.8755462765693665, |
| "learning_rate": 0.0005, |
| "loss": 1.2243, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.19120458891013384, |
| "grad_norm": 0.7727536559104919, |
| "learning_rate": 0.0005, |
| "loss": 1.2447, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.19172135806935042, |
| "grad_norm": 0.7509860396385193, |
| "learning_rate": 0.0005, |
| "loss": 1.2324, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.192238127228567, |
| "grad_norm": 0.9001826047897339, |
| "learning_rate": 0.0005, |
| "loss": 1.2175, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.19275489638778356, |
| "grad_norm": 0.7595515847206116, |
| "learning_rate": 0.0005, |
| "loss": 1.2536, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.19327166554700015, |
| "grad_norm": 0.746465802192688, |
| "learning_rate": 0.0005, |
| "loss": 1.2439, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.19378843470621673, |
| "grad_norm": 0.8454607725143433, |
| "learning_rate": 0.0005, |
| "loss": 1.2319, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.19430520386543332, |
| "grad_norm": 0.7905994057655334, |
| "learning_rate": 0.0005, |
| "loss": 1.2335, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.1948219730246499, |
| "grad_norm": 1.1130495071411133, |
| "learning_rate": 0.0005, |
| "loss": 1.2444, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.19533874218386646, |
| "grad_norm": 0.9213355183601379, |
| "learning_rate": 0.0005, |
| "loss": 1.2188, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.19585551134308304, |
| "grad_norm": 0.8003748655319214, |
| "learning_rate": 0.0005, |
| "loss": 1.2478, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.19637228050229963, |
| "grad_norm": 0.7667946815490723, |
| "learning_rate": 0.0005, |
| "loss": 1.2286, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.1968890496615162, |
| "grad_norm": 0.7806205153465271, |
| "learning_rate": 0.0005, |
| "loss": 1.2152, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.19740581882073277, |
| "grad_norm": 1.1093833446502686, |
| "learning_rate": 0.0005, |
| "loss": 1.2281, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.19792258797994935, |
| "grad_norm": 0.8750317692756653, |
| "learning_rate": 0.0005, |
| "loss": 1.2418, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.19843935713916594, |
| "grad_norm": 0.9322946071624756, |
| "learning_rate": 0.0005, |
| "loss": 1.2168, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.19895612629838252, |
| "grad_norm": 0.9042627215385437, |
| "learning_rate": 0.0005, |
| "loss": 1.229, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.19947289545759908, |
| "grad_norm": 0.8162991404533386, |
| "learning_rate": 0.0005, |
| "loss": 1.2044, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.19998966461681567, |
| "grad_norm": 0.7078894972801208, |
| "learning_rate": 0.0005, |
| "loss": 1.2077, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.20050643377603225, |
| "grad_norm": 0.8144243955612183, |
| "learning_rate": 0.0005, |
| "loss": 1.1932, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.20102320293524883, |
| "grad_norm": 0.7456822991371155, |
| "learning_rate": 0.0005, |
| "loss": 1.2187, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.2015399720944654, |
| "grad_norm": 0.7855635285377502, |
| "learning_rate": 0.0005, |
| "loss": 1.2096, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.20205674125368198, |
| "grad_norm": 0.7501581311225891, |
| "learning_rate": 0.0005, |
| "loss": 1.2083, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.20257351041289856, |
| "grad_norm": 0.7569208145141602, |
| "learning_rate": 0.0005, |
| "loss": 1.2208, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.20309027957211515, |
| "grad_norm": 0.7520230412483215, |
| "learning_rate": 0.0005, |
| "loss": 1.2031, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.2036070487313317, |
| "grad_norm": 0.9110859632492065, |
| "learning_rate": 0.0005, |
| "loss": 1.2135, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.2041238178905483, |
| "grad_norm": 0.738043487071991, |
| "learning_rate": 0.0005, |
| "loss": 1.2066, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.20464058704976487, |
| "grad_norm": 0.7910060286521912, |
| "learning_rate": 0.0005, |
| "loss": 1.2089, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.20515735620898146, |
| "grad_norm": 0.7672162652015686, |
| "learning_rate": 0.0005, |
| "loss": 1.216, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.205674125368198, |
| "grad_norm": 0.7567201852798462, |
| "learning_rate": 0.0005, |
| "loss": 1.1915, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.2061908945274146, |
| "grad_norm": 0.759067714214325, |
| "learning_rate": 0.0005, |
| "loss": 1.2111, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.20670766368663118, |
| "grad_norm": 0.7911349534988403, |
| "learning_rate": 0.0005, |
| "loss": 1.211, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.20722443284584777, |
| "grad_norm": 1.0086050033569336, |
| "learning_rate": 0.0005, |
| "loss": 1.2122, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.20774120200506435, |
| "grad_norm": 1.1961076259613037, |
| "learning_rate": 0.0005, |
| "loss": 1.1972, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.2082579711642809, |
| "grad_norm": 0.8429704308509827, |
| "learning_rate": 0.0005, |
| "loss": 1.2038, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.2087747403234975, |
| "grad_norm": 1.0080244541168213, |
| "learning_rate": 0.0005, |
| "loss": 1.1981, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.20929150948271408, |
| "grad_norm": 0.7220394611358643, |
| "learning_rate": 0.0005, |
| "loss": 1.2083, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.20980827864193066, |
| "grad_norm": 0.7594371438026428, |
| "learning_rate": 0.0005, |
| "loss": 1.1976, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.21032504780114722, |
| "grad_norm": 0.7990491986274719, |
| "learning_rate": 0.0005, |
| "loss": 1.1938, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.2108418169603638, |
| "grad_norm": 1.0034983158111572, |
| "learning_rate": 0.0005, |
| "loss": 1.1769, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.2113585861195804, |
| "grad_norm": 0.8476843237876892, |
| "learning_rate": 0.0005, |
| "loss": 1.1914, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.21187535527879697, |
| "grad_norm": 0.7301702499389648, |
| "learning_rate": 0.0005, |
| "loss": 1.2054, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.21239212443801353, |
| "grad_norm": 0.7379107475280762, |
| "learning_rate": 0.0005, |
| "loss": 1.1945, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.21290889359723011, |
| "grad_norm": 0.7332804203033447, |
| "learning_rate": 0.0005, |
| "loss": 1.1921, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.2134256627564467, |
| "grad_norm": 0.7600969672203064, |
| "learning_rate": 0.0005, |
| "loss": 1.1957, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.21394243191566328, |
| "grad_norm": 0.9124670028686523, |
| "learning_rate": 0.0005, |
| "loss": 1.199, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.21445920107487984, |
| "grad_norm": 0.7995319962501526, |
| "learning_rate": 0.0005, |
| "loss": 1.1806, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.21497597023409643, |
| "grad_norm": 0.7137150168418884, |
| "learning_rate": 0.0005, |
| "loss": 1.1944, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.215492739393313, |
| "grad_norm": 0.8427070379257202, |
| "learning_rate": 0.0005, |
| "loss": 1.204, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.2160095085525296, |
| "grad_norm": 0.6893758177757263, |
| "learning_rate": 0.0005, |
| "loss": 1.2056, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.21652627771174615, |
| "grad_norm": 0.777153730392456, |
| "learning_rate": 0.0005, |
| "loss": 1.1834, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.21704304687096274, |
| "grad_norm": 0.7304201126098633, |
| "learning_rate": 0.0005, |
| "loss": 1.1918, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.21755981603017932, |
| "grad_norm": 0.7642196416854858, |
| "learning_rate": 0.0005, |
| "loss": 1.2043, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.2180765851893959, |
| "grad_norm": 0.703868567943573, |
| "learning_rate": 0.0005, |
| "loss": 1.1717, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.21859335434861246, |
| "grad_norm": 0.751356840133667, |
| "learning_rate": 0.0005, |
| "loss": 1.1975, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.21911012350782905, |
| "grad_norm": 0.8302937150001526, |
| "learning_rate": 0.0005, |
| "loss": 1.1981, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.21962689266704563, |
| "grad_norm": 0.8335602879524231, |
| "learning_rate": 0.0005, |
| "loss": 1.1863, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.22014366182626222, |
| "grad_norm": 0.7479858994483948, |
| "learning_rate": 0.0005, |
| "loss": 1.1788, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.2206604309854788, |
| "grad_norm": 0.9171736836433411, |
| "learning_rate": 0.0005, |
| "loss": 1.1773, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.22117720014469536, |
| "grad_norm": 0.7626177668571472, |
| "learning_rate": 0.0005, |
| "loss": 1.1869, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.22169396930391194, |
| "grad_norm": 0.7428616881370544, |
| "learning_rate": 0.0005, |
| "loss": 1.1698, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.22221073846312853, |
| "grad_norm": 0.8029087781906128, |
| "learning_rate": 0.0005, |
| "loss": 1.1884, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.2227275076223451, |
| "grad_norm": 0.7876361608505249, |
| "learning_rate": 0.0005, |
| "loss": 1.1843, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.22324427678156167, |
| "grad_norm": 0.6730009913444519, |
| "learning_rate": 0.0005, |
| "loss": 1.1703, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.22376104594077825, |
| "grad_norm": 0.7202760577201843, |
| "learning_rate": 0.0005, |
| "loss": 1.1753, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.22427781509999484, |
| "grad_norm": 0.7547861337661743, |
| "learning_rate": 0.0005, |
| "loss": 1.1755, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.22479458425921142, |
| "grad_norm": 0.7263453602790833, |
| "learning_rate": 0.0005, |
| "loss": 1.1783, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.22531135341842798, |
| "grad_norm": 0.7226181030273438, |
| "learning_rate": 0.0005, |
| "loss": 1.1829, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.22582812257764456, |
| "grad_norm": 0.7433076500892639, |
| "learning_rate": 0.0005, |
| "loss": 1.1821, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.22634489173686115, |
| "grad_norm": 0.8025347590446472, |
| "learning_rate": 0.0005, |
| "loss": 1.1548, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.22686166089607773, |
| "grad_norm": 0.8330517411231995, |
| "learning_rate": 0.0005, |
| "loss": 1.1757, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.2273784300552943, |
| "grad_norm": 0.7150396704673767, |
| "learning_rate": 0.0005, |
| "loss": 1.1592, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.22789519921451087, |
| "grad_norm": 0.8366827368736267, |
| "learning_rate": 0.0005, |
| "loss": 1.1614, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.22841196837372746, |
| "grad_norm": 0.8655450344085693, |
| "learning_rate": 0.0005, |
| "loss": 1.1553, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.22892873753294404, |
| "grad_norm": 0.6938055753707886, |
| "learning_rate": 0.0005, |
| "loss": 1.1657, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.2294455066921606, |
| "grad_norm": 0.7177290320396423, |
| "learning_rate": 0.0005, |
| "loss": 1.1728, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.22996227585137718, |
| "grad_norm": 0.7082594037055969, |
| "learning_rate": 0.0005, |
| "loss": 1.1659, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.23047904501059377, |
| "grad_norm": 0.7543273568153381, |
| "learning_rate": 0.0005, |
| "loss": 1.1517, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.23099581416981035, |
| "grad_norm": 0.722029983997345, |
| "learning_rate": 0.0005, |
| "loss": 1.1593, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.2315125833290269, |
| "grad_norm": 0.7107385396957397, |
| "learning_rate": 0.0005, |
| "loss": 1.1499, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.2320293524882435, |
| "grad_norm": 0.8118393421173096, |
| "learning_rate": 0.0005, |
| "loss": 1.1614, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.23254612164746008, |
| "grad_norm": 0.7901565432548523, |
| "learning_rate": 0.0005, |
| "loss": 1.1627, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.23306289080667666, |
| "grad_norm": 0.6997384428977966, |
| "learning_rate": 0.0005, |
| "loss": 1.1694, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.23357965996589325, |
| "grad_norm": 0.7574887871742249, |
| "learning_rate": 0.0005, |
| "loss": 1.1772, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.2340964291251098, |
| "grad_norm": 0.709123432636261, |
| "learning_rate": 0.0005, |
| "loss": 1.1793, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.2346131982843264, |
| "grad_norm": 0.7011120915412903, |
| "learning_rate": 0.0005, |
| "loss": 1.1569, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.23512996744354298, |
| "grad_norm": 0.7826752662658691, |
| "learning_rate": 0.0005, |
| "loss": 1.1551, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.23564673660275956, |
| "grad_norm": 0.7468019723892212, |
| "learning_rate": 0.0005, |
| "loss": 1.177, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.23616350576197612, |
| "grad_norm": 0.8336277604103088, |
| "learning_rate": 0.0005, |
| "loss": 1.1437, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.2366802749211927, |
| "grad_norm": 0.7412180304527283, |
| "learning_rate": 0.0005, |
| "loss": 1.1371, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.23719704408040929, |
| "grad_norm": 0.7702532410621643, |
| "learning_rate": 0.0005, |
| "loss": 1.1539, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.23771381323962587, |
| "grad_norm": 0.7170100808143616, |
| "learning_rate": 0.0005, |
| "loss": 1.1493, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.23823058239884243, |
| "grad_norm": 0.6973877549171448, |
| "learning_rate": 0.0005, |
| "loss": 1.1686, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.238747351558059, |
| "grad_norm": 0.7682148218154907, |
| "learning_rate": 0.0005, |
| "loss": 1.1374, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.2392641207172756, |
| "grad_norm": 0.7360324263572693, |
| "learning_rate": 0.0005, |
| "loss": 1.1461, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.23978088987649218, |
| "grad_norm": 0.6636998057365417, |
| "learning_rate": 0.0005, |
| "loss": 1.1468, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.24029765903570874, |
| "grad_norm": 0.9023354053497314, |
| "learning_rate": 0.0005, |
| "loss": 1.1523, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.24081442819492532, |
| "grad_norm": 0.6802653074264526, |
| "learning_rate": 0.0005, |
| "loss": 1.1354, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.2413311973541419, |
| "grad_norm": 0.917087972164154, |
| "learning_rate": 0.0005, |
| "loss": 1.1402, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.2418479665133585, |
| "grad_norm": 0.8304193019866943, |
| "learning_rate": 0.0005, |
| "loss": 1.1526, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.24236473567257505, |
| "grad_norm": 0.833188533782959, |
| "learning_rate": 0.0005, |
| "loss": 1.165, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.24288150483179163, |
| "grad_norm": 0.7147198915481567, |
| "learning_rate": 0.0005, |
| "loss": 1.1431, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.24339827399100822, |
| "grad_norm": 0.6784700155258179, |
| "learning_rate": 0.0005, |
| "loss": 1.138, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.2439150431502248, |
| "grad_norm": 0.6933045983314514, |
| "learning_rate": 0.0005, |
| "loss": 1.1173, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.24443181230944136, |
| "grad_norm": 0.7840824127197266, |
| "learning_rate": 0.0005, |
| "loss": 1.1384, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.24494858146865794, |
| "grad_norm": 0.8129291534423828, |
| "learning_rate": 0.0005, |
| "loss": 1.151, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.24546535062787453, |
| "grad_norm": 0.7420192360877991, |
| "learning_rate": 0.0005, |
| "loss": 1.1218, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.2459821197870911, |
| "grad_norm": 0.6665251851081848, |
| "learning_rate": 0.0005, |
| "loss": 1.1278, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.2464988889463077, |
| "grad_norm": 0.7529242038726807, |
| "learning_rate": 0.0005, |
| "loss": 1.1417, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.24701565810552426, |
| "grad_norm": 0.6908478140830994, |
| "learning_rate": 0.0005, |
| "loss": 1.1353, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.24753242726474084, |
| "grad_norm": 0.6860882043838501, |
| "learning_rate": 0.0005, |
| "loss": 1.1278, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.24804919642395742, |
| "grad_norm": 0.7322950959205627, |
| "learning_rate": 0.0005, |
| "loss": 1.1447, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.248565965583174, |
| "grad_norm": 0.679210364818573, |
| "learning_rate": 0.0005, |
| "loss": 1.146, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.24908273474239057, |
| "grad_norm": 0.7133141756057739, |
| "learning_rate": 0.0005, |
| "loss": 1.1389, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.24959950390160715, |
| "grad_norm": 0.6991278529167175, |
| "learning_rate": 0.0005, |
| "loss": 1.1324, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.25011627306082374, |
| "grad_norm": 0.7213752865791321, |
| "learning_rate": 0.0005, |
| "loss": 1.1303, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.2506330422200403, |
| "grad_norm": 0.6555566191673279, |
| "learning_rate": 0.0005, |
| "loss": 1.1277, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.2511498113792569, |
| "grad_norm": 0.7012516260147095, |
| "learning_rate": 0.0005, |
| "loss": 1.1267, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.25166658053847346, |
| "grad_norm": 0.74920654296875, |
| "learning_rate": 0.0005, |
| "loss": 1.1432, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.25218334969769, |
| "grad_norm": 0.721111536026001, |
| "learning_rate": 0.0005, |
| "loss": 1.1393, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.25270011885690663, |
| "grad_norm": 0.7633620500564575, |
| "learning_rate": 0.0005, |
| "loss": 1.135, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.2532168880161232, |
| "grad_norm": 0.7658079266548157, |
| "learning_rate": 0.0005, |
| "loss": 1.1223, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.2537336571753398, |
| "grad_norm": 0.6615222692489624, |
| "learning_rate": 0.0005, |
| "loss": 1.1476, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.25425042633455636, |
| "grad_norm": 0.6398602724075317, |
| "learning_rate": 0.0005, |
| "loss": 1.1044, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.2547671954937729, |
| "grad_norm": 0.7086970210075378, |
| "learning_rate": 0.0005, |
| "loss": 1.1253, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.2552839646529895, |
| "grad_norm": 0.6913731694221497, |
| "learning_rate": 0.0005, |
| "loss": 1.1356, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.2558007338122061, |
| "grad_norm": 0.7111396789550781, |
| "learning_rate": 0.0005, |
| "loss": 1.1219, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.25631750297142264, |
| "grad_norm": 0.699747622013092, |
| "learning_rate": 0.0005, |
| "loss": 1.1198, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.25683427213063925, |
| "grad_norm": 0.6903569102287292, |
| "learning_rate": 0.0005, |
| "loss": 1.1384, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.2573510412898558, |
| "grad_norm": 0.7051145434379578, |
| "learning_rate": 0.0005, |
| "loss": 1.1439, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.2578678104490724, |
| "grad_norm": 0.7983745336532593, |
| "learning_rate": 0.0005, |
| "loss": 1.1171, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.258384579608289, |
| "grad_norm": 0.7234880924224854, |
| "learning_rate": 0.0005, |
| "loss": 1.1083, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.25890134876750553, |
| "grad_norm": 0.740550696849823, |
| "learning_rate": 0.0005, |
| "loss": 1.1211, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.25941811792672215, |
| "grad_norm": 0.7128597497940063, |
| "learning_rate": 0.0005, |
| "loss": 1.1432, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.2599348870859387, |
| "grad_norm": 0.6916446089744568, |
| "learning_rate": 0.0005, |
| "loss": 1.1358, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.2604516562451553, |
| "grad_norm": 0.776382327079773, |
| "learning_rate": 0.0005, |
| "loss": 1.125, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.2609684254043719, |
| "grad_norm": 0.720817506313324, |
| "learning_rate": 0.0005, |
| "loss": 1.11, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.26148519456358843, |
| "grad_norm": 0.6699787378311157, |
| "learning_rate": 0.0005, |
| "loss": 1.1143, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.26200196372280504, |
| "grad_norm": 0.7283949851989746, |
| "learning_rate": 0.0005, |
| "loss": 1.1094, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.2625187328820216, |
| "grad_norm": 0.6964280009269714, |
| "learning_rate": 0.0005, |
| "loss": 1.1332, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.26303550204123816, |
| "grad_norm": 0.7906248569488525, |
| "learning_rate": 0.0005, |
| "loss": 1.1242, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.26355227120045477, |
| "grad_norm": 0.7149584889411926, |
| "learning_rate": 0.0005, |
| "loss": 1.1215, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.2640690403596713, |
| "grad_norm": 0.6400547027587891, |
| "learning_rate": 0.0005, |
| "loss": 1.1319, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.26458580951888794, |
| "grad_norm": 0.6504139304161072, |
| "learning_rate": 0.0005, |
| "loss": 1.1145, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.2651025786781045, |
| "grad_norm": 0.724251389503479, |
| "learning_rate": 0.0005, |
| "loss": 1.1185, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.26561934783732105, |
| "grad_norm": 0.7142144441604614, |
| "learning_rate": 0.0005, |
| "loss": 1.1296, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.26613611699653766, |
| "grad_norm": 0.7482824325561523, |
| "learning_rate": 0.0005, |
| "loss": 1.1035, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.2666528861557542, |
| "grad_norm": 0.7604995369911194, |
| "learning_rate": 0.0005, |
| "loss": 1.113, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.2671696553149708, |
| "grad_norm": 0.7642651200294495, |
| "learning_rate": 0.0005, |
| "loss": 1.0964, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.2676864244741874, |
| "grad_norm": 0.9142786860466003, |
| "learning_rate": 0.0005, |
| "loss": 1.101, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.26820319363340395, |
| "grad_norm": 0.6688016057014465, |
| "learning_rate": 0.0005, |
| "loss": 1.1125, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.26871996279262056, |
| "grad_norm": 0.7352325916290283, |
| "learning_rate": 0.0005, |
| "loss": 1.1081, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.2692367319518371, |
| "grad_norm": 0.696356475353241, |
| "learning_rate": 0.0005, |
| "loss": 1.0972, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.2697535011110537, |
| "grad_norm": 0.6730584502220154, |
| "learning_rate": 0.0005, |
| "loss": 1.1173, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.2702702702702703, |
| "grad_norm": 0.6800664067268372, |
| "learning_rate": 0.0005, |
| "loss": 1.0942, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.27078703942948684, |
| "grad_norm": 0.6622713208198547, |
| "learning_rate": 0.0005, |
| "loss": 1.1297, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.2713038085887034, |
| "grad_norm": 0.7148898839950562, |
| "learning_rate": 0.0005, |
| "loss": 1.0997, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.27182057774792, |
| "grad_norm": 0.6884311437606812, |
| "learning_rate": 0.0005, |
| "loss": 1.1031, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.27233734690713657, |
| "grad_norm": 0.6427676677703857, |
| "learning_rate": 0.0005, |
| "loss": 1.1102, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.2728541160663532, |
| "grad_norm": 0.6422214508056641, |
| "learning_rate": 0.0005, |
| "loss": 1.1116, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.27337088522556974, |
| "grad_norm": 0.6933507919311523, |
| "learning_rate": 0.0005, |
| "loss": 1.1179, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.2738876543847863, |
| "grad_norm": 0.6655607223510742, |
| "learning_rate": 0.0005, |
| "loss": 1.0943, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.2744044235440029, |
| "grad_norm": 0.7125523686408997, |
| "learning_rate": 0.0005, |
| "loss": 1.1065, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.27492119270321946, |
| "grad_norm": 0.8208178281784058, |
| "learning_rate": 0.0005, |
| "loss": 1.1193, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.2754379618624361, |
| "grad_norm": 0.715416669845581, |
| "learning_rate": 0.0005, |
| "loss": 1.1064, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.27595473102165263, |
| "grad_norm": 0.7992897629737854, |
| "learning_rate": 0.0005, |
| "loss": 1.1008, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.2764715001808692, |
| "grad_norm": 0.6610242128372192, |
| "learning_rate": 0.0005, |
| "loss": 1.1132, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.2769882693400858, |
| "grad_norm": 0.7205715775489807, |
| "learning_rate": 0.0005, |
| "loss": 1.0994, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.27750503849930236, |
| "grad_norm": 0.6824073791503906, |
| "learning_rate": 0.0005, |
| "loss": 1.0882, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.2780218076585189, |
| "grad_norm": 0.7015029191970825, |
| "learning_rate": 0.0005, |
| "loss": 1.1182, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.27853857681773553, |
| "grad_norm": 0.6447197794914246, |
| "learning_rate": 0.0005, |
| "loss": 1.105, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.2790553459769521, |
| "grad_norm": 0.7455316781997681, |
| "learning_rate": 0.0005, |
| "loss": 1.1069, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.2795721151361687, |
| "grad_norm": 0.8284129500389099, |
| "learning_rate": 0.0005, |
| "loss": 1.108, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.28008888429538525, |
| "grad_norm": 0.6697763204574585, |
| "learning_rate": 0.0005, |
| "loss": 1.1079, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.2806056534546018, |
| "grad_norm": 0.6729034781455994, |
| "learning_rate": 0.0005, |
| "loss": 1.1004, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.2811224226138184, |
| "grad_norm": 0.6567364931106567, |
| "learning_rate": 0.0005, |
| "loss": 1.0876, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.281639191773035, |
| "grad_norm": 0.6983076333999634, |
| "learning_rate": 0.0005, |
| "loss": 1.0979, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.28215596093225154, |
| "grad_norm": 0.6503905057907104, |
| "learning_rate": 0.0005, |
| "loss": 1.0884, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.28267273009146815, |
| "grad_norm": 0.6191208362579346, |
| "learning_rate": 0.0005, |
| "loss": 1.1057, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.2831894992506847, |
| "grad_norm": 0.7421597838401794, |
| "learning_rate": 0.0005, |
| "loss": 1.0992, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.2837062684099013, |
| "grad_norm": 0.6919003129005432, |
| "learning_rate": 0.0005, |
| "loss": 1.0961, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.2842230375691179, |
| "grad_norm": 0.6625383496284485, |
| "learning_rate": 0.0005, |
| "loss": 1.1108, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.28473980672833443, |
| "grad_norm": 0.6479719877243042, |
| "learning_rate": 0.0005, |
| "loss": 1.0969, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.28525657588755104, |
| "grad_norm": 0.765210747718811, |
| "learning_rate": 0.0005, |
| "loss": 1.0857, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.2857733450467676, |
| "grad_norm": 0.6934791803359985, |
| "learning_rate": 0.0005, |
| "loss": 1.0945, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.2862901142059842, |
| "grad_norm": 0.6789985299110413, |
| "learning_rate": 0.0005, |
| "loss": 1.1165, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.28680688336520077, |
| "grad_norm": 0.6476292014122009, |
| "learning_rate": 0.0005, |
| "loss": 1.0886, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.28732365252441733, |
| "grad_norm": 0.8015202283859253, |
| "learning_rate": 0.0005, |
| "loss": 1.09, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.28784042168363394, |
| "grad_norm": 0.8759499192237854, |
| "learning_rate": 0.0005, |
| "loss": 1.0962, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.2883571908428505, |
| "grad_norm": 0.6740782856941223, |
| "learning_rate": 0.0005, |
| "loss": 1.0803, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.28887396000206705, |
| "grad_norm": 0.6475633978843689, |
| "learning_rate": 0.0005, |
| "loss": 1.0825, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.28939072916128367, |
| "grad_norm": 0.7087163329124451, |
| "learning_rate": 0.0005, |
| "loss": 1.0982, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.2899074983205002, |
| "grad_norm": 0.6702967882156372, |
| "learning_rate": 0.0005, |
| "loss": 1.1146, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.29042426747971684, |
| "grad_norm": 0.6150313019752502, |
| "learning_rate": 0.0005, |
| "loss": 1.0919, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.2909410366389334, |
| "grad_norm": 0.6218642592430115, |
| "learning_rate": 0.0005, |
| "loss": 1.0874, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.29145780579814995, |
| "grad_norm": 0.670069694519043, |
| "learning_rate": 0.0005, |
| "loss": 1.0764, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.29197457495736656, |
| "grad_norm": 0.7384163737297058, |
| "learning_rate": 0.0005, |
| "loss": 1.0888, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.2924913441165831, |
| "grad_norm": 0.6525676250457764, |
| "learning_rate": 0.0005, |
| "loss": 1.0955, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.2930081132757997, |
| "grad_norm": 0.6424722075462341, |
| "learning_rate": 0.0005, |
| "loss": 1.077, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.2935248824350163, |
| "grad_norm": 0.6522981524467468, |
| "learning_rate": 0.0005, |
| "loss": 1.0996, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.29404165159423284, |
| "grad_norm": 0.686553955078125, |
| "learning_rate": 0.0005, |
| "loss": 1.0776, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.29455842075344946, |
| "grad_norm": 0.6501746773719788, |
| "learning_rate": 0.0005, |
| "loss": 1.09, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.295075189912666, |
| "grad_norm": 0.661805272102356, |
| "learning_rate": 0.0005, |
| "loss": 1.0987, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.29559195907188257, |
| "grad_norm": 0.6171291470527649, |
| "learning_rate": 0.0005, |
| "loss": 1.0896, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.2961087282310992, |
| "grad_norm": 0.6660189032554626, |
| "learning_rate": 0.0005, |
| "loss": 1.0795, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.29662549739031574, |
| "grad_norm": 0.7182852625846863, |
| "learning_rate": 0.0005, |
| "loss": 1.0888, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.2971422665495323, |
| "grad_norm": 0.6748793125152588, |
| "learning_rate": 0.0005, |
| "loss": 1.1066, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.2976590357087489, |
| "grad_norm": 0.9658355712890625, |
| "learning_rate": 0.0005, |
| "loss": 1.0788, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.29817580486796547, |
| "grad_norm": 0.7361212968826294, |
| "learning_rate": 0.0005, |
| "loss": 1.0963, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.2986925740271821, |
| "grad_norm": 0.6640811562538147, |
| "learning_rate": 0.0005, |
| "loss": 1.0872, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.29920934318639864, |
| "grad_norm": 0.6937102675437927, |
| "learning_rate": 0.0005, |
| "loss": 1.0777, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.2997261123456152, |
| "grad_norm": 0.7803467512130737, |
| "learning_rate": 0.0005, |
| "loss": 1.0986, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.3002428815048318, |
| "grad_norm": 0.8593279719352722, |
| "learning_rate": 0.0005, |
| "loss": 1.0796, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.30075965066404836, |
| "grad_norm": 0.6236810088157654, |
| "learning_rate": 0.0005, |
| "loss": 1.0932, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.301276419823265, |
| "grad_norm": 0.6399732828140259, |
| "learning_rate": 0.0005, |
| "loss": 1.0614, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.30179318898248153, |
| "grad_norm": 0.6762784123420715, |
| "learning_rate": 0.0005, |
| "loss": 1.0763, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.3023099581416981, |
| "grad_norm": 0.7428263425827026, |
| "learning_rate": 0.0005, |
| "loss": 1.0701, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.3028267273009147, |
| "grad_norm": 0.6435476541519165, |
| "learning_rate": 0.0005, |
| "loss": 1.0782, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.30334349646013126, |
| "grad_norm": 0.6325916647911072, |
| "learning_rate": 0.0005, |
| "loss": 1.0858, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.3038602656193478, |
| "grad_norm": 0.6759895086288452, |
| "learning_rate": 0.0005, |
| "loss": 1.082, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.3043770347785644, |
| "grad_norm": 0.705319881439209, |
| "learning_rate": 0.0005, |
| "loss": 1.0587, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.304893803937781, |
| "grad_norm": 0.6924307346343994, |
| "learning_rate": 0.0005, |
| "loss": 1.0756, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.3054105730969976, |
| "grad_norm": 0.6262795925140381, |
| "learning_rate": 0.0005, |
| "loss": 1.0875, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.30592734225621415, |
| "grad_norm": 0.6304033398628235, |
| "learning_rate": 0.0005, |
| "loss": 1.0889, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.3064441114154307, |
| "grad_norm": 0.6266285181045532, |
| "learning_rate": 0.0005, |
| "loss": 1.0734, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.3069608805746473, |
| "grad_norm": 0.66020268201828, |
| "learning_rate": 0.0005, |
| "loss": 1.0756, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.3074776497338639, |
| "grad_norm": 0.6455373764038086, |
| "learning_rate": 0.0005, |
| "loss": 1.088, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.30799441889308043, |
| "grad_norm": 0.6743224263191223, |
| "learning_rate": 0.0005, |
| "loss": 1.0777, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.30851118805229705, |
| "grad_norm": 0.6214370131492615, |
| "learning_rate": 0.0005, |
| "loss": 1.069, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.3090279572115136, |
| "grad_norm": 0.6882118582725525, |
| "learning_rate": 0.0005, |
| "loss": 1.0713, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.3095447263707302, |
| "grad_norm": 0.6656840443611145, |
| "learning_rate": 0.0005, |
| "loss": 1.0783, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.3100614955299468, |
| "grad_norm": 0.7134031653404236, |
| "learning_rate": 0.0005, |
| "loss": 1.049, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.31057826468916333, |
| "grad_norm": 0.7211028933525085, |
| "learning_rate": 0.0005, |
| "loss": 1.067, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.31109503384837994, |
| "grad_norm": 0.6382066607475281, |
| "learning_rate": 0.0005, |
| "loss": 1.0771, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.3116118030075965, |
| "grad_norm": 0.7246118187904358, |
| "learning_rate": 0.0005, |
| "loss": 1.0877, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.3121285721668131, |
| "grad_norm": 0.6753916144371033, |
| "learning_rate": 0.0005, |
| "loss": 1.0655, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.31264534132602967, |
| "grad_norm": 0.6585648655891418, |
| "learning_rate": 0.0005, |
| "loss": 1.0557, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.3131621104852462, |
| "grad_norm": 0.6378208994865417, |
| "learning_rate": 0.0005, |
| "loss": 1.0657, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.31367887964446284, |
| "grad_norm": 0.6496950387954712, |
| "learning_rate": 0.0005, |
| "loss": 1.0743, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.3141956488036794, |
| "grad_norm": 0.6112158298492432, |
| "learning_rate": 0.0005, |
| "loss": 1.076, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.31471241796289595, |
| "grad_norm": 0.6267996430397034, |
| "learning_rate": 0.0005, |
| "loss": 1.0882, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.31522918712211256, |
| "grad_norm": 0.6258119940757751, |
| "learning_rate": 0.0005, |
| "loss": 1.0747, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.3157459562813291, |
| "grad_norm": 0.6293036341667175, |
| "learning_rate": 0.0005, |
| "loss": 1.0648, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.31626272544054573, |
| "grad_norm": 0.6443596482276917, |
| "learning_rate": 0.0005, |
| "loss": 1.0898, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.3167794945997623, |
| "grad_norm": 0.6488006711006165, |
| "learning_rate": 0.0005, |
| "loss": 1.0533, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.31729626375897885, |
| "grad_norm": 0.6419286131858826, |
| "learning_rate": 0.0005, |
| "loss": 1.0755, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.31781303291819546, |
| "grad_norm": 0.6659611463546753, |
| "learning_rate": 0.0005, |
| "loss": 1.0526, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.318329802077412, |
| "grad_norm": 0.6645331382751465, |
| "learning_rate": 0.0005, |
| "loss": 1.0528, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.3188465712366286, |
| "grad_norm": 0.7420417070388794, |
| "learning_rate": 0.0005, |
| "loss": 1.0637, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.3193633403958452, |
| "grad_norm": 0.6399688720703125, |
| "learning_rate": 0.0005, |
| "loss": 1.0575, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.31988010955506174, |
| "grad_norm": 0.6128381490707397, |
| "learning_rate": 0.0005, |
| "loss": 1.0692, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.32039687871427835, |
| "grad_norm": 0.6373854279518127, |
| "learning_rate": 0.0005, |
| "loss": 1.0543, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.3209136478734949, |
| "grad_norm": 0.8587968349456787, |
| "learning_rate": 0.0005, |
| "loss": 1.0697, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.32143041703271147, |
| "grad_norm": 0.6043888926506042, |
| "learning_rate": 0.0005, |
| "loss": 1.0748, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.3219471861919281, |
| "grad_norm": 0.6279845237731934, |
| "learning_rate": 0.0005, |
| "loss": 1.0746, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.32246395535114464, |
| "grad_norm": 0.6751164793968201, |
| "learning_rate": 0.0005, |
| "loss": 1.0715, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.3229807245103612, |
| "grad_norm": 0.5915717482566833, |
| "learning_rate": 0.0005, |
| "loss": 1.0705, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.3234974936695778, |
| "grad_norm": 0.6816694140434265, |
| "learning_rate": 0.0005, |
| "loss": 1.0666, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.32401426282879436, |
| "grad_norm": 0.7093113660812378, |
| "learning_rate": 0.0005, |
| "loss": 1.0585, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.324531031988011, |
| "grad_norm": 0.6673592925071716, |
| "learning_rate": 0.0005, |
| "loss": 1.0767, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.32504780114722753, |
| "grad_norm": 0.5884393453598022, |
| "learning_rate": 0.0005, |
| "loss": 1.0662, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.3255645703064441, |
| "grad_norm": 0.6808472871780396, |
| "learning_rate": 0.0005, |
| "loss": 1.0442, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.3260813394656607, |
| "grad_norm": 0.6658387184143066, |
| "learning_rate": 0.0005, |
| "loss": 1.0627, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.32659810862487726, |
| "grad_norm": 0.6469089388847351, |
| "learning_rate": 0.0005, |
| "loss": 1.0645, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.32711487778409387, |
| "grad_norm": 0.6215671896934509, |
| "learning_rate": 0.0005, |
| "loss": 1.0544, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.32763164694331043, |
| "grad_norm": 0.6409225463867188, |
| "learning_rate": 0.0005, |
| "loss": 1.0555, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.328148416102527, |
| "grad_norm": 0.6427381038665771, |
| "learning_rate": 0.0005, |
| "loss": 1.0696, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.3286651852617436, |
| "grad_norm": 0.5856565833091736, |
| "learning_rate": 0.0005, |
| "loss": 1.0518, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.32918195442096015, |
| "grad_norm": 0.6217045187950134, |
| "learning_rate": 0.0005, |
| "loss": 1.066, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.3296987235801767, |
| "grad_norm": 0.7256447672843933, |
| "learning_rate": 0.0005, |
| "loss": 1.0514, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.3302154927393933, |
| "grad_norm": 0.6222741007804871, |
| "learning_rate": 0.0005, |
| "loss": 1.0509, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.3307322618986099, |
| "grad_norm": 0.6448323726654053, |
| "learning_rate": 0.0005, |
| "loss": 1.0622, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.3312490310578265, |
| "grad_norm": 0.6215245723724365, |
| "learning_rate": 0.0005, |
| "loss": 1.0637, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.33176580021704305, |
| "grad_norm": 0.6422061920166016, |
| "learning_rate": 0.0005, |
| "loss": 1.0599, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.3322825693762596, |
| "grad_norm": 0.8208865523338318, |
| "learning_rate": 0.0005, |
| "loss": 1.0524, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.3327993385354762, |
| "grad_norm": 1.1319376230239868, |
| "learning_rate": 0.0005, |
| "loss": 1.0577, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.3333161076946928, |
| "grad_norm": 0.63709956407547, |
| "learning_rate": 0.0005, |
| "loss": 1.0515, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.33383287685390933, |
| "grad_norm": 0.6338751912117004, |
| "learning_rate": 0.0005, |
| "loss": 1.0428, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.33434964601312595, |
| "grad_norm": 0.6264437437057495, |
| "learning_rate": 0.0005, |
| "loss": 1.0545, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.3348664151723425, |
| "grad_norm": 0.6507226228713989, |
| "learning_rate": 0.0005, |
| "loss": 1.0478, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.3353831843315591, |
| "grad_norm": 0.6316462755203247, |
| "learning_rate": 0.0005, |
| "loss": 1.05, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.33589995349077567, |
| "grad_norm": 0.8337516188621521, |
| "learning_rate": 0.0005, |
| "loss": 1.0486, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.33641672264999223, |
| "grad_norm": 0.9597588777542114, |
| "learning_rate": 0.0005, |
| "loss": 1.0551, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.33693349180920884, |
| "grad_norm": 0.6857469081878662, |
| "learning_rate": 0.0005, |
| "loss": 1.0651, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.3374502609684254, |
| "grad_norm": 0.6196707487106323, |
| "learning_rate": 0.0005, |
| "loss": 1.0425, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.337967030127642, |
| "grad_norm": 0.6072001457214355, |
| "learning_rate": 0.0005, |
| "loss": 1.0698, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.33848379928685857, |
| "grad_norm": 0.6677159667015076, |
| "learning_rate": 0.0005, |
| "loss": 1.0646, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.3390005684460751, |
| "grad_norm": 0.6435421109199524, |
| "learning_rate": 0.0005, |
| "loss": 1.0342, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.33951733760529174, |
| "grad_norm": 0.5953618288040161, |
| "learning_rate": 0.0005, |
| "loss": 1.0512, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.3400341067645083, |
| "grad_norm": 0.6292535066604614, |
| "learning_rate": 0.0005, |
| "loss": 1.0502, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.34055087592372485, |
| "grad_norm": 0.7501185536384583, |
| "learning_rate": 0.0005, |
| "loss": 1.0556, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.34106764508294146, |
| "grad_norm": 0.58536696434021, |
| "learning_rate": 0.0005, |
| "loss": 1.0534, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.341584414242158, |
| "grad_norm": 0.6455935835838318, |
| "learning_rate": 0.0005, |
| "loss": 1.0366, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.34210118340137463, |
| "grad_norm": 0.6323394179344177, |
| "learning_rate": 0.0005, |
| "loss": 1.052, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.3426179525605912, |
| "grad_norm": 0.6140257120132446, |
| "learning_rate": 0.0005, |
| "loss": 1.0452, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.34313472171980774, |
| "grad_norm": 0.6486880779266357, |
| "learning_rate": 0.0005, |
| "loss": 1.0422, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.34365149087902436, |
| "grad_norm": 0.6136801838874817, |
| "learning_rate": 0.0005, |
| "loss": 1.0529, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.3441682600382409, |
| "grad_norm": 0.78439861536026, |
| "learning_rate": 0.0005, |
| "loss": 1.0616, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.34468502919745747, |
| "grad_norm": 0.6717984080314636, |
| "learning_rate": 0.0005, |
| "loss": 1.0471, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.3452017983566741, |
| "grad_norm": 0.632985532283783, |
| "learning_rate": 0.0005, |
| "loss": 1.0426, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.34571856751589064, |
| "grad_norm": 0.6086390018463135, |
| "learning_rate": 0.0005, |
| "loss": 1.0384, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.34623533667510725, |
| "grad_norm": 0.7206865549087524, |
| "learning_rate": 0.0005, |
| "loss": 1.0441, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.3467521058343238, |
| "grad_norm": 0.6115614771842957, |
| "learning_rate": 0.0005, |
| "loss": 1.0486, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.34726887499354037, |
| "grad_norm": 0.6737103462219238, |
| "learning_rate": 0.0005, |
| "loss": 1.0679, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.347785644152757, |
| "grad_norm": 0.632331132888794, |
| "learning_rate": 0.0005, |
| "loss": 1.0327, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.34830241331197354, |
| "grad_norm": 0.7133494019508362, |
| "learning_rate": 0.0005, |
| "loss": 1.0412, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.3488191824711901, |
| "grad_norm": 0.5726544260978699, |
| "learning_rate": 0.0005, |
| "loss": 1.0503, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.3493359516304067, |
| "grad_norm": 0.719832181930542, |
| "learning_rate": 0.0005, |
| "loss": 1.0477, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.34985272078962326, |
| "grad_norm": 0.7709729671478271, |
| "learning_rate": 0.0005, |
| "loss": 1.0424, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.3503694899488399, |
| "grad_norm": 0.6043444275856018, |
| "learning_rate": 0.0005, |
| "loss": 1.0527, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.35088625910805643, |
| "grad_norm": 0.5770915746688843, |
| "learning_rate": 0.0005, |
| "loss": 1.033, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.351403028267273, |
| "grad_norm": 0.6332295536994934, |
| "learning_rate": 0.0005, |
| "loss": 1.0405, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3519197974264896, |
| "grad_norm": 0.6505199670791626, |
| "learning_rate": 0.0005, |
| "loss": 1.0389, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.35243656658570616, |
| "grad_norm": 0.6215615272521973, |
| "learning_rate": 0.0005, |
| "loss": 1.0591, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.35295333574492277, |
| "grad_norm": 0.6917248368263245, |
| "learning_rate": 0.0005, |
| "loss": 1.0384, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.3534701049041393, |
| "grad_norm": 0.6240680813789368, |
| "learning_rate": 0.0005, |
| "loss": 1.0491, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.3539868740633559, |
| "grad_norm": 0.6082044243812561, |
| "learning_rate": 0.0005, |
| "loss": 1.0495, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.3545036432225725, |
| "grad_norm": 0.6314426064491272, |
| "learning_rate": 0.0005, |
| "loss": 1.0274, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.35502041238178905, |
| "grad_norm": 0.6714574694633484, |
| "learning_rate": 0.0005, |
| "loss": 1.0275, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.3555371815410056, |
| "grad_norm": 0.6438120603561401, |
| "learning_rate": 0.0005, |
| "loss": 1.0383, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.3560539507002222, |
| "grad_norm": 0.7354781031608582, |
| "learning_rate": 0.0005, |
| "loss": 1.0524, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.3565707198594388, |
| "grad_norm": 0.6491745114326477, |
| "learning_rate": 0.0005, |
| "loss": 1.0386, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.3570874890186554, |
| "grad_norm": 0.5888579487800598, |
| "learning_rate": 0.0005, |
| "loss": 1.0417, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.35760425817787195, |
| "grad_norm": 0.6474457383155823, |
| "learning_rate": 0.0005, |
| "loss": 1.0514, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.3581210273370885, |
| "grad_norm": 0.6235959529876709, |
| "learning_rate": 0.0005, |
| "loss": 1.03, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.3586377964963051, |
| "grad_norm": 0.6418899297714233, |
| "learning_rate": 0.0005, |
| "loss": 1.0436, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.3591545656555217, |
| "grad_norm": 0.671491801738739, |
| "learning_rate": 0.0005, |
| "loss": 1.0494, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.35967133481473823, |
| "grad_norm": 0.6662471890449524, |
| "learning_rate": 0.0005, |
| "loss": 1.0339, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.36018810397395484, |
| "grad_norm": 0.6041388511657715, |
| "learning_rate": 0.0005, |
| "loss": 1.0242, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.3607048731331714, |
| "grad_norm": 0.6014126539230347, |
| "learning_rate": 0.0005, |
| "loss": 1.0327, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.361221642292388, |
| "grad_norm": 0.611056387424469, |
| "learning_rate": 0.0005, |
| "loss": 1.0537, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.36173841145160457, |
| "grad_norm": 0.605475127696991, |
| "learning_rate": 0.0005, |
| "loss": 1.0255, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.3622551806108211, |
| "grad_norm": 0.5799763798713684, |
| "learning_rate": 0.0005, |
| "loss": 1.0396, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.36277194977003774, |
| "grad_norm": 0.5857988595962524, |
| "learning_rate": 0.0005, |
| "loss": 1.0362, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.3632887189292543, |
| "grad_norm": 0.6305558085441589, |
| "learning_rate": 0.0005, |
| "loss": 1.0378, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.3638054880884709, |
| "grad_norm": 0.5987147688865662, |
| "learning_rate": 0.0005, |
| "loss": 1.0407, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.36432225724768746, |
| "grad_norm": 0.5889327526092529, |
| "learning_rate": 0.0005, |
| "loss": 1.044, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.364839026406904, |
| "grad_norm": 0.5972746014595032, |
| "learning_rate": 0.0005, |
| "loss": 1.0333, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.36535579556612063, |
| "grad_norm": 0.6437240839004517, |
| "learning_rate": 0.0005, |
| "loss": 1.0219, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.3658725647253372, |
| "grad_norm": 0.6240195631980896, |
| "learning_rate": 0.0005, |
| "loss": 1.0268, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.36638933388455375, |
| "grad_norm": 0.6170317530632019, |
| "learning_rate": 0.0005, |
| "loss": 1.0402, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.36690610304377036, |
| "grad_norm": 0.661592423915863, |
| "learning_rate": 0.0005, |
| "loss": 1.0255, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.3674228722029869, |
| "grad_norm": 0.6611010432243347, |
| "learning_rate": 0.0005, |
| "loss": 1.0387, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.36793964136220353, |
| "grad_norm": 0.6037949323654175, |
| "learning_rate": 0.0005, |
| "loss": 1.0398, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.3684564105214201, |
| "grad_norm": 0.6260375380516052, |
| "learning_rate": 0.0005, |
| "loss": 1.0223, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.36897317968063664, |
| "grad_norm": 0.7400781512260437, |
| "learning_rate": 0.0005, |
| "loss": 1.0331, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.36948994883985325, |
| "grad_norm": 0.8144364356994629, |
| "learning_rate": 0.0005, |
| "loss": 1.0341, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.3700067179990698, |
| "grad_norm": 0.6299716830253601, |
| "learning_rate": 0.0005, |
| "loss": 1.0428, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.37052348715828637, |
| "grad_norm": 0.605995774269104, |
| "learning_rate": 0.0005, |
| "loss": 1.0628, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.371040256317503, |
| "grad_norm": 0.5977038145065308, |
| "learning_rate": 0.0005, |
| "loss": 1.0227, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.37155702547671954, |
| "grad_norm": 0.6418441534042358, |
| "learning_rate": 0.0005, |
| "loss": 1.0216, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.37207379463593615, |
| "grad_norm": 0.6550008654594421, |
| "learning_rate": 0.0005, |
| "loss": 1.0317, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.3725905637951527, |
| "grad_norm": 0.6023372411727905, |
| "learning_rate": 0.0005, |
| "loss": 1.0291, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.37310733295436926, |
| "grad_norm": 0.6071696877479553, |
| "learning_rate": 0.0005, |
| "loss": 1.041, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.3736241021135859, |
| "grad_norm": 0.6096029877662659, |
| "learning_rate": 0.0005, |
| "loss": 1.0316, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.37414087127280243, |
| "grad_norm": 0.5897752642631531, |
| "learning_rate": 0.0005, |
| "loss": 1.0289, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.374657640432019, |
| "grad_norm": 0.6093285083770752, |
| "learning_rate": 0.0005, |
| "loss": 1.0368, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.3751744095912356, |
| "grad_norm": 0.6444416046142578, |
| "learning_rate": 0.0005, |
| "loss": 1.0116, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.37569117875045216, |
| "grad_norm": 0.6363521814346313, |
| "learning_rate": 0.0005, |
| "loss": 1.0198, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.37620794790966877, |
| "grad_norm": 0.6633175611495972, |
| "learning_rate": 0.0005, |
| "loss": 1.0178, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.37672471706888533, |
| "grad_norm": 0.5611307621002197, |
| "learning_rate": 0.0005, |
| "loss": 1.0319, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.3772414862281019, |
| "grad_norm": 0.5733465552330017, |
| "learning_rate": 0.0005, |
| "loss": 1.0095, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.3777582553873185, |
| "grad_norm": 0.6538148522377014, |
| "learning_rate": 0.0005, |
| "loss": 1.0405, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.37827502454653505, |
| "grad_norm": 0.6904069781303406, |
| "learning_rate": 0.0005, |
| "loss": 1.0322, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.37879179370575167, |
| "grad_norm": 0.6486346125602722, |
| "learning_rate": 0.0005, |
| "loss": 1.0162, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.3793085628649682, |
| "grad_norm": 0.5600974559783936, |
| "learning_rate": 0.0005, |
| "loss": 1.0255, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.3798253320241848, |
| "grad_norm": 0.5800735354423523, |
| "learning_rate": 0.0005, |
| "loss": 1.0228, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.3803421011834014, |
| "grad_norm": 0.6365842819213867, |
| "learning_rate": 0.0005, |
| "loss": 1.0229, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.38085887034261795, |
| "grad_norm": 0.6074081659317017, |
| "learning_rate": 0.0005, |
| "loss": 1.0325, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.3813756395018345, |
| "grad_norm": 0.5998241901397705, |
| "learning_rate": 0.0005, |
| "loss": 1.0164, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.3818924086610511, |
| "grad_norm": 0.6576969623565674, |
| "learning_rate": 0.0005, |
| "loss": 1.0153, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.3824091778202677, |
| "grad_norm": 0.6602439284324646, |
| "learning_rate": 0.0005, |
| "loss": 1.0197, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.3829259469794843, |
| "grad_norm": 0.6058171987533569, |
| "learning_rate": 0.0005, |
| "loss": 1.0289, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.38344271613870085, |
| "grad_norm": 0.7188865542411804, |
| "learning_rate": 0.0005, |
| "loss": 1.0216, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.3839594852979174, |
| "grad_norm": 0.6025785803794861, |
| "learning_rate": 0.0005, |
| "loss": 1.0195, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.384476254457134, |
| "grad_norm": 0.6643381118774414, |
| "learning_rate": 0.0005, |
| "loss": 1.0059, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.38499302361635057, |
| "grad_norm": 0.6015246510505676, |
| "learning_rate": 0.0005, |
| "loss": 1.0181, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.38550979277556713, |
| "grad_norm": 0.6102477312088013, |
| "learning_rate": 0.0005, |
| "loss": 1.0268, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.38602656193478374, |
| "grad_norm": 0.6054964661598206, |
| "learning_rate": 0.0005, |
| "loss": 1.0207, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.3865433310940003, |
| "grad_norm": 0.5937122106552124, |
| "learning_rate": 0.0005, |
| "loss": 1.0214, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.3870601002532169, |
| "grad_norm": 0.5697932839393616, |
| "learning_rate": 0.0005, |
| "loss": 0.9999, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.38757686941243347, |
| "grad_norm": 0.6040372848510742, |
| "learning_rate": 0.0005, |
| "loss": 1.0244, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.38809363857165, |
| "grad_norm": 0.666986346244812, |
| "learning_rate": 0.0005, |
| "loss": 1.0253, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.38861040773086664, |
| "grad_norm": 0.5957795977592468, |
| "learning_rate": 0.0005, |
| "loss": 1.015, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.3891271768900832, |
| "grad_norm": 0.7224922776222229, |
| "learning_rate": 0.0005, |
| "loss": 1.018, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.3896439460492998, |
| "grad_norm": 0.6356753706932068, |
| "learning_rate": 0.0005, |
| "loss": 1.028, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.39016071520851636, |
| "grad_norm": 0.6179920434951782, |
| "learning_rate": 0.0005, |
| "loss": 1.022, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.3906774843677329, |
| "grad_norm": 0.7617205381393433, |
| "learning_rate": 0.0005, |
| "loss": 1.0124, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.39119425352694953, |
| "grad_norm": 0.6080652475357056, |
| "learning_rate": 0.0005, |
| "loss": 1.0189, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.3917110226861661, |
| "grad_norm": 0.6190568804740906, |
| "learning_rate": 0.0005, |
| "loss": 1.02, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.39222779184538265, |
| "grad_norm": 0.584118127822876, |
| "learning_rate": 0.0005, |
| "loss": 1.0134, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.39274456100459926, |
| "grad_norm": 0.5745325088500977, |
| "learning_rate": 0.0005, |
| "loss": 1.0145, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.3932613301638158, |
| "grad_norm": 0.586669385433197, |
| "learning_rate": 0.0005, |
| "loss": 1.0215, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.3937780993230324, |
| "grad_norm": 0.6320251822471619, |
| "learning_rate": 0.0005, |
| "loss": 1.0075, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.394294868482249, |
| "grad_norm": 0.6066457033157349, |
| "learning_rate": 0.0005, |
| "loss": 1.0084, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.39481163764146554, |
| "grad_norm": 0.5506545305252075, |
| "learning_rate": 0.0005, |
| "loss": 1.0187, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.39532840680068215, |
| "grad_norm": 0.6136749982833862, |
| "learning_rate": 0.0005, |
| "loss": 1.0125, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.3958451759598987, |
| "grad_norm": 0.7134038805961609, |
| "learning_rate": 0.0005, |
| "loss": 1.0056, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.39636194511911527, |
| "grad_norm": 0.6053097248077393, |
| "learning_rate": 0.0005, |
| "loss": 1.0099, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.3968787142783319, |
| "grad_norm": 0.5632675290107727, |
| "learning_rate": 0.0005, |
| "loss": 1.0134, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.39739548343754844, |
| "grad_norm": 0.6165273189544678, |
| "learning_rate": 0.0005, |
| "loss": 1.0235, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.39791225259676505, |
| "grad_norm": 0.6279580593109131, |
| "learning_rate": 0.0005, |
| "loss": 1.0103, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.3984290217559816, |
| "grad_norm": 0.6073136329650879, |
| "learning_rate": 0.0005, |
| "loss": 1.0134, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.39894579091519816, |
| "grad_norm": 0.5953530073165894, |
| "learning_rate": 0.0005, |
| "loss": 1.0249, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.3994625600744148, |
| "grad_norm": 0.5744448900222778, |
| "learning_rate": 0.0005, |
| "loss": 1.0138, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.39997932923363133, |
| "grad_norm": 0.5618404746055603, |
| "learning_rate": 0.0005, |
| "loss": 1.0079, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.4004960983928479, |
| "grad_norm": 0.567597508430481, |
| "learning_rate": 0.0005, |
| "loss": 1.0139, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.4010128675520645, |
| "grad_norm": 0.5764487981796265, |
| "learning_rate": 0.0005, |
| "loss": 1.0379, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.40152963671128106, |
| "grad_norm": 0.6651884913444519, |
| "learning_rate": 0.0005, |
| "loss": 1.0082, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.40204640587049767, |
| "grad_norm": 0.7175072431564331, |
| "learning_rate": 0.0005, |
| "loss": 1.0144, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.4025631750297142, |
| "grad_norm": 0.591261625289917, |
| "learning_rate": 0.0005, |
| "loss": 1.0103, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.4030799441889308, |
| "grad_norm": 0.5823299884796143, |
| "learning_rate": 0.0005, |
| "loss": 1.0016, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.4035967133481474, |
| "grad_norm": 0.5339162945747375, |
| "learning_rate": 0.0005, |
| "loss": 1.0124, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.40411348250736395, |
| "grad_norm": 0.6042317748069763, |
| "learning_rate": 0.0005, |
| "loss": 1.0006, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.40463025166658056, |
| "grad_norm": 0.6178877353668213, |
| "learning_rate": 0.0005, |
| "loss": 1.0166, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.4051470208257971, |
| "grad_norm": 0.6470639705657959, |
| "learning_rate": 0.0005, |
| "loss": 0.9899, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.4056637899850137, |
| "grad_norm": 0.5468031167984009, |
| "learning_rate": 0.0005, |
| "loss": 1.0, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.4061805591442303, |
| "grad_norm": 0.566137433052063, |
| "learning_rate": 0.0005, |
| "loss": 1.0187, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.40669732830344685, |
| "grad_norm": 0.6000310182571411, |
| "learning_rate": 0.0005, |
| "loss": 1.0221, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.4072140974626634, |
| "grad_norm": 0.5763528943061829, |
| "learning_rate": 0.0005, |
| "loss": 1.0318, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.40773086662188, |
| "grad_norm": 0.5767903327941895, |
| "learning_rate": 0.0005, |
| "loss": 1.014, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.4082476357810966, |
| "grad_norm": 0.6295961737632751, |
| "learning_rate": 0.0005, |
| "loss": 0.9885, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.4087644049403132, |
| "grad_norm": 0.6416009068489075, |
| "learning_rate": 0.0005, |
| "loss": 1.0013, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.40928117409952974, |
| "grad_norm": 0.6039779186248779, |
| "learning_rate": 0.0005, |
| "loss": 0.9986, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.4097979432587463, |
| "grad_norm": 0.6459826827049255, |
| "learning_rate": 0.0005, |
| "loss": 1.0051, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.4103147124179629, |
| "grad_norm": 0.597352147102356, |
| "learning_rate": 0.0005, |
| "loss": 1.002, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.41083148157717947, |
| "grad_norm": 0.5876639485359192, |
| "learning_rate": 0.0005, |
| "loss": 1.0076, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.411348250736396, |
| "grad_norm": 0.5862469673156738, |
| "learning_rate": 0.0005, |
| "loss": 0.9972, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.41186501989561264, |
| "grad_norm": 0.5829436779022217, |
| "learning_rate": 0.0005, |
| "loss": 1.0233, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.4123817890548292, |
| "grad_norm": 0.5912736058235168, |
| "learning_rate": 0.0005, |
| "loss": 1.0038, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.4128985582140458, |
| "grad_norm": 0.5810758471488953, |
| "learning_rate": 0.0005, |
| "loss": 1.0077, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.41341532737326236, |
| "grad_norm": 0.5771864056587219, |
| "learning_rate": 0.0005, |
| "loss": 0.9976, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4139320965324789, |
| "grad_norm": 0.5928204655647278, |
| "learning_rate": 0.0005, |
| "loss": 1.0186, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.41444886569169553, |
| "grad_norm": 0.603636622428894, |
| "learning_rate": 0.0005, |
| "loss": 1.0009, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.4149656348509121, |
| "grad_norm": 0.5715627670288086, |
| "learning_rate": 0.0005, |
| "loss": 1.014, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.4154824040101287, |
| "grad_norm": 0.5580553412437439, |
| "learning_rate": 0.0005, |
| "loss": 0.9957, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.41599917316934526, |
| "grad_norm": 0.5680859088897705, |
| "learning_rate": 0.0005, |
| "loss": 1.0027, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.4165159423285618, |
| "grad_norm": 0.5446572303771973, |
| "learning_rate": 0.0005, |
| "loss": 1.0412, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.41703271148777843, |
| "grad_norm": 0.5877604484558105, |
| "learning_rate": 0.0005, |
| "loss": 1.0087, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.417549480646995, |
| "grad_norm": 0.5905182361602783, |
| "learning_rate": 0.0005, |
| "loss": 0.9889, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.41806624980621154, |
| "grad_norm": 0.6025214195251465, |
| "learning_rate": 0.0005, |
| "loss": 1.0171, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.41858301896542816, |
| "grad_norm": 0.5762201547622681, |
| "learning_rate": 0.0005, |
| "loss": 1.0199, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.4190997881246447, |
| "grad_norm": 0.5564827919006348, |
| "learning_rate": 0.0005, |
| "loss": 1.0049, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.4196165572838613, |
| "grad_norm": 0.5517228841781616, |
| "learning_rate": 0.0005, |
| "loss": 1.0036, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.4201333264430779, |
| "grad_norm": 0.6581810712814331, |
| "learning_rate": 0.0005, |
| "loss": 1.0042, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.42065009560229444, |
| "grad_norm": 0.5902772545814514, |
| "learning_rate": 0.0005, |
| "loss": 0.9956, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.42116686476151105, |
| "grad_norm": 0.5903311967849731, |
| "learning_rate": 0.0005, |
| "loss": 0.9994, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.4216836339207276, |
| "grad_norm": 0.5883710980415344, |
| "learning_rate": 0.0005, |
| "loss": 1.001, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.42220040307994416, |
| "grad_norm": 0.5694506764411926, |
| "learning_rate": 0.0005, |
| "loss": 0.997, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.4227171722391608, |
| "grad_norm": 0.5448591113090515, |
| "learning_rate": 0.0005, |
| "loss": 0.9987, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.42323394139837733, |
| "grad_norm": 0.5763291120529175, |
| "learning_rate": 0.0005, |
| "loss": 0.9957, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.42375071055759395, |
| "grad_norm": 0.5763616561889648, |
| "learning_rate": 0.0005, |
| "loss": 1.009, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.4242674797168105, |
| "grad_norm": 0.5575286149978638, |
| "learning_rate": 0.0005, |
| "loss": 1.01, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.42478424887602706, |
| "grad_norm": 0.5435507297515869, |
| "learning_rate": 0.0005, |
| "loss": 0.9947, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.42530101803524367, |
| "grad_norm": 0.6307750344276428, |
| "learning_rate": 0.0005, |
| "loss": 1.0098, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.42581778719446023, |
| "grad_norm": 0.5419248342514038, |
| "learning_rate": 0.0005, |
| "loss": 1.013, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.4263345563536768, |
| "grad_norm": 0.5558311343193054, |
| "learning_rate": 0.0005, |
| "loss": 0.9956, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.4268513255128934, |
| "grad_norm": 0.5593147277832031, |
| "learning_rate": 0.0005, |
| "loss": 1.0142, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.42736809467210995, |
| "grad_norm": 0.5839881896972656, |
| "learning_rate": 0.0005, |
| "loss": 1.001, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.42788486383132657, |
| "grad_norm": 0.5981064438819885, |
| "learning_rate": 0.0005, |
| "loss": 0.9952, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.4284016329905431, |
| "grad_norm": 0.6945583820343018, |
| "learning_rate": 0.0005, |
| "loss": 0.9971, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.4289184021497597, |
| "grad_norm": 0.5536506772041321, |
| "learning_rate": 0.0005, |
| "loss": 0.9929, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.4294351713089763, |
| "grad_norm": 0.557338297367096, |
| "learning_rate": 0.0005, |
| "loss": 0.9833, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.42995194046819285, |
| "grad_norm": 0.5480133295059204, |
| "learning_rate": 0.0005, |
| "loss": 1.0008, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.43046870962740946, |
| "grad_norm": 0.5495566129684448, |
| "learning_rate": 0.0005, |
| "loss": 1.0126, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.430985478786626, |
| "grad_norm": 0.5759509801864624, |
| "learning_rate": 0.0005, |
| "loss": 0.9866, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.4315022479458426, |
| "grad_norm": 0.5602892637252808, |
| "learning_rate": 0.0005, |
| "loss": 0.9893, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.4320190171050592, |
| "grad_norm": 0.560892641544342, |
| "learning_rate": 0.0005, |
| "loss": 1.0125, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.43253578626427575, |
| "grad_norm": 0.582815408706665, |
| "learning_rate": 0.0005, |
| "loss": 1.0, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.4330525554234923, |
| "grad_norm": 0.6133496165275574, |
| "learning_rate": 0.0005, |
| "loss": 0.9928, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.4335693245827089, |
| "grad_norm": 0.5611013174057007, |
| "learning_rate": 0.0005, |
| "loss": 0.998, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.43408609374192547, |
| "grad_norm": 0.5589267611503601, |
| "learning_rate": 0.0005, |
| "loss": 0.999, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.4346028629011421, |
| "grad_norm": 0.5508078932762146, |
| "learning_rate": 0.0005, |
| "loss": 0.9954, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.43511963206035864, |
| "grad_norm": 0.5803013443946838, |
| "learning_rate": 0.0005, |
| "loss": 0.9891, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.4356364012195752, |
| "grad_norm": 0.532085120677948, |
| "learning_rate": 0.0005, |
| "loss": 0.9935, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.4361531703787918, |
| "grad_norm": 0.6158758401870728, |
| "learning_rate": 0.0005, |
| "loss": 0.9927, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.43666993953800837, |
| "grad_norm": 0.5444722771644592, |
| "learning_rate": 0.0005, |
| "loss": 0.9754, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.4371867086972249, |
| "grad_norm": 0.5872038006782532, |
| "learning_rate": 0.0005, |
| "loss": 0.9803, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.43770347785644154, |
| "grad_norm": 0.5382379293441772, |
| "learning_rate": 0.0005, |
| "loss": 1.0025, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.4382202470156581, |
| "grad_norm": 0.5538324117660522, |
| "learning_rate": 0.0005, |
| "loss": 0.9764, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.4387370161748747, |
| "grad_norm": 0.5917341709136963, |
| "learning_rate": 0.0005, |
| "loss": 0.9732, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.43925378533409126, |
| "grad_norm": 0.5395458340644836, |
| "learning_rate": 0.0005, |
| "loss": 0.9948, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.4397705544933078, |
| "grad_norm": 0.5973149538040161, |
| "learning_rate": 0.0005, |
| "loss": 0.9971, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.44028732365252443, |
| "grad_norm": 0.579712450504303, |
| "learning_rate": 0.0005, |
| "loss": 0.9836, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.440804092811741, |
| "grad_norm": 0.5590643882751465, |
| "learning_rate": 0.0005, |
| "loss": 0.9896, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.4413208619709576, |
| "grad_norm": 0.5443204045295715, |
| "learning_rate": 0.0005, |
| "loss": 0.9868, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.44183763113017416, |
| "grad_norm": 0.5973614454269409, |
| "learning_rate": 0.0005, |
| "loss": 0.9881, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.4423544002893907, |
| "grad_norm": 0.6157576441764832, |
| "learning_rate": 0.0005, |
| "loss": 1.0007, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.4428711694486073, |
| "grad_norm": 0.5678598880767822, |
| "learning_rate": 0.0005, |
| "loss": 0.9878, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.4433879386078239, |
| "grad_norm": 0.5606565475463867, |
| "learning_rate": 0.0005, |
| "loss": 0.9899, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.44390470776704044, |
| "grad_norm": 0.651261031627655, |
| "learning_rate": 0.0005, |
| "loss": 1.0053, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.44442147692625705, |
| "grad_norm": 0.6717237830162048, |
| "learning_rate": 0.0005, |
| "loss": 0.9783, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.4449382460854736, |
| "grad_norm": 0.5981956720352173, |
| "learning_rate": 0.0005, |
| "loss": 0.9846, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.4454550152446902, |
| "grad_norm": 0.6338360905647278, |
| "learning_rate": 0.0005, |
| "loss": 0.9903, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.4459717844039068, |
| "grad_norm": 0.6431187987327576, |
| "learning_rate": 0.0005, |
| "loss": 0.9967, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.44648855356312334, |
| "grad_norm": 0.6032900810241699, |
| "learning_rate": 0.0005, |
| "loss": 0.9814, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.44700532272233995, |
| "grad_norm": 0.5607067942619324, |
| "learning_rate": 0.0005, |
| "loss": 1.0021, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.4475220918815565, |
| "grad_norm": 0.5442407727241516, |
| "learning_rate": 0.0005, |
| "loss": 0.9911, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.44803886104077306, |
| "grad_norm": 0.5274026989936829, |
| "learning_rate": 0.0005, |
| "loss": 0.9798, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.4485556301999897, |
| "grad_norm": 0.5678251385688782, |
| "learning_rate": 0.0005, |
| "loss": 0.9869, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.44907239935920623, |
| "grad_norm": 0.5528420805931091, |
| "learning_rate": 0.0005, |
| "loss": 0.9963, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.44958916851842284, |
| "grad_norm": 0.5485315918922424, |
| "learning_rate": 0.0005, |
| "loss": 0.974, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.4501059376776394, |
| "grad_norm": 0.566852331161499, |
| "learning_rate": 0.0005, |
| "loss": 0.9891, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.45062270683685596, |
| "grad_norm": 0.5270015597343445, |
| "learning_rate": 0.0005, |
| "loss": 0.979, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.45113947599607257, |
| "grad_norm": 0.5595947504043579, |
| "learning_rate": 0.0005, |
| "loss": 0.9632, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.4516562451552891, |
| "grad_norm": 0.5901986360549927, |
| "learning_rate": 0.0005, |
| "loss": 0.9952, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.4521730143145057, |
| "grad_norm": 0.5500153303146362, |
| "learning_rate": 0.0005, |
| "loss": 0.9826, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.4526897834737223, |
| "grad_norm": 0.6439850926399231, |
| "learning_rate": 0.0005, |
| "loss": 0.9783, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.45320655263293885, |
| "grad_norm": 0.6534972190856934, |
| "learning_rate": 0.0005, |
| "loss": 0.9905, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.45372332179215547, |
| "grad_norm": 0.6489924192428589, |
| "learning_rate": 0.0005, |
| "loss": 1.0066, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.454240090951372, |
| "grad_norm": 0.5541792511940002, |
| "learning_rate": 0.0005, |
| "loss": 0.989, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.4547568601105886, |
| "grad_norm": 0.5128721594810486, |
| "learning_rate": 0.0005, |
| "loss": 0.9943, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.4552736292698052, |
| "grad_norm": 0.5839647054672241, |
| "learning_rate": 0.0005, |
| "loss": 0.9824, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.45579039842902175, |
| "grad_norm": 0.6303303241729736, |
| "learning_rate": 0.0005, |
| "loss": 0.9975, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.45630716758823836, |
| "grad_norm": 0.5413320064544678, |
| "learning_rate": 0.0005, |
| "loss": 0.973, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.4568239367474549, |
| "grad_norm": 0.5503526926040649, |
| "learning_rate": 0.0005, |
| "loss": 0.997, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.4573407059066715, |
| "grad_norm": 0.5337091684341431, |
| "learning_rate": 0.0005, |
| "loss": 0.9785, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.4578574750658881, |
| "grad_norm": 0.5215671062469482, |
| "learning_rate": 0.0005, |
| "loss": 0.988, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.45837424422510464, |
| "grad_norm": 0.5596259236335754, |
| "learning_rate": 0.0005, |
| "loss": 0.9619, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.4588910133843212, |
| "grad_norm": 0.644656777381897, |
| "learning_rate": 0.0005, |
| "loss": 1.0018, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.4594077825435378, |
| "grad_norm": 0.546576976776123, |
| "learning_rate": 0.0005, |
| "loss": 0.9895, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.45992455170275437, |
| "grad_norm": 0.5912691354751587, |
| "learning_rate": 0.0005, |
| "loss": 0.9769, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.460441320861971, |
| "grad_norm": 0.5670520663261414, |
| "learning_rate": 0.0005, |
| "loss": 0.9841, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.46095809002118754, |
| "grad_norm": 0.5410053730010986, |
| "learning_rate": 0.0005, |
| "loss": 0.9842, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.4614748591804041, |
| "grad_norm": 0.5501711964607239, |
| "learning_rate": 0.0005, |
| "loss": 0.9833, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.4619916283396207, |
| "grad_norm": 0.5702757835388184, |
| "learning_rate": 0.0005, |
| "loss": 0.996, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.46250839749883726, |
| "grad_norm": 0.5536521077156067, |
| "learning_rate": 0.0005, |
| "loss": 0.9808, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.4630251666580538, |
| "grad_norm": 0.5470142364501953, |
| "learning_rate": 0.0005, |
| "loss": 0.9701, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.46354193581727043, |
| "grad_norm": 0.5773063898086548, |
| "learning_rate": 0.0005, |
| "loss": 0.9648, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.464058704976487, |
| "grad_norm": 0.5552759170532227, |
| "learning_rate": 0.0005, |
| "loss": 0.9801, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.4645754741357036, |
| "grad_norm": 0.5589256882667542, |
| "learning_rate": 0.0005, |
| "loss": 0.9762, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.46509224329492016, |
| "grad_norm": 0.5548306703567505, |
| "learning_rate": 0.0005, |
| "loss": 0.9536, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.4656090124541367, |
| "grad_norm": 0.5578811168670654, |
| "learning_rate": 0.0005, |
| "loss": 0.9758, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.46612578161335333, |
| "grad_norm": 0.542353630065918, |
| "learning_rate": 0.0005, |
| "loss": 0.9754, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.4666425507725699, |
| "grad_norm": 0.5240308046340942, |
| "learning_rate": 0.0005, |
| "loss": 0.9527, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.4671593199317865, |
| "grad_norm": 0.5662107467651367, |
| "learning_rate": 0.0005, |
| "loss": 0.9812, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.46767608909100306, |
| "grad_norm": 0.5549916625022888, |
| "learning_rate": 0.0005, |
| "loss": 0.9881, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.4681928582502196, |
| "grad_norm": 0.5178738832473755, |
| "learning_rate": 0.0005, |
| "loss": 0.9641, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.4687096274094362, |
| "grad_norm": 0.52500981092453, |
| "learning_rate": 0.0005, |
| "loss": 0.969, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.4692263965686528, |
| "grad_norm": 0.5403527617454529, |
| "learning_rate": 0.0005, |
| "loss": 0.9853, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.46974316572786934, |
| "grad_norm": 0.6338274478912354, |
| "learning_rate": 0.0005, |
| "loss": 0.9762, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.47025993488708595, |
| "grad_norm": 0.5694402456283569, |
| "learning_rate": 0.0005, |
| "loss": 0.9947, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.4707767040463025, |
| "grad_norm": 0.5308618545532227, |
| "learning_rate": 0.0005, |
| "loss": 1.0035, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.4712934732055191, |
| "grad_norm": 0.5705435872077942, |
| "learning_rate": 0.0005, |
| "loss": 0.979, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.4718102423647357, |
| "grad_norm": 0.5150364637374878, |
| "learning_rate": 0.0005, |
| "loss": 0.9907, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.47232701152395223, |
| "grad_norm": 0.6099853515625, |
| "learning_rate": 0.0005, |
| "loss": 0.9834, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.47284378068316885, |
| "grad_norm": 0.5578297972679138, |
| "learning_rate": 0.0005, |
| "loss": 0.9758, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.4733605498423854, |
| "grad_norm": 0.5842065811157227, |
| "learning_rate": 0.0005, |
| "loss": 0.9831, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.47387731900160196, |
| "grad_norm": 0.54753577709198, |
| "learning_rate": 0.0005, |
| "loss": 0.9767, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.47439408816081857, |
| "grad_norm": 0.5472375750541687, |
| "learning_rate": 0.0005, |
| "loss": 0.9844, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.47491085732003513, |
| "grad_norm": 0.6289487481117249, |
| "learning_rate": 0.0005, |
| "loss": 0.9806, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.47542762647925174, |
| "grad_norm": 0.5702399015426636, |
| "learning_rate": 0.0005, |
| "loss": 0.973, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.4759443956384683, |
| "grad_norm": 0.5393164753913879, |
| "learning_rate": 0.0005, |
| "loss": 0.9862, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.47646116479768486, |
| "grad_norm": 0.5307340621948242, |
| "learning_rate": 0.0005, |
| "loss": 0.9949, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.47697793395690147, |
| "grad_norm": 0.6061729788780212, |
| "learning_rate": 0.0005, |
| "loss": 0.9869, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.477494703116118, |
| "grad_norm": 0.5458270311355591, |
| "learning_rate": 0.0005, |
| "loss": 0.9782, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.4780114722753346, |
| "grad_norm": 0.5837684869766235, |
| "learning_rate": 0.0005, |
| "loss": 0.9794, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.4785282414345512, |
| "grad_norm": 0.557824432849884, |
| "learning_rate": 0.0005, |
| "loss": 0.9723, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.47904501059376775, |
| "grad_norm": 0.57038414478302, |
| "learning_rate": 0.0005, |
| "loss": 0.9782, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.47956177975298436, |
| "grad_norm": 0.5163660645484924, |
| "learning_rate": 0.0005, |
| "loss": 0.9615, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.4800785489122009, |
| "grad_norm": 0.5604984760284424, |
| "learning_rate": 0.0005, |
| "loss": 0.9806, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.4805953180714175, |
| "grad_norm": 0.5169503092765808, |
| "learning_rate": 0.0005, |
| "loss": 0.9594, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.4811120872306341, |
| "grad_norm": 0.547803521156311, |
| "learning_rate": 0.0005, |
| "loss": 0.9795, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.48162885638985065, |
| "grad_norm": 0.5462937951087952, |
| "learning_rate": 0.0005, |
| "loss": 0.9756, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.48214562554906726, |
| "grad_norm": 0.5670326352119446, |
| "learning_rate": 0.0005, |
| "loss": 0.9726, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.4826623947082838, |
| "grad_norm": 0.5633768439292908, |
| "learning_rate": 0.0005, |
| "loss": 0.958, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.48317916386750037, |
| "grad_norm": 0.5781881213188171, |
| "learning_rate": 0.0005, |
| "loss": 0.9531, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.483695933026717, |
| "grad_norm": 0.6162354350090027, |
| "learning_rate": 0.0005, |
| "loss": 0.9584, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.48421270218593354, |
| "grad_norm": 0.5659033060073853, |
| "learning_rate": 0.0005, |
| "loss": 0.9691, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.4847294713451501, |
| "grad_norm": 0.5409724116325378, |
| "learning_rate": 0.0005, |
| "loss": 0.9654, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.4852462405043667, |
| "grad_norm": 0.5185449719429016, |
| "learning_rate": 0.0005, |
| "loss": 0.9767, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.48576300966358327, |
| "grad_norm": 0.5317234992980957, |
| "learning_rate": 0.0005, |
| "loss": 0.9797, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.4862797788227999, |
| "grad_norm": 0.5362582802772522, |
| "learning_rate": 0.0005, |
| "loss": 0.9691, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.48679654798201644, |
| "grad_norm": 0.5296323895454407, |
| "learning_rate": 0.0005, |
| "loss": 0.9714, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.487313317141233, |
| "grad_norm": 0.5387376546859741, |
| "learning_rate": 0.0005, |
| "loss": 0.9857, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.4878300863004496, |
| "grad_norm": 0.5592471957206726, |
| "learning_rate": 0.0005, |
| "loss": 0.9687, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.48834685545966616, |
| "grad_norm": 0.5368979573249817, |
| "learning_rate": 0.0005, |
| "loss": 0.9624, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.4888636246188827, |
| "grad_norm": 0.559069037437439, |
| "learning_rate": 0.0005, |
| "loss": 0.9713, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.48938039377809933, |
| "grad_norm": 0.5417030453681946, |
| "learning_rate": 0.0005, |
| "loss": 0.9749, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.4898971629373159, |
| "grad_norm": 0.6302499771118164, |
| "learning_rate": 0.0005, |
| "loss": 0.976, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.4904139320965325, |
| "grad_norm": 0.5580116510391235, |
| "learning_rate": 0.0005, |
| "loss": 0.9696, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.49093070125574906, |
| "grad_norm": 0.5281049013137817, |
| "learning_rate": 0.0005, |
| "loss": 0.9626, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.4914474704149656, |
| "grad_norm": 0.6579439043998718, |
| "learning_rate": 0.0005, |
| "loss": 0.9915, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.4919642395741822, |
| "grad_norm": 0.6327407956123352, |
| "learning_rate": 0.0005, |
| "loss": 0.976, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.4924810087333988, |
| "grad_norm": 0.5917522311210632, |
| "learning_rate": 0.0005, |
| "loss": 0.9698, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.4929977778926154, |
| "grad_norm": 0.5556752681732178, |
| "learning_rate": 0.0005, |
| "loss": 0.974, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.49351454705183195, |
| "grad_norm": 0.6051674485206604, |
| "learning_rate": 0.0005, |
| "loss": 0.9673, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.4940313162110485, |
| "grad_norm": 0.6255143880844116, |
| "learning_rate": 0.0005, |
| "loss": 0.9741, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.4945480853702651, |
| "grad_norm": 0.5358819961547852, |
| "learning_rate": 0.0005, |
| "loss": 0.965, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.4950648545294817, |
| "grad_norm": 0.5503594279289246, |
| "learning_rate": 0.0005, |
| "loss": 0.9668, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.49558162368869824, |
| "grad_norm": 0.510237455368042, |
| "learning_rate": 0.0005, |
| "loss": 0.9685, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.49609839284791485, |
| "grad_norm": 0.5995839238166809, |
| "learning_rate": 0.0005, |
| "loss": 0.9709, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.4966151620071314, |
| "grad_norm": 0.5354804992675781, |
| "learning_rate": 0.0005, |
| "loss": 0.9618, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.497131931166348, |
| "grad_norm": 0.5301372408866882, |
| "learning_rate": 0.0005, |
| "loss": 0.9644, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.4976487003255646, |
| "grad_norm": 0.6010123491287231, |
| "learning_rate": 0.0005, |
| "loss": 0.9834, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.49816546948478113, |
| "grad_norm": 0.5131679177284241, |
| "learning_rate": 0.0005, |
| "loss": 0.9695, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.49868223864399774, |
| "grad_norm": 0.5364587903022766, |
| "learning_rate": 0.0005, |
| "loss": 0.9572, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.4991990078032143, |
| "grad_norm": 0.5561274290084839, |
| "learning_rate": 0.0005, |
| "loss": 0.9739, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.49971577696243086, |
| "grad_norm": 0.5267083048820496, |
| "learning_rate": 0.0005, |
| "loss": 0.9659, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.5002325461216475, |
| "grad_norm": 0.5306525230407715, |
| "learning_rate": 0.0005, |
| "loss": 0.9698, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.5007493152808641, |
| "grad_norm": 0.6048880219459534, |
| "learning_rate": 0.0005, |
| "loss": 0.9702, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.5012660844400806, |
| "grad_norm": 0.5528176426887512, |
| "learning_rate": 0.0005, |
| "loss": 0.981, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.5017828535992972, |
| "grad_norm": 0.5247277021408081, |
| "learning_rate": 0.0005, |
| "loss": 0.9587, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.5022996227585138, |
| "grad_norm": 0.5636876225471497, |
| "learning_rate": 0.0005, |
| "loss": 0.9627, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.5028163919177303, |
| "grad_norm": 0.5214900970458984, |
| "learning_rate": 0.0005, |
| "loss": 0.9644, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.5033331610769469, |
| "grad_norm": 0.5302378535270691, |
| "learning_rate": 0.0005, |
| "loss": 0.9612, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.5038499302361635, |
| "grad_norm": 0.5830851197242737, |
| "learning_rate": 0.0005, |
| "loss": 0.9563, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.50436669939538, |
| "grad_norm": 0.5303472876548767, |
| "learning_rate": 0.0005, |
| "loss": 0.9542, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.5048834685545966, |
| "grad_norm": 0.5632893443107605, |
| "learning_rate": 0.0005, |
| "loss": 0.9828, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.5054002377138133, |
| "grad_norm": 0.5968844890594482, |
| "learning_rate": 0.0005, |
| "loss": 0.9855, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.5059170068730299, |
| "grad_norm": 0.580721378326416, |
| "learning_rate": 0.0005, |
| "loss": 0.9903, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.5064337760322464, |
| "grad_norm": 0.5187913179397583, |
| "learning_rate": 0.0005, |
| "loss": 0.9512, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.506950545191463, |
| "grad_norm": 0.5946047902107239, |
| "learning_rate": 0.0005, |
| "loss": 0.9661, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.5074673143506796, |
| "grad_norm": 0.5428043603897095, |
| "learning_rate": 0.0005, |
| "loss": 0.9669, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.5079840835098961, |
| "grad_norm": 0.562601625919342, |
| "learning_rate": 0.0005, |
| "loss": 0.958, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.5085008526691127, |
| "grad_norm": 0.5812455415725708, |
| "learning_rate": 0.0005, |
| "loss": 0.969, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.5090176218283293, |
| "grad_norm": 0.6318747997283936, |
| "learning_rate": 0.0005, |
| "loss": 0.9512, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.5095343909875458, |
| "grad_norm": 0.6214849352836609, |
| "learning_rate": 0.0005, |
| "loss": 0.9727, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.5100511601467624, |
| "grad_norm": 0.5631205439567566, |
| "learning_rate": 0.0005, |
| "loss": 0.9564, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.510567929305979, |
| "grad_norm": 0.626625657081604, |
| "learning_rate": 0.0005, |
| "loss": 0.9597, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.5110846984651956, |
| "grad_norm": 0.4959418475627899, |
| "learning_rate": 0.0005, |
| "loss": 0.9591, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.5116014676244122, |
| "grad_norm": 0.5196536779403687, |
| "learning_rate": 0.0005, |
| "loss": 0.9771, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.5121182367836288, |
| "grad_norm": 0.6234534382820129, |
| "learning_rate": 0.0005, |
| "loss": 0.9609, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.5126350059428453, |
| "grad_norm": 0.5823763012886047, |
| "learning_rate": 0.0005, |
| "loss": 0.9757, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.5131517751020619, |
| "grad_norm": 0.5576559901237488, |
| "learning_rate": 0.0005, |
| "loss": 0.9502, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.5136685442612785, |
| "grad_norm": 0.5374221801757812, |
| "learning_rate": 0.0005, |
| "loss": 0.9513, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.5141853134204951, |
| "grad_norm": 0.5272248387336731, |
| "learning_rate": 0.0005, |
| "loss": 0.9586, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.5147020825797116, |
| "grad_norm": 0.5568712949752808, |
| "learning_rate": 0.0005, |
| "loss": 0.957, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.5152188517389282, |
| "grad_norm": 0.5274987816810608, |
| "learning_rate": 0.0005, |
| "loss": 0.9432, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.5157356208981448, |
| "grad_norm": 0.5364307165145874, |
| "learning_rate": 0.0005, |
| "loss": 0.9548, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.5162523900573613, |
| "grad_norm": 0.5436477065086365, |
| "learning_rate": 0.0005, |
| "loss": 0.9572, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.516769159216578, |
| "grad_norm": 0.5213954448699951, |
| "learning_rate": 0.0005, |
| "loss": 0.9589, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5172859283757946, |
| "grad_norm": 0.5076503157615662, |
| "learning_rate": 0.0005, |
| "loss": 0.9498, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.5178026975350111, |
| "grad_norm": 0.5266632437705994, |
| "learning_rate": 0.0005, |
| "loss": 0.9641, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.5183194666942277, |
| "grad_norm": 0.5237132906913757, |
| "learning_rate": 0.0005, |
| "loss": 0.9717, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.5188362358534443, |
| "grad_norm": 0.5496323704719543, |
| "learning_rate": 0.0005, |
| "loss": 0.952, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.5193530050126608, |
| "grad_norm": 0.5751678347587585, |
| "learning_rate": 0.0005, |
| "loss": 0.9508, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.5198697741718774, |
| "grad_norm": 0.5333780646324158, |
| "learning_rate": 0.0005, |
| "loss": 0.9442, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.520386543331094, |
| "grad_norm": 0.5529361367225647, |
| "learning_rate": 0.0005, |
| "loss": 0.9662, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.5209033124903106, |
| "grad_norm": 0.5695346593856812, |
| "learning_rate": 0.0005, |
| "loss": 0.9648, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.5214200816495271, |
| "grad_norm": 0.528101921081543, |
| "learning_rate": 0.0005, |
| "loss": 0.9581, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.5219368508087437, |
| "grad_norm": 0.5323454141616821, |
| "learning_rate": 0.0005, |
| "loss": 0.9842, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.5224536199679604, |
| "grad_norm": 0.5791360139846802, |
| "learning_rate": 0.0005, |
| "loss": 0.9755, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.5229703891271769, |
| "grad_norm": 0.5297543406486511, |
| "learning_rate": 0.0005, |
| "loss": 0.9706, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.5234871582863935, |
| "grad_norm": 0.5344191789627075, |
| "learning_rate": 0.0005, |
| "loss": 0.9557, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.5240039274456101, |
| "grad_norm": 0.5307314395904541, |
| "learning_rate": 0.0005, |
| "loss": 0.9561, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.5245206966048266, |
| "grad_norm": 0.5625677108764648, |
| "learning_rate": 0.0005, |
| "loss": 0.9639, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.5250374657640432, |
| "grad_norm": 0.5287933945655823, |
| "learning_rate": 0.0005, |
| "loss": 0.9458, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.5255542349232598, |
| "grad_norm": 0.4987037777900696, |
| "learning_rate": 0.0005, |
| "loss": 0.9542, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.5260710040824763, |
| "grad_norm": 0.5192455053329468, |
| "learning_rate": 0.0005, |
| "loss": 0.9534, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.5265877732416929, |
| "grad_norm": 0.5038531422615051, |
| "learning_rate": 0.0005, |
| "loss": 0.9534, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.5271045424009095, |
| "grad_norm": 0.5356433391571045, |
| "learning_rate": 0.0005, |
| "loss": 0.9657, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.527621311560126, |
| "grad_norm": 0.5290383696556091, |
| "learning_rate": 0.0005, |
| "loss": 0.9405, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.5281380807193427, |
| "grad_norm": 0.5376208424568176, |
| "learning_rate": 0.0005, |
| "loss": 0.9581, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.5286548498785593, |
| "grad_norm": 0.5011909604072571, |
| "learning_rate": 0.0005, |
| "loss": 0.9383, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.5291716190377759, |
| "grad_norm": 0.503073513507843, |
| "learning_rate": 0.0005, |
| "loss": 0.9525, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.5296883881969924, |
| "grad_norm": 0.5255160927772522, |
| "learning_rate": 0.0005, |
| "loss": 0.9525, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.530205157356209, |
| "grad_norm": 0.5147885084152222, |
| "learning_rate": 0.0005, |
| "loss": 0.9465, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.5307219265154256, |
| "grad_norm": 0.5343205332756042, |
| "learning_rate": 0.0005, |
| "loss": 0.9441, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.5312386956746421, |
| "grad_norm": 0.5480389595031738, |
| "learning_rate": 0.0005, |
| "loss": 0.9551, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.5317554648338587, |
| "grad_norm": 0.5425328612327576, |
| "learning_rate": 0.0005, |
| "loss": 0.951, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.5322722339930753, |
| "grad_norm": 0.6197424530982971, |
| "learning_rate": 0.0005, |
| "loss": 0.9467, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.5327890031522918, |
| "grad_norm": 0.5289689898490906, |
| "learning_rate": 0.0005, |
| "loss": 0.9615, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.5333057723115084, |
| "grad_norm": 0.5715579986572266, |
| "learning_rate": 0.0005, |
| "loss": 0.9572, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.533822541470725, |
| "grad_norm": 0.5315567851066589, |
| "learning_rate": 0.0005, |
| "loss": 0.961, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.5343393106299416, |
| "grad_norm": 0.5441263318061829, |
| "learning_rate": 0.0005, |
| "loss": 0.9581, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.5348560797891582, |
| "grad_norm": 0.5785178542137146, |
| "learning_rate": 0.0005, |
| "loss": 0.9479, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.5353728489483748, |
| "grad_norm": 0.5260955691337585, |
| "learning_rate": 0.0005, |
| "loss": 0.9729, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.5358896181075914, |
| "grad_norm": 0.5125389099121094, |
| "learning_rate": 0.0005, |
| "loss": 0.9568, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.5364063872668079, |
| "grad_norm": 0.5203437209129333, |
| "learning_rate": 0.0005, |
| "loss": 0.9603, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.5369231564260245, |
| "grad_norm": 0.5585212707519531, |
| "learning_rate": 0.0005, |
| "loss": 0.9599, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.5374399255852411, |
| "grad_norm": 0.48404642939567566, |
| "learning_rate": 0.0005, |
| "loss": 0.9494, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.5379566947444576, |
| "grad_norm": 0.65147465467453, |
| "learning_rate": 0.0005, |
| "loss": 0.9469, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.5384734639036742, |
| "grad_norm": 0.5233981013298035, |
| "learning_rate": 0.0005, |
| "loss": 0.9564, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.5389902330628908, |
| "grad_norm": 0.5470656156539917, |
| "learning_rate": 0.0005, |
| "loss": 0.9377, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.5395070022221073, |
| "grad_norm": 0.522283673286438, |
| "learning_rate": 0.0005, |
| "loss": 0.9431, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.540023771381324, |
| "grad_norm": 0.5491459965705872, |
| "learning_rate": 0.0005, |
| "loss": 0.9565, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.5405405405405406, |
| "grad_norm": 0.5251693725585938, |
| "learning_rate": 0.0005, |
| "loss": 0.9485, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.5410573096997571, |
| "grad_norm": 0.5080156922340393, |
| "learning_rate": 0.0005, |
| "loss": 0.9577, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.5415740788589737, |
| "grad_norm": 0.5703207850456238, |
| "learning_rate": 0.0005, |
| "loss": 0.969, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.5420908480181903, |
| "grad_norm": 0.5768096446990967, |
| "learning_rate": 0.0005, |
| "loss": 0.9481, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.5426076171774068, |
| "grad_norm": 0.5119413733482361, |
| "learning_rate": 0.0005, |
| "loss": 0.9491, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.5431243863366234, |
| "grad_norm": 0.5329270958900452, |
| "learning_rate": 0.0005, |
| "loss": 0.9625, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.54364115549584, |
| "grad_norm": 0.528266966342926, |
| "learning_rate": 0.0005, |
| "loss": 0.9477, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.5441579246550566, |
| "grad_norm": 0.5584282279014587, |
| "learning_rate": 0.0005, |
| "loss": 0.9555, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.5446746938142731, |
| "grad_norm": 0.5280376672744751, |
| "learning_rate": 0.0005, |
| "loss": 0.9404, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.5451914629734897, |
| "grad_norm": 0.512711763381958, |
| "learning_rate": 0.0005, |
| "loss": 0.9551, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.5457082321327064, |
| "grad_norm": 0.5412839651107788, |
| "learning_rate": 0.0005, |
| "loss": 0.946, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.5462250012919229, |
| "grad_norm": 0.5105991363525391, |
| "learning_rate": 0.0005, |
| "loss": 0.9507, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.5467417704511395, |
| "grad_norm": 0.5690359473228455, |
| "learning_rate": 0.0005, |
| "loss": 0.9532, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.5472585396103561, |
| "grad_norm": 0.5333488583564758, |
| "learning_rate": 0.0005, |
| "loss": 0.9634, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.5477753087695726, |
| "grad_norm": 0.5984283089637756, |
| "learning_rate": 0.0005, |
| "loss": 0.9624, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.5482920779287892, |
| "grad_norm": 0.5076044201850891, |
| "learning_rate": 0.0005, |
| "loss": 0.9426, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.5488088470880058, |
| "grad_norm": 0.5287521481513977, |
| "learning_rate": 0.0005, |
| "loss": 0.9571, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.5493256162472223, |
| "grad_norm": 0.5479470491409302, |
| "learning_rate": 0.0005, |
| "loss": 0.9424, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.5498423854064389, |
| "grad_norm": 0.5120390057563782, |
| "learning_rate": 0.0005, |
| "loss": 0.9471, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.5503591545656555, |
| "grad_norm": 0.5130133032798767, |
| "learning_rate": 0.0005, |
| "loss": 0.9605, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.5508759237248722, |
| "grad_norm": 0.5507628917694092, |
| "learning_rate": 0.0005, |
| "loss": 0.9545, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.5513926928840887, |
| "grad_norm": 0.4929947555065155, |
| "learning_rate": 0.0005, |
| "loss": 0.9415, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.5519094620433053, |
| "grad_norm": 0.5119226574897766, |
| "learning_rate": 0.0005, |
| "loss": 0.9564, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.5524262312025219, |
| "grad_norm": 0.5126231908798218, |
| "learning_rate": 0.0005, |
| "loss": 0.9467, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.5529430003617384, |
| "grad_norm": 0.5123251676559448, |
| "learning_rate": 0.0005, |
| "loss": 0.9412, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.553459769520955, |
| "grad_norm": 0.5106756687164307, |
| "learning_rate": 0.0005, |
| "loss": 0.9583, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.5539765386801716, |
| "grad_norm": 0.520325243473053, |
| "learning_rate": 0.0005, |
| "loss": 0.9593, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.5544933078393881, |
| "grad_norm": 0.6005384922027588, |
| "learning_rate": 0.0005, |
| "loss": 0.9617, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.5550100769986047, |
| "grad_norm": 0.49362891912460327, |
| "learning_rate": 0.0005, |
| "loss": 0.9476, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.5555268461578213, |
| "grad_norm": 0.5586000084877014, |
| "learning_rate": 0.0005, |
| "loss": 0.9594, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.5560436153170378, |
| "grad_norm": 0.5586140155792236, |
| "learning_rate": 0.0005, |
| "loss": 0.9343, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.5565603844762544, |
| "grad_norm": 0.5251288414001465, |
| "learning_rate": 0.0005, |
| "loss": 0.945, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.5570771536354711, |
| "grad_norm": 0.5328302383422852, |
| "learning_rate": 0.0005, |
| "loss": 0.9479, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.5575939227946876, |
| "grad_norm": 0.49472010135650635, |
| "learning_rate": 0.0005, |
| "loss": 0.9668, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.5581106919539042, |
| "grad_norm": 0.5159969925880432, |
| "learning_rate": 0.0005, |
| "loss": 0.9559, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.5586274611131208, |
| "grad_norm": 0.5159046649932861, |
| "learning_rate": 0.0005, |
| "loss": 0.9397, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.5591442302723374, |
| "grad_norm": 0.5191036462783813, |
| "learning_rate": 0.0005, |
| "loss": 0.9466, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.5596609994315539, |
| "grad_norm": 0.5178474187850952, |
| "learning_rate": 0.0005, |
| "loss": 0.9439, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.5601777685907705, |
| "grad_norm": 0.5447880625724792, |
| "learning_rate": 0.0005, |
| "loss": 0.9576, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.5606945377499871, |
| "grad_norm": 0.5056577920913696, |
| "learning_rate": 0.0005, |
| "loss": 0.9514, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.5612113069092036, |
| "grad_norm": 0.5639669299125671, |
| "learning_rate": 0.0005, |
| "loss": 0.9482, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.5617280760684202, |
| "grad_norm": 0.570584774017334, |
| "learning_rate": 0.0005, |
| "loss": 0.9602, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.5622448452276368, |
| "grad_norm": 0.5161934494972229, |
| "learning_rate": 0.0005, |
| "loss": 0.9366, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.5627616143868533, |
| "grad_norm": 0.5521616339683533, |
| "learning_rate": 0.0005, |
| "loss": 0.9535, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.56327838354607, |
| "grad_norm": 0.5411272644996643, |
| "learning_rate": 0.0005, |
| "loss": 0.9324, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.5637951527052866, |
| "grad_norm": 0.5098778605461121, |
| "learning_rate": 0.0005, |
| "loss": 0.9408, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.5643119218645031, |
| "grad_norm": 0.4730329215526581, |
| "learning_rate": 0.0005, |
| "loss": 0.946, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.5648286910237197, |
| "grad_norm": 0.5085341334342957, |
| "learning_rate": 0.0005, |
| "loss": 0.9469, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.5653454601829363, |
| "grad_norm": 0.5201531052589417, |
| "learning_rate": 0.0005, |
| "loss": 0.9583, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.5658622293421529, |
| "grad_norm": 0.4958653748035431, |
| "learning_rate": 0.0005, |
| "loss": 0.9542, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.5663789985013694, |
| "grad_norm": 0.5279732942581177, |
| "learning_rate": 0.0005, |
| "loss": 0.9503, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.566895767660586, |
| "grad_norm": 0.5014291405677795, |
| "learning_rate": 0.0005, |
| "loss": 0.9562, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.5674125368198026, |
| "grad_norm": 0.5004532337188721, |
| "learning_rate": 0.0005, |
| "loss": 0.937, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.5679293059790191, |
| "grad_norm": 0.5091339349746704, |
| "learning_rate": 0.0005, |
| "loss": 0.9442, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.5684460751382358, |
| "grad_norm": 0.5625014901161194, |
| "learning_rate": 0.0005, |
| "loss": 0.9426, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.5689628442974524, |
| "grad_norm": 0.5026536583900452, |
| "learning_rate": 0.0005, |
| "loss": 0.952, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.5694796134566689, |
| "grad_norm": 0.4980801045894623, |
| "learning_rate": 0.0005, |
| "loss": 0.9347, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.5699963826158855, |
| "grad_norm": 0.4974989593029022, |
| "learning_rate": 0.0005, |
| "loss": 0.9442, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.5705131517751021, |
| "grad_norm": 0.5242035388946533, |
| "learning_rate": 0.0005, |
| "loss": 0.9464, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.5710299209343186, |
| "grad_norm": 0.5066283941268921, |
| "learning_rate": 0.0005, |
| "loss": 0.9276, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.5715466900935352, |
| "grad_norm": 0.508834958076477, |
| "learning_rate": 0.0005, |
| "loss": 0.9402, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.5720634592527518, |
| "grad_norm": 0.5046612024307251, |
| "learning_rate": 0.0005, |
| "loss": 0.9487, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.5725802284119684, |
| "grad_norm": 0.5268915891647339, |
| "learning_rate": 0.0005, |
| "loss": 0.9415, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.5730969975711849, |
| "grad_norm": 0.5040035247802734, |
| "learning_rate": 0.0005, |
| "loss": 0.9326, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.5736137667304015, |
| "grad_norm": 0.500636100769043, |
| "learning_rate": 0.0005, |
| "loss": 0.9422, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.5741305358896182, |
| "grad_norm": 0.5215865969657898, |
| "learning_rate": 0.0005, |
| "loss": 0.9414, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.5746473050488347, |
| "grad_norm": 0.5058110356330872, |
| "learning_rate": 0.0005, |
| "loss": 0.9522, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.5751640742080513, |
| "grad_norm": 0.5117678046226501, |
| "learning_rate": 0.0005, |
| "loss": 0.9518, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.5756808433672679, |
| "grad_norm": 0.5039757490158081, |
| "learning_rate": 0.0005, |
| "loss": 0.9418, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.5761976125264844, |
| "grad_norm": 0.5518759489059448, |
| "learning_rate": 0.0005, |
| "loss": 0.9407, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.576714381685701, |
| "grad_norm": 0.5106251239776611, |
| "learning_rate": 0.0005, |
| "loss": 0.9367, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.5772311508449176, |
| "grad_norm": 0.5682827830314636, |
| "learning_rate": 0.0005, |
| "loss": 0.945, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.5777479200041341, |
| "grad_norm": 0.521513044834137, |
| "learning_rate": 0.0005, |
| "loss": 0.9453, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.5782646891633507, |
| "grad_norm": 0.5230028629302979, |
| "learning_rate": 0.0005, |
| "loss": 0.9544, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.5787814583225673, |
| "grad_norm": 0.5285042524337769, |
| "learning_rate": 0.0005, |
| "loss": 0.9459, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.5792982274817838, |
| "grad_norm": 0.5230273604393005, |
| "learning_rate": 0.0005, |
| "loss": 0.9354, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.5798149966410004, |
| "grad_norm": 0.5298386216163635, |
| "learning_rate": 0.0005, |
| "loss": 0.9578, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.5803317658002171, |
| "grad_norm": 0.5199642181396484, |
| "learning_rate": 0.0005, |
| "loss": 0.9559, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.5808485349594337, |
| "grad_norm": 0.5283148884773254, |
| "learning_rate": 0.0005, |
| "loss": 0.9315, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.5813653041186502, |
| "grad_norm": 0.5081456303596497, |
| "learning_rate": 0.0005, |
| "loss": 0.936, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.5818820732778668, |
| "grad_norm": 0.4844646751880646, |
| "learning_rate": 0.0005, |
| "loss": 0.9478, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.5823988424370834, |
| "grad_norm": 0.5176190733909607, |
| "learning_rate": 0.0005, |
| "loss": 0.918, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.5829156115962999, |
| "grad_norm": 0.5267295241355896, |
| "learning_rate": 0.0005, |
| "loss": 0.9462, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.5834323807555165, |
| "grad_norm": 0.5780160427093506, |
| "learning_rate": 0.0005, |
| "loss": 0.9302, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.5839491499147331, |
| "grad_norm": 0.47616294026374817, |
| "learning_rate": 0.0005, |
| "loss": 0.945, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.5844659190739496, |
| "grad_norm": 0.556125283241272, |
| "learning_rate": 0.0005, |
| "loss": 0.9306, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.5849826882331662, |
| "grad_norm": 0.5071564316749573, |
| "learning_rate": 0.0005, |
| "loss": 0.9611, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.5854994573923828, |
| "grad_norm": 0.5186158418655396, |
| "learning_rate": 0.0005, |
| "loss": 0.9311, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.5860162265515994, |
| "grad_norm": 0.48720046877861023, |
| "learning_rate": 0.0005, |
| "loss": 0.9609, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.586532995710816, |
| "grad_norm": 0.49717170000076294, |
| "learning_rate": 0.0005, |
| "loss": 0.957, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.5870497648700326, |
| "grad_norm": 0.534752368927002, |
| "learning_rate": 0.0005, |
| "loss": 0.94, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.5875665340292492, |
| "grad_norm": 0.523997962474823, |
| "learning_rate": 0.0005, |
| "loss": 0.9373, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.5880833031884657, |
| "grad_norm": 0.49437177181243896, |
| "learning_rate": 0.0005, |
| "loss": 0.9327, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.5886000723476823, |
| "grad_norm": 0.4986345171928406, |
| "learning_rate": 0.0005, |
| "loss": 0.9353, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.5891168415068989, |
| "grad_norm": 0.49254122376441956, |
| "learning_rate": 0.0005, |
| "loss": 0.9451, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.5896336106661154, |
| "grad_norm": 0.5066004991531372, |
| "learning_rate": 0.0005, |
| "loss": 0.9307, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.590150379825332, |
| "grad_norm": 0.4954734444618225, |
| "learning_rate": 0.0005, |
| "loss": 0.9345, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.5906671489845486, |
| "grad_norm": 0.4814952313899994, |
| "learning_rate": 0.0005, |
| "loss": 0.9383, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.5911839181437651, |
| "grad_norm": 0.48946642875671387, |
| "learning_rate": 0.0005, |
| "loss": 0.9314, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.5917006873029818, |
| "grad_norm": 0.5009201765060425, |
| "learning_rate": 0.0005, |
| "loss": 0.9532, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.5922174564621984, |
| "grad_norm": 0.5228848457336426, |
| "learning_rate": 0.0005, |
| "loss": 0.9346, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.5927342256214149, |
| "grad_norm": 0.5121431350708008, |
| "learning_rate": 0.0005, |
| "loss": 0.9367, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.5932509947806315, |
| "grad_norm": 0.49431100487709045, |
| "learning_rate": 0.0005, |
| "loss": 0.9261, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.5937677639398481, |
| "grad_norm": 0.516291081905365, |
| "learning_rate": 0.0005, |
| "loss": 0.9452, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.5942845330990646, |
| "grad_norm": 0.5128830671310425, |
| "learning_rate": 0.0005, |
| "loss": 0.9446, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.5948013022582812, |
| "grad_norm": 0.5089874267578125, |
| "learning_rate": 0.0005, |
| "loss": 0.9321, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.5953180714174978, |
| "grad_norm": 0.5457943677902222, |
| "learning_rate": 0.0005, |
| "loss": 0.9349, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5958348405767144, |
| "grad_norm": 0.5342771410942078, |
| "learning_rate": 0.0005, |
| "loss": 0.9326, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.5963516097359309, |
| "grad_norm": 0.511667788028717, |
| "learning_rate": 0.0005, |
| "loss": 0.933, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.5968683788951475, |
| "grad_norm": 0.5304045677185059, |
| "learning_rate": 0.0005, |
| "loss": 0.9278, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.5973851480543642, |
| "grad_norm": 0.5285548567771912, |
| "learning_rate": 0.0005, |
| "loss": 0.9451, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.5979019172135807, |
| "grad_norm": 0.5200523734092712, |
| "learning_rate": 0.0005, |
| "loss": 0.9256, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.5984186863727973, |
| "grad_norm": 0.49133771657943726, |
| "learning_rate": 0.0005, |
| "loss": 0.9107, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.5989354555320139, |
| "grad_norm": 0.5477631092071533, |
| "learning_rate": 0.0005, |
| "loss": 0.9273, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.5994522246912304, |
| "grad_norm": 0.5735862255096436, |
| "learning_rate": 0.0005, |
| "loss": 0.9358, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.599968993850447, |
| "grad_norm": 0.48721542954444885, |
| "learning_rate": 0.0005, |
| "loss": 0.9273, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.6004857630096636, |
| "grad_norm": 0.5106229186058044, |
| "learning_rate": 0.0005, |
| "loss": 0.9283, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.6010025321688801, |
| "grad_norm": 0.4914691746234894, |
| "learning_rate": 0.0005, |
| "loss": 0.9303, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.6015193013280967, |
| "grad_norm": 0.5924090147018433, |
| "learning_rate": 0.0005, |
| "loss": 0.9199, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.6020360704873133, |
| "grad_norm": 0.4983723759651184, |
| "learning_rate": 0.0005, |
| "loss": 0.9384, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.60255283964653, |
| "grad_norm": 0.52519690990448, |
| "learning_rate": 0.0005, |
| "loss": 0.934, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.6030696088057464, |
| "grad_norm": 0.5365654826164246, |
| "learning_rate": 0.0005, |
| "loss": 0.9342, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.6035863779649631, |
| "grad_norm": 0.4914066195487976, |
| "learning_rate": 0.0005, |
| "loss": 0.9453, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.6041031471241797, |
| "grad_norm": 0.4888913929462433, |
| "learning_rate": 0.0005, |
| "loss": 0.9322, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.6046199162833962, |
| "grad_norm": 0.4911440908908844, |
| "learning_rate": 0.0005, |
| "loss": 0.9327, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.6051366854426128, |
| "grad_norm": 0.5005333423614502, |
| "learning_rate": 0.0005, |
| "loss": 0.9467, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.6056534546018294, |
| "grad_norm": 0.5367693901062012, |
| "learning_rate": 0.0005, |
| "loss": 0.9384, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.6061702237610459, |
| "grad_norm": 0.48554107546806335, |
| "learning_rate": 0.0005, |
| "loss": 0.9446, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.6066869929202625, |
| "grad_norm": 0.514530599117279, |
| "learning_rate": 0.0005, |
| "loss": 0.914, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.6072037620794791, |
| "grad_norm": 0.5004679560661316, |
| "learning_rate": 0.0005, |
| "loss": 0.9342, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.6077205312386956, |
| "grad_norm": 0.516576886177063, |
| "learning_rate": 0.0005, |
| "loss": 0.9325, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.6082373003979122, |
| "grad_norm": 0.5298195481300354, |
| "learning_rate": 0.0005, |
| "loss": 0.9324, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.6087540695571289, |
| "grad_norm": 0.4899151921272278, |
| "learning_rate": 0.0005, |
| "loss": 0.9161, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.6092708387163454, |
| "grad_norm": 0.5261816382408142, |
| "learning_rate": 0.0005, |
| "loss": 0.9393, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.609787607875562, |
| "grad_norm": 0.5143525004386902, |
| "learning_rate": 0.0005, |
| "loss": 0.9393, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.6103043770347786, |
| "grad_norm": 0.521551251411438, |
| "learning_rate": 0.0005, |
| "loss": 0.9291, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.6108211461939952, |
| "grad_norm": 0.4708675444126129, |
| "learning_rate": 0.0005, |
| "loss": 0.9462, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.6113379153532117, |
| "grad_norm": 0.47985512018203735, |
| "learning_rate": 0.0005, |
| "loss": 0.9355, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.6118546845124283, |
| "grad_norm": 0.5093055367469788, |
| "learning_rate": 0.0005, |
| "loss": 0.9301, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.6123714536716449, |
| "grad_norm": 0.5011575222015381, |
| "learning_rate": 0.0005, |
| "loss": 0.9382, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.6128882228308614, |
| "grad_norm": 0.5071706771850586, |
| "learning_rate": 0.0005, |
| "loss": 0.9425, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.613404991990078, |
| "grad_norm": 0.49520188570022583, |
| "learning_rate": 0.0005, |
| "loss": 0.9402, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.6139217611492946, |
| "grad_norm": 0.46812620759010315, |
| "learning_rate": 0.0005, |
| "loss": 0.9325, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.6144385303085111, |
| "grad_norm": 0.524341344833374, |
| "learning_rate": 0.0005, |
| "loss": 0.9267, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.6149552994677278, |
| "grad_norm": 0.48518240451812744, |
| "learning_rate": 0.0005, |
| "loss": 0.938, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.6154720686269444, |
| "grad_norm": 0.5080456137657166, |
| "learning_rate": 0.0005, |
| "loss": 0.9341, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.6159888377861609, |
| "grad_norm": 0.5626226663589478, |
| "learning_rate": 0.0005, |
| "loss": 0.9258, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.6165056069453775, |
| "grad_norm": 0.47337082028388977, |
| "learning_rate": 0.0005, |
| "loss": 0.9421, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.6170223761045941, |
| "grad_norm": 0.4747110903263092, |
| "learning_rate": 0.0005, |
| "loss": 0.9339, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.6175391452638107, |
| "grad_norm": 0.5242559909820557, |
| "learning_rate": 0.0005, |
| "loss": 0.942, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.6180559144230272, |
| "grad_norm": 0.5247402191162109, |
| "learning_rate": 0.0005, |
| "loss": 0.9269, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.6185726835822438, |
| "grad_norm": 0.5551696419715881, |
| "learning_rate": 0.0005, |
| "loss": 0.9268, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.6190894527414604, |
| "grad_norm": 0.5222793817520142, |
| "learning_rate": 0.0005, |
| "loss": 0.9331, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.6196062219006769, |
| "grad_norm": 0.49412423372268677, |
| "learning_rate": 0.0005, |
| "loss": 0.9292, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.6201229910598935, |
| "grad_norm": 0.49935638904571533, |
| "learning_rate": 0.0005, |
| "loss": 0.9168, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6206397602191102, |
| "grad_norm": 0.5514285564422607, |
| "learning_rate": 0.0005, |
| "loss": 0.9289, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.6211565293783267, |
| "grad_norm": 0.5182361602783203, |
| "learning_rate": 0.0005, |
| "loss": 0.9359, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.6216732985375433, |
| "grad_norm": 0.5162422060966492, |
| "learning_rate": 0.0005, |
| "loss": 0.9257, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.6221900676967599, |
| "grad_norm": 0.4926648437976837, |
| "learning_rate": 0.0005, |
| "loss": 0.935, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.6227068368559764, |
| "grad_norm": 0.5213857293128967, |
| "learning_rate": 0.0005, |
| "loss": 0.9353, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.623223606015193, |
| "grad_norm": 0.5043472051620483, |
| "learning_rate": 0.0005, |
| "loss": 0.9499, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.6237403751744096, |
| "grad_norm": 0.48353925347328186, |
| "learning_rate": 0.0005, |
| "loss": 0.9319, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.6242571443336262, |
| "grad_norm": 0.5488812923431396, |
| "learning_rate": 0.0005, |
| "loss": 0.9262, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.6247739134928427, |
| "grad_norm": 0.5349071621894836, |
| "learning_rate": 0.0005, |
| "loss": 0.9317, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.6252906826520593, |
| "grad_norm": 0.5111981630325317, |
| "learning_rate": 0.0005, |
| "loss": 0.9128, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.625807451811276, |
| "grad_norm": 0.525330126285553, |
| "learning_rate": 0.0005, |
| "loss": 0.9212, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.6263242209704925, |
| "grad_norm": 0.5191537141799927, |
| "learning_rate": 0.0005, |
| "loss": 0.9313, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.6268409901297091, |
| "grad_norm": 0.49418073892593384, |
| "learning_rate": 0.0005, |
| "loss": 0.9408, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.6273577592889257, |
| "grad_norm": 0.49373695254325867, |
| "learning_rate": 0.0005, |
| "loss": 0.9226, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.6278745284481422, |
| "grad_norm": 0.488068550825119, |
| "learning_rate": 0.0005, |
| "loss": 0.9407, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.6283912976073588, |
| "grad_norm": 0.5186513662338257, |
| "learning_rate": 0.0005, |
| "loss": 0.9351, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.6289080667665754, |
| "grad_norm": 0.532514750957489, |
| "learning_rate": 0.0005, |
| "loss": 0.9323, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.6294248359257919, |
| "grad_norm": 0.4832149147987366, |
| "learning_rate": 0.0005, |
| "loss": 0.9303, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.6299416050850085, |
| "grad_norm": 0.5020478963851929, |
| "learning_rate": 0.0005, |
| "loss": 0.9278, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.6304583742442251, |
| "grad_norm": 0.45874807238578796, |
| "learning_rate": 0.0005, |
| "loss": 0.9205, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.6309751434034416, |
| "grad_norm": 0.5273077487945557, |
| "learning_rate": 0.0005, |
| "loss": 0.9133, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.6314919125626582, |
| "grad_norm": 0.49270930886268616, |
| "learning_rate": 0.0005, |
| "loss": 0.9228, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.6320086817218749, |
| "grad_norm": 0.47435376048088074, |
| "learning_rate": 0.0005, |
| "loss": 0.937, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.6325254508810915, |
| "grad_norm": 0.49013498425483704, |
| "learning_rate": 0.0005, |
| "loss": 0.925, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.633042220040308, |
| "grad_norm": 0.481581449508667, |
| "learning_rate": 0.0005, |
| "loss": 0.9209, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.6335589891995246, |
| "grad_norm": 0.5189198851585388, |
| "learning_rate": 0.0005, |
| "loss": 0.9206, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.6340757583587412, |
| "grad_norm": 0.47871729731559753, |
| "learning_rate": 0.0005, |
| "loss": 0.9279, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.6345925275179577, |
| "grad_norm": 0.4953111410140991, |
| "learning_rate": 0.0005, |
| "loss": 0.93, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.6351092966771743, |
| "grad_norm": 0.5199342370033264, |
| "learning_rate": 0.0005, |
| "loss": 0.9246, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.6356260658363909, |
| "grad_norm": 0.48852893710136414, |
| "learning_rate": 0.0005, |
| "loss": 0.9222, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.6361428349956074, |
| "grad_norm": 0.5054774284362793, |
| "learning_rate": 0.0005, |
| "loss": 0.9346, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.636659604154824, |
| "grad_norm": 0.5030813813209534, |
| "learning_rate": 0.0005, |
| "loss": 0.9238, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.6371763733140406, |
| "grad_norm": 0.47299617528915405, |
| "learning_rate": 0.0005, |
| "loss": 0.9317, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.6376931424732571, |
| "grad_norm": 0.5473576784133911, |
| "learning_rate": 0.0005, |
| "loss": 0.9206, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.6382099116324738, |
| "grad_norm": 0.4999616742134094, |
| "learning_rate": 0.0005, |
| "loss": 0.9449, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.6387266807916904, |
| "grad_norm": 0.5508975982666016, |
| "learning_rate": 0.0005, |
| "loss": 0.921, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.639243449950907, |
| "grad_norm": 0.5574737191200256, |
| "learning_rate": 0.0005, |
| "loss": 0.935, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.6397602191101235, |
| "grad_norm": 0.5615907907485962, |
| "learning_rate": 0.0005, |
| "loss": 0.9263, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.6402769882693401, |
| "grad_norm": 0.5180084109306335, |
| "learning_rate": 0.0005, |
| "loss": 0.9235, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.6407937574285567, |
| "grad_norm": 0.46675363183021545, |
| "learning_rate": 0.0005, |
| "loss": 0.9237, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.6413105265877732, |
| "grad_norm": 0.4773077070713043, |
| "learning_rate": 0.0005, |
| "loss": 0.9098, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.6418272957469898, |
| "grad_norm": 0.5147991180419922, |
| "learning_rate": 0.0005, |
| "loss": 0.9215, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.6423440649062064, |
| "grad_norm": 0.47254249453544617, |
| "learning_rate": 0.0005, |
| "loss": 0.925, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.6428608340654229, |
| "grad_norm": 0.48444342613220215, |
| "learning_rate": 0.0005, |
| "loss": 0.9138, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.6433776032246395, |
| "grad_norm": 0.4626687169075012, |
| "learning_rate": 0.0005, |
| "loss": 0.9239, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.6438943723838562, |
| "grad_norm": 0.48663684725761414, |
| "learning_rate": 0.0005, |
| "loss": 0.9365, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.6444111415430727, |
| "grad_norm": 0.5721457600593567, |
| "learning_rate": 0.0005, |
| "loss": 0.9228, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.6449279107022893, |
| "grad_norm": 0.4997864067554474, |
| "learning_rate": 0.0005, |
| "loss": 0.9203, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.6454446798615059, |
| "grad_norm": 0.4961699843406677, |
| "learning_rate": 0.0005, |
| "loss": 0.909, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.6459614490207224, |
| "grad_norm": 0.49018388986587524, |
| "learning_rate": 0.0005, |
| "loss": 0.9242, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.646478218179939, |
| "grad_norm": 0.5205206871032715, |
| "learning_rate": 0.0005, |
| "loss": 0.923, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.6469949873391556, |
| "grad_norm": 0.527740478515625, |
| "learning_rate": 0.0005, |
| "loss": 0.9267, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.6475117564983722, |
| "grad_norm": 0.4962241053581238, |
| "learning_rate": 0.0005, |
| "loss": 0.9206, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.6480285256575887, |
| "grad_norm": 0.47836676239967346, |
| "learning_rate": 0.0005, |
| "loss": 0.9134, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.6485452948168053, |
| "grad_norm": 0.48245546221733093, |
| "learning_rate": 0.0005, |
| "loss": 0.9326, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.649062063976022, |
| "grad_norm": 0.503021240234375, |
| "learning_rate": 0.0005, |
| "loss": 0.9361, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.6495788331352385, |
| "grad_norm": 0.5059377551078796, |
| "learning_rate": 0.0005, |
| "loss": 0.8998, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.6500956022944551, |
| "grad_norm": 0.49928557872772217, |
| "learning_rate": 0.0004994267553729553, |
| "loss": 0.929, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.6506123714536717, |
| "grad_norm": 0.4804401099681854, |
| "learning_rate": 0.0004963394943411699, |
| "loss": 0.9173, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.6511291406128882, |
| "grad_norm": 0.4649386405944824, |
| "learning_rate": 0.0004932713175506187, |
| "loss": 0.9256, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.6516459097721048, |
| "grad_norm": 0.47866883873939514, |
| "learning_rate": 0.0004902221070299804, |
| "loss": 0.9185, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.6521626789313214, |
| "grad_norm": 0.4801424443721771, |
| "learning_rate": 0.00048719174553718596, |
| "loss": 0.9276, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.6526794480905379, |
| "grad_norm": 0.4797857105731964, |
| "learning_rate": 0.0004841801165549115, |
| "loss": 0.9262, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.6531962172497545, |
| "grad_norm": 0.4703647494316101, |
| "learning_rate": 0.0004811871042860973, |
| "loss": 0.9113, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.6537129864089711, |
| "grad_norm": 0.4952949583530426, |
| "learning_rate": 0.00047821259364949593, |
| "loss": 0.9372, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.6542297555681877, |
| "grad_norm": 0.48347562551498413, |
| "learning_rate": 0.0004752564702752473, |
| "loss": 0.9224, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.6547465247274042, |
| "grad_norm": 0.4917808473110199, |
| "learning_rate": 0.0004723186205004811, |
| "loss": 0.91, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.6552632938866209, |
| "grad_norm": 0.5070691704750061, |
| "learning_rate": 0.00046939893136494626, |
| "loss": 0.9147, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.6557800630458375, |
| "grad_norm": 0.49811315536499023, |
| "learning_rate": 0.0004664972906066682, |
| "loss": 0.903, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.656296832205054, |
| "grad_norm": 0.5315011739730835, |
| "learning_rate": 0.0004636135866576317, |
| "loss": 0.9087, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.6568136013642706, |
| "grad_norm": 0.4951007068157196, |
| "learning_rate": 0.00046074770863949155, |
| "loss": 0.9282, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.6573303705234872, |
| "grad_norm": 0.49288272857666016, |
| "learning_rate": 0.00045789954635930914, |
| "loss": 0.9279, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.6578471396827037, |
| "grad_norm": 0.4682476222515106, |
| "learning_rate": 0.00045506899030531544, |
| "loss": 0.9122, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.6583639088419203, |
| "grad_norm": 0.5064340233802795, |
| "learning_rate": 0.0004522559316427005, |
| "loss": 0.9114, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.6588806780011369, |
| "grad_norm": 0.4566449224948883, |
| "learning_rate": 0.00044946026220942865, |
| "loss": 0.9133, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.6593974471603534, |
| "grad_norm": 0.4679611623287201, |
| "learning_rate": 0.00044668187451207944, |
| "loss": 0.8991, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.65991421631957, |
| "grad_norm": 0.48330655694007874, |
| "learning_rate": 0.00044392066172171496, |
| "loss": 0.9103, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.6604309854787866, |
| "grad_norm": 0.5204933285713196, |
| "learning_rate": 0.00044117651766977195, |
| "loss": 0.9149, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.6609477546380031, |
| "grad_norm": 0.48776623606681824, |
| "learning_rate": 0.00043844933684397984, |
| "loss": 0.9185, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.6614645237972198, |
| "grad_norm": 0.4869120419025421, |
| "learning_rate": 0.0004357390143843035, |
| "loss": 0.9096, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.6619812929564364, |
| "grad_norm": 0.4783307611942291, |
| "learning_rate": 0.0004330454460789117, |
| "loss": 0.8977, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.662498062115653, |
| "grad_norm": 0.4555026888847351, |
| "learning_rate": 0.00043036852836016994, |
| "loss": 0.9039, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.6630148312748695, |
| "grad_norm": 0.47510290145874023, |
| "learning_rate": 0.00042770815830065834, |
| "loss": 0.9051, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.6635316004340861, |
| "grad_norm": 0.4920065999031067, |
| "learning_rate": 0.0004250642336092143, |
| "loss": 0.9138, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.6640483695933027, |
| "grad_norm": 0.47680869698524475, |
| "learning_rate": 0.000422436652626999, |
| "loss": 0.9131, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.6645651387525192, |
| "grad_norm": 0.5098276138305664, |
| "learning_rate": 0.00041982531432358883, |
| "loss": 0.9158, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.6650819079117358, |
| "grad_norm": 0.4745832085609436, |
| "learning_rate": 0.000417230118293091, |
| "loss": 0.9019, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.6655986770709524, |
| "grad_norm": 0.456750750541687, |
| "learning_rate": 0.00041465096475028256, |
| "loss": 0.8881, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.6661154462301689, |
| "grad_norm": 0.49757450819015503, |
| "learning_rate": 0.00041208775452677374, |
| "loss": 0.8971, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.6666322153893856, |
| "grad_norm": 0.4721812605857849, |
| "learning_rate": 0.0004095403890671951, |
| "loss": 0.8896, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.6671489845486022, |
| "grad_norm": 0.4674829840660095, |
| "learning_rate": 0.00040700877042540803, |
| "loss": 0.8978, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.6676657537078187, |
| "grad_norm": 0.45353659987449646, |
| "learning_rate": 0.0004044928012607386, |
| "loss": 0.9012, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.6681825228670353, |
| "grad_norm": 0.44594326615333557, |
| "learning_rate": 0.0004019923848342348, |
| "loss": 0.8864, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.6686992920262519, |
| "grad_norm": 0.4606136083602905, |
| "learning_rate": 0.0003995074250049472, |
| "loss": 0.9042, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.6692160611854685, |
| "grad_norm": 0.4778830111026764, |
| "learning_rate": 0.000397037826226232, |
| "loss": 0.8883, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.669732830344685, |
| "grad_norm": 0.4795719385147095, |
| "learning_rate": 0.00039458349354207754, |
| "loss": 0.8943, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.6702495995039016, |
| "grad_norm": 0.46150490641593933, |
| "learning_rate": 0.000392144332583453, |
| "loss": 0.8986, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.6707663686631182, |
| "grad_norm": 0.4591388404369354, |
| "learning_rate": 0.00038972024956468015, |
| "loss": 0.8973, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.6712831378223347, |
| "grad_norm": 0.447889506816864, |
| "learning_rate": 0.00038731115127982704, |
| "loss": 0.8982, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.6717999069815513, |
| "grad_norm": 0.4567711651325226, |
| "learning_rate": 0.00038491694509912446, |
| "loss": 0.8946, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.672316676140768, |
| "grad_norm": 0.4653710424900055, |
| "learning_rate": 0.00038253753896540417, |
| "loss": 0.8805, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.6728334452999845, |
| "grad_norm": 0.47622108459472656, |
| "learning_rate": 0.00038017284139055935, |
| "loss": 0.8971, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.6733502144592011, |
| "grad_norm": 0.46596968173980713, |
| "learning_rate": 0.0003778227614520272, |
| "loss": 0.8872, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.6738669836184177, |
| "grad_norm": 0.47842490673065186, |
| "learning_rate": 0.0003754872087892921, |
| "loss": 0.8844, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.6743837527776342, |
| "grad_norm": 0.5763306617736816, |
| "learning_rate": 0.00037316609360041244, |
| "loss": 0.884, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.6749005219368508, |
| "grad_norm": 0.4681786298751831, |
| "learning_rate": 0.00037085932663856664, |
| "loss": 0.8957, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.6754172910960674, |
| "grad_norm": 0.4536014199256897, |
| "learning_rate": 0.0003685668192086224, |
| "loss": 0.8962, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.675934060255284, |
| "grad_norm": 0.4593828320503235, |
| "learning_rate": 0.0003662884831637259, |
| "loss": 0.8792, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.6764508294145005, |
| "grad_norm": 0.4837941527366638, |
| "learning_rate": 0.00036402423090191283, |
| "loss": 0.8928, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.6769675985737171, |
| "grad_norm": 0.47275635600090027, |
| "learning_rate": 0.0003617739753627399, |
| "loss": 0.8885, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.6774843677329337, |
| "grad_norm": 0.465971976518631, |
| "learning_rate": 0.00035953763002393753, |
| "loss": 0.8859, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.6780011368921502, |
| "grad_norm": 0.46785497665405273, |
| "learning_rate": 0.00035731510889808296, |
| "loss": 0.8829, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.6785179060513669, |
| "grad_norm": 0.44653069972991943, |
| "learning_rate": 0.0003551063265292941, |
| "loss": 0.8694, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.6790346752105835, |
| "grad_norm": 0.46585527062416077, |
| "learning_rate": 0.0003529111979899436, |
| "loss": 0.8871, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.6795514443698, |
| "grad_norm": 0.5283601880073547, |
| "learning_rate": 0.00035072963887739373, |
| "loss": 0.8863, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.6800682135290166, |
| "grad_norm": 0.4678700864315033, |
| "learning_rate": 0.0003485615653107508, |
| "loss": 0.8859, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.6805849826882332, |
| "grad_norm": 0.4804142713546753, |
| "learning_rate": 0.0003464068939276399, |
| "loss": 0.8994, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.6811017518474497, |
| "grad_norm": 0.450847864151001, |
| "learning_rate": 0.0003442655418809999, |
| "loss": 0.8894, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.6816185210066663, |
| "grad_norm": 0.46586012840270996, |
| "learning_rate": 0.00034213742683589774, |
| "loss": 0.8768, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.6821352901658829, |
| "grad_norm": 0.439656525850296, |
| "learning_rate": 0.0003400224669663629, |
| "loss": 0.8855, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.6826520593250994, |
| "grad_norm": 0.4356318712234497, |
| "learning_rate": 0.00033792058095224076, |
| "loss": 0.8772, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.683168828484316, |
| "grad_norm": 0.460469514131546, |
| "learning_rate": 0.0003358316879760663, |
| "loss": 0.8681, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.6836855976435326, |
| "grad_norm": 0.43120890855789185, |
| "learning_rate": 0.0003337557077199565, |
| "loss": 0.8611, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.6842023668027493, |
| "grad_norm": 0.45166271924972534, |
| "learning_rate": 0.000331692560362522, |
| "loss": 0.8771, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.6847191359619658, |
| "grad_norm": 0.44746896624565125, |
| "learning_rate": 0.0003296421665757981, |
| "loss": 0.8781, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.6852359051211824, |
| "grad_norm": 0.4466201663017273, |
| "learning_rate": 0.0003276044475221947, |
| "loss": 0.8647, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.685752674280399, |
| "grad_norm": 0.48084691166877747, |
| "learning_rate": 0.00032557932485146473, |
| "loss": 0.9078, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.6862694434396155, |
| "grad_norm": 0.46723824739456177, |
| "learning_rate": 0.0003235667206976918, |
| "loss": 0.8802, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.6867862125988321, |
| "grad_norm": 0.4841623902320862, |
| "learning_rate": 0.00032156655767629616, |
| "loss": 0.8721, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.6873029817580487, |
| "grad_norm": 0.4535221755504608, |
| "learning_rate": 0.0003195787588810593, |
| "loss": 0.8609, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.6878197509172652, |
| "grad_norm": 0.47944900393486023, |
| "learning_rate": 0.00031760324788116683, |
| "loss": 0.8803, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.6883365200764818, |
| "grad_norm": 0.4466581344604492, |
| "learning_rate": 0.00031563994871826995, |
| "loss": 0.867, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.6888532892356984, |
| "grad_norm": 0.4529067277908325, |
| "learning_rate": 0.00031368878590356457, |
| "loss": 0.8861, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.6893700583949149, |
| "grad_norm": 0.45706498622894287, |
| "learning_rate": 0.00031174968441488886, |
| "loss": 0.8754, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.6898868275541316, |
| "grad_norm": 0.46450352668762207, |
| "learning_rate": 0.00030982256969383883, |
| "loss": 0.8669, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.6904035967133482, |
| "grad_norm": 0.45960313081741333, |
| "learning_rate": 0.0003079073676429011, |
| "loss": 0.8669, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.6909203658725648, |
| "grad_norm": 0.4698009192943573, |
| "learning_rate": 0.00030600400462260457, |
| "loss": 0.8697, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.6914371350317813, |
| "grad_norm": 0.4546875059604645, |
| "learning_rate": 0.0003041124074486883, |
| "loss": 0.863, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.6919539041909979, |
| "grad_norm": 0.4646720588207245, |
| "learning_rate": 0.00030223250338928787, |
| "loss": 0.8664, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.6924706733502145, |
| "grad_norm": 0.6140843629837036, |
| "learning_rate": 0.0003003642201621389, |
| "loss": 0.8636, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.692987442509431, |
| "grad_norm": 0.46629661321640015, |
| "learning_rate": 0.0002985074859317977, |
| "loss": 0.8776, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.6935042116686476, |
| "grad_norm": 0.4489153027534485, |
| "learning_rate": 0.00029666222930687926, |
| "loss": 0.8663, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.6940209808278642, |
| "grad_norm": 0.45471352338790894, |
| "learning_rate": 0.00029482837933731207, |
| "loss": 0.8514, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.6945377499870807, |
| "grad_norm": 0.4706459045410156, |
| "learning_rate": 0.00029300586551161034, |
| "loss": 0.866, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.6950545191462973, |
| "grad_norm": 0.44388100504875183, |
| "learning_rate": 0.00029119461775416286, |
| "loss": 0.862, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.695571288305514, |
| "grad_norm": 0.5106334090232849, |
| "learning_rate": 0.0002893945664225381, |
| "loss": 0.8563, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.6960880574647305, |
| "grad_norm": 0.4586535096168518, |
| "learning_rate": 0.00028760564230480724, |
| "loss": 0.8564, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.6966048266239471, |
| "grad_norm": 0.5277544856071472, |
| "learning_rate": 0.0002858277766168823, |
| "loss": 0.8685, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.6971215957831637, |
| "grad_norm": 0.48058634996414185, |
| "learning_rate": 0.0002840609009998717, |
| "loss": 0.8645, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.6976383649423802, |
| "grad_norm": 0.4804344177246094, |
| "learning_rate": 0.0002823049475174519, |
| "loss": 0.8754, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.6981551341015968, |
| "grad_norm": 0.4439767003059387, |
| "learning_rate": 0.00028055984865325503, |
| "loss": 0.8514, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.6986719032608134, |
| "grad_norm": 0.4501279294490814, |
| "learning_rate": 0.0002788255373082731, |
| "loss": 0.856, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.69918867242003, |
| "grad_norm": 0.5022059679031372, |
| "learning_rate": 0.000277101946798278, |
| "loss": 0.8647, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.6997054415792465, |
| "grad_norm": 0.45433667302131653, |
| "learning_rate": 0.00027538901085125735, |
| "loss": 0.8719, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.7002222107384631, |
| "grad_norm": 0.46493837237358093, |
| "learning_rate": 0.0002736866636048666, |
| "loss": 0.8599, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.7007389798976797, |
| "grad_norm": 0.45873501896858215, |
| "learning_rate": 0.0002719948396038963, |
| "loss": 0.8648, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.7012557490568962, |
| "grad_norm": 0.4426117539405823, |
| "learning_rate": 0.0002703134737977557, |
| "loss": 0.8574, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.7017725182161129, |
| "grad_norm": 0.44519364833831787, |
| "learning_rate": 0.0002686425015379712, |
| "loss": 0.854, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.7022892873753295, |
| "grad_norm": 0.47185274958610535, |
| "learning_rate": 0.00026698185857570094, |
| "loss": 0.8565, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.702806056534546, |
| "grad_norm": 0.43223652243614197, |
| "learning_rate": 0.00026533148105926436, |
| "loss": 0.8721, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.7033228256937626, |
| "grad_norm": 0.4602532386779785, |
| "learning_rate": 0.0002636913055316868, |
| "loss": 0.8518, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.7038395948529792, |
| "grad_norm": 0.45018014311790466, |
| "learning_rate": 0.00026206126892826, |
| "loss": 0.8685, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.7043563640121957, |
| "grad_norm": 0.49739015102386475, |
| "learning_rate": 0.000260441308574117, |
| "loss": 0.8483, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.7048731331714123, |
| "grad_norm": 0.4658418595790863, |
| "learning_rate": 0.00025883136218182235, |
| "loss": 0.8545, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.7053899023306289, |
| "grad_norm": 0.4808160066604614, |
| "learning_rate": 0.0002572313678489773, |
| "loss": 0.8622, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.7059066714898455, |
| "grad_norm": 0.4521915316581726, |
| "learning_rate": 0.0002556412640558396, |
| "loss": 0.8632, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.706423440649062, |
| "grad_norm": 0.456153005361557, |
| "learning_rate": 0.0002540609896629577, |
| "loss": 0.861, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.7069402098082787, |
| "grad_norm": 0.43279728293418884, |
| "learning_rate": 0.00025249048390882053, |
| "loss": 0.8593, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.7074569789674953, |
| "grad_norm": 0.4601012170314789, |
| "learning_rate": 0.0002509296864075207, |
| "loss": 0.8629, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.7079737481267118, |
| "grad_norm": 0.47351303696632385, |
| "learning_rate": 0.0002493785371464332, |
| "loss": 0.8622, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.7084905172859284, |
| "grad_norm": 0.4869425594806671, |
| "learning_rate": 0.0002478369764839074, |
| "loss": 0.8546, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.709007286445145, |
| "grad_norm": 0.4412122964859009, |
| "learning_rate": 0.0002463049451469741, |
| "loss": 0.8444, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.7095240556043615, |
| "grad_norm": 0.4480939209461212, |
| "learning_rate": 0.0002447823842290664, |
| "loss": 0.848, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.7100408247635781, |
| "grad_norm": 0.4651864767074585, |
| "learning_rate": 0.00024326923518775486, |
| "loss": 0.8455, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.7105575939227947, |
| "grad_norm": 0.4487757384777069, |
| "learning_rate": 0.0002417654398424963, |
| "loss": 0.841, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.7110743630820112, |
| "grad_norm": 0.44667768478393555, |
| "learning_rate": 0.00024027094037239717, |
| "loss": 0.8454, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.7115911322412278, |
| "grad_norm": 0.44757676124572754, |
| "learning_rate": 0.0002387856793139899, |
| "loss": 0.8438, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.7121079014004444, |
| "grad_norm": 0.47068849205970764, |
| "learning_rate": 0.00023730959955902366, |
| "loss": 0.8434, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.7126246705596609, |
| "grad_norm": 0.4390396773815155, |
| "learning_rate": 0.00023584264435226848, |
| "loss": 0.8461, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.7131414397188776, |
| "grad_norm": 0.4566657543182373, |
| "learning_rate": 0.00023438475728933318, |
| "loss": 0.8473, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.7136582088780942, |
| "grad_norm": 0.49407103657722473, |
| "learning_rate": 0.0002329358823144963, |
| "loss": 0.8431, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.7141749780373108, |
| "grad_norm": 0.47513094544410706, |
| "learning_rate": 0.00023149596371855103, |
| "loss": 0.8425, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.7146917471965273, |
| "grad_norm": 0.4418255686759949, |
| "learning_rate": 0.00023006494613666317, |
| "loss": 0.8394, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.7152085163557439, |
| "grad_norm": 0.45882540941238403, |
| "learning_rate": 0.0002286427745462422, |
| "loss": 0.844, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.7157252855149605, |
| "grad_norm": 0.44126296043395996, |
| "learning_rate": 0.00022722939426482577, |
| "loss": 0.8438, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.716242054674177, |
| "grad_norm": 0.44302189350128174, |
| "learning_rate": 0.00022582475094797713, |
| "loss": 0.8597, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.7167588238333936, |
| "grad_norm": 0.46645456552505493, |
| "learning_rate": 0.00022442879058719568, |
| "loss": 0.8218, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.7172755929926102, |
| "grad_norm": 0.4451071619987488, |
| "learning_rate": 0.00022304145950784017, |
| "loss": 0.852, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.7177923621518267, |
| "grad_norm": 0.47982582449913025, |
| "learning_rate": 0.00022166270436706502, |
| "loss": 0.8408, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.7183091313110433, |
| "grad_norm": 0.4596095085144043, |
| "learning_rate": 0.00022029247215176934, |
| "loss": 0.8333, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.71882590047026, |
| "grad_norm": 0.4595165550708771, |
| "learning_rate": 0.00021893071017655845, |
| "loss": 0.8426, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.7193426696294765, |
| "grad_norm": 0.4321739375591278, |
| "learning_rate": 0.00021757736608171818, |
| "loss": 0.8419, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.7198594387886931, |
| "grad_norm": 0.4603961706161499, |
| "learning_rate": 0.00021623238783120176, |
| "loss": 0.8471, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.7203762079479097, |
| "grad_norm": 0.47230657935142517, |
| "learning_rate": 0.00021489572371062883, |
| "loss": 0.8326, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.7208929771071263, |
| "grad_norm": 0.45762136578559875, |
| "learning_rate": 0.0002135673223252971, |
| "loss": 0.8425, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.7214097462663428, |
| "grad_norm": 0.4551469385623932, |
| "learning_rate": 0.00021224713259820633, |
| "loss": 0.8335, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.7219265154255594, |
| "grad_norm": 0.4409978985786438, |
| "learning_rate": 0.00021093510376809428, |
| "loss": 0.8388, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.722443284584776, |
| "grad_norm": 0.444934219121933, |
| "learning_rate": 0.00020963118538748493, |
| "loss": 0.8313, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.7229600537439925, |
| "grad_norm": 0.4529027044773102, |
| "learning_rate": 0.00020833532732074907, |
| "loss": 0.8298, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.7234768229032091, |
| "grad_norm": 0.44308820366859436, |
| "learning_rate": 0.00020704747974217608, |
| "loss": 0.8132, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.7239935920624258, |
| "grad_norm": 0.451187402009964, |
| "learning_rate": 0.0002057675931340586, |
| "loss": 0.8465, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.7245103612216423, |
| "grad_norm": 0.4436304569244385, |
| "learning_rate": 0.00020449561828478832, |
| "loss": 0.8502, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.7250271303808589, |
| "grad_norm": 0.4516158401966095, |
| "learning_rate": 0.00020323150628696383, |
| "loss": 0.8323, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.7255438995400755, |
| "grad_norm": 0.4490114450454712, |
| "learning_rate": 0.00020197520853551025, |
| "loss": 0.8366, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.726060668699292, |
| "grad_norm": 0.4692043364048004, |
| "learning_rate": 0.00020072667672581016, |
| "loss": 0.8537, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.7265774378585086, |
| "grad_norm": 0.47233638167381287, |
| "learning_rate": 0.00019948586285184656, |
| "loss": 0.8387, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.7270942070177252, |
| "grad_norm": 0.43632131814956665, |
| "learning_rate": 0.00019825271920435674, |
| "loss": 0.836, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.7276109761769418, |
| "grad_norm": 0.4420956075191498, |
| "learning_rate": 0.00019702719836899813, |
| "loss": 0.8381, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.7281277453361583, |
| "grad_norm": 0.4486638009548187, |
| "learning_rate": 0.00019580925322452495, |
| "loss": 0.8382, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.7286445144953749, |
| "grad_norm": 0.45652589201927185, |
| "learning_rate": 0.0001945988369409767, |
| "loss": 0.8538, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.7291612836545915, |
| "grad_norm": 0.4422604739665985, |
| "learning_rate": 0.00019339590297787735, |
| "loss": 0.8321, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.729678052813808, |
| "grad_norm": 0.4418606758117676, |
| "learning_rate": 0.00019220040508244581, |
| "loss": 0.8362, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.7301948219730247, |
| "grad_norm": 0.43576526641845703, |
| "learning_rate": 0.00019101229728781774, |
| "loss": 0.8131, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.7307115911322413, |
| "grad_norm": 0.4448246657848358, |
| "learning_rate": 0.0001898315339112779, |
| "loss": 0.8425, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.7312283602914578, |
| "grad_norm": 0.43587714433670044, |
| "learning_rate": 0.0001886580695525038, |
| "loss": 0.8283, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.7317451294506744, |
| "grad_norm": 0.4598979353904724, |
| "learning_rate": 0.00018749185909182, |
| "loss": 0.8441, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.732261898609891, |
| "grad_norm": 0.5122143626213074, |
| "learning_rate": 0.0001863328576884632, |
| "loss": 0.8497, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.7327786677691075, |
| "grad_norm": 0.45913758873939514, |
| "learning_rate": 0.00018518102077885824, |
| "loss": 0.8324, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.7332954369283241, |
| "grad_norm": 0.46700534224510193, |
| "learning_rate": 0.00018403630407490455, |
| "loss": 0.8165, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.7338122060875407, |
| "grad_norm": 0.4529505670070648, |
| "learning_rate": 0.0001828986635622732, |
| "loss": 0.8345, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.7343289752467572, |
| "grad_norm": 0.4726906716823578, |
| "learning_rate": 0.0001817680554987149, |
| "loss": 0.8283, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.7348457444059738, |
| "grad_norm": 0.4485037326812744, |
| "learning_rate": 0.00018064443641237752, |
| "loss": 0.8403, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.7353625135651904, |
| "grad_norm": 0.46243423223495483, |
| "learning_rate": 0.00017952776310013513, |
| "loss": 0.8292, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.7358792827244071, |
| "grad_norm": 0.45175400376319885, |
| "learning_rate": 0.00017841799262592663, |
| "loss": 0.837, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.7363960518836236, |
| "grad_norm": 0.4575372040271759, |
| "learning_rate": 0.0001773150823191048, |
| "loss": 0.8224, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.7369128210428402, |
| "grad_norm": 0.4672216773033142, |
| "learning_rate": 0.00017621898977279577, |
| "loss": 0.8351, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.7374295902020568, |
| "grad_norm": 0.45373353362083435, |
| "learning_rate": 0.0001751296728422683, |
| "loss": 0.8334, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.7379463593612733, |
| "grad_norm": 0.472469687461853, |
| "learning_rate": 0.0001740470896433135, |
| "loss": 0.8346, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.7384631285204899, |
| "grad_norm": 0.4568733274936676, |
| "learning_rate": 0.00017297119855063422, |
| "loss": 0.8223, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.7389798976797065, |
| "grad_norm": 0.4490255117416382, |
| "learning_rate": 0.00017190195819624467, |
| "loss": 0.8298, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.739496666838923, |
| "grad_norm": 0.4388444125652313, |
| "learning_rate": 0.0001708393274678798, |
| "loss": 0.8301, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.7400134359981396, |
| "grad_norm": 0.4393922686576843, |
| "learning_rate": 0.00016978326550741443, |
| "loss": 0.8379, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.7405302051573562, |
| "grad_norm": 0.44879150390625, |
| "learning_rate": 0.00016873373170929243, |
| "loss": 0.8205, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.7410469743165727, |
| "grad_norm": 0.4404836595058441, |
| "learning_rate": 0.00016769068571896532, |
| "loss": 0.8197, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.7415637434757893, |
| "grad_norm": 0.47884973883628845, |
| "learning_rate": 0.00016665408743134062, |
| "loss": 0.8433, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.742080512635006, |
| "grad_norm": 0.4363346993923187, |
| "learning_rate": 0.00016562389698924, |
| "loss": 0.8255, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.7425972817942226, |
| "grad_norm": 0.4692130982875824, |
| "learning_rate": 0.00016460007478186648, |
| "loss": 0.8146, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.7431140509534391, |
| "grad_norm": 0.45265311002731323, |
| "learning_rate": 0.00016358258144328163, |
| "loss": 0.8166, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.7436308201126557, |
| "grad_norm": 0.46352484822273254, |
| "learning_rate": 0.00016257137785089182, |
| "loss": 0.8262, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.7441475892718723, |
| "grad_norm": 0.4382546842098236, |
| "learning_rate": 0.00016156642512394405, |
| "loss": 0.8118, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.7446643584310888, |
| "grad_norm": 0.4326501190662384, |
| "learning_rate": 0.0001605676846220309, |
| "loss": 0.832, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.7451811275903054, |
| "grad_norm": 0.4683341979980469, |
| "learning_rate": 0.0001595751179436049, |
| "loss": 0.8202, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.745697896749522, |
| "grad_norm": 0.4519064426422119, |
| "learning_rate": 0.0001585886869245019, |
| "loss": 0.8055, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.7462146659087385, |
| "grad_norm": 0.45761948823928833, |
| "learning_rate": 0.00015760835363647367, |
| "loss": 0.8128, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.7467314350679551, |
| "grad_norm": 0.4355948269367218, |
| "learning_rate": 0.00015663408038572963, |
| "loss": 0.816, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.7472482042271718, |
| "grad_norm": 0.4464154839515686, |
| "learning_rate": 0.00015566582971148748, |
| "loss": 0.8211, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.7477649733863883, |
| "grad_norm": 0.4529094696044922, |
| "learning_rate": 0.0001547035643845329, |
| "loss": 0.8124, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.7482817425456049, |
| "grad_norm": 0.48181021213531494, |
| "learning_rate": 0.00015374724740578792, |
| "loss": 0.8092, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.7487985117048215, |
| "grad_norm": 0.46071046590805054, |
| "learning_rate": 0.0001527968420048884, |
| "loss": 0.7989, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.749315280864038, |
| "grad_norm": 0.4348960220813751, |
| "learning_rate": 0.00015185231163877035, |
| "loss": 0.834, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.7498320500232546, |
| "grad_norm": 0.42849427461624146, |
| "learning_rate": 0.00015091361999026458, |
| "loss": 0.7947, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.7503488191824712, |
| "grad_norm": 0.42904916405677795, |
| "learning_rate": 0.00014998073096670058, |
| "loss": 0.8235, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.7508655883416878, |
| "grad_norm": 0.4777064919471741, |
| "learning_rate": 0.0001490536086985185, |
| "loss": 0.8273, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.7513823575009043, |
| "grad_norm": 0.44165903329849243, |
| "learning_rate": 0.00014813221753789016, |
| "loss": 0.825, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.7518991266601209, |
| "grad_norm": 0.4439583122730255, |
| "learning_rate": 0.00014721652205734831, |
| "loss": 0.827, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.7524158958193375, |
| "grad_norm": 0.455435186624527, |
| "learning_rate": 0.00014630648704842445, |
| "loss": 0.8198, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.752932664978554, |
| "grad_norm": 0.4566732347011566, |
| "learning_rate": 0.00014540207752029508, |
| "loss": 0.8284, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.7534494341377707, |
| "grad_norm": 0.44228848814964294, |
| "learning_rate": 0.00014450325869843633, |
| "loss": 0.8191, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.7539662032969873, |
| "grad_norm": 0.445332795381546, |
| "learning_rate": 0.0001436099960232868, |
| "loss": 0.8131, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.7544829724562038, |
| "grad_norm": 0.4628824293613434, |
| "learning_rate": 0.0001427222551489188, |
| "loss": 0.8257, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.7549997416154204, |
| "grad_norm": 0.46374180912971497, |
| "learning_rate": 0.00014184000194171777, |
| "loss": 0.8334, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.755516510774637, |
| "grad_norm": 0.4505828320980072, |
| "learning_rate": 0.00014096320247906978, |
| "loss": 0.8203, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.7560332799338535, |
| "grad_norm": 0.4418148100376129, |
| "learning_rate": 0.00014009182304805726, |
| "loss": 0.8071, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.7565500490930701, |
| "grad_norm": 0.43000486493110657, |
| "learning_rate": 0.0001392258301441627, |
| "loss": 0.8223, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.7570668182522867, |
| "grad_norm": 0.4482291340827942, |
| "learning_rate": 0.0001383651904699805, |
| "loss": 0.8106, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.7575835874115033, |
| "grad_norm": 0.4472900629043579, |
| "learning_rate": 0.00013750987093393656, |
| "loss": 0.8196, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.7581003565707198, |
| "grad_norm": 0.45943567156791687, |
| "learning_rate": 0.00013665983864901587, |
| "loss": 0.8197, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.7586171257299364, |
| "grad_norm": 0.43818199634552, |
| "learning_rate": 0.00013581506093149825, |
| "loss": 0.8003, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.7591338948891531, |
| "grad_norm": 0.43463850021362305, |
| "learning_rate": 0.0001349755052997014, |
| "loss": 0.8086, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.7596506640483696, |
| "grad_norm": 0.4578488767147064, |
| "learning_rate": 0.00013414113947273217, |
| "loss": 0.8011, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.7601674332075862, |
| "grad_norm": 0.44629108905792236, |
| "learning_rate": 0.00013331193136924515, |
| "loss": 0.8086, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.7606842023668028, |
| "grad_norm": 0.4482209384441376, |
| "learning_rate": 0.00013248784910620945, |
| "loss": 0.7996, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.7612009715260193, |
| "grad_norm": 0.4447433650493622, |
| "learning_rate": 0.00013166886099768245, |
| "loss": 0.8162, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.7617177406852359, |
| "grad_norm": 0.44065767526626587, |
| "learning_rate": 0.00013085493555359173, |
| "loss": 0.826, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.7622345098444525, |
| "grad_norm": 0.47181805968284607, |
| "learning_rate": 0.00013004604147852416, |
| "loss": 0.8074, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.762751279003669, |
| "grad_norm": 0.44598037004470825, |
| "learning_rate": 0.00012924214767052268, |
| "loss": 0.8047, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.7632680481628856, |
| "grad_norm": 0.4688059091567993, |
| "learning_rate": 0.00012844322321989025, |
| "loss": 0.8076, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.7637848173221022, |
| "grad_norm": 0.47695672512054443, |
| "learning_rate": 0.00012764923740800162, |
| "loss": 0.7913, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.7643015864813187, |
| "grad_norm": 0.4601481556892395, |
| "learning_rate": 0.00012686015970612207, |
| "loss": 0.8122, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.7648183556405354, |
| "grad_norm": 0.46827730536460876, |
| "learning_rate": 0.0001260759597742335, |
| "loss": 0.8136, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.765335124799752, |
| "grad_norm": 0.43789979815483093, |
| "learning_rate": 0.00012529660745986808, |
| "loss": 0.8131, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.7658518939589686, |
| "grad_norm": 0.44412630796432495, |
| "learning_rate": 0.00012452207279694858, |
| "loss": 0.7994, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.7663686631181851, |
| "grad_norm": 0.44957849383354187, |
| "learning_rate": 0.00012375232600463646, |
| "loss": 0.801, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.7668854322774017, |
| "grad_norm": 0.4659784436225891, |
| "learning_rate": 0.0001229873374861867, |
| "loss": 0.8011, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.7674022014366183, |
| "grad_norm": 0.4447031617164612, |
| "learning_rate": 0.00012222707782780977, |
| "loss": 0.8132, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.7679189705958348, |
| "grad_norm": 0.45082828402519226, |
| "learning_rate": 0.00012147151779754062, |
| "loss": 0.8067, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.7684357397550514, |
| "grad_norm": 0.42726126313209534, |
| "learning_rate": 0.00012072062834411491, |
| "loss": 0.81, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.768952508914268, |
| "grad_norm": 0.46154364943504333, |
| "learning_rate": 0.00011997438059585174, |
| "loss": 0.8063, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.7694692780734845, |
| "grad_norm": 0.45202165842056274, |
| "learning_rate": 0.00011923274585954376, |
| "loss": 0.8066, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.7699860472327011, |
| "grad_norm": 0.43574896454811096, |
| "learning_rate": 0.00011849569561935377, |
| "loss": 0.8024, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.7705028163919178, |
| "grad_norm": 0.4647500514984131, |
| "learning_rate": 0.00011776320153571831, |
| "loss": 0.8047, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.7710195855511343, |
| "grad_norm": 0.4715510308742523, |
| "learning_rate": 0.00011703523544425804, |
| "loss": 0.8242, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.7715363547103509, |
| "grad_norm": 0.48043355345726013, |
| "learning_rate": 0.00011631176935469487, |
| "loss": 0.8014, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.7720531238695675, |
| "grad_norm": 0.45127764344215393, |
| "learning_rate": 0.00011559277544977559, |
| "loss": 0.8143, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.7725698930287841, |
| "grad_norm": 0.447942852973938, |
| "learning_rate": 0.0001148782260842024, |
| "loss": 0.815, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.7730866621880006, |
| "grad_norm": 0.4494159519672394, |
| "learning_rate": 0.00011416809378356995, |
| "loss": 0.8193, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.7736034313472172, |
| "grad_norm": 0.4411426782608032, |
| "learning_rate": 0.00011346235124330891, |
| "loss": 0.7971, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.7741202005064338, |
| "grad_norm": 0.4652232229709625, |
| "learning_rate": 0.0001127609713276361, |
| "loss": 0.8108, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.7746369696656503, |
| "grad_norm": 0.48985597491264343, |
| "learning_rate": 0.00011206392706851122, |
| "loss": 0.8061, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.7751537388248669, |
| "grad_norm": 0.4511886239051819, |
| "learning_rate": 0.00011137119166459977, |
| "loss": 0.8046, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.7756705079840835, |
| "grad_norm": 0.4621480405330658, |
| "learning_rate": 0.00011068273848024272, |
| "loss": 0.8116, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.7761872771433, |
| "grad_norm": 0.45318228006362915, |
| "learning_rate": 0.00010999854104443217, |
| "loss": 0.7992, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.7767040463025167, |
| "grad_norm": 0.46225494146347046, |
| "learning_rate": 0.00010931857304979372, |
| "loss": 0.8055, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.7772208154617333, |
| "grad_norm": 0.4576970934867859, |
| "learning_rate": 0.00010864280835157488, |
| "loss": 0.7918, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.7777375846209498, |
| "grad_norm": 0.43827998638153076, |
| "learning_rate": 0.00010797122096663975, |
| "loss": 0.8124, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.7782543537801664, |
| "grad_norm": 0.4270840883255005, |
| "learning_rate": 0.00010730378507247009, |
| "loss": 0.8027, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.778771122939383, |
| "grad_norm": 0.4645536243915558, |
| "learning_rate": 0.00010664047500617232, |
| "loss": 0.8103, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.7792878920985996, |
| "grad_norm": 0.4405182898044586, |
| "learning_rate": 0.00010598126526349083, |
| "loss": 0.7886, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.7798046612578161, |
| "grad_norm": 0.4572370648384094, |
| "learning_rate": 0.00010532613049782744, |
| "loss": 0.8021, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.7803214304170327, |
| "grad_norm": 0.4464896321296692, |
| "learning_rate": 0.00010467504551926664, |
| "loss": 0.7897, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.7808381995762493, |
| "grad_norm": 0.470245897769928, |
| "learning_rate": 0.00010402798529360717, |
| "loss": 0.8053, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.7813549687354658, |
| "grad_norm": 0.4271971583366394, |
| "learning_rate": 0.00010338492494139942, |
| "loss": 0.8144, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.7818717378946825, |
| "grad_norm": 0.45670023560523987, |
| "learning_rate": 0.00010274583973698883, |
| "loss": 0.8012, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.7823885070538991, |
| "grad_norm": 0.4224714934825897, |
| "learning_rate": 0.0001021107051075651, |
| "loss": 0.785, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.7829052762131156, |
| "grad_norm": 0.43493083119392395, |
| "learning_rate": 0.00010147949663221759, |
| "loss": 0.8028, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.7834220453723322, |
| "grad_norm": 0.4562802016735077, |
| "learning_rate": 0.00010085219004099603, |
| "loss": 0.8052, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.7839388145315488, |
| "grad_norm": 0.44530564546585083, |
| "learning_rate": 0.00010022876121397758, |
| "loss": 0.8073, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.7844555836907653, |
| "grad_norm": 0.5228975415229797, |
| "learning_rate": 9.960918618033934e-05, |
| "loss": 0.8089, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.7849723528499819, |
| "grad_norm": 0.44067102670669556, |
| "learning_rate": 9.899344111743661e-05, |
| "loss": 0.7955, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.7854891220091985, |
| "grad_norm": 0.474118173122406, |
| "learning_rate": 9.838150234988704e-05, |
| "loss": 0.7932, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.786005891168415, |
| "grad_norm": 0.4493066668510437, |
| "learning_rate": 9.777334634866019e-05, |
| "loss": 0.7938, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.7865226603276316, |
| "grad_norm": 0.44325533509254456, |
| "learning_rate": 9.716894973017291e-05, |
| "loss": 0.8098, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.7870394294868482, |
| "grad_norm": 0.44017842411994934, |
| "learning_rate": 9.656828925539026e-05, |
| "loss": 0.7872, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.7875561986460649, |
| "grad_norm": 0.4537578225135803, |
| "learning_rate": 9.597134182893185e-05, |
| "loss": 0.8046, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.7880729678052814, |
| "grad_norm": 0.43279150128364563, |
| "learning_rate": 9.5378084498184e-05, |
| "loss": 0.8155, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.788589736964498, |
| "grad_norm": 0.45793530344963074, |
| "learning_rate": 9.478849445241703e-05, |
| "loss": 0.8033, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.7891065061237146, |
| "grad_norm": 0.45037081837654114, |
| "learning_rate": 9.420254902190833e-05, |
| "loss": 0.7985, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.7896232752829311, |
| "grad_norm": 0.4623776972293854, |
| "learning_rate": 9.362022567707067e-05, |
| "loss": 0.8197, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.7901400444421477, |
| "grad_norm": 0.4537854790687561, |
| "learning_rate": 9.30415020275859e-05, |
| "loss": 0.7926, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.7906568136013643, |
| "grad_norm": 0.4492059648036957, |
| "learning_rate": 9.246635582154403e-05, |
| "loss": 0.7938, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.7911735827605808, |
| "grad_norm": 0.4396090805530548, |
| "learning_rate": 9.189476494458775e-05, |
| "loss": 0.7999, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.7916903519197974, |
| "grad_norm": 0.43469393253326416, |
| "learning_rate": 9.132670741906201e-05, |
| "loss": 0.7994, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.792207121079014, |
| "grad_norm": 0.44428810477256775, |
| "learning_rate": 9.076216140316906e-05, |
| "loss": 0.8043, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.7927238902382305, |
| "grad_norm": 0.4329991638660431, |
| "learning_rate": 9.02011051901286e-05, |
| "loss": 0.7877, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.7932406593974471, |
| "grad_norm": 0.4495084583759308, |
| "learning_rate": 8.964351720734322e-05, |
| "loss": 0.7969, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.7937574285566638, |
| "grad_norm": 0.4632558822631836, |
| "learning_rate": 8.908937601556875e-05, |
| "loss": 0.7895, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.7942741977158804, |
| "grad_norm": 0.44832077622413635, |
| "learning_rate": 8.853866030809016e-05, |
| "loss": 0.7928, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.7947909668750969, |
| "grad_norm": 0.4608152210712433, |
| "learning_rate": 8.799134890990218e-05, |
| "loss": 0.8033, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.7953077360343135, |
| "grad_norm": 0.45813852548599243, |
| "learning_rate": 8.744742077689513e-05, |
| "loss": 0.8127, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.7958245051935301, |
| "grad_norm": 0.4426814317703247, |
| "learning_rate": 8.69068549950458e-05, |
| "loss": 0.7939, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.7963412743527466, |
| "grad_norm": 0.4528840482234955, |
| "learning_rate": 8.636963077961332e-05, |
| "loss": 0.7889, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.7968580435119632, |
| "grad_norm": 0.4318794310092926, |
| "learning_rate": 8.583572747433989e-05, |
| "loss": 0.79, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.7973748126711798, |
| "grad_norm": 0.4563692808151245, |
| "learning_rate": 8.530512455065673e-05, |
| "loss": 0.7922, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.7978915818303963, |
| "grad_norm": 0.44473403692245483, |
| "learning_rate": 8.477780160689458e-05, |
| "loss": 0.7999, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.7984083509896129, |
| "grad_norm": 0.45080122351646423, |
| "learning_rate": 8.425373836749934e-05, |
| "loss": 0.7854, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.7989251201488295, |
| "grad_norm": 0.4660671055316925, |
| "learning_rate": 8.373291468225247e-05, |
| "loss": 0.8033, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.799441889308046, |
| "grad_norm": 0.43612638115882874, |
| "learning_rate": 8.321531052549621e-05, |
| "loss": 0.7975, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.7999586584672627, |
| "grad_norm": 0.44829973578453064, |
| "learning_rate": 8.270090599536357e-05, |
| "loss": 0.7865, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.8004754276264793, |
| "grad_norm": 0.4527774751186371, |
| "learning_rate": 8.218968131301314e-05, |
| "loss": 0.7994, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.8009921967856958, |
| "grad_norm": 0.46482163667678833, |
| "learning_rate": 8.16816168218686e-05, |
| "loss": 0.7949, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8015089659449124, |
| "grad_norm": 0.4425605535507202, |
| "learning_rate": 8.117669298686285e-05, |
| "loss": 0.7708, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.802025735104129, |
| "grad_norm": 0.4287862777709961, |
| "learning_rate": 8.0674890393687e-05, |
| "loss": 0.801, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.8025425042633456, |
| "grad_norm": 0.4485211670398712, |
| "learning_rate": 8.017618974804377e-05, |
| "loss": 0.7876, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.8030592734225621, |
| "grad_norm": 0.43715623021125793, |
| "learning_rate": 7.968057187490574e-05, |
| "loss": 0.7984, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.8035760425817787, |
| "grad_norm": 0.4431898891925812, |
| "learning_rate": 7.918801771777797e-05, |
| "loss": 0.787, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.8040928117409953, |
| "grad_norm": 0.4634036421775818, |
| "learning_rate": 7.869850833796537e-05, |
| "loss": 0.8002, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.8046095809002118, |
| "grad_norm": 0.4434111416339874, |
| "learning_rate": 7.821202491384445e-05, |
| "loss": 0.7827, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.8051263500594285, |
| "grad_norm": 0.4345285892486572, |
| "learning_rate": 7.77285487401396e-05, |
| "loss": 0.7983, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.8056431192186451, |
| "grad_norm": 0.4299919605255127, |
| "learning_rate": 7.724806122720396e-05, |
| "loss": 0.7777, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.8061598883778616, |
| "grad_norm": 0.44167646765708923, |
| "learning_rate": 7.677054390030455e-05, |
| "loss": 0.7967, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.8066766575370782, |
| "grad_norm": 0.4805566370487213, |
| "learning_rate": 7.629597839891209e-05, |
| "loss": 0.809, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.8071934266962948, |
| "grad_norm": 0.4554888606071472, |
| "learning_rate": 7.582434647599476e-05, |
| "loss": 0.792, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.8077101958555113, |
| "grad_norm": 0.4604235887527466, |
| "learning_rate": 7.535562999731686e-05, |
| "loss": 0.7825, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.8082269650147279, |
| "grad_norm": 0.47276201844215393, |
| "learning_rate": 7.488981094074143e-05, |
| "loss": 0.7981, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.8087437341739445, |
| "grad_norm": 0.46937987208366394, |
| "learning_rate": 7.442687139553729e-05, |
| "loss": 0.7825, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.8092605033331611, |
| "grad_norm": 0.44667670130729675, |
| "learning_rate": 7.396679356169044e-05, |
| "loss": 0.7788, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.8097772724923776, |
| "grad_norm": 0.4452296197414398, |
| "learning_rate": 7.35095597492196e-05, |
| "loss": 0.7962, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.8102940416515942, |
| "grad_norm": 0.47155633568763733, |
| "learning_rate": 7.3055152377496e-05, |
| "loss": 0.7937, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.8108108108108109, |
| "grad_norm": 0.4572817087173462, |
| "learning_rate": 7.260355397456748e-05, |
| "loss": 0.7911, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.8113275799700274, |
| "grad_norm": 0.4582803547382355, |
| "learning_rate": 7.21547471764867e-05, |
| "loss": 0.7832, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.811844349129244, |
| "grad_norm": 0.45184165239334106, |
| "learning_rate": 7.170871472664335e-05, |
| "loss": 0.7896, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.8123611182884606, |
| "grad_norm": 0.462866872549057, |
| "learning_rate": 7.126543947510089e-05, |
| "loss": 0.8053, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.8128778874476771, |
| "grad_norm": 0.4350687265396118, |
| "learning_rate": 7.082490437793685e-05, |
| "loss": 0.7901, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.8133946566068937, |
| "grad_norm": 0.48868757486343384, |
| "learning_rate": 7.03870924965877e-05, |
| "loss": 0.7932, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.8139114257661103, |
| "grad_norm": 0.4378123879432678, |
| "learning_rate": 6.995198699719745e-05, |
| "loss": 0.8041, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.8144281949253268, |
| "grad_norm": 0.43519341945648193, |
| "learning_rate": 6.95195711499705e-05, |
| "loss": 0.7868, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.8149449640845434, |
| "grad_norm": 0.434491366147995, |
| "learning_rate": 6.908982832852821e-05, |
| "loss": 0.7872, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.81546173324376, |
| "grad_norm": 0.44694221019744873, |
| "learning_rate": 6.86627420092698e-05, |
| "loss": 0.7804, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.8159785024029765, |
| "grad_norm": 0.4496343731880188, |
| "learning_rate": 6.823829577073686e-05, |
| "loss": 0.7805, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.8164952715621931, |
| "grad_norm": 0.4403352737426758, |
| "learning_rate": 6.781647329298209e-05, |
| "loss": 0.7783, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.8170120407214098, |
| "grad_norm": 0.43307387828826904, |
| "learning_rate": 6.739725835694167e-05, |
| "loss": 0.7883, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.8175288098806264, |
| "grad_norm": 0.4405989646911621, |
| "learning_rate": 6.698063484381174e-05, |
| "loss": 0.7945, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.8180455790398429, |
| "grad_norm": 0.46816104650497437, |
| "learning_rate": 6.656658673442854e-05, |
| "loss": 0.7719, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.8185623481990595, |
| "grad_norm": 0.4712413251399994, |
| "learning_rate": 6.615509810865257e-05, |
| "loss": 0.8033, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.8190791173582761, |
| "grad_norm": 0.45156368613243103, |
| "learning_rate": 6.574615314475637e-05, |
| "loss": 0.7981, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.8195958865174926, |
| "grad_norm": 0.44122111797332764, |
| "learning_rate": 6.533973611881624e-05, |
| "loss": 0.7945, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.8201126556767092, |
| "grad_norm": 0.4496499001979828, |
| "learning_rate": 6.493583140410763e-05, |
| "loss": 0.7858, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.8206294248359258, |
| "grad_norm": 0.4501078128814697, |
| "learning_rate": 6.453442347050426e-05, |
| "loss": 0.7928, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.8211461939951423, |
| "grad_norm": 0.4360281825065613, |
| "learning_rate": 6.413549688388107e-05, |
| "loss": 0.787, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.8216629631543589, |
| "grad_norm": 0.4398462176322937, |
| "learning_rate": 6.37390363055207e-05, |
| "loss": 0.7736, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.8221797323135756, |
| "grad_norm": 0.44592639803886414, |
| "learning_rate": 6.334502649152376e-05, |
| "loss": 0.7869, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.822696501472792, |
| "grad_norm": 0.44563406705856323, |
| "learning_rate": 6.295345229222268e-05, |
| "loss": 0.7859, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.8232132706320087, |
| "grad_norm": 0.46638575196266174, |
| "learning_rate": 6.256429865159924e-05, |
| "loss": 0.7921, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.8237300397912253, |
| "grad_norm": 0.458056777715683, |
| "learning_rate": 6.217755060670557e-05, |
| "loss": 0.7799, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.8242468089504419, |
| "grad_norm": 0.4988017976284027, |
| "learning_rate": 6.1793193287089e-05, |
| "loss": 0.7771, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.8247635781096584, |
| "grad_norm": 0.44715121388435364, |
| "learning_rate": 6.141121191422011e-05, |
| "loss": 0.7974, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.825280347268875, |
| "grad_norm": 0.45090383291244507, |
| "learning_rate": 6.1031591800924596e-05, |
| "loss": 0.7683, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.8257971164280916, |
| "grad_norm": 0.43011826276779175, |
| "learning_rate": 6.0654318350818545e-05, |
| "loss": 0.7791, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.8263138855873081, |
| "grad_norm": 0.4606122672557831, |
| "learning_rate": 6.027937705774713e-05, |
| "loss": 0.7998, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.8268306547465247, |
| "grad_norm": 0.4207383096218109, |
| "learning_rate": 5.9906753505226956e-05, |
| "loss": 0.7785, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.8273474239057413, |
| "grad_norm": 0.4336974620819092, |
| "learning_rate": 5.953643336589173e-05, |
| "loss": 0.7834, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.8278641930649578, |
| "grad_norm": 0.4548156261444092, |
| "learning_rate": 5.916840240094121e-05, |
| "loss": 0.7922, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.8283809622241745, |
| "grad_norm": 0.43436485528945923, |
| "learning_rate": 5.880264645959399e-05, |
| "loss": 0.7804, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.8288977313833911, |
| "grad_norm": 0.4377012252807617, |
| "learning_rate": 5.843915147854316e-05, |
| "loss": 0.7718, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.8294145005426076, |
| "grad_norm": 0.46145206689834595, |
| "learning_rate": 5.807790348141579e-05, |
| "loss": 0.7888, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.8299312697018242, |
| "grad_norm": 0.444749116897583, |
| "learning_rate": 5.771888857823527e-05, |
| "loss": 0.7978, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.8304480388610408, |
| "grad_norm": 0.4541518986225128, |
| "learning_rate": 5.736209296488757e-05, |
| "loss": 0.7849, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.8309648080202574, |
| "grad_norm": 0.43136441707611084, |
| "learning_rate": 5.7007502922590154e-05, |
| "loss": 0.7924, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.8314815771794739, |
| "grad_norm": 0.4634501338005066, |
| "learning_rate": 5.665510481736475e-05, |
| "loss": 0.7966, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.8319983463386905, |
| "grad_norm": 0.45138517022132874, |
| "learning_rate": 5.63048850995129e-05, |
| "loss": 0.783, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.8325151154979071, |
| "grad_norm": 0.45926496386528015, |
| "learning_rate": 5.59568303030952e-05, |
| "loss": 0.7903, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.8330318846571236, |
| "grad_norm": 0.4217846691608429, |
| "learning_rate": 5.561092704541337e-05, |
| "loss": 0.765, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.8335486538163402, |
| "grad_norm": 0.46820348501205444, |
| "learning_rate": 5.526716202649569e-05, |
| "loss": 0.7917, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.8340654229755569, |
| "grad_norm": 0.45810696482658386, |
| "learning_rate": 5.492552202858579e-05, |
| "loss": 0.7771, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.8345821921347734, |
| "grad_norm": 0.45739495754241943, |
| "learning_rate": 5.458599391563416e-05, |
| "loss": 0.7949, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.83509896129399, |
| "grad_norm": 0.45775654911994934, |
| "learning_rate": 5.4248564632793354e-05, |
| "loss": 0.7748, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.8356157304532066, |
| "grad_norm": 0.471780925989151, |
| "learning_rate": 5.3913221205915764e-05, |
| "loss": 0.7908, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.8361324996124231, |
| "grad_norm": 0.4380318522453308, |
| "learning_rate": 5.3579950741055e-05, |
| "loss": 0.7871, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.8366492687716397, |
| "grad_norm": 0.45614588260650635, |
| "learning_rate": 5.324874042396992e-05, |
| "loss": 0.7717, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.8371660379308563, |
| "grad_norm": 0.42838895320892334, |
| "learning_rate": 5.29195775196321e-05, |
| "loss": 0.7816, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.8376828070900728, |
| "grad_norm": 0.47133561968803406, |
| "learning_rate": 5.259244937173599e-05, |
| "loss": 0.7732, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.8381995762492894, |
| "grad_norm": 0.42173993587493896, |
| "learning_rate": 5.226734340221249e-05, |
| "loss": 0.7687, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.838716345408506, |
| "grad_norm": 0.42915183305740356, |
| "learning_rate": 5.194424711074507e-05, |
| "loss": 0.7866, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.8392331145677226, |
| "grad_norm": 0.4370039999485016, |
| "learning_rate": 5.1623148074289386e-05, |
| "loss": 0.7855, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.8397498837269392, |
| "grad_norm": 0.4343273937702179, |
| "learning_rate": 5.130403394659548e-05, |
| "loss": 0.7871, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.8402666528861558, |
| "grad_norm": 0.4628264009952545, |
| "learning_rate": 5.0986892457733016e-05, |
| "loss": 0.7929, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.8407834220453724, |
| "grad_norm": 0.4544295072555542, |
| "learning_rate": 5.067171141361967e-05, |
| "loss": 0.7823, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.8413001912045889, |
| "grad_norm": 0.46135464310646057, |
| "learning_rate": 5.035847869555207e-05, |
| "loss": 0.7747, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.8418169603638055, |
| "grad_norm": 0.44259122014045715, |
| "learning_rate": 5.004718225974004e-05, |
| "loss": 0.7836, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.8423337295230221, |
| "grad_norm": 0.44478118419647217, |
| "learning_rate": 4.9737810136843286e-05, |
| "loss": 0.7664, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.8428504986822386, |
| "grad_norm": 0.44629231095314026, |
| "learning_rate": 4.943035043151143e-05, |
| "loss": 0.7906, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.8433672678414552, |
| "grad_norm": 0.4398927092552185, |
| "learning_rate": 4.912479132192638e-05, |
| "loss": 0.7835, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.8438840370006718, |
| "grad_norm": 0.4557620882987976, |
| "learning_rate": 4.882112105934801e-05, |
| "loss": 0.7727, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.8444008061598883, |
| "grad_norm": 0.45272544026374817, |
| "learning_rate": 4.851932796766221e-05, |
| "loss": 0.781, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.8449175753191049, |
| "grad_norm": 0.44196563959121704, |
| "learning_rate": 4.821940044293212e-05, |
| "loss": 0.7867, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.8454343444783216, |
| "grad_norm": 0.44495323300361633, |
| "learning_rate": 4.79213269529519e-05, |
| "loss": 0.7791, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.8459511136375382, |
| "grad_norm": 0.4298705756664276, |
| "learning_rate": 4.76250960368032e-05, |
| "loss": 0.7924, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.8464678827967547, |
| "grad_norm": 0.4538145065307617, |
| "learning_rate": 4.7330696304414696e-05, |
| "loss": 0.801, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.8469846519559713, |
| "grad_norm": 0.437732458114624, |
| "learning_rate": 4.703811643612394e-05, |
| "loss": 0.7953, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.8475014211151879, |
| "grad_norm": 0.441617876291275, |
| "learning_rate": 4.674734518224231e-05, |
| "loss": 0.772, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.8480181902744044, |
| "grad_norm": 0.42918652296066284, |
| "learning_rate": 4.645837136262228e-05, |
| "loss": 0.7839, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.848534959433621, |
| "grad_norm": 0.44365042448043823, |
| "learning_rate": 4.617118386622768e-05, |
| "loss": 0.7774, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.8490517285928376, |
| "grad_norm": 0.43790024518966675, |
| "learning_rate": 4.588577165070638e-05, |
| "loss": 0.7821, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.8495684977520541, |
| "grad_norm": 0.4523584246635437, |
| "learning_rate": 4.5602123741965806e-05, |
| "loss": 0.7689, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.8500852669112707, |
| "grad_norm": 0.438987135887146, |
| "learning_rate": 4.5320229233750884e-05, |
| "loss": 0.7774, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.8506020360704873, |
| "grad_norm": 0.4385901987552643, |
| "learning_rate": 4.504007728722478e-05, |
| "loss": 0.7767, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.8511188052297038, |
| "grad_norm": 0.44286254048347473, |
| "learning_rate": 4.4761657130552136e-05, |
| "loss": 0.7893, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.8516355743889205, |
| "grad_norm": 0.43227192759513855, |
| "learning_rate": 4.448495805848479e-05, |
| "loss": 0.7632, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.8521523435481371, |
| "grad_norm": 0.4544907510280609, |
| "learning_rate": 4.420996943195034e-05, |
| "loss": 0.7812, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.8526691127073536, |
| "grad_norm": 0.46841660141944885, |
| "learning_rate": 4.393668067764288e-05, |
| "loss": 0.7712, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.8531858818665702, |
| "grad_norm": 0.45919257402420044, |
| "learning_rate": 4.3665081287616635e-05, |
| "loss": 0.7757, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.8537026510257868, |
| "grad_norm": 0.44672319293022156, |
| "learning_rate": 4.339516081888175e-05, |
| "loss": 0.7787, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.8542194201850034, |
| "grad_norm": 0.445287823677063, |
| "learning_rate": 4.312690889300296e-05, |
| "loss": 0.7787, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.8547361893442199, |
| "grad_norm": 0.46268194913864136, |
| "learning_rate": 4.286031519570033e-05, |
| "loss": 0.7757, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.8552529585034365, |
| "grad_norm": 0.434190034866333, |
| "learning_rate": 4.2595369476452845e-05, |
| "loss": 0.7733, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.8557697276626531, |
| "grad_norm": 0.4440845549106598, |
| "learning_rate": 4.233206154810416e-05, |
| "loss": 0.7667, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.8562864968218696, |
| "grad_norm": 0.43531450629234314, |
| "learning_rate": 4.2070381286470965e-05, |
| "loss": 0.7712, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.8568032659810862, |
| "grad_norm": 0.46447721123695374, |
| "learning_rate": 4.181031862995373e-05, |
| "loss": 0.7679, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.8573200351403029, |
| "grad_norm": 0.43442919850349426, |
| "learning_rate": 4.155186357914973e-05, |
| "loss": 0.7959, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.8578368042995194, |
| "grad_norm": 0.4343065917491913, |
| "learning_rate": 4.129500619646871e-05, |
| "loss": 0.7829, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.858353573458736, |
| "grad_norm": 0.4585905969142914, |
| "learning_rate": 4.103973660575065e-05, |
| "loss": 0.7786, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.8588703426179526, |
| "grad_norm": 0.43392133712768555, |
| "learning_rate": 4.078604499188617e-05, |
| "loss": 0.7773, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.8593871117771691, |
| "grad_norm": 0.43312516808509827, |
| "learning_rate": 4.053392160043896e-05, |
| "loss": 0.7678, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.8599038809363857, |
| "grad_norm": 0.4381249248981476, |
| "learning_rate": 4.028335673727093e-05, |
| "loss": 0.7724, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.8604206500956023, |
| "grad_norm": 0.4337814152240753, |
| "learning_rate": 4.0034340768169274e-05, |
| "loss": 0.7823, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.8609374192548189, |
| "grad_norm": 0.4437348246574402, |
| "learning_rate": 3.978686411847619e-05, |
| "loss": 0.7926, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.8614541884140354, |
| "grad_norm": 0.4341773986816406, |
| "learning_rate": 3.954091727272062e-05, |
| "loss": 0.7826, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.861970957573252, |
| "grad_norm": 0.44881367683410645, |
| "learning_rate": 3.929649077425246e-05, |
| "loss": 0.7704, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.8624877267324687, |
| "grad_norm": 0.4502032697200775, |
| "learning_rate": 3.9053575224878926e-05, |
| "loss": 0.7816, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.8630044958916852, |
| "grad_norm": 0.47224000096321106, |
| "learning_rate": 3.881216128450315e-05, |
| "loss": 0.7736, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.8635212650509018, |
| "grad_norm": 0.4375690221786499, |
| "learning_rate": 3.857223967076515e-05, |
| "loss": 0.7812, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.8640380342101184, |
| "grad_norm": 0.4506520926952362, |
| "learning_rate": 3.833380115868479e-05, |
| "loss": 0.7993, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.8645548033693349, |
| "grad_norm": 0.45109614729881287, |
| "learning_rate": 3.809683658030725e-05, |
| "loss": 0.7912, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.8650715725285515, |
| "grad_norm": 0.4557834267616272, |
| "learning_rate": 3.7861336824350335e-05, |
| "loss": 0.7775, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.8655883416877681, |
| "grad_norm": 0.43183183670043945, |
| "learning_rate": 3.7627292835854304e-05, |
| "loss": 0.7656, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.8661051108469846, |
| "grad_norm": 0.43516460061073303, |
| "learning_rate": 3.7394695615833586e-05, |
| "loss": 0.7817, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.8666218800062012, |
| "grad_norm": 0.45719340443611145, |
| "learning_rate": 3.7163536220930875e-05, |
| "loss": 0.7886, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.8671386491654178, |
| "grad_norm": 0.45268991589546204, |
| "learning_rate": 3.693380576307314e-05, |
| "loss": 0.7874, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.8676554183246343, |
| "grad_norm": 0.43802937865257263, |
| "learning_rate": 3.6705495409130015e-05, |
| "loss": 0.7802, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.8681721874838509, |
| "grad_norm": 0.44268324971199036, |
| "learning_rate": 3.647859638057403e-05, |
| "loss": 0.7695, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.8686889566430676, |
| "grad_norm": 0.4444487988948822, |
| "learning_rate": 3.625309995314319e-05, |
| "loss": 0.7831, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.8692057258022842, |
| "grad_norm": 0.4433843493461609, |
| "learning_rate": 3.602899745650546e-05, |
| "loss": 0.7795, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.8697224949615007, |
| "grad_norm": 0.45644548535346985, |
| "learning_rate": 3.580628027392539e-05, |
| "loss": 0.7705, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.8702392641207173, |
| "grad_norm": 0.4484211802482605, |
| "learning_rate": 3.558493984193286e-05, |
| "loss": 0.7708, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.8707560332799339, |
| "grad_norm": 0.46782976388931274, |
| "learning_rate": 3.536496764999374e-05, |
| "loss": 0.7723, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.8712728024391504, |
| "grad_norm": 0.4290997087955475, |
| "learning_rate": 3.5146355240182734e-05, |
| "loss": 0.7832, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.871789571598367, |
| "grad_norm": 0.449011892080307, |
| "learning_rate": 3.492909420685807e-05, |
| "loss": 0.786, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.8723063407575836, |
| "grad_norm": 0.4471029043197632, |
| "learning_rate": 3.471317619633846e-05, |
| "loss": 0.7797, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.8728231099168001, |
| "grad_norm": 0.41699501872062683, |
| "learning_rate": 3.449859290658173e-05, |
| "loss": 0.7732, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.8733398790760167, |
| "grad_norm": 0.42831024527549744, |
| "learning_rate": 3.428533608686573e-05, |
| "loss": 0.7711, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.8738566482352333, |
| "grad_norm": 0.44072601199150085, |
| "learning_rate": 3.407339753747102e-05, |
| "loss": 0.7796, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.8743734173944498, |
| "grad_norm": 0.43595975637435913, |
| "learning_rate": 3.386276910936564e-05, |
| "loss": 0.7583, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.8748901865536665, |
| "grad_norm": 0.47178915143013, |
| "learning_rate": 3.365344270389179e-05, |
| "loss": 0.7815, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.8754069557128831, |
| "grad_norm": 0.4295157790184021, |
| "learning_rate": 3.344541027245434e-05, |
| "loss": 0.7664, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.8759237248720997, |
| "grad_norm": 0.43913745880126953, |
| "learning_rate": 3.323866381621149e-05, |
| "loss": 0.767, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.8764404940313162, |
| "grad_norm": 0.4710383415222168, |
| "learning_rate": 3.3033195385767116e-05, |
| "loss": 0.7841, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.8769572631905328, |
| "grad_norm": 0.4420885443687439, |
| "learning_rate": 3.282899708086518e-05, |
| "loss": 0.7809, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.8774740323497494, |
| "grad_norm": 0.4158540666103363, |
| "learning_rate": 3.262606105008591e-05, |
| "loss": 0.7677, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.8779908015089659, |
| "grad_norm": 0.4570242464542389, |
| "learning_rate": 3.242437949054398e-05, |
| "loss": 0.7651, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.8785075706681825, |
| "grad_norm": 0.4389027953147888, |
| "learning_rate": 3.2223944647588423e-05, |
| "loss": 0.7688, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.8790243398273991, |
| "grad_norm": 0.4603040814399719, |
| "learning_rate": 3.202474881450452e-05, |
| "loss": 0.7836, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.8795411089866156, |
| "grad_norm": 0.43595853447914124, |
| "learning_rate": 3.18267843322174e-05, |
| "loss": 0.7632, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.8800578781458323, |
| "grad_norm": 0.45017024874687195, |
| "learning_rate": 3.163004358899766e-05, |
| "loss": 0.7783, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.8805746473050489, |
| "grad_norm": 0.4486757516860962, |
| "learning_rate": 3.143451902016862e-05, |
| "loss": 0.7764, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.8810914164642654, |
| "grad_norm": 0.44407910108566284, |
| "learning_rate": 3.124020310781543e-05, |
| "loss": 0.768, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.881608185623482, |
| "grad_norm": 0.43660351634025574, |
| "learning_rate": 3.1047088380496114e-05, |
| "loss": 0.7758, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.8821249547826986, |
| "grad_norm": 0.4449329674243927, |
| "learning_rate": 3.0855167412954175e-05, |
| "loss": 0.7875, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.8826417239419152, |
| "grad_norm": 0.43863120675086975, |
| "learning_rate": 3.066443282583321e-05, |
| "loss": 0.7723, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.8831584931011317, |
| "grad_norm": 0.4402186870574951, |
| "learning_rate": 3.0474877285393036e-05, |
| "loss": 0.7713, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.8836752622603483, |
| "grad_norm": 0.47123128175735474, |
| "learning_rate": 3.028649350322787e-05, |
| "loss": 0.7822, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.8841920314195649, |
| "grad_norm": 0.44672438502311707, |
| "learning_rate": 3.0099274235985934e-05, |
| "loss": 0.7716, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.8847088005787814, |
| "grad_norm": 0.4311140179634094, |
| "learning_rate": 2.9913212285091083e-05, |
| "loss": 0.7735, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.885225569737998, |
| "grad_norm": 0.42859673500061035, |
| "learning_rate": 2.9728300496465886e-05, |
| "loss": 0.768, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.8857423388972147, |
| "grad_norm": 0.4675106406211853, |
| "learning_rate": 2.954453176025668e-05, |
| "loss": 0.7915, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.8862591080564312, |
| "grad_norm": 0.44611257314682007, |
| "learning_rate": 2.936189901056014e-05, |
| "loss": 0.7661, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.8867758772156478, |
| "grad_norm": 0.4537068009376526, |
| "learning_rate": 2.918039522515154e-05, |
| "loss": 0.7732, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.8872926463748644, |
| "grad_norm": 0.451235830783844, |
| "learning_rate": 2.900001342521487e-05, |
| "loss": 0.7765, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.8878094155340809, |
| "grad_norm": 0.42030608654022217, |
| "learning_rate": 2.882074667507437e-05, |
| "loss": 0.764, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.8883261846932975, |
| "grad_norm": 0.4544169306755066, |
| "learning_rate": 2.8642588081927974e-05, |
| "loss": 0.7751, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.8888429538525141, |
| "grad_norm": 0.4388182759284973, |
| "learning_rate": 2.8465530795582176e-05, |
| "loss": 0.7677, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.8893597230117306, |
| "grad_norm": 0.4463309645652771, |
| "learning_rate": 2.8289568008188735e-05, |
| "loss": 0.7847, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.8898764921709472, |
| "grad_norm": 0.42829135060310364, |
| "learning_rate": 2.8114692953982826e-05, |
| "loss": 0.7622, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.8903932613301638, |
| "grad_norm": 0.4384378492832184, |
| "learning_rate": 2.7940898909022972e-05, |
| "loss": 0.7695, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.8909100304893804, |
| "grad_norm": 0.4420071542263031, |
| "learning_rate": 2.7768179190932436e-05, |
| "loss": 0.7716, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.891426799648597, |
| "grad_norm": 0.4406958818435669, |
| "learning_rate": 2.7596527158642362e-05, |
| "loss": 0.772, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.8919435688078136, |
| "grad_norm": 0.46476542949676514, |
| "learning_rate": 2.7425936212136382e-05, |
| "loss": 0.7747, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.8924603379670302, |
| "grad_norm": 0.44601190090179443, |
| "learning_rate": 2.7256399792196816e-05, |
| "loss": 0.7739, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.8929771071262467, |
| "grad_norm": 0.4409795105457306, |
| "learning_rate": 2.7087911380152546e-05, |
| "loss": 0.7703, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.8934938762854633, |
| "grad_norm": 0.4447353780269623, |
| "learning_rate": 2.6920464497628288e-05, |
| "loss": 0.7713, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.8940106454446799, |
| "grad_norm": 0.42424049973487854, |
| "learning_rate": 2.6754052706295595e-05, |
| "loss": 0.7662, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.8945274146038964, |
| "grad_norm": 0.4320373237133026, |
| "learning_rate": 2.6588669607625194e-05, |
| "loss": 0.764, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.895044183763113, |
| "grad_norm": 0.4584170877933502, |
| "learning_rate": 2.6424308842641074e-05, |
| "loss": 0.7697, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.8955609529223296, |
| "grad_norm": 0.4255240261554718, |
| "learning_rate": 2.6260964091675873e-05, |
| "loss": 0.7638, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.8960777220815461, |
| "grad_norm": 0.4410153329372406, |
| "learning_rate": 2.6098629074128e-05, |
| "loss": 0.7722, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.8965944912407627, |
| "grad_norm": 0.4603617787361145, |
| "learning_rate": 2.593729754822004e-05, |
| "loss": 0.7764, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.8971112603999793, |
| "grad_norm": 0.4616399109363556, |
| "learning_rate": 2.5776963310758847e-05, |
| "loss": 0.7828, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.897628029559196, |
| "grad_norm": 0.4478990435600281, |
| "learning_rate": 2.5617620196896944e-05, |
| "loss": 0.7677, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.8981447987184125, |
| "grad_norm": 0.4245089292526245, |
| "learning_rate": 2.545926207989558e-05, |
| "loss": 0.7751, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.8986615678776291, |
| "grad_norm": 0.4588530957698822, |
| "learning_rate": 2.530188287088909e-05, |
| "loss": 0.7735, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.8991783370368457, |
| "grad_norm": 0.4587204158306122, |
| "learning_rate": 2.5145476518650782e-05, |
| "loss": 0.7804, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.8996951061960622, |
| "grad_norm": 0.4349258244037628, |
| "learning_rate": 2.499003700936031e-05, |
| "loss": 0.78, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.9002118753552788, |
| "grad_norm": 0.46240687370300293, |
| "learning_rate": 2.4835558366372383e-05, |
| "loss": 0.7741, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.9007286445144954, |
| "grad_norm": 0.43434906005859375, |
| "learning_rate": 2.4682034649987037e-05, |
| "loss": 0.7757, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.9012454136737119, |
| "grad_norm": 0.45485690236091614, |
| "learning_rate": 2.4529459957221164e-05, |
| "loss": 0.7614, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.9017621828329285, |
| "grad_norm": 0.451511025428772, |
| "learning_rate": 2.4377828421581636e-05, |
| "loss": 0.775, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.9022789519921451, |
| "grad_norm": 0.44211798906326294, |
| "learning_rate": 2.422713421283965e-05, |
| "loss": 0.7715, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.9027957211513616, |
| "grad_norm": 0.43941619992256165, |
| "learning_rate": 2.4077371536806647e-05, |
| "loss": 0.7762, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.9033124903105783, |
| "grad_norm": 0.4729272723197937, |
| "learning_rate": 2.392853463511143e-05, |
| "loss": 0.7889, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.9038292594697949, |
| "grad_norm": 0.45001113414764404, |
| "learning_rate": 2.3780617784978833e-05, |
| "loss": 0.7644, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.9043460286290114, |
| "grad_norm": 0.44931286573410034, |
| "learning_rate": 2.3633615299009652e-05, |
| "loss": 0.7628, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.904862797788228, |
| "grad_norm": 0.43167644739151, |
| "learning_rate": 2.348752152496193e-05, |
| "loss": 0.7707, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.9053795669474446, |
| "grad_norm": 0.4542749226093292, |
| "learning_rate": 2.33423308455337e-05, |
| "loss": 0.7687, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.9058963361066612, |
| "grad_norm": 0.4356542229652405, |
| "learning_rate": 2.319803767814693e-05, |
| "loss": 0.7656, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.9064131052658777, |
| "grad_norm": 0.4345816373825073, |
| "learning_rate": 2.305463647473293e-05, |
| "loss": 0.7564, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.9069298744250943, |
| "grad_norm": 0.4554193317890167, |
| "learning_rate": 2.291212172151897e-05, |
| "loss": 0.7659, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.9074466435843109, |
| "grad_norm": 0.4463479518890381, |
| "learning_rate": 2.2770487938816346e-05, |
| "loss": 0.7608, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.9079634127435274, |
| "grad_norm": 0.4824206829071045, |
| "learning_rate": 2.262972968080962e-05, |
| "loss": 0.7768, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.908480181902744, |
| "grad_norm": 0.4427326023578644, |
| "learning_rate": 2.248984153534727e-05, |
| "loss": 0.7791, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.9089969510619607, |
| "grad_norm": 0.4576285779476166, |
| "learning_rate": 2.2350818123733565e-05, |
| "loss": 0.7788, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.9095137202211772, |
| "grad_norm": 0.40807288885116577, |
| "learning_rate": 2.2212654100521793e-05, |
| "loss": 0.7733, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.9100304893803938, |
| "grad_norm": 0.4429195821285248, |
| "learning_rate": 2.20753441533087e-05, |
| "loss": 0.796, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.9105472585396104, |
| "grad_norm": 0.4344060719013214, |
| "learning_rate": 2.19388830025302e-05, |
| "loss": 0.7661, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.9110640276988269, |
| "grad_norm": 0.4657835364341736, |
| "learning_rate": 2.180326540125846e-05, |
| "loss": 0.7738, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.9115807968580435, |
| "grad_norm": 0.4533781111240387, |
| "learning_rate": 2.166848613500005e-05, |
| "loss": 0.7719, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.9120975660172601, |
| "grad_norm": 0.43933114409446716, |
| "learning_rate": 2.1534540021495556e-05, |
| "loss": 0.769, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.9126143351764767, |
| "grad_norm": 0.439761221408844, |
| "learning_rate": 2.140142191052022e-05, |
| "loss": 0.7698, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.9131311043356932, |
| "grad_norm": 0.471292644739151, |
| "learning_rate": 2.1269126683685998e-05, |
| "loss": 0.7586, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.9136478734949098, |
| "grad_norm": 0.45629554986953735, |
| "learning_rate": 2.1137649254244677e-05, |
| "loss": 0.794, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.9141646426541264, |
| "grad_norm": 0.4637652039527893, |
| "learning_rate": 2.1006984566892386e-05, |
| "loss": 0.7757, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.914681411813343, |
| "grad_norm": 0.4626142382621765, |
| "learning_rate": 2.087712759757512e-05, |
| "loss": 0.7778, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.9151981809725596, |
| "grad_norm": 0.4568713903427124, |
| "learning_rate": 2.074807335329564e-05, |
| "loss": 0.7972, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.9157149501317762, |
| "grad_norm": 0.43964695930480957, |
| "learning_rate": 2.061981687192147e-05, |
| "loss": 0.7651, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.9162317192909927, |
| "grad_norm": 0.45957452058792114, |
| "learning_rate": 2.0492353221994066e-05, |
| "loss": 0.7744, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.9167484884502093, |
| "grad_norm": 0.42849215865135193, |
| "learning_rate": 2.0365677502539268e-05, |
| "loss": 0.7602, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.9172652576094259, |
| "grad_norm": 0.4392319619655609, |
| "learning_rate": 2.0239784842878798e-05, |
| "loss": 0.7822, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.9177820267686424, |
| "grad_norm": 0.43897444009780884, |
| "learning_rate": 2.011467040244303e-05, |
| "loss": 0.7793, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.918298795927859, |
| "grad_norm": 0.4271240532398224, |
| "learning_rate": 1.9990329370584816e-05, |
| "loss": 0.7727, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.9188155650870756, |
| "grad_norm": 0.43358883261680603, |
| "learning_rate": 1.9866756966394584e-05, |
| "loss": 0.7884, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.9193323342462921, |
| "grad_norm": 0.4576852023601532, |
| "learning_rate": 1.9743948438516452e-05, |
| "loss": 0.7845, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.9198491034055087, |
| "grad_norm": 0.4521750211715698, |
| "learning_rate": 1.962189906496559e-05, |
| "loss": 0.7652, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.9203658725647254, |
| "grad_norm": 0.4462205469608307, |
| "learning_rate": 1.9500604152946586e-05, |
| "loss": 0.7748, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.920882641723942, |
| "grad_norm": 0.4531271457672119, |
| "learning_rate": 1.9380059038673104e-05, |
| "loss": 0.7843, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.9213994108831585, |
| "grad_norm": 0.4446341097354889, |
| "learning_rate": 1.9260259087188497e-05, |
| "loss": 0.7529, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.9219161800423751, |
| "grad_norm": 0.4507541060447693, |
| "learning_rate": 1.9141199692187586e-05, |
| "loss": 0.7641, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.9224329492015917, |
| "grad_norm": 0.4495556056499481, |
| "learning_rate": 1.9022876275839615e-05, |
| "loss": 0.7679, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.9229497183608082, |
| "grad_norm": 0.448811799287796, |
| "learning_rate": 1.890528428861213e-05, |
| "loss": 0.7744, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.9234664875200248, |
| "grad_norm": 0.45697128772735596, |
| "learning_rate": 1.8788419209096178e-05, |
| "loss": 0.7723, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.9239832566792414, |
| "grad_norm": 0.43319204449653625, |
| "learning_rate": 1.8672276543832325e-05, |
| "loss": 0.7901, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.9245000258384579, |
| "grad_norm": 0.4573897123336792, |
| "learning_rate": 1.855685182713799e-05, |
| "loss": 0.7739, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.9250167949976745, |
| "grad_norm": 0.4467730224132538, |
| "learning_rate": 1.8442140620935673e-05, |
| "loss": 0.7709, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.9255335641568911, |
| "grad_norm": 0.4632819592952728, |
| "learning_rate": 1.8328138514582353e-05, |
| "loss": 0.7597, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.9260503333161076, |
| "grad_norm": 0.45948299765586853, |
| "learning_rate": 1.821484112469986e-05, |
| "loss": 0.7795, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.9265671024753243, |
| "grad_norm": 0.464005708694458, |
| "learning_rate": 1.810224409500637e-05, |
| "loss": 0.7693, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.9270838716345409, |
| "grad_norm": 0.4494501054286957, |
| "learning_rate": 1.79903430961489e-05, |
| "loss": 0.7754, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.9276006407937575, |
| "grad_norm": 0.4453310966491699, |
| "learning_rate": 1.7879133825536803e-05, |
| "loss": 0.7703, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.928117409952974, |
| "grad_norm": 0.4534304141998291, |
| "learning_rate": 1.7768612007176403e-05, |
| "loss": 0.7694, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.9286341791121906, |
| "grad_norm": 0.42768940329551697, |
| "learning_rate": 1.7658773391506503e-05, |
| "loss": 0.7753, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.9291509482714072, |
| "grad_norm": 0.4579961597919464, |
| "learning_rate": 1.754961375523509e-05, |
| "loss": 0.7756, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.9296677174306237, |
| "grad_norm": 0.43378955125808716, |
| "learning_rate": 1.744112890117683e-05, |
| "loss": 0.7584, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.9301844865898403, |
| "grad_norm": 0.4437185823917389, |
| "learning_rate": 1.7333314658091796e-05, |
| "loss": 0.7636, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.9307012557490569, |
| "grad_norm": 0.4335078299045563, |
| "learning_rate": 1.7226166880525008e-05, |
| "loss": 0.7676, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.9312180249082734, |
| "grad_norm": 0.4542897343635559, |
| "learning_rate": 1.711968144864709e-05, |
| "loss": 0.7743, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.93173479406749, |
| "grad_norm": 0.46580132842063904, |
| "learning_rate": 1.7013854268095815e-05, |
| "loss": 0.7722, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.9322515632267067, |
| "grad_norm": 0.4515324532985687, |
| "learning_rate": 1.6908681269818735e-05, |
| "loss": 0.7711, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.9327683323859232, |
| "grad_norm": 0.4366278350353241, |
| "learning_rate": 1.6804158409916664e-05, |
| "loss": 0.7707, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.9332851015451398, |
| "grad_norm": 0.45202723145484924, |
| "learning_rate": 1.6700281669488236e-05, |
| "loss": 0.7733, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.9338018707043564, |
| "grad_norm": 0.4829843044281006, |
| "learning_rate": 1.6597047054475375e-05, |
| "loss": 0.7772, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.934318639863573, |
| "grad_norm": 0.45102638006210327, |
| "learning_rate": 1.6494450595509677e-05, |
| "loss": 0.7736, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.9348354090227895, |
| "grad_norm": 0.43405377864837646, |
| "learning_rate": 1.639248834775986e-05, |
| "loss": 0.7655, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.9353521781820061, |
| "grad_norm": 0.44487160444259644, |
| "learning_rate": 1.6291156390780006e-05, |
| "loss": 0.7617, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.9358689473412227, |
| "grad_norm": 0.4330504238605499, |
| "learning_rate": 1.6190450828358913e-05, |
| "loss": 0.7771, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.9363857165004392, |
| "grad_norm": 0.44895511865615845, |
| "learning_rate": 1.6090367788370184e-05, |
| "loss": 0.7787, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.9369024856596558, |
| "grad_norm": 0.4521077275276184, |
| "learning_rate": 1.599090342262343e-05, |
| "loss": 0.7599, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.9374192548188724, |
| "grad_norm": 0.4501364529132843, |
| "learning_rate": 1.589205390671625e-05, |
| "loss": 0.7611, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.937936023978089, |
| "grad_norm": 0.45777976512908936, |
| "learning_rate": 1.5793815439887217e-05, |
| "loss": 0.7609, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.9384527931373056, |
| "grad_norm": 0.4469406306743622, |
| "learning_rate": 1.569618424486971e-05, |
| "loss": 0.7669, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.9389695622965222, |
| "grad_norm": 0.44795021414756775, |
| "learning_rate": 1.5599156567746714e-05, |
| "loss": 0.7748, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.9394863314557387, |
| "grad_norm": 0.46077170968055725, |
| "learning_rate": 1.5502728677806457e-05, |
| "loss": 0.7829, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.9400031006149553, |
| "grad_norm": 0.4519754946231842, |
| "learning_rate": 1.5406896867398952e-05, |
| "loss": 0.7608, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.9405198697741719, |
| "grad_norm": 0.43412908911705017, |
| "learning_rate": 1.5311657451793483e-05, |
| "loss": 0.7739, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.9410366389333884, |
| "grad_norm": 0.44264018535614014, |
| "learning_rate": 1.5217006769036868e-05, |
| "loss": 0.7754, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.941553408092605, |
| "grad_norm": 0.42187464237213135, |
| "learning_rate": 1.5122941179812719e-05, |
| "loss": 0.7649, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.9420701772518216, |
| "grad_norm": 0.44390153884887695, |
| "learning_rate": 1.5029457067301455e-05, |
| "loss": 0.759, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.9425869464110382, |
| "grad_norm": 0.43942004442214966, |
| "learning_rate": 1.4936550837041282e-05, |
| "loss": 0.7693, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.9431037155702547, |
| "grad_norm": 0.44910815358161926, |
| "learning_rate": 1.4844218916789941e-05, |
| "loss": 0.7672, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.9436204847294714, |
| "grad_norm": 0.4458234906196594, |
| "learning_rate": 1.4752457756387405e-05, |
| "loss": 0.7841, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.944137253888688, |
| "grad_norm": 0.42799797654151917, |
| "learning_rate": 1.4661263827619318e-05, |
| "loss": 0.7717, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.9446540230479045, |
| "grad_norm": 0.4394701421260834, |
| "learning_rate": 1.4570633624081393e-05, |
| "loss": 0.7702, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.9451707922071211, |
| "grad_norm": 0.44984373450279236, |
| "learning_rate": 1.4480563661044558e-05, |
| "loss": 0.7719, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.9456875613663377, |
| "grad_norm": 0.446482390165329, |
| "learning_rate": 1.4391050475320961e-05, |
| "loss": 0.7572, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.9462043305255542, |
| "grad_norm": 0.4424509108066559, |
| "learning_rate": 1.4302090625130843e-05, |
| "loss": 0.7773, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.9467210996847708, |
| "grad_norm": 0.4587627649307251, |
| "learning_rate": 1.4213680689970162e-05, |
| "loss": 0.7723, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.9472378688439874, |
| "grad_norm": 0.4332590699195862, |
| "learning_rate": 1.4125817270479119e-05, |
| "loss": 0.7649, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.9477546380032039, |
| "grad_norm": 0.4457739591598511, |
| "learning_rate": 1.4038496988311402e-05, |
| "loss": 0.7722, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.9482714071624205, |
| "grad_norm": 0.4352693557739258, |
| "learning_rate": 1.3951716486004345e-05, |
| "loss": 0.7592, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.9487881763216371, |
| "grad_norm": 0.44573667645454407, |
| "learning_rate": 1.3865472426849772e-05, |
| "loss": 0.7637, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.9493049454808538, |
| "grad_norm": 0.4508999288082123, |
| "learning_rate": 1.3779761494765763e-05, |
| "loss": 0.7627, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.9498217146400703, |
| "grad_norm": 0.46261972188949585, |
| "learning_rate": 1.3694580394169099e-05, |
| "loss": 0.7798, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.9503384837992869, |
| "grad_norm": 0.446575790643692, |
| "learning_rate": 1.360992584984858e-05, |
| "loss": 0.7636, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.9508552529585035, |
| "grad_norm": 0.4478476941585541, |
| "learning_rate": 1.3525794606839085e-05, |
| "loss": 0.7757, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.95137202211772, |
| "grad_norm": 0.4484612047672272, |
| "learning_rate": 1.3442183430296398e-05, |
| "loss": 0.7695, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.9518887912769366, |
| "grad_norm": 0.45452138781547546, |
| "learning_rate": 1.3359089105372866e-05, |
| "loss": 0.7659, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.9524055604361532, |
| "grad_norm": 0.4534998834133148, |
| "learning_rate": 1.3276508437093752e-05, |
| "loss": 0.763, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.9529223295953697, |
| "grad_norm": 0.43683722615242004, |
| "learning_rate": 1.3194438250234418e-05, |
| "loss": 0.7744, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.9534390987545863, |
| "grad_norm": 0.4494810998439789, |
| "learning_rate": 1.3112875389198208e-05, |
| "loss": 0.7645, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.9539558679138029, |
| "grad_norm": 0.449897825717926, |
| "learning_rate": 1.3031816717895151e-05, |
| "loss": 0.7641, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.9544726370730194, |
| "grad_norm": 0.4382020831108093, |
| "learning_rate": 1.2951259119621336e-05, |
| "loss": 0.7748, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.954989406232236, |
| "grad_norm": 0.46431413292884827, |
| "learning_rate": 1.2871199496939121e-05, |
| "loss": 0.7683, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.9555061753914527, |
| "grad_norm": 0.4337891936302185, |
| "learning_rate": 1.2791634771557991e-05, |
| "loss": 0.7561, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.9560229445506692, |
| "grad_norm": 0.46482157707214355, |
| "learning_rate": 1.2712561884216234e-05, |
| "loss": 0.7601, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.9565397137098858, |
| "grad_norm": 0.4410005211830139, |
| "learning_rate": 1.2633977794563303e-05, |
| "loss": 0.773, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.9570564828691024, |
| "grad_norm": 0.46581384539604187, |
| "learning_rate": 1.2555879481042893e-05, |
| "loss": 0.7753, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.957573252028319, |
| "grad_norm": 0.45101165771484375, |
| "learning_rate": 1.2478263940776792e-05, |
| "loss": 0.7647, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.9580900211875355, |
| "grad_norm": 0.44979819655418396, |
| "learning_rate": 1.2401128189449399e-05, |
| "loss": 0.775, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.9586067903467521, |
| "grad_norm": 0.4470668435096741, |
| "learning_rate": 1.2324469261193e-05, |
| "loss": 0.7579, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.9591235595059687, |
| "grad_norm": 0.4402695596218109, |
| "learning_rate": 1.2248284208473693e-05, |
| "loss": 0.7793, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.9596403286651852, |
| "grad_norm": 0.4400414526462555, |
| "learning_rate": 1.2172570101978107e-05, |
| "loss": 0.7725, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.9601570978244018, |
| "grad_norm": 0.43797457218170166, |
| "learning_rate": 1.2097324030500717e-05, |
| "loss": 0.7474, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.9606738669836185, |
| "grad_norm": 0.47379326820373535, |
| "learning_rate": 1.2022543100831949e-05, |
| "loss": 0.7644, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.961190636142835, |
| "grad_norm": 0.4277331829071045, |
| "learning_rate": 1.1948224437646907e-05, |
| "loss": 0.7698, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.9617074053020516, |
| "grad_norm": 0.46481338143348694, |
| "learning_rate": 1.1874365183394848e-05, |
| "loss": 0.7575, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.9622241744612682, |
| "grad_norm": 0.4436621367931366, |
| "learning_rate": 1.1800962498189266e-05, |
| "loss": 0.7714, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.9627409436204847, |
| "grad_norm": 0.44922277331352234, |
| "learning_rate": 1.1728013559698744e-05, |
| "loss": 0.7711, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.9632577127797013, |
| "grad_norm": 0.4406448006629944, |
| "learning_rate": 1.1655515563038412e-05, |
| "loss": 0.7645, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.9637744819389179, |
| "grad_norm": 0.4575316607952118, |
| "learning_rate": 1.1583465720662092e-05, |
| "loss": 0.7774, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.9642912510981345, |
| "grad_norm": 0.44259268045425415, |
| "learning_rate": 1.1511861262255142e-05, |
| "loss": 0.7791, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.964808020257351, |
| "grad_norm": 0.43396565318107605, |
| "learning_rate": 1.14406994346279e-05, |
| "loss": 0.7552, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.9653247894165676, |
| "grad_norm": 0.4611850082874298, |
| "learning_rate": 1.1369977501609877e-05, |
| "loss": 0.7747, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.9658415585757842, |
| "grad_norm": 0.4555375277996063, |
| "learning_rate": 1.129969274394449e-05, |
| "loss": 0.7726, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.9663583277350007, |
| "grad_norm": 0.4663475453853607, |
| "learning_rate": 1.1229842459184562e-05, |
| "loss": 0.7596, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.9668750968942174, |
| "grad_norm": 0.45513424277305603, |
| "learning_rate": 1.1160423961588368e-05, |
| "loss": 0.7813, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.967391866053434, |
| "grad_norm": 0.4629857838153839, |
| "learning_rate": 1.1091434582016413e-05, |
| "loss": 0.7668, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.9679086352126505, |
| "grad_norm": 0.45282307267189026, |
| "learning_rate": 1.1022871667828753e-05, |
| "loss": 0.7543, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.9684254043718671, |
| "grad_norm": 0.4608106315135956, |
| "learning_rate": 1.0954732582783043e-05, |
| "loss": 0.7588, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.9689421735310837, |
| "grad_norm": 0.44871219992637634, |
| "learning_rate": 1.088701470693316e-05, |
| "loss": 0.7681, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.9694589426903002, |
| "grad_norm": 0.4576722979545593, |
| "learning_rate": 1.081971543652845e-05, |
| "loss": 0.7618, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.9699757118495168, |
| "grad_norm": 0.4332127571105957, |
| "learning_rate": 1.0752832183913647e-05, |
| "loss": 0.7586, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.9704924810087334, |
| "grad_norm": 0.44485628604888916, |
| "learning_rate": 1.0686362377429339e-05, |
| "loss": 0.7737, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.9710092501679499, |
| "grad_norm": 0.45990100502967834, |
| "learning_rate": 1.0620303461313126e-05, |
| "loss": 0.7679, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.9715260193271665, |
| "grad_norm": 0.4547218084335327, |
| "learning_rate": 1.0554652895601313e-05, |
| "loss": 0.7559, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.9720427884863831, |
| "grad_norm": 0.43457552790641785, |
| "learning_rate": 1.0489408156031289e-05, |
| "loss": 0.7512, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.9725595576455998, |
| "grad_norm": 0.44039562344551086, |
| "learning_rate": 1.0424566733944429e-05, |
| "loss": 0.7791, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.9730763268048163, |
| "grad_norm": 0.4435688257217407, |
| "learning_rate": 1.0360126136189671e-05, |
| "loss": 0.7738, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.9735930959640329, |
| "grad_norm": 0.4358065128326416, |
| "learning_rate": 1.0296083885027623e-05, |
| "loss": 0.7595, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.9741098651232495, |
| "grad_norm": 0.4542253613471985, |
| "learning_rate": 1.0232437518035322e-05, |
| "loss": 0.7802, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.974626634282466, |
| "grad_norm": 0.4499568045139313, |
| "learning_rate": 1.0169184588011541e-05, |
| "loss": 0.7556, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.9751434034416826, |
| "grad_norm": 0.42469751834869385, |
| "learning_rate": 1.0106322662882686e-05, |
| "loss": 0.7747, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.9756601726008992, |
| "grad_norm": 0.45162233710289, |
| "learning_rate": 1.00438493256093e-05, |
| "loss": 0.7716, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.9761769417601157, |
| "grad_norm": 0.45597076416015625, |
| "learning_rate": 9.981762174093112e-06, |
| "loss": 0.7779, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.9766937109193323, |
| "grad_norm": 0.4463193714618683, |
| "learning_rate": 9.920058821084695e-06, |
| "loss": 0.7686, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.9772104800785489, |
| "grad_norm": 0.4148988425731659, |
| "learning_rate": 9.858736894091644e-06, |
| "loss": 0.753, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.9777272492377654, |
| "grad_norm": 0.4257926940917969, |
| "learning_rate": 9.797794035287406e-06, |
| "loss": 0.7675, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.978244018396982, |
| "grad_norm": 0.4566889703273773, |
| "learning_rate": 9.737227901420558e-06, |
| "loss": 0.7674, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.9787607875561987, |
| "grad_norm": 0.46036675572395325, |
| "learning_rate": 9.677036163724766e-06, |
| "loss": 0.7701, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.9792775567154153, |
| "grad_norm": 0.4719618260860443, |
| "learning_rate": 9.617216507829204e-06, |
| "loss": 0.7577, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.9797943258746318, |
| "grad_norm": 0.45223793387413025, |
| "learning_rate": 9.557766633669592e-06, |
| "loss": 0.7618, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.9803110950338484, |
| "grad_norm": 0.44620633125305176, |
| "learning_rate": 9.498684255399747e-06, |
| "loss": 0.7623, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.980827864193065, |
| "grad_norm": 0.4350356459617615, |
| "learning_rate": 9.439967101303683e-06, |
| "loss": 0.7659, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.9813446333522815, |
| "grad_norm": 0.434857040643692, |
| "learning_rate": 9.381612913708292e-06, |
| "loss": 0.7637, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.9818614025114981, |
| "grad_norm": 0.44825971126556396, |
| "learning_rate": 9.323619448896502e-06, |
| "loss": 0.766, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.9823781716707147, |
| "grad_norm": 0.4420020282268524, |
| "learning_rate": 9.26598447702104e-06, |
| "loss": 0.7644, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.9828949408299312, |
| "grad_norm": 0.44582831859588623, |
| "learning_rate": 9.208705782018656e-06, |
| "loss": 0.7606, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.9834117099891478, |
| "grad_norm": 0.4383075535297394, |
| "learning_rate": 9.151781161524964e-06, |
| "loss": 0.7662, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.9839284791483645, |
| "grad_norm": 0.4672369062900543, |
| "learning_rate": 9.095208426789703e-06, |
| "loss": 0.7623, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.984445248307581, |
| "grad_norm": 0.4448625445365906, |
| "learning_rate": 9.03898540259264e-06, |
| "loss": 0.7767, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.9849620174667976, |
| "grad_norm": 0.45743006467819214, |
| "learning_rate": 8.983109927159886e-06, |
| "loss": 0.7655, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.9854787866260142, |
| "grad_norm": 0.4571949243545532, |
| "learning_rate": 8.927579852080794e-06, |
| "loss": 0.7569, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.9859955557852308, |
| "grad_norm": 0.4542441666126251, |
| "learning_rate": 8.872393042225366e-06, |
| "loss": 0.7726, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.9865123249444473, |
| "grad_norm": 0.4544001817703247, |
| "learning_rate": 8.817547375662121e-06, |
| "loss": 0.7624, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.9870290941036639, |
| "grad_norm": 0.44613394141197205, |
| "learning_rate": 8.763040743576555e-06, |
| "loss": 0.7729, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.9875458632628805, |
| "grad_norm": 0.4503871202468872, |
| "learning_rate": 8.708871050190002e-06, |
| "loss": 0.7619, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.988062632422097, |
| "grad_norm": 0.45252034068107605, |
| "learning_rate": 8.65503621267911e-06, |
| "loss": 0.7617, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.9885794015813136, |
| "grad_norm": 0.4656429886817932, |
| "learning_rate": 8.601534161095704e-06, |
| "loss": 0.7733, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.9890961707405302, |
| "grad_norm": 0.44941556453704834, |
| "learning_rate": 8.548362838287236e-06, |
| "loss": 0.765, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.9896129398997467, |
| "grad_norm": 0.4554784893989563, |
| "learning_rate": 8.495520199817657e-06, |
| "loss": 0.7708, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.9901297090589634, |
| "grad_norm": 0.44851189851760864, |
| "learning_rate": 8.443004213888836e-06, |
| "loss": 0.7548, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.99064647821818, |
| "grad_norm": 0.43213942646980286, |
| "learning_rate": 8.390812861262414e-06, |
| "loss": 0.7583, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.9911632473773965, |
| "grad_norm": 0.4359610676765442, |
| "learning_rate": 8.33894413518218e-06, |
| "loss": 0.7451, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.9916800165366131, |
| "grad_norm": 0.4492233693599701, |
| "learning_rate": 8.287396041296902e-06, |
| "loss": 0.7648, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.9921967856958297, |
| "grad_norm": 0.45256808400154114, |
| "learning_rate": 8.236166597583653e-06, |
| "loss": 0.781, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.9927135548550462, |
| "grad_norm": 0.45061782002449036, |
| "learning_rate": 8.185253834271597e-06, |
| "loss": 0.7828, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.9932303240142628, |
| "grad_norm": 0.43763041496276855, |
| "learning_rate": 8.134655793766237e-06, |
| "loss": 0.7523, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.9937470931734794, |
| "grad_norm": 0.4337799847126007, |
| "learning_rate": 8.084370530574186e-06, |
| "loss": 0.7738, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.994263862332696, |
| "grad_norm": 0.45650362968444824, |
| "learning_rate": 8.034396111228312e-06, |
| "loss": 0.7676, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.9947806314919125, |
| "grad_norm": 0.458556205034256, |
| "learning_rate": 7.98473061421344e-06, |
| "loss": 0.7812, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.9952974006511291, |
| "grad_norm": 0.4379122853279114, |
| "learning_rate": 7.935372129892435e-06, |
| "loss": 0.7653, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.9958141698103458, |
| "grad_norm": 0.453417032957077, |
| "learning_rate": 7.886318760432809e-06, |
| "loss": 0.7701, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.9963309389695623, |
| "grad_norm": 0.4366815388202667, |
| "learning_rate": 7.837568619733714e-06, |
| "loss": 0.7665, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.9968477081287789, |
| "grad_norm": 0.4635095000267029, |
| "learning_rate": 7.78911983335346e-06, |
| "loss": 0.7694, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.9973644772879955, |
| "grad_norm": 0.4435023069381714, |
| "learning_rate": 7.740970538437405e-06, |
| "loss": 0.7689, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.997881246447212, |
| "grad_norm": 0.432817667722702, |
| "learning_rate": 7.693118883646362e-06, |
| "loss": 0.7592, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.9983980156064286, |
| "grad_norm": 0.45705628395080566, |
| "learning_rate": 7.64556302908539e-06, |
| "loss": 0.77, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.9989147847656452, |
| "grad_norm": 0.45206621289253235, |
| "learning_rate": 7.598301146233062e-06, |
| "loss": 0.7665, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.9994315539248617, |
| "grad_norm": 0.42955172061920166, |
| "learning_rate": 7.551331417871156e-06, |
| "loss": 0.7619, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.9999483230840783, |
| "grad_norm": 0.436574250459671, |
| "learning_rate": 7.50465203801478e-06, |
| "loss": 0.7581, |
| "step": 19350 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 19351, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.2816606598299008e+17, |
| "train_batch_size": 512, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|