| { |
| "best_global_step": 1800, |
| "best_metric": 0.74, |
| "best_model_checkpoint": "/mnt/parscratch/users/acr24wz/etu/topcon/qwen3_4B/cpt_model/balanced/finetuned/all/checkpoint-1800", |
| "epoch": 2.0642662458757712, |
| "eval_steps": 100, |
| "global_step": 1800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0011476115334959117, |
| "grad_norm": 201.0, |
| "learning_rate": 0.0, |
| "loss": 18.5701, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0022952230669918234, |
| "grad_norm": 1392.0, |
| "learning_rate": 4.587155963302753e-08, |
| "loss": 12.0441, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0034428346004877347, |
| "grad_norm": 161.0, |
| "learning_rate": 9.174311926605506e-08, |
| "loss": 14.3223, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.004590446133983647, |
| "grad_norm": 150.0, |
| "learning_rate": 1.376146788990826e-07, |
| "loss": 10.3759, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.005738057667479558, |
| "grad_norm": 141.0, |
| "learning_rate": 1.8348623853211012e-07, |
| "loss": 11.4624, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.006885669200975469, |
| "grad_norm": 159.0, |
| "learning_rate": 2.2935779816513764e-07, |
| "loss": 10.5127, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.008033280734471382, |
| "grad_norm": 166.0, |
| "learning_rate": 2.752293577981652e-07, |
| "loss": 15.7339, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.009180892267967294, |
| "grad_norm": 310.0, |
| "learning_rate": 3.211009174311927e-07, |
| "loss": 21.2237, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.010328503801463204, |
| "grad_norm": 153.0, |
| "learning_rate": 3.6697247706422023e-07, |
| "loss": 11.4438, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.011476115334959116, |
| "grad_norm": 146.0, |
| "learning_rate": 4.128440366972478e-07, |
| "loss": 14.27, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012623726868455028, |
| "grad_norm": 185.0, |
| "learning_rate": 4.587155963302753e-07, |
| "loss": 17.4331, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.013771338401950939, |
| "grad_norm": 157.0, |
| "learning_rate": 5.045871559633028e-07, |
| "loss": 16.0972, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.014918949935446851, |
| "grad_norm": 146.0, |
| "learning_rate": 5.504587155963304e-07, |
| "loss": 10.9198, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.016066561468942763, |
| "grad_norm": 194.0, |
| "learning_rate": 5.963302752293579e-07, |
| "loss": 14.1635, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.017214173002438674, |
| "grad_norm": 162.0, |
| "learning_rate": 6.422018348623854e-07, |
| "loss": 11.853, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.018361784535934587, |
| "grad_norm": 160.0, |
| "learning_rate": 6.880733944954129e-07, |
| "loss": 12.7435, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.019509396069430498, |
| "grad_norm": 158.0, |
| "learning_rate": 7.339449541284405e-07, |
| "loss": 11.8396, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.020657007602926408, |
| "grad_norm": 165.0, |
| "learning_rate": 7.79816513761468e-07, |
| "loss": 11.5206, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.021804619136422322, |
| "grad_norm": 177.0, |
| "learning_rate": 8.256880733944956e-07, |
| "loss": 11.5111, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.022952230669918233, |
| "grad_norm": 157.0, |
| "learning_rate": 8.71559633027523e-07, |
| "loss": 14.7131, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.024099842203414143, |
| "grad_norm": 172.0, |
| "learning_rate": 9.174311926605506e-07, |
| "loss": 11.1313, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.025247453736910057, |
| "grad_norm": 194.0, |
| "learning_rate": 9.633027522935782e-07, |
| "loss": 16.6221, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.026395065270405967, |
| "grad_norm": 268.0, |
| "learning_rate": 1.0091743119266057e-06, |
| "loss": 15.6201, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.027542676803901878, |
| "grad_norm": 218.0, |
| "learning_rate": 1.055045871559633e-06, |
| "loss": 11.6275, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02869028833739779, |
| "grad_norm": 158.0, |
| "learning_rate": 1.1009174311926608e-06, |
| "loss": 15.5678, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.029837899870893702, |
| "grad_norm": 236.0, |
| "learning_rate": 1.1467889908256882e-06, |
| "loss": 15.4133, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.030985511404389616, |
| "grad_norm": 131.0, |
| "learning_rate": 1.1926605504587159e-06, |
| "loss": 12.6406, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.032133122937885526, |
| "grad_norm": 139.0, |
| "learning_rate": 1.2385321100917433e-06, |
| "loss": 15.0131, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03328073447138144, |
| "grad_norm": 150.0, |
| "learning_rate": 1.2844036697247707e-06, |
| "loss": 13.4583, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.03442834600487735, |
| "grad_norm": 166.0, |
| "learning_rate": 1.3302752293577984e-06, |
| "loss": 15.6894, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03557595753837326, |
| "grad_norm": 168.0, |
| "learning_rate": 1.3761467889908258e-06, |
| "loss": 13.8435, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.036723569071869175, |
| "grad_norm": 191.0, |
| "learning_rate": 1.4220183486238535e-06, |
| "loss": 8.6607, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.03787118060536508, |
| "grad_norm": 140.0, |
| "learning_rate": 1.467889908256881e-06, |
| "loss": 12.4132, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.039018792138860996, |
| "grad_norm": 201.0, |
| "learning_rate": 1.5137614678899084e-06, |
| "loss": 13.8843, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.04016640367235691, |
| "grad_norm": 164.0, |
| "learning_rate": 1.559633027522936e-06, |
| "loss": 15.1008, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.041314015205852817, |
| "grad_norm": 284.0, |
| "learning_rate": 1.6055045871559635e-06, |
| "loss": 21.6636, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04246162673934873, |
| "grad_norm": 394.0, |
| "learning_rate": 1.6513761467889911e-06, |
| "loss": 14.0359, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.043609238272844644, |
| "grad_norm": 161.0, |
| "learning_rate": 1.6972477064220186e-06, |
| "loss": 13.4677, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04475684980634055, |
| "grad_norm": 164.0, |
| "learning_rate": 1.743119266055046e-06, |
| "loss": 8.3447, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.045904461339836465, |
| "grad_norm": 148.0, |
| "learning_rate": 1.7889908256880737e-06, |
| "loss": 6.5679, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04705207287333238, |
| "grad_norm": 168.0, |
| "learning_rate": 1.8348623853211011e-06, |
| "loss": 15.6762, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.048199684406828286, |
| "grad_norm": 164.0, |
| "learning_rate": 1.8807339449541288e-06, |
| "loss": 6.2052, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0493472959403242, |
| "grad_norm": 103.5, |
| "learning_rate": 1.9266055045871564e-06, |
| "loss": 8.8464, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.050494907473820114, |
| "grad_norm": 372.0, |
| "learning_rate": 1.9724770642201837e-06, |
| "loss": 15.5934, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.05164251900731602, |
| "grad_norm": 210.0, |
| "learning_rate": 2.0183486238532113e-06, |
| "loss": 11.6431, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.052790130540811935, |
| "grad_norm": 125.5, |
| "learning_rate": 2.064220183486239e-06, |
| "loss": 8.5935, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05393774207430785, |
| "grad_norm": 102.5, |
| "learning_rate": 2.110091743119266e-06, |
| "loss": 9.1192, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.055085353607803755, |
| "grad_norm": 143.0, |
| "learning_rate": 2.155963302752294e-06, |
| "loss": 8.8696, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05623296514129967, |
| "grad_norm": 137.0, |
| "learning_rate": 2.2018348623853215e-06, |
| "loss": 11.2497, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.05738057667479558, |
| "grad_norm": 129.0, |
| "learning_rate": 2.2477064220183487e-06, |
| "loss": 11.6115, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05852818820829149, |
| "grad_norm": 149.0, |
| "learning_rate": 2.2935779816513764e-06, |
| "loss": 13.9466, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.059675799741787404, |
| "grad_norm": 120.5, |
| "learning_rate": 2.339449541284404e-06, |
| "loss": 9.2116, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06082341127528332, |
| "grad_norm": 240.0, |
| "learning_rate": 2.3853211009174317e-06, |
| "loss": 21.5138, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06197102280877923, |
| "grad_norm": 112.0, |
| "learning_rate": 2.431192660550459e-06, |
| "loss": 9.7849, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06311863434227515, |
| "grad_norm": 135.0, |
| "learning_rate": 2.4770642201834866e-06, |
| "loss": 12.795, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.06426624587577105, |
| "grad_norm": 222.0, |
| "learning_rate": 2.522935779816514e-06, |
| "loss": 15.822, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06541385740926696, |
| "grad_norm": 137.0, |
| "learning_rate": 2.5688073394495415e-06, |
| "loss": 14.4134, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.06656146894276288, |
| "grad_norm": 100.0, |
| "learning_rate": 2.6146788990825687e-06, |
| "loss": 10.1907, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.06770908047625879, |
| "grad_norm": 154.0, |
| "learning_rate": 2.6605504587155968e-06, |
| "loss": 14.2824, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0688566920097547, |
| "grad_norm": 109.0, |
| "learning_rate": 2.706422018348624e-06, |
| "loss": 8.6855, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07000430354325061, |
| "grad_norm": 198.0, |
| "learning_rate": 2.7522935779816517e-06, |
| "loss": 14.2233, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07115191507674652, |
| "grad_norm": 134.0, |
| "learning_rate": 2.798165137614679e-06, |
| "loss": 11.8776, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07229952661024243, |
| "grad_norm": 112.5, |
| "learning_rate": 2.844036697247707e-06, |
| "loss": 9.321, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.07344713814373835, |
| "grad_norm": 118.0, |
| "learning_rate": 2.8899082568807342e-06, |
| "loss": 10.727, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07459474967723426, |
| "grad_norm": 167.0, |
| "learning_rate": 2.935779816513762e-06, |
| "loss": 13.9879, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.07574236121073016, |
| "grad_norm": 100.0, |
| "learning_rate": 2.981651376146789e-06, |
| "loss": 7.9334, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.07688997274422608, |
| "grad_norm": 153.0, |
| "learning_rate": 3.0275229357798168e-06, |
| "loss": 14.9343, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.07803758427772199, |
| "grad_norm": 119.0, |
| "learning_rate": 3.073394495412844e-06, |
| "loss": 8.6676, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0791851958112179, |
| "grad_norm": 117.5, |
| "learning_rate": 3.119266055045872e-06, |
| "loss": 10.101, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.08033280734471382, |
| "grad_norm": 85.5, |
| "learning_rate": 3.1651376146788993e-06, |
| "loss": 7.5899, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.08148041887820973, |
| "grad_norm": 174.0, |
| "learning_rate": 3.211009174311927e-06, |
| "loss": 15.9673, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.08262803041170563, |
| "grad_norm": 221.0, |
| "learning_rate": 3.256880733944954e-06, |
| "loss": 14.1455, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.08377564194520155, |
| "grad_norm": 172.0, |
| "learning_rate": 3.3027522935779823e-06, |
| "loss": 15.9228, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.08492325347869746, |
| "grad_norm": 144.0, |
| "learning_rate": 3.3486238532110095e-06, |
| "loss": 12.6043, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.08607086501219337, |
| "grad_norm": 118.5, |
| "learning_rate": 3.394495412844037e-06, |
| "loss": 9.2068, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.08721847654568929, |
| "grad_norm": 147.0, |
| "learning_rate": 3.4403669724770644e-06, |
| "loss": 11.8722, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0883660880791852, |
| "grad_norm": 119.5, |
| "learning_rate": 3.486238532110092e-06, |
| "loss": 10.4207, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0895136996126811, |
| "grad_norm": 170.0, |
| "learning_rate": 3.5321100917431193e-06, |
| "loss": 14.4936, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09066131114617702, |
| "grad_norm": 183.0, |
| "learning_rate": 3.5779816513761473e-06, |
| "loss": 14.2192, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.09180892267967293, |
| "grad_norm": 128.0, |
| "learning_rate": 3.6238532110091746e-06, |
| "loss": 12.4628, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.09295653421316884, |
| "grad_norm": 100.0, |
| "learning_rate": 3.6697247706422022e-06, |
| "loss": 5.9004, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.09410414574666476, |
| "grad_norm": 163.0, |
| "learning_rate": 3.7155963302752295e-06, |
| "loss": 12.766, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.09525175728016066, |
| "grad_norm": 202.0, |
| "learning_rate": 3.7614678899082575e-06, |
| "loss": 14.3118, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.09639936881365657, |
| "grad_norm": 314.0, |
| "learning_rate": 3.8073394495412848e-06, |
| "loss": 12.7559, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.09754698034715249, |
| "grad_norm": 100.0, |
| "learning_rate": 3.853211009174313e-06, |
| "loss": 7.6448, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0986945918806484, |
| "grad_norm": 135.0, |
| "learning_rate": 3.89908256880734e-06, |
| "loss": 11.1222, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0998422034141443, |
| "grad_norm": 176.0, |
| "learning_rate": 3.944954128440367e-06, |
| "loss": 11.0153, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.10098981494764023, |
| "grad_norm": 130.0, |
| "learning_rate": 3.9908256880733945e-06, |
| "loss": 11.3109, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.10213742648113613, |
| "grad_norm": 176.0, |
| "learning_rate": 4.036697247706423e-06, |
| "loss": 11.3729, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.10328503801463204, |
| "grad_norm": 132.0, |
| "learning_rate": 4.08256880733945e-06, |
| "loss": 10.5579, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.10443264954812796, |
| "grad_norm": 126.0, |
| "learning_rate": 4.128440366972478e-06, |
| "loss": 9.2442, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.10558026108162387, |
| "grad_norm": 151.0, |
| "learning_rate": 4.174311926605505e-06, |
| "loss": 13.6998, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.10672787261511978, |
| "grad_norm": 99.5, |
| "learning_rate": 4.220183486238532e-06, |
| "loss": 8.6871, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.1078754841486157, |
| "grad_norm": 128.0, |
| "learning_rate": 4.26605504587156e-06, |
| "loss": 8.4919, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1090230956821116, |
| "grad_norm": 132.0, |
| "learning_rate": 4.311926605504588e-06, |
| "loss": 8.9568, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.11017070721560751, |
| "grad_norm": 135.0, |
| "learning_rate": 4.357798165137615e-06, |
| "loss": 11.2536, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.11131831874910343, |
| "grad_norm": 141.0, |
| "learning_rate": 4.403669724770643e-06, |
| "loss": 10.4686, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.11246593028259934, |
| "grad_norm": 78.5, |
| "learning_rate": 4.44954128440367e-06, |
| "loss": 4.7855, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.11361354181609525, |
| "grad_norm": 126.5, |
| "learning_rate": 4.4954128440366975e-06, |
| "loss": 8.6237, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.11476115334959117, |
| "grad_norm": 104.5, |
| "learning_rate": 4.541284403669725e-06, |
| "loss": 6.5662, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11476115334959117, |
| "eval_accuracy": 0.46, |
| "eval_loss": 10.765486717224121, |
| "eval_runtime": 49.6485, |
| "eval_samples_per_second": 2.014, |
| "eval_steps_per_second": 2.014, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.11590876488308707, |
| "grad_norm": 103.0, |
| "learning_rate": 4.587155963302753e-06, |
| "loss": 6.5649, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.11705637641658298, |
| "grad_norm": 144.0, |
| "learning_rate": 4.63302752293578e-06, |
| "loss": 8.2535, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.1182039879500789, |
| "grad_norm": 135.0, |
| "learning_rate": 4.678899082568808e-06, |
| "loss": 11.0001, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.11935159948357481, |
| "grad_norm": 109.0, |
| "learning_rate": 4.724770642201835e-06, |
| "loss": 8.2321, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.12049921101707071, |
| "grad_norm": 134.0, |
| "learning_rate": 4.770642201834863e-06, |
| "loss": 10.8236, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.12164682255056664, |
| "grad_norm": 133.0, |
| "learning_rate": 4.816513761467891e-06, |
| "loss": 10.03, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.12279443408406254, |
| "grad_norm": 148.0, |
| "learning_rate": 4.862385321100918e-06, |
| "loss": 13.1908, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.12394204561755846, |
| "grad_norm": 64.0, |
| "learning_rate": 4.908256880733945e-06, |
| "loss": 3.6086, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.12508965715105436, |
| "grad_norm": 139.0, |
| "learning_rate": 4.954128440366973e-06, |
| "loss": 10.9146, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.1262372686845503, |
| "grad_norm": 100.5, |
| "learning_rate": 5e-06, |
| "loss": 7.6266, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1273848802180462, |
| "grad_norm": 102.0, |
| "learning_rate": 5.045871559633028e-06, |
| "loss": 8.3553, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.1285324917515421, |
| "grad_norm": 145.0, |
| "learning_rate": 5.091743119266055e-06, |
| "loss": 8.7646, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.129680103285038, |
| "grad_norm": 178.0, |
| "learning_rate": 5.137614678899083e-06, |
| "loss": 12.6374, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.13082771481853392, |
| "grad_norm": 91.5, |
| "learning_rate": 5.18348623853211e-06, |
| "loss": 5.8455, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.13197532635202983, |
| "grad_norm": 122.0, |
| "learning_rate": 5.229357798165137e-06, |
| "loss": 9.7438, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.13312293788552576, |
| "grad_norm": 192.0, |
| "learning_rate": 5.275229357798165e-06, |
| "loss": 9.9915, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.13427054941902167, |
| "grad_norm": 108.0, |
| "learning_rate": 5.3211009174311936e-06, |
| "loss": 7.6686, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.13541816095251757, |
| "grad_norm": 153.0, |
| "learning_rate": 5.366972477064221e-06, |
| "loss": 10.4111, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.13656577248601348, |
| "grad_norm": 139.0, |
| "learning_rate": 5.412844036697248e-06, |
| "loss": 6.46, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1377133840195094, |
| "grad_norm": 79.0, |
| "learning_rate": 5.458715596330275e-06, |
| "loss": 5.2337, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1388609955530053, |
| "grad_norm": 114.5, |
| "learning_rate": 5.504587155963303e-06, |
| "loss": 5.2836, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.14000860708650123, |
| "grad_norm": 99.5, |
| "learning_rate": 5.5504587155963306e-06, |
| "loss": 7.6412, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.14115621861999714, |
| "grad_norm": 147.0, |
| "learning_rate": 5.596330275229358e-06, |
| "loss": 9.4328, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.14230383015349304, |
| "grad_norm": 114.5, |
| "learning_rate": 5.642201834862385e-06, |
| "loss": 7.6121, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.14345144168698895, |
| "grad_norm": 131.0, |
| "learning_rate": 5.688073394495414e-06, |
| "loss": 8.1481, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.14459905322048486, |
| "grad_norm": 124.5, |
| "learning_rate": 5.733944954128441e-06, |
| "loss": 6.9154, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.14574666475398076, |
| "grad_norm": 125.0, |
| "learning_rate": 5.7798165137614684e-06, |
| "loss": 7.5579, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.1468942762874767, |
| "grad_norm": 69.0, |
| "learning_rate": 5.825688073394496e-06, |
| "loss": 4.5767, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1480418878209726, |
| "grad_norm": 136.0, |
| "learning_rate": 5.871559633027524e-06, |
| "loss": 8.226, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.1491894993544685, |
| "grad_norm": 592.0, |
| "learning_rate": 5.917431192660551e-06, |
| "loss": 4.7686, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.15033711088796442, |
| "grad_norm": 199.0, |
| "learning_rate": 5.963302752293578e-06, |
| "loss": 8.1914, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.15148472242146033, |
| "grad_norm": 99.0, |
| "learning_rate": 6.0091743119266054e-06, |
| "loss": 4.6827, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.15263233395495623, |
| "grad_norm": 79.0, |
| "learning_rate": 6.0550458715596335e-06, |
| "loss": 4.0036, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.15377994548845217, |
| "grad_norm": 104.0, |
| "learning_rate": 6.100917431192661e-06, |
| "loss": 5.5383, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.15492755702194808, |
| "grad_norm": 89.5, |
| "learning_rate": 6.146788990825688e-06, |
| "loss": 5.6737, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.15607516855544398, |
| "grad_norm": 126.0, |
| "learning_rate": 6.192660550458715e-06, |
| "loss": 6.3379, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1572227800889399, |
| "grad_norm": 106.0, |
| "learning_rate": 6.238532110091744e-06, |
| "loss": 5.8609, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1583703916224358, |
| "grad_norm": 74.0, |
| "learning_rate": 6.284403669724771e-06, |
| "loss": 2.5903, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.1595180031559317, |
| "grad_norm": 166.0, |
| "learning_rate": 6.330275229357799e-06, |
| "loss": 6.5836, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.16066561468942764, |
| "grad_norm": 132.0, |
| "learning_rate": 6.376146788990826e-06, |
| "loss": 4.7941, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.16181322622292355, |
| "grad_norm": 79.5, |
| "learning_rate": 6.422018348623854e-06, |
| "loss": 3.4315, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.16296083775641945, |
| "grad_norm": 90.0, |
| "learning_rate": 6.467889908256881e-06, |
| "loss": 2.8439, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.16410844928991536, |
| "grad_norm": 147.0, |
| "learning_rate": 6.513761467889908e-06, |
| "loss": 5.9459, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.16525606082341127, |
| "grad_norm": 127.5, |
| "learning_rate": 6.559633027522936e-06, |
| "loss": 5.9421, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1664036723569072, |
| "grad_norm": 108.5, |
| "learning_rate": 6.6055045871559645e-06, |
| "loss": 4.3347, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.1675512838904031, |
| "grad_norm": 110.0, |
| "learning_rate": 6.651376146788992e-06, |
| "loss": 2.862, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.16869889542389901, |
| "grad_norm": 91.5, |
| "learning_rate": 6.697247706422019e-06, |
| "loss": 3.0382, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.16984650695739492, |
| "grad_norm": 90.0, |
| "learning_rate": 6.743119266055046e-06, |
| "loss": 2.4137, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.17099411849089083, |
| "grad_norm": 205.0, |
| "learning_rate": 6.788990825688074e-06, |
| "loss": 3.6585, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.17214173002438674, |
| "grad_norm": 132.0, |
| "learning_rate": 6.8348623853211015e-06, |
| "loss": 3.3452, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.17328934155788267, |
| "grad_norm": 102.5, |
| "learning_rate": 6.880733944954129e-06, |
| "loss": 2.6872, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.17443695309137858, |
| "grad_norm": 92.5, |
| "learning_rate": 6.926605504587156e-06, |
| "loss": 2.7081, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.17558456462487448, |
| "grad_norm": 97.0, |
| "learning_rate": 6.972477064220184e-06, |
| "loss": 1.789, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.1767321761583704, |
| "grad_norm": 96.5, |
| "learning_rate": 7.018348623853211e-06, |
| "loss": 2.1933, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.1778797876918663, |
| "grad_norm": 117.0, |
| "learning_rate": 7.0642201834862385e-06, |
| "loss": 1.5972, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.1790273992253622, |
| "grad_norm": 70.0, |
| "learning_rate": 7.110091743119267e-06, |
| "loss": 1.6302, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.18017501075885814, |
| "grad_norm": 50.0, |
| "learning_rate": 7.155963302752295e-06, |
| "loss": 1.1936, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.18132262229235405, |
| "grad_norm": 71.5, |
| "learning_rate": 7.201834862385322e-06, |
| "loss": 1.2134, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.18247023382584995, |
| "grad_norm": 37.75, |
| "learning_rate": 7.247706422018349e-06, |
| "loss": 0.8042, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.18361784535934586, |
| "grad_norm": 54.75, |
| "learning_rate": 7.293577981651376e-06, |
| "loss": 0.7016, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.18476545689284177, |
| "grad_norm": 141.0, |
| "learning_rate": 7.3394495412844045e-06, |
| "loss": 1.6214, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.18591306842633767, |
| "grad_norm": 118.0, |
| "learning_rate": 7.385321100917432e-06, |
| "loss": 1.4091, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.1870606799598336, |
| "grad_norm": 42.0, |
| "learning_rate": 7.431192660550459e-06, |
| "loss": 0.636, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.18820829149332952, |
| "grad_norm": 165.0, |
| "learning_rate": 7.477064220183486e-06, |
| "loss": 1.8344, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.18935590302682542, |
| "grad_norm": 62.0, |
| "learning_rate": 7.522935779816515e-06, |
| "loss": 0.5202, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.19050351456032133, |
| "grad_norm": 87.5, |
| "learning_rate": 7.568807339449542e-06, |
| "loss": 1.1639, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.19165112609381724, |
| "grad_norm": 53.0, |
| "learning_rate": 7.6146788990825695e-06, |
| "loss": 0.907, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.19279873762731314, |
| "grad_norm": 59.5, |
| "learning_rate": 7.660550458715596e-06, |
| "loss": 1.0624, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.19394634916080908, |
| "grad_norm": 37.0, |
| "learning_rate": 7.706422018348626e-06, |
| "loss": 0.6051, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.19509396069430499, |
| "grad_norm": 50.75, |
| "learning_rate": 7.752293577981652e-06, |
| "loss": 0.9568, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1962415722278009, |
| "grad_norm": 99.5, |
| "learning_rate": 7.79816513761468e-06, |
| "loss": 1.0009, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1973891837612968, |
| "grad_norm": 58.75, |
| "learning_rate": 7.844036697247707e-06, |
| "loss": 1.2179, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.1985367952947927, |
| "grad_norm": 30.375, |
| "learning_rate": 7.889908256880735e-06, |
| "loss": 0.2789, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1996844068282886, |
| "grad_norm": 48.5, |
| "learning_rate": 7.935779816513763e-06, |
| "loss": 0.7911, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.20083201836178455, |
| "grad_norm": 42.25, |
| "learning_rate": 7.981651376146789e-06, |
| "loss": 0.8686, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.20197962989528045, |
| "grad_norm": 141.0, |
| "learning_rate": 8.027522935779817e-06, |
| "loss": 1.1276, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.20312724142877636, |
| "grad_norm": 156.0, |
| "learning_rate": 8.073394495412845e-06, |
| "loss": 0.8758, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.20427485296227227, |
| "grad_norm": 32.75, |
| "learning_rate": 8.119266055045872e-06, |
| "loss": 0.6642, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.20542246449576818, |
| "grad_norm": 56.25, |
| "learning_rate": 8.1651376146789e-06, |
| "loss": 1.0594, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.20657007602926408, |
| "grad_norm": 34.5, |
| "learning_rate": 8.211009174311926e-06, |
| "loss": 0.6556, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.20771768756276002, |
| "grad_norm": 80.5, |
| "learning_rate": 8.256880733944956e-06, |
| "loss": 0.8868, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.20886529909625592, |
| "grad_norm": 47.5, |
| "learning_rate": 8.302752293577982e-06, |
| "loss": 0.7725, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.21001291062975183, |
| "grad_norm": 23.0, |
| "learning_rate": 8.34862385321101e-06, |
| "loss": 0.6719, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.21116052216324774, |
| "grad_norm": 72.0, |
| "learning_rate": 8.394495412844037e-06, |
| "loss": 0.8492, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.21230813369674364, |
| "grad_norm": 73.0, |
| "learning_rate": 8.440366972477065e-06, |
| "loss": 0.7163, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.21345574523023955, |
| "grad_norm": 82.0, |
| "learning_rate": 8.486238532110093e-06, |
| "loss": 0.7227, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.2146033567637355, |
| "grad_norm": 48.75, |
| "learning_rate": 8.53211009174312e-06, |
| "loss": 0.8237, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.2157509682972314, |
| "grad_norm": 30.75, |
| "learning_rate": 8.577981651376147e-06, |
| "loss": 0.7007, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.2168985798307273, |
| "grad_norm": 76.0, |
| "learning_rate": 8.623853211009175e-06, |
| "loss": 0.7568, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.2180461913642232, |
| "grad_norm": 126.0, |
| "learning_rate": 8.669724770642203e-06, |
| "loss": 0.8139, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.21919380289771911, |
| "grad_norm": 67.5, |
| "learning_rate": 8.71559633027523e-06, |
| "loss": 0.7062, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.22034141443121502, |
| "grad_norm": 26.375, |
| "learning_rate": 8.761467889908258e-06, |
| "loss": 0.5425, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.22148902596471096, |
| "grad_norm": 105.5, |
| "learning_rate": 8.807339449541286e-06, |
| "loss": 0.8822, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.22263663749820686, |
| "grad_norm": 131.0, |
| "learning_rate": 8.853211009174312e-06, |
| "loss": 0.9047, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.22378424903170277, |
| "grad_norm": 56.5, |
| "learning_rate": 8.89908256880734e-06, |
| "loss": 0.5039, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.22493186056519868, |
| "grad_norm": 73.5, |
| "learning_rate": 8.944954128440367e-06, |
| "loss": 0.7597, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.22607947209869458, |
| "grad_norm": 56.25, |
| "learning_rate": 8.990825688073395e-06, |
| "loss": 0.742, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.2272270836321905, |
| "grad_norm": 71.5, |
| "learning_rate": 9.036697247706423e-06, |
| "loss": 0.892, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.22837469516568643, |
| "grad_norm": 33.0, |
| "learning_rate": 9.08256880733945e-06, |
| "loss": 0.6746, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.22952230669918233, |
| "grad_norm": 95.0, |
| "learning_rate": 9.128440366972477e-06, |
| "loss": 0.8428, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22952230669918233, |
| "eval_accuracy": 0.23, |
| "eval_loss": 0.7526699900627136, |
| "eval_runtime": 49.2923, |
| "eval_samples_per_second": 2.029, |
| "eval_steps_per_second": 2.029, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23066991823267824, |
| "grad_norm": 43.0, |
| "learning_rate": 9.174311926605506e-06, |
| "loss": 0.6504, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.23181752976617415, |
| "grad_norm": 46.75, |
| "learning_rate": 9.220183486238534e-06, |
| "loss": 0.7568, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.23296514129967005, |
| "grad_norm": 76.5, |
| "learning_rate": 9.26605504587156e-06, |
| "loss": 0.5601, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.23411275283316596, |
| "grad_norm": 82.0, |
| "learning_rate": 9.311926605504588e-06, |
| "loss": 0.6661, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2352603643666619, |
| "grad_norm": 63.75, |
| "learning_rate": 9.357798165137616e-06, |
| "loss": 0.7619, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.2364079759001578, |
| "grad_norm": 28.5, |
| "learning_rate": 9.403669724770643e-06, |
| "loss": 0.6332, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.2375555874336537, |
| "grad_norm": 48.75, |
| "learning_rate": 9.44954128440367e-06, |
| "loss": 0.8103, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.23870319896714962, |
| "grad_norm": 32.25, |
| "learning_rate": 9.495412844036697e-06, |
| "loss": 0.8623, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.23985081050064552, |
| "grad_norm": 51.25, |
| "learning_rate": 9.541284403669727e-06, |
| "loss": 0.6734, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.24099842203414143, |
| "grad_norm": 106.0, |
| "learning_rate": 9.587155963302753e-06, |
| "loss": 0.7637, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.24214603356763736, |
| "grad_norm": 43.5, |
| "learning_rate": 9.633027522935781e-06, |
| "loss": 0.6827, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.24329364510113327, |
| "grad_norm": 56.25, |
| "learning_rate": 9.678899082568808e-06, |
| "loss": 0.9193, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.24444125663462918, |
| "grad_norm": 67.5, |
| "learning_rate": 9.724770642201836e-06, |
| "loss": 0.8784, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.24558886816812509, |
| "grad_norm": 61.0, |
| "learning_rate": 9.770642201834864e-06, |
| "loss": 0.6853, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.246736479701621, |
| "grad_norm": 33.5, |
| "learning_rate": 9.81651376146789e-06, |
| "loss": 0.6893, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.24788409123511693, |
| "grad_norm": 20.5, |
| "learning_rate": 9.862385321100918e-06, |
| "loss": 0.6858, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.24903170276861283, |
| "grad_norm": 51.0, |
| "learning_rate": 9.908256880733946e-06, |
| "loss": 0.5894, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.2501793143021087, |
| "grad_norm": 61.0, |
| "learning_rate": 9.954128440366973e-06, |
| "loss": 0.9096, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2513269258356047, |
| "grad_norm": 28.625, |
| "learning_rate": 1e-05, |
| "loss": 0.801, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.2524745373691006, |
| "grad_norm": 41.0, |
| "learning_rate": 1.0045871559633029e-05, |
| "loss": 0.6585, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2536221489025965, |
| "grad_norm": 39.75, |
| "learning_rate": 1.0091743119266055e-05, |
| "loss": 0.7587, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.2547697604360924, |
| "grad_norm": 40.0, |
| "learning_rate": 1.0137614678899083e-05, |
| "loss": 0.7094, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.2559173719695883, |
| "grad_norm": 684.0, |
| "learning_rate": 1.018348623853211e-05, |
| "loss": 0.7388, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.2570649835030842, |
| "grad_norm": 54.5, |
| "learning_rate": 1.0229357798165138e-05, |
| "loss": 0.7495, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.2582125950365801, |
| "grad_norm": 65.5, |
| "learning_rate": 1.0275229357798166e-05, |
| "loss": 0.834, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.259360206570076, |
| "grad_norm": 68.5, |
| "learning_rate": 1.0321100917431192e-05, |
| "loss": 0.9911, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.26050781810357193, |
| "grad_norm": 59.75, |
| "learning_rate": 1.036697247706422e-05, |
| "loss": 0.7996, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.26165542963706784, |
| "grad_norm": 39.0, |
| "learning_rate": 1.041284403669725e-05, |
| "loss": 0.7586, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.26280304117056374, |
| "grad_norm": 41.0, |
| "learning_rate": 1.0458715596330275e-05, |
| "loss": 0.6575, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.26395065270405965, |
| "grad_norm": 25.75, |
| "learning_rate": 1.0504587155963305e-05, |
| "loss": 0.5676, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2650982642375556, |
| "grad_norm": 38.5, |
| "learning_rate": 1.055045871559633e-05, |
| "loss": 0.7107, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.2662458757710515, |
| "grad_norm": 29.0, |
| "learning_rate": 1.0596330275229359e-05, |
| "loss": 0.5768, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.26739348730454743, |
| "grad_norm": 67.0, |
| "learning_rate": 1.0642201834862387e-05, |
| "loss": 0.8002, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.26854109883804334, |
| "grad_norm": 92.0, |
| "learning_rate": 1.0688073394495414e-05, |
| "loss": 0.9373, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.26968871037153924, |
| "grad_norm": 95.0, |
| "learning_rate": 1.0733944954128442e-05, |
| "loss": 0.9883, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.27083632190503515, |
| "grad_norm": 32.25, |
| "learning_rate": 1.077981651376147e-05, |
| "loss": 0.3327, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.27198393343853106, |
| "grad_norm": 38.75, |
| "learning_rate": 1.0825688073394496e-05, |
| "loss": 0.9128, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.27313154497202696, |
| "grad_norm": 113.5, |
| "learning_rate": 1.0871559633027524e-05, |
| "loss": 0.7185, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.27427915650552287, |
| "grad_norm": 78.5, |
| "learning_rate": 1.091743119266055e-05, |
| "loss": 0.7406, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.2754267680390188, |
| "grad_norm": 54.25, |
| "learning_rate": 1.0963302752293579e-05, |
| "loss": 0.5355, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2765743795725147, |
| "grad_norm": 88.0, |
| "learning_rate": 1.1009174311926607e-05, |
| "loss": 0.7876, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.2777219911060106, |
| "grad_norm": 25.125, |
| "learning_rate": 1.1055045871559633e-05, |
| "loss": 0.7005, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.27886960263950655, |
| "grad_norm": 62.0, |
| "learning_rate": 1.1100917431192661e-05, |
| "loss": 0.6772, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.28001721417300246, |
| "grad_norm": 88.5, |
| "learning_rate": 1.114678899082569e-05, |
| "loss": 0.7296, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.28116482570649837, |
| "grad_norm": 29.375, |
| "learning_rate": 1.1192660550458716e-05, |
| "loss": 0.7339, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2823124372399943, |
| "grad_norm": 21.75, |
| "learning_rate": 1.1238532110091744e-05, |
| "loss": 0.5743, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.2834600487734902, |
| "grad_norm": 127.5, |
| "learning_rate": 1.128440366972477e-05, |
| "loss": 0.9532, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.2846076603069861, |
| "grad_norm": 97.0, |
| "learning_rate": 1.1330275229357798e-05, |
| "loss": 0.9855, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.285755271840482, |
| "grad_norm": 54.25, |
| "learning_rate": 1.1376146788990828e-05, |
| "loss": 0.6011, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.2869028833739779, |
| "grad_norm": 27.125, |
| "learning_rate": 1.1422018348623853e-05, |
| "loss": 0.4934, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2880504949074738, |
| "grad_norm": 156.0, |
| "learning_rate": 1.1467889908256882e-05, |
| "loss": 1.0312, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.2891981064409697, |
| "grad_norm": 31.5, |
| "learning_rate": 1.151376146788991e-05, |
| "loss": 0.6735, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.2903457179744656, |
| "grad_norm": 26.0, |
| "learning_rate": 1.1559633027522937e-05, |
| "loss": 0.5176, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.29149332950796153, |
| "grad_norm": 28.0, |
| "learning_rate": 1.1605504587155965e-05, |
| "loss": 0.7067, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.2926409410414575, |
| "grad_norm": 50.75, |
| "learning_rate": 1.1651376146788991e-05, |
| "loss": 0.5816, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2937885525749534, |
| "grad_norm": 33.0, |
| "learning_rate": 1.169724770642202e-05, |
| "loss": 0.5099, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.2949361641084493, |
| "grad_norm": 63.25, |
| "learning_rate": 1.1743119266055047e-05, |
| "loss": 0.6038, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.2960837756419452, |
| "grad_norm": 152.0, |
| "learning_rate": 1.1788990825688074e-05, |
| "loss": 1.2612, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.2972313871754411, |
| "grad_norm": 55.5, |
| "learning_rate": 1.1834862385321102e-05, |
| "loss": 0.8309, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.298378998708937, |
| "grad_norm": 49.75, |
| "learning_rate": 1.188073394495413e-05, |
| "loss": 0.7434, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.29952661024243293, |
| "grad_norm": 38.25, |
| "learning_rate": 1.1926605504587156e-05, |
| "loss": 0.6988, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.30067422177592884, |
| "grad_norm": 31.25, |
| "learning_rate": 1.1972477064220184e-05, |
| "loss": 0.674, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.30182183330942475, |
| "grad_norm": 61.25, |
| "learning_rate": 1.2018348623853211e-05, |
| "loss": 0.8105, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.30296944484292065, |
| "grad_norm": 67.0, |
| "learning_rate": 1.2064220183486239e-05, |
| "loss": 0.7834, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.30411705637641656, |
| "grad_norm": 34.0, |
| "learning_rate": 1.2110091743119267e-05, |
| "loss": 0.6694, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.30526466790991247, |
| "grad_norm": 48.75, |
| "learning_rate": 1.2155963302752293e-05, |
| "loss": 0.4389, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.30641227944340843, |
| "grad_norm": 45.0, |
| "learning_rate": 1.2201834862385321e-05, |
| "loss": 0.9619, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.30755989097690434, |
| "grad_norm": 27.25, |
| "learning_rate": 1.2247706422018351e-05, |
| "loss": 0.8181, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.30870750251040024, |
| "grad_norm": 78.5, |
| "learning_rate": 1.2293577981651376e-05, |
| "loss": 0.8289, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.30985511404389615, |
| "grad_norm": 29.625, |
| "learning_rate": 1.2339449541284406e-05, |
| "loss": 0.66, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.31100272557739206, |
| "grad_norm": 51.25, |
| "learning_rate": 1.238532110091743e-05, |
| "loss": 0.6833, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.31215033711088797, |
| "grad_norm": 45.0, |
| "learning_rate": 1.243119266055046e-05, |
| "loss": 0.6545, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3132979486443839, |
| "grad_norm": 35.5, |
| "learning_rate": 1.2477064220183488e-05, |
| "loss": 0.6642, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.3144455601778798, |
| "grad_norm": 27.75, |
| "learning_rate": 1.2522935779816515e-05, |
| "loss": 0.7786, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3155931717113757, |
| "grad_norm": 103.0, |
| "learning_rate": 1.2568807339449543e-05, |
| "loss": 0.9578, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.3167407832448716, |
| "grad_norm": 61.75, |
| "learning_rate": 1.261467889908257e-05, |
| "loss": 0.5513, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.3178883947783675, |
| "grad_norm": 86.5, |
| "learning_rate": 1.2660550458715597e-05, |
| "loss": 0.855, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.3190360063118634, |
| "grad_norm": 47.0, |
| "learning_rate": 1.2706422018348625e-05, |
| "loss": 0.7903, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.32018361784535937, |
| "grad_norm": 21.125, |
| "learning_rate": 1.2752293577981652e-05, |
| "loss": 0.6084, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.3213312293788553, |
| "grad_norm": 53.0, |
| "learning_rate": 1.279816513761468e-05, |
| "loss": 0.7655, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3224788409123512, |
| "grad_norm": 69.0, |
| "learning_rate": 1.2844036697247708e-05, |
| "loss": 0.7763, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.3236264524458471, |
| "grad_norm": 98.0, |
| "learning_rate": 1.2889908256880734e-05, |
| "loss": 0.8355, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.324774063979343, |
| "grad_norm": 65.0, |
| "learning_rate": 1.2935779816513762e-05, |
| "loss": 0.7071, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.3259216755128389, |
| "grad_norm": 25.75, |
| "learning_rate": 1.298165137614679e-05, |
| "loss": 0.8358, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3270692870463348, |
| "grad_norm": 48.25, |
| "learning_rate": 1.3027522935779817e-05, |
| "loss": 0.7069, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.3282168985798307, |
| "grad_norm": 27.75, |
| "learning_rate": 1.3073394495412845e-05, |
| "loss": 0.601, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3293645101133266, |
| "grad_norm": 44.25, |
| "learning_rate": 1.3119266055045871e-05, |
| "loss": 0.6844, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.33051212164682253, |
| "grad_norm": 73.0, |
| "learning_rate": 1.31651376146789e-05, |
| "loss": 1.5458, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.33165973318031844, |
| "grad_norm": 36.0, |
| "learning_rate": 1.3211009174311929e-05, |
| "loss": 0.8631, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3328073447138144, |
| "grad_norm": 60.25, |
| "learning_rate": 1.3256880733944954e-05, |
| "loss": 0.7894, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3339549562473103, |
| "grad_norm": 46.75, |
| "learning_rate": 1.3302752293577984e-05, |
| "loss": 0.7715, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.3351025677808062, |
| "grad_norm": 17.5, |
| "learning_rate": 1.3348623853211012e-05, |
| "loss": 0.5756, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3362501793143021, |
| "grad_norm": 32.0, |
| "learning_rate": 1.3394495412844038e-05, |
| "loss": 0.7155, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.33739779084779803, |
| "grad_norm": 96.5, |
| "learning_rate": 1.3440366972477066e-05, |
| "loss": 0.6518, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.33854540238129394, |
| "grad_norm": 37.75, |
| "learning_rate": 1.3486238532110092e-05, |
| "loss": 0.6962, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.33969301391478984, |
| "grad_norm": 57.5, |
| "learning_rate": 1.353211009174312e-05, |
| "loss": 0.5687, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.34084062544828575, |
| "grad_norm": 20.125, |
| "learning_rate": 1.3577981651376149e-05, |
| "loss": 0.7647, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.34198823698178166, |
| "grad_norm": 22.75, |
| "learning_rate": 1.3623853211009175e-05, |
| "loss": 0.7313, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.34313584851527756, |
| "grad_norm": 71.0, |
| "learning_rate": 1.3669724770642203e-05, |
| "loss": 0.8702, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.34428346004877347, |
| "grad_norm": 70.0, |
| "learning_rate": 1.3715596330275231e-05, |
| "loss": 0.7895, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.34428346004877347, |
| "eval_accuracy": 0.22, |
| "eval_loss": 0.6987403631210327, |
| "eval_runtime": 49.3136, |
| "eval_samples_per_second": 2.028, |
| "eval_steps_per_second": 2.028, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3454310715822694, |
| "grad_norm": 32.0, |
| "learning_rate": 1.3761467889908258e-05, |
| "loss": 0.6857, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.34657868311576534, |
| "grad_norm": 58.25, |
| "learning_rate": 1.3807339449541286e-05, |
| "loss": 0.6662, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.34772629464926125, |
| "grad_norm": 26.875, |
| "learning_rate": 1.3853211009174312e-05, |
| "loss": 0.5594, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.34887390618275715, |
| "grad_norm": 36.5, |
| "learning_rate": 1.389908256880734e-05, |
| "loss": 0.6889, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.35002151771625306, |
| "grad_norm": 49.0, |
| "learning_rate": 1.3944954128440368e-05, |
| "loss": 0.6969, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.35116912924974897, |
| "grad_norm": 173.0, |
| "learning_rate": 1.3990825688073395e-05, |
| "loss": 0.6462, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3523167407832449, |
| "grad_norm": 60.0, |
| "learning_rate": 1.4036697247706423e-05, |
| "loss": 0.539, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.3534643523167408, |
| "grad_norm": 27.125, |
| "learning_rate": 1.4082568807339452e-05, |
| "loss": 0.855, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3546119638502367, |
| "grad_norm": 61.5, |
| "learning_rate": 1.4128440366972477e-05, |
| "loss": 0.7295, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3557595753837326, |
| "grad_norm": 56.25, |
| "learning_rate": 1.4174311926605507e-05, |
| "loss": 0.8013, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3569071869172285, |
| "grad_norm": 19.375, |
| "learning_rate": 1.4220183486238533e-05, |
| "loss": 0.6061, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.3580547984507244, |
| "grad_norm": 22.75, |
| "learning_rate": 1.4266055045871561e-05, |
| "loss": 0.2982, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.3592024099842203, |
| "grad_norm": 102.0, |
| "learning_rate": 1.431192660550459e-05, |
| "loss": 1.1087, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.3603500215177163, |
| "grad_norm": 42.25, |
| "learning_rate": 1.4357798165137616e-05, |
| "loss": 0.8645, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.3614976330512122, |
| "grad_norm": 112.0, |
| "learning_rate": 1.4403669724770644e-05, |
| "loss": 1.2745, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.3626452445847081, |
| "grad_norm": 21.125, |
| "learning_rate": 1.4449541284403672e-05, |
| "loss": 0.3714, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.363792856118204, |
| "grad_norm": 87.0, |
| "learning_rate": 1.4495412844036698e-05, |
| "loss": 0.9653, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.3649404676516999, |
| "grad_norm": 82.0, |
| "learning_rate": 1.4541284403669726e-05, |
| "loss": 0.982, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3660880791851958, |
| "grad_norm": 27.375, |
| "learning_rate": 1.4587155963302753e-05, |
| "loss": 0.4173, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.3672356907186917, |
| "grad_norm": 74.0, |
| "learning_rate": 1.463302752293578e-05, |
| "loss": 0.9199, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3683833022521876, |
| "grad_norm": 57.0, |
| "learning_rate": 1.4678899082568809e-05, |
| "loss": 0.6555, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.36953091378568353, |
| "grad_norm": 40.25, |
| "learning_rate": 1.4724770642201835e-05, |
| "loss": 0.5512, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.37067852531917944, |
| "grad_norm": 51.0, |
| "learning_rate": 1.4770642201834863e-05, |
| "loss": 0.8541, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.37182613685267535, |
| "grad_norm": 112.0, |
| "learning_rate": 1.4816513761467891e-05, |
| "loss": 1.031, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.37297374838617126, |
| "grad_norm": 17.625, |
| "learning_rate": 1.4862385321100918e-05, |
| "loss": 0.448, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.3741213599196672, |
| "grad_norm": 109.0, |
| "learning_rate": 1.4908256880733946e-05, |
| "loss": 1.0731, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.3752689714531631, |
| "grad_norm": 76.0, |
| "learning_rate": 1.4954128440366972e-05, |
| "loss": 0.9293, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.37641658298665903, |
| "grad_norm": 142.0, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 1.2647, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.37756419452015494, |
| "grad_norm": 17.0, |
| "learning_rate": 1.504587155963303e-05, |
| "loss": 0.5858, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.37871180605365085, |
| "grad_norm": 63.5, |
| "learning_rate": 1.5091743119266057e-05, |
| "loss": 1.1073, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.37985941758714675, |
| "grad_norm": 23.125, |
| "learning_rate": 1.5137614678899085e-05, |
| "loss": 0.6616, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.38100702912064266, |
| "grad_norm": 23.5, |
| "learning_rate": 1.5183486238532111e-05, |
| "loss": 0.81, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.38215464065413857, |
| "grad_norm": 44.5, |
| "learning_rate": 1.5229357798165139e-05, |
| "loss": 0.7774, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.3833022521876345, |
| "grad_norm": 53.0, |
| "learning_rate": 1.5275229357798167e-05, |
| "loss": 0.7527, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.3844498637211304, |
| "grad_norm": 26.0, |
| "learning_rate": 1.5321100917431192e-05, |
| "loss": 0.5953, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3855974752546263, |
| "grad_norm": 81.5, |
| "learning_rate": 1.536697247706422e-05, |
| "loss": 1.1549, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.3867450867881222, |
| "grad_norm": 40.25, |
| "learning_rate": 1.541284403669725e-05, |
| "loss": 0.6953, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.38789269832161816, |
| "grad_norm": 59.5, |
| "learning_rate": 1.5458715596330276e-05, |
| "loss": 0.9157, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.38904030985511406, |
| "grad_norm": 37.25, |
| "learning_rate": 1.5504587155963304e-05, |
| "loss": 0.6101, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.39018792138860997, |
| "grad_norm": 47.75, |
| "learning_rate": 1.555045871559633e-05, |
| "loss": 0.6971, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.3913355329221059, |
| "grad_norm": 35.5, |
| "learning_rate": 1.559633027522936e-05, |
| "loss": 0.6038, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.3924831444556018, |
| "grad_norm": 258.0, |
| "learning_rate": 1.564220183486239e-05, |
| "loss": 0.7838, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.3936307559890977, |
| "grad_norm": 19.625, |
| "learning_rate": 1.5688073394495413e-05, |
| "loss": 0.6458, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.3947783675225936, |
| "grad_norm": 78.5, |
| "learning_rate": 1.573394495412844e-05, |
| "loss": 0.8405, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.3959259790560895, |
| "grad_norm": 118.5, |
| "learning_rate": 1.577981651376147e-05, |
| "loss": 1.0364, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.3970735905895854, |
| "grad_norm": 30.0, |
| "learning_rate": 1.5825688073394497e-05, |
| "loss": 0.5703, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.3982212021230813, |
| "grad_norm": 60.75, |
| "learning_rate": 1.5871559633027525e-05, |
| "loss": 0.8595, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.3993688136565772, |
| "grad_norm": 78.5, |
| "learning_rate": 1.591743119266055e-05, |
| "loss": 0.8161, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.40051642519007313, |
| "grad_norm": 33.75, |
| "learning_rate": 1.5963302752293578e-05, |
| "loss": 0.7062, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.4016640367235691, |
| "grad_norm": 20.75, |
| "learning_rate": 1.6009174311926606e-05, |
| "loss": 0.825, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.402811648257065, |
| "grad_norm": 20.375, |
| "learning_rate": 1.6055045871559634e-05, |
| "loss": 0.5635, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.4039592597905609, |
| "grad_norm": 26.0, |
| "learning_rate": 1.6100917431192662e-05, |
| "loss": 0.7392, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4051068713240568, |
| "grad_norm": 39.75, |
| "learning_rate": 1.614678899082569e-05, |
| "loss": 0.6261, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.4062544828575527, |
| "grad_norm": 40.0, |
| "learning_rate": 1.6192660550458715e-05, |
| "loss": 0.6046, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.40740209439104863, |
| "grad_norm": 106.0, |
| "learning_rate": 1.6238532110091743e-05, |
| "loss": 0.9682, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.40854970592454454, |
| "grad_norm": 51.25, |
| "learning_rate": 1.628440366972477e-05, |
| "loss": 0.7811, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.40969731745804044, |
| "grad_norm": 38.0, |
| "learning_rate": 1.63302752293578e-05, |
| "loss": 0.8129, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.41084492899153635, |
| "grad_norm": 25.375, |
| "learning_rate": 1.6376146788990827e-05, |
| "loss": 0.5273, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.41199254052503226, |
| "grad_norm": 34.0, |
| "learning_rate": 1.6422018348623852e-05, |
| "loss": 0.5413, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.41314015205852817, |
| "grad_norm": 19.25, |
| "learning_rate": 1.6467889908256884e-05, |
| "loss": 0.5278, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4142877635920241, |
| "grad_norm": 24.0, |
| "learning_rate": 1.6513761467889912e-05, |
| "loss": 0.6991, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.41543537512552003, |
| "grad_norm": 180.0, |
| "learning_rate": 1.6559633027522936e-05, |
| "loss": 0.7462, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.41658298665901594, |
| "grad_norm": 27.125, |
| "learning_rate": 1.6605504587155964e-05, |
| "loss": 0.7057, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.41773059819251185, |
| "grad_norm": 75.0, |
| "learning_rate": 1.6651376146788993e-05, |
| "loss": 0.8487, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.41887820972600776, |
| "grad_norm": 52.5, |
| "learning_rate": 1.669724770642202e-05, |
| "loss": 0.83, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.42002582125950366, |
| "grad_norm": 18.5, |
| "learning_rate": 1.674311926605505e-05, |
| "loss": 0.4915, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.42117343279299957, |
| "grad_norm": 76.0, |
| "learning_rate": 1.6788990825688073e-05, |
| "loss": 0.6684, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.4223210443264955, |
| "grad_norm": 21.625, |
| "learning_rate": 1.68348623853211e-05, |
| "loss": 0.5262, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4234686558599914, |
| "grad_norm": 13.375, |
| "learning_rate": 1.688073394495413e-05, |
| "loss": 0.5778, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.4246162673934873, |
| "grad_norm": 20.375, |
| "learning_rate": 1.6926605504587158e-05, |
| "loss": 0.5191, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4257638789269832, |
| "grad_norm": 91.5, |
| "learning_rate": 1.6972477064220186e-05, |
| "loss": 1.0079, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.4269114904604791, |
| "grad_norm": 55.25, |
| "learning_rate": 1.701834862385321e-05, |
| "loss": 0.6378, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.42805910199397507, |
| "grad_norm": 39.5, |
| "learning_rate": 1.706422018348624e-05, |
| "loss": 0.836, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.429206713527471, |
| "grad_norm": 42.75, |
| "learning_rate": 1.7110091743119267e-05, |
| "loss": 0.5683, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.4303543250609669, |
| "grad_norm": 60.25, |
| "learning_rate": 1.7155963302752295e-05, |
| "loss": 0.4543, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.4315019365944628, |
| "grad_norm": 21.375, |
| "learning_rate": 1.7201834862385323e-05, |
| "loss": 0.5242, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.4326495481279587, |
| "grad_norm": 17.625, |
| "learning_rate": 1.724770642201835e-05, |
| "loss": 0.6393, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.4337971596614546, |
| "grad_norm": 21.875, |
| "learning_rate": 1.7293577981651376e-05, |
| "loss": 0.5476, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4349447711949505, |
| "grad_norm": 56.25, |
| "learning_rate": 1.7339449541284407e-05, |
| "loss": 0.7973, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.4360923827284464, |
| "grad_norm": 80.0, |
| "learning_rate": 1.738532110091743e-05, |
| "loss": 0.8487, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4372399942619423, |
| "grad_norm": 46.5, |
| "learning_rate": 1.743119266055046e-05, |
| "loss": 0.8605, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.43838760579543823, |
| "grad_norm": 65.0, |
| "learning_rate": 1.7477064220183488e-05, |
| "loss": 0.8858, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.43953521732893414, |
| "grad_norm": 87.0, |
| "learning_rate": 1.7522935779816516e-05, |
| "loss": 0.7342, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.44068282886243004, |
| "grad_norm": 108.5, |
| "learning_rate": 1.7568807339449544e-05, |
| "loss": 0.8372, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.441830440395926, |
| "grad_norm": 38.5, |
| "learning_rate": 1.7614678899082572e-05, |
| "loss": 1.0963, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4429780519294219, |
| "grad_norm": 21.875, |
| "learning_rate": 1.7660550458715597e-05, |
| "loss": 0.737, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.4441256634629178, |
| "grad_norm": 34.25, |
| "learning_rate": 1.7706422018348625e-05, |
| "loss": 0.7902, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.4452732749964137, |
| "grad_norm": 116.0, |
| "learning_rate": 1.7752293577981653e-05, |
| "loss": 0.875, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.44642088652990963, |
| "grad_norm": 66.5, |
| "learning_rate": 1.779816513761468e-05, |
| "loss": 0.9535, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.44756849806340554, |
| "grad_norm": 46.25, |
| "learning_rate": 1.784403669724771e-05, |
| "loss": 0.7879, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.44871610959690145, |
| "grad_norm": 48.75, |
| "learning_rate": 1.7889908256880734e-05, |
| "loss": 0.6081, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.44986372113039735, |
| "grad_norm": 32.5, |
| "learning_rate": 1.7935779816513762e-05, |
| "loss": 0.6908, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.45101133266389326, |
| "grad_norm": 34.0, |
| "learning_rate": 1.798165137614679e-05, |
| "loss": 0.6664, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.45215894419738917, |
| "grad_norm": 74.5, |
| "learning_rate": 1.8027522935779818e-05, |
| "loss": 0.6012, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.4533065557308851, |
| "grad_norm": 33.25, |
| "learning_rate": 1.8073394495412846e-05, |
| "loss": 0.6278, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.454454167264381, |
| "grad_norm": 19.25, |
| "learning_rate": 1.811926605504587e-05, |
| "loss": 0.6279, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.45560177879787694, |
| "grad_norm": 33.5, |
| "learning_rate": 1.81651376146789e-05, |
| "loss": 0.689, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.45674939033137285, |
| "grad_norm": 34.25, |
| "learning_rate": 1.821100917431193e-05, |
| "loss": 0.4764, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.45789700186486876, |
| "grad_norm": 144.0, |
| "learning_rate": 1.8256880733944955e-05, |
| "loss": 1.3598, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.45904461339836466, |
| "grad_norm": 105.5, |
| "learning_rate": 1.8302752293577983e-05, |
| "loss": 0.9441, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.45904461339836466, |
| "eval_accuracy": 0.37, |
| "eval_loss": 0.7958357334136963, |
| "eval_runtime": 49.9294, |
| "eval_samples_per_second": 2.003, |
| "eval_steps_per_second": 2.003, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.46019222493186057, |
| "grad_norm": 125.5, |
| "learning_rate": 1.834862385321101e-05, |
| "loss": 1.1444, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.4613398364653565, |
| "grad_norm": 67.0, |
| "learning_rate": 1.839449541284404e-05, |
| "loss": 0.694, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.4624874479988524, |
| "grad_norm": 48.75, |
| "learning_rate": 1.8440366972477067e-05, |
| "loss": 0.7125, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.4636350595323483, |
| "grad_norm": 60.5, |
| "learning_rate": 1.8486238532110092e-05, |
| "loss": 0.6703, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.4647826710658442, |
| "grad_norm": 51.0, |
| "learning_rate": 1.853211009174312e-05, |
| "loss": 0.6631, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.4659302825993401, |
| "grad_norm": 51.75, |
| "learning_rate": 1.8577981651376148e-05, |
| "loss": 0.7814, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.467077894132836, |
| "grad_norm": 22.875, |
| "learning_rate": 1.8623853211009176e-05, |
| "loss": 0.5642, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.4682255056663319, |
| "grad_norm": 105.0, |
| "learning_rate": 1.8669724770642204e-05, |
| "loss": 0.884, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.4693731171998279, |
| "grad_norm": 48.5, |
| "learning_rate": 1.8715596330275232e-05, |
| "loss": 0.5543, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.4705207287333238, |
| "grad_norm": 26.875, |
| "learning_rate": 1.8761467889908257e-05, |
| "loss": 0.6461, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.4716683402668197, |
| "grad_norm": 23.125, |
| "learning_rate": 1.8807339449541285e-05, |
| "loss": 0.5786, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.4728159518003156, |
| "grad_norm": 37.75, |
| "learning_rate": 1.8853211009174313e-05, |
| "loss": 0.6921, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.4739635633338115, |
| "grad_norm": 23.625, |
| "learning_rate": 1.889908256880734e-05, |
| "loss": 0.4189, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.4751111748673074, |
| "grad_norm": 108.5, |
| "learning_rate": 1.894495412844037e-05, |
| "loss": 1.0126, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.4762587864008033, |
| "grad_norm": 138.0, |
| "learning_rate": 1.8990825688073394e-05, |
| "loss": 1.2399, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.47740639793429923, |
| "grad_norm": 31.375, |
| "learning_rate": 1.9036697247706422e-05, |
| "loss": 0.4347, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.47855400946779514, |
| "grad_norm": 120.5, |
| "learning_rate": 1.9082568807339454e-05, |
| "loss": 1.1874, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.47970162100129105, |
| "grad_norm": 25.5, |
| "learning_rate": 1.912844036697248e-05, |
| "loss": 0.6172, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.48084923253478695, |
| "grad_norm": 29.0, |
| "learning_rate": 1.9174311926605506e-05, |
| "loss": 0.7072, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.48199684406828286, |
| "grad_norm": 40.75, |
| "learning_rate": 1.9220183486238534e-05, |
| "loss": 0.8408, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.4831444556017788, |
| "grad_norm": 17.5, |
| "learning_rate": 1.9266055045871563e-05, |
| "loss": 0.6384, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.48429206713527473, |
| "grad_norm": 26.375, |
| "learning_rate": 1.931192660550459e-05, |
| "loss": 0.7132, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.48543967866877064, |
| "grad_norm": 41.0, |
| "learning_rate": 1.9357798165137615e-05, |
| "loss": 0.6823, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.48658729020226654, |
| "grad_norm": 36.0, |
| "learning_rate": 1.9403669724770643e-05, |
| "loss": 0.5629, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.48773490173576245, |
| "grad_norm": 93.5, |
| "learning_rate": 1.944954128440367e-05, |
| "loss": 1.028, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.48888251326925836, |
| "grad_norm": 70.5, |
| "learning_rate": 1.94954128440367e-05, |
| "loss": 0.7085, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.49003012480275426, |
| "grad_norm": 17.875, |
| "learning_rate": 1.9541284403669728e-05, |
| "loss": 0.5299, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.49117773633625017, |
| "grad_norm": 58.75, |
| "learning_rate": 1.9587155963302752e-05, |
| "loss": 0.9028, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.4923253478697461, |
| "grad_norm": 45.25, |
| "learning_rate": 1.963302752293578e-05, |
| "loss": 0.8021, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.493472959403242, |
| "grad_norm": 69.0, |
| "learning_rate": 1.967889908256881e-05, |
| "loss": 0.696, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.4946205709367379, |
| "grad_norm": 44.0, |
| "learning_rate": 1.9724770642201837e-05, |
| "loss": 0.5913, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.49576818247023385, |
| "grad_norm": 71.5, |
| "learning_rate": 1.9770642201834865e-05, |
| "loss": 0.8661, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.49691579400372976, |
| "grad_norm": 80.5, |
| "learning_rate": 1.9816513761467893e-05, |
| "loss": 1.109, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.49806340553722567, |
| "grad_norm": 86.0, |
| "learning_rate": 1.9862385321100917e-05, |
| "loss": 1.0316, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.4992110170707216, |
| "grad_norm": 30.375, |
| "learning_rate": 1.9908256880733945e-05, |
| "loss": 0.7336, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.5003586286042174, |
| "grad_norm": 42.75, |
| "learning_rate": 1.9954128440366974e-05, |
| "loss": 0.7081, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5015062401377134, |
| "grad_norm": 20.75, |
| "learning_rate": 2e-05, |
| "loss": 0.5407, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.5026538516712094, |
| "grad_norm": 117.0, |
| "learning_rate": 1.999490316004078e-05, |
| "loss": 1.1049, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5038014632047052, |
| "grad_norm": 126.5, |
| "learning_rate": 1.998980632008155e-05, |
| "loss": 1.1828, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.5049490747382012, |
| "grad_norm": 120.5, |
| "learning_rate": 1.9984709480122327e-05, |
| "loss": 1.3274, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.506096686271697, |
| "grad_norm": 80.0, |
| "learning_rate": 1.99796126401631e-05, |
| "loss": 0.9327, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.507244297805193, |
| "grad_norm": 105.0, |
| "learning_rate": 1.9974515800203875e-05, |
| "loss": 1.1003, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5083919093386888, |
| "grad_norm": 106.5, |
| "learning_rate": 1.9969418960244652e-05, |
| "loss": 1.0261, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.5095395208721848, |
| "grad_norm": 36.5, |
| "learning_rate": 1.9964322120285426e-05, |
| "loss": 0.695, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5106871324056806, |
| "grad_norm": 15.8125, |
| "learning_rate": 1.99592252803262e-05, |
| "loss": 0.6983, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.5118347439391766, |
| "grad_norm": 53.5, |
| "learning_rate": 1.9954128440366974e-05, |
| "loss": 0.7186, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.5129823554726725, |
| "grad_norm": 12.25, |
| "learning_rate": 1.9949031600407747e-05, |
| "loss": 0.6272, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.5141299670061684, |
| "grad_norm": 16.125, |
| "learning_rate": 1.9943934760448525e-05, |
| "loss": 0.5849, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.5152775785396643, |
| "grad_norm": 36.25, |
| "learning_rate": 1.99388379204893e-05, |
| "loss": 0.5905, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.5164251900731602, |
| "grad_norm": 24.25, |
| "learning_rate": 1.9933741080530073e-05, |
| "loss": 0.4751, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5175728016066562, |
| "grad_norm": 15.9375, |
| "learning_rate": 1.9928644240570846e-05, |
| "loss": 0.4372, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.518720413140152, |
| "grad_norm": 18.375, |
| "learning_rate": 1.9923547400611624e-05, |
| "loss": 0.6552, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.519868024673648, |
| "grad_norm": 13.6875, |
| "learning_rate": 1.9918450560652398e-05, |
| "loss": 0.6515, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.5210156362071439, |
| "grad_norm": 13.6875, |
| "learning_rate": 1.991335372069317e-05, |
| "loss": 0.5219, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5221632477406398, |
| "grad_norm": 70.0, |
| "learning_rate": 1.9908256880733945e-05, |
| "loss": 0.694, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5233108592741357, |
| "grad_norm": 47.75, |
| "learning_rate": 1.990316004077472e-05, |
| "loss": 1.0051, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5244584708076316, |
| "grad_norm": 98.5, |
| "learning_rate": 1.9898063200815497e-05, |
| "loss": 0.8809, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.5256060823411275, |
| "grad_norm": 22.875, |
| "learning_rate": 1.989296636085627e-05, |
| "loss": 0.6882, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5267536938746235, |
| "grad_norm": 103.0, |
| "learning_rate": 1.9887869520897044e-05, |
| "loss": 0.8227, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5279013054081193, |
| "grad_norm": 41.5, |
| "learning_rate": 1.9882772680937822e-05, |
| "loss": 0.5851, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5290489169416153, |
| "grad_norm": 16.125, |
| "learning_rate": 1.9877675840978596e-05, |
| "loss": 0.6286, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.5301965284751112, |
| "grad_norm": 40.0, |
| "learning_rate": 1.987257900101937e-05, |
| "loss": 0.3909, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5313441400086071, |
| "grad_norm": 180.0, |
| "learning_rate": 1.9867482161060147e-05, |
| "loss": 1.4089, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.532491751542103, |
| "grad_norm": 67.5, |
| "learning_rate": 1.9862385321100917e-05, |
| "loss": 0.7342, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5336393630755989, |
| "grad_norm": 146.0, |
| "learning_rate": 1.9857288481141695e-05, |
| "loss": 1.1024, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.5347869746090949, |
| "grad_norm": 112.0, |
| "learning_rate": 1.985219164118247e-05, |
| "loss": 0.7631, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5359345861425907, |
| "grad_norm": 51.25, |
| "learning_rate": 1.9847094801223243e-05, |
| "loss": 0.8592, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.5370821976760867, |
| "grad_norm": 53.5, |
| "learning_rate": 1.984199796126402e-05, |
| "loss": 0.772, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5382298092095825, |
| "grad_norm": 168.0, |
| "learning_rate": 1.9836901121304794e-05, |
| "loss": 1.3238, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.5393774207430785, |
| "grad_norm": 56.25, |
| "learning_rate": 1.9831804281345568e-05, |
| "loss": 0.686, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5405250322765743, |
| "grad_norm": 140.0, |
| "learning_rate": 1.982670744138634e-05, |
| "loss": 1.3487, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.5416726438100703, |
| "grad_norm": 26.25, |
| "learning_rate": 1.9821610601427115e-05, |
| "loss": 0.654, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5428202553435661, |
| "grad_norm": 96.0, |
| "learning_rate": 1.9816513761467893e-05, |
| "loss": 0.8932, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.5439678668770621, |
| "grad_norm": 118.5, |
| "learning_rate": 1.9811416921508667e-05, |
| "loss": 0.9886, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5451154784105581, |
| "grad_norm": 34.75, |
| "learning_rate": 1.980632008154944e-05, |
| "loss": 0.5023, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.5462630899440539, |
| "grad_norm": 28.125, |
| "learning_rate": 1.9801223241590214e-05, |
| "loss": 0.4678, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5474107014775499, |
| "grad_norm": 27.875, |
| "learning_rate": 1.9796126401630992e-05, |
| "loss": 0.5802, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.5485583130110457, |
| "grad_norm": 9.5625, |
| "learning_rate": 1.9791029561671766e-05, |
| "loss": 0.3969, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5497059245445417, |
| "grad_norm": 30.875, |
| "learning_rate": 1.978593272171254e-05, |
| "loss": 0.4119, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.5508535360780376, |
| "grad_norm": 17.25, |
| "learning_rate": 1.9780835881753317e-05, |
| "loss": 0.5645, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5520011476115335, |
| "grad_norm": 47.75, |
| "learning_rate": 1.9775739041794087e-05, |
| "loss": 0.6168, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.5531487591450294, |
| "grad_norm": 14.1875, |
| "learning_rate": 1.9770642201834865e-05, |
| "loss": 0.5325, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5542963706785253, |
| "grad_norm": 41.25, |
| "learning_rate": 1.976554536187564e-05, |
| "loss": 0.5013, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.5554439822120212, |
| "grad_norm": 18.75, |
| "learning_rate": 1.9760448521916412e-05, |
| "loss": 0.4441, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5565915937455171, |
| "grad_norm": 147.0, |
| "learning_rate": 1.975535168195719e-05, |
| "loss": 1.585, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.5577392052790131, |
| "grad_norm": 64.0, |
| "learning_rate": 1.9750254841997964e-05, |
| "loss": 0.958, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.558886816812509, |
| "grad_norm": 135.0, |
| "learning_rate": 1.9745158002038738e-05, |
| "loss": 1.4838, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.5600344283460049, |
| "grad_norm": 141.0, |
| "learning_rate": 1.974006116207951e-05, |
| "loss": 1.6651, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.5611820398795008, |
| "grad_norm": 108.0, |
| "learning_rate": 1.9734964322120285e-05, |
| "loss": 0.9729, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.5623296514129967, |
| "grad_norm": 35.25, |
| "learning_rate": 1.9729867482161063e-05, |
| "loss": 0.5966, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.5634772629464926, |
| "grad_norm": 34.5, |
| "learning_rate": 1.9724770642201837e-05, |
| "loss": 0.7337, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.5646248744799885, |
| "grad_norm": 19.875, |
| "learning_rate": 1.971967380224261e-05, |
| "loss": 0.4022, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.5657724860134844, |
| "grad_norm": 29.375, |
| "learning_rate": 1.9714576962283384e-05, |
| "loss": 0.618, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.5669200975469804, |
| "grad_norm": 75.5, |
| "learning_rate": 1.970948012232416e-05, |
| "loss": 0.6627, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.5680677090804762, |
| "grad_norm": 104.5, |
| "learning_rate": 1.9704383282364936e-05, |
| "loss": 0.9524, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.5692153206139722, |
| "grad_norm": 91.0, |
| "learning_rate": 1.969928644240571e-05, |
| "loss": 0.7282, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.570362932147468, |
| "grad_norm": 95.5, |
| "learning_rate": 1.9694189602446487e-05, |
| "loss": 0.9184, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.571510543680964, |
| "grad_norm": 23.625, |
| "learning_rate": 1.9689092762487257e-05, |
| "loss": 0.6252, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.57265815521446, |
| "grad_norm": 55.25, |
| "learning_rate": 1.9683995922528035e-05, |
| "loss": 0.77, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.5738057667479558, |
| "grad_norm": 49.75, |
| "learning_rate": 1.967889908256881e-05, |
| "loss": 0.5024, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5738057667479558, |
| "eval_accuracy": 0.56, |
| "eval_loss": 0.5818310379981995, |
| "eval_runtime": 49.317, |
| "eval_samples_per_second": 2.028, |
| "eval_steps_per_second": 2.028, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5749533782814518, |
| "grad_norm": 72.0, |
| "learning_rate": 1.9673802242609582e-05, |
| "loss": 0.5565, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.5761009898149476, |
| "grad_norm": 17.0, |
| "learning_rate": 1.966870540265036e-05, |
| "loss": 0.5465, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.5772486013484436, |
| "grad_norm": 16.5, |
| "learning_rate": 1.9663608562691134e-05, |
| "loss": 0.7208, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.5783962128819394, |
| "grad_norm": 13.75, |
| "learning_rate": 1.9658511722731907e-05, |
| "loss": 0.5784, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.5795438244154354, |
| "grad_norm": 17.875, |
| "learning_rate": 1.9653414882772685e-05, |
| "loss": 0.7433, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.5806914359489312, |
| "grad_norm": 55.5, |
| "learning_rate": 1.9648318042813455e-05, |
| "loss": 0.5593, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.5818390474824272, |
| "grad_norm": 26.0, |
| "learning_rate": 1.9643221202854233e-05, |
| "loss": 0.4981, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.5829866590159231, |
| "grad_norm": 45.5, |
| "learning_rate": 1.9638124362895006e-05, |
| "loss": 0.6998, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.584134270549419, |
| "grad_norm": 64.5, |
| "learning_rate": 1.963302752293578e-05, |
| "loss": 0.331, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.585281882082915, |
| "grad_norm": 15.75, |
| "learning_rate": 1.9627930682976558e-05, |
| "loss": 0.5757, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.5864294936164108, |
| "grad_norm": 78.0, |
| "learning_rate": 1.962283384301733e-05, |
| "loss": 0.5458, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.5875771051499068, |
| "grad_norm": 12.6875, |
| "learning_rate": 1.9617737003058106e-05, |
| "loss": 0.4577, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.5887247166834026, |
| "grad_norm": 94.5, |
| "learning_rate": 1.961264016309888e-05, |
| "loss": 1.0295, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.5898723282168986, |
| "grad_norm": 62.75, |
| "learning_rate": 1.9607543323139657e-05, |
| "loss": 0.6586, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.5910199397503945, |
| "grad_norm": 12.8125, |
| "learning_rate": 1.960244648318043e-05, |
| "loss": 0.4499, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.5921675512838904, |
| "grad_norm": 41.25, |
| "learning_rate": 1.9597349643221205e-05, |
| "loss": 0.6115, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.5933151628173863, |
| "grad_norm": 33.5, |
| "learning_rate": 1.959225280326198e-05, |
| "loss": 0.6823, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.5944627743508822, |
| "grad_norm": 67.5, |
| "learning_rate": 1.9587155963302752e-05, |
| "loss": 0.7254, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.5956103858843781, |
| "grad_norm": 21.75, |
| "learning_rate": 1.958205912334353e-05, |
| "loss": 0.6258, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.596757997417874, |
| "grad_norm": 20.125, |
| "learning_rate": 1.9576962283384304e-05, |
| "loss": 0.782, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.59790560895137, |
| "grad_norm": 55.0, |
| "learning_rate": 1.9571865443425077e-05, |
| "loss": 0.6427, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.5990532204848659, |
| "grad_norm": 21.375, |
| "learning_rate": 1.9566768603465855e-05, |
| "loss": 0.5042, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6002008320183618, |
| "grad_norm": 84.0, |
| "learning_rate": 1.9561671763506625e-05, |
| "loss": 0.7413, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.6013484435518577, |
| "grad_norm": 32.25, |
| "learning_rate": 1.9556574923547403e-05, |
| "loss": 0.4809, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6024960550853536, |
| "grad_norm": 29.25, |
| "learning_rate": 1.9551478083588176e-05, |
| "loss": 0.5245, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6036436666188495, |
| "grad_norm": 115.0, |
| "learning_rate": 1.954638124362895e-05, |
| "loss": 1.0439, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.6047912781523455, |
| "grad_norm": 155.0, |
| "learning_rate": 1.9541284403669728e-05, |
| "loss": 1.6477, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.6059388896858413, |
| "grad_norm": 39.25, |
| "learning_rate": 1.95361875637105e-05, |
| "loss": 0.555, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6070865012193373, |
| "grad_norm": 23.0, |
| "learning_rate": 1.9531090723751275e-05, |
| "loss": 0.478, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.6082341127528331, |
| "grad_norm": 43.25, |
| "learning_rate": 1.9525993883792053e-05, |
| "loss": 0.6068, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6093817242863291, |
| "grad_norm": 44.5, |
| "learning_rate": 1.9520897043832823e-05, |
| "loss": 0.248, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.6105293358198249, |
| "grad_norm": 20.75, |
| "learning_rate": 1.95158002038736e-05, |
| "loss": 0.4505, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6116769473533209, |
| "grad_norm": 41.5, |
| "learning_rate": 1.9510703363914374e-05, |
| "loss": 0.3902, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.6128245588868169, |
| "grad_norm": 38.75, |
| "learning_rate": 1.950560652395515e-05, |
| "loss": 0.5029, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6139721704203127, |
| "grad_norm": 131.0, |
| "learning_rate": 1.9500509683995926e-05, |
| "loss": 1.2225, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.6151197819538087, |
| "grad_norm": 85.5, |
| "learning_rate": 1.94954128440367e-05, |
| "loss": 0.8337, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.6162673934873045, |
| "grad_norm": 43.25, |
| "learning_rate": 1.9490316004077473e-05, |
| "loss": 0.5878, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.6174150050208005, |
| "grad_norm": 12.875, |
| "learning_rate": 1.9485219164118247e-05, |
| "loss": 0.4961, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6185626165542963, |
| "grad_norm": 77.0, |
| "learning_rate": 1.9480122324159025e-05, |
| "loss": 0.9027, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6197102280877923, |
| "grad_norm": 46.5, |
| "learning_rate": 1.94750254841998e-05, |
| "loss": 0.7113, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6208578396212882, |
| "grad_norm": 77.5, |
| "learning_rate": 1.9469928644240572e-05, |
| "loss": 0.7001, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.6220054511547841, |
| "grad_norm": 68.5, |
| "learning_rate": 1.9464831804281346e-05, |
| "loss": 0.6916, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.62315306268828, |
| "grad_norm": 77.0, |
| "learning_rate": 1.945973496432212e-05, |
| "loss": 0.7548, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.6243006742217759, |
| "grad_norm": 85.0, |
| "learning_rate": 1.9454638124362898e-05, |
| "loss": 0.8164, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6254482857552719, |
| "grad_norm": 15.0625, |
| "learning_rate": 1.944954128440367e-05, |
| "loss": 0.603, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.6265958972887677, |
| "grad_norm": 23.25, |
| "learning_rate": 1.9444444444444445e-05, |
| "loss": 0.4311, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6277435088222637, |
| "grad_norm": 10.1875, |
| "learning_rate": 1.9439347604485223e-05, |
| "loss": 0.3436, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.6288911203557596, |
| "grad_norm": 75.0, |
| "learning_rate": 1.9434250764525993e-05, |
| "loss": 0.8814, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6300387318892555, |
| "grad_norm": 62.0, |
| "learning_rate": 1.942915392456677e-05, |
| "loss": 0.939, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.6311863434227514, |
| "grad_norm": 52.0, |
| "learning_rate": 1.9424057084607544e-05, |
| "loss": 0.411, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6323339549562473, |
| "grad_norm": 127.5, |
| "learning_rate": 1.9418960244648318e-05, |
| "loss": 1.655, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.6334815664897432, |
| "grad_norm": 96.0, |
| "learning_rate": 1.9413863404689096e-05, |
| "loss": 1.4065, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6346291780232391, |
| "grad_norm": 52.5, |
| "learning_rate": 1.940876656472987e-05, |
| "loss": 0.7391, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.635776789556735, |
| "grad_norm": 78.0, |
| "learning_rate": 1.9403669724770643e-05, |
| "loss": 0.9576, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.636924401090231, |
| "grad_norm": 91.0, |
| "learning_rate": 1.9398572884811417e-05, |
| "loss": 1.0132, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.6380720126237268, |
| "grad_norm": 14.8125, |
| "learning_rate": 1.9393476044852195e-05, |
| "loss": 0.734, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6392196241572228, |
| "grad_norm": 63.75, |
| "learning_rate": 1.938837920489297e-05, |
| "loss": 0.6127, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.6403672356907187, |
| "grad_norm": 19.625, |
| "learning_rate": 1.9383282364933742e-05, |
| "loss": 0.5999, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6415148472242146, |
| "grad_norm": 21.625, |
| "learning_rate": 1.9378185524974516e-05, |
| "loss": 0.7446, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.6426624587577106, |
| "grad_norm": 26.375, |
| "learning_rate": 1.937308868501529e-05, |
| "loss": 0.6067, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6438100702912064, |
| "grad_norm": 130.0, |
| "learning_rate": 1.9367991845056068e-05, |
| "loss": 1.0849, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.6449576818247024, |
| "grad_norm": 26.875, |
| "learning_rate": 1.936289500509684e-05, |
| "loss": 0.4882, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6461052933581982, |
| "grad_norm": 13.375, |
| "learning_rate": 1.9357798165137615e-05, |
| "loss": 0.6071, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.6472529048916942, |
| "grad_norm": 23.625, |
| "learning_rate": 1.9352701325178393e-05, |
| "loss": 0.7541, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.64840051642519, |
| "grad_norm": 22.5, |
| "learning_rate": 1.9347604485219163e-05, |
| "loss": 0.6343, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.649548127958686, |
| "grad_norm": 37.75, |
| "learning_rate": 1.934250764525994e-05, |
| "loss": 0.629, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6506957394921818, |
| "grad_norm": 33.25, |
| "learning_rate": 1.9337410805300714e-05, |
| "loss": 0.6112, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.6518433510256778, |
| "grad_norm": 23.625, |
| "learning_rate": 1.9332313965341488e-05, |
| "loss": 0.6854, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.6529909625591738, |
| "grad_norm": 25.125, |
| "learning_rate": 1.9327217125382266e-05, |
| "loss": 0.5574, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.6541385740926696, |
| "grad_norm": 22.125, |
| "learning_rate": 1.932212028542304e-05, |
| "loss": 0.4604, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.6552861856261656, |
| "grad_norm": 10.625, |
| "learning_rate": 1.9317023445463813e-05, |
| "loss": 0.409, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.6564337971596614, |
| "grad_norm": 11.375, |
| "learning_rate": 1.931192660550459e-05, |
| "loss": 0.4102, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.6575814086931574, |
| "grad_norm": 74.5, |
| "learning_rate": 1.9306829765545365e-05, |
| "loss": 0.5481, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.6587290202266533, |
| "grad_norm": 109.5, |
| "learning_rate": 1.930173292558614e-05, |
| "loss": 0.886, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.6598766317601492, |
| "grad_norm": 48.75, |
| "learning_rate": 1.9296636085626912e-05, |
| "loss": 0.6536, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.6610242432936451, |
| "grad_norm": 57.75, |
| "learning_rate": 1.9291539245667686e-05, |
| "loss": 0.902, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.662171854827141, |
| "grad_norm": 61.5, |
| "learning_rate": 1.9286442405708464e-05, |
| "loss": 0.7151, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.6633194663606369, |
| "grad_norm": 36.25, |
| "learning_rate": 1.9281345565749237e-05, |
| "loss": 0.6232, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.6644670778941328, |
| "grad_norm": 11.6875, |
| "learning_rate": 1.927624872579001e-05, |
| "loss": 0.3918, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.6656146894276288, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.9271151885830785e-05, |
| "loss": 0.7175, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.6667623009611247, |
| "grad_norm": 17.875, |
| "learning_rate": 1.9266055045871563e-05, |
| "loss": 0.7939, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.6679099124946206, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.9260958205912336e-05, |
| "loss": 0.6663, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.6690575240281165, |
| "grad_norm": 17.75, |
| "learning_rate": 1.925586136595311e-05, |
| "loss": 0.512, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.6702051355616124, |
| "grad_norm": 24.625, |
| "learning_rate": 1.9250764525993884e-05, |
| "loss": 0.8056, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.6713527470951083, |
| "grad_norm": 46.75, |
| "learning_rate": 1.9245667686034658e-05, |
| "loss": 0.6661, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.6725003586286042, |
| "grad_norm": 23.5, |
| "learning_rate": 1.9240570846075435e-05, |
| "loss": 0.6705, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.6736479701621001, |
| "grad_norm": 55.5, |
| "learning_rate": 1.923547400611621e-05, |
| "loss": 0.7411, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.6747955816955961, |
| "grad_norm": 34.25, |
| "learning_rate": 1.9230377166156983e-05, |
| "loss": 0.6056, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.6759431932290919, |
| "grad_norm": 107.5, |
| "learning_rate": 1.922528032619776e-05, |
| "loss": 0.9975, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.6770908047625879, |
| "grad_norm": 14.1875, |
| "learning_rate": 1.9220183486238534e-05, |
| "loss": 0.5988, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.6782384162960837, |
| "grad_norm": 132.0, |
| "learning_rate": 1.921508664627931e-05, |
| "loss": 1.1598, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.6793860278295797, |
| "grad_norm": 15.6875, |
| "learning_rate": 1.9209989806320086e-05, |
| "loss": 0.4172, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.6805336393630756, |
| "grad_norm": 38.25, |
| "learning_rate": 1.9204892966360856e-05, |
| "loss": 0.5714, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.6816812508965715, |
| "grad_norm": 14.0, |
| "learning_rate": 1.9199796126401633e-05, |
| "loss": 0.4887, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.6828288624300675, |
| "grad_norm": 41.5, |
| "learning_rate": 1.9194699286442407e-05, |
| "loss": 0.6648, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.6839764739635633, |
| "grad_norm": 13.875, |
| "learning_rate": 1.918960244648318e-05, |
| "loss": 0.6589, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.6851240854970593, |
| "grad_norm": 35.0, |
| "learning_rate": 1.918450560652396e-05, |
| "loss": 1.0174, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.6862716970305551, |
| "grad_norm": 27.875, |
| "learning_rate": 1.9179408766564732e-05, |
| "loss": 0.7711, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.6874193085640511, |
| "grad_norm": 48.25, |
| "learning_rate": 1.9174311926605506e-05, |
| "loss": 0.4402, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.6885669200975469, |
| "grad_norm": 49.0, |
| "learning_rate": 1.916921508664628e-05, |
| "loss": 0.4483, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6885669200975469, |
| "eval_accuracy": 0.63, |
| "eval_loss": 0.6301568150520325, |
| "eval_runtime": 49.333, |
| "eval_samples_per_second": 2.027, |
| "eval_steps_per_second": 2.027, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6897145316310429, |
| "grad_norm": 30.25, |
| "learning_rate": 1.9164118246687054e-05, |
| "loss": 0.4694, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.6908621431645388, |
| "grad_norm": 29.875, |
| "learning_rate": 1.915902140672783e-05, |
| "loss": 0.374, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.6920097546980347, |
| "grad_norm": 70.5, |
| "learning_rate": 1.9153924566768605e-05, |
| "loss": 0.941, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.6931573662315307, |
| "grad_norm": 87.0, |
| "learning_rate": 1.914882772680938e-05, |
| "loss": 0.906, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.6943049777650265, |
| "grad_norm": 14.0625, |
| "learning_rate": 1.9143730886850153e-05, |
| "loss": 0.406, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.6954525892985225, |
| "grad_norm": 55.25, |
| "learning_rate": 1.913863404689093e-05, |
| "loss": 0.779, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.6966002008320183, |
| "grad_norm": 36.5, |
| "learning_rate": 1.9133537206931704e-05, |
| "loss": 0.5805, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.6977478123655143, |
| "grad_norm": 23.75, |
| "learning_rate": 1.912844036697248e-05, |
| "loss": 0.6406, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.6988954238990102, |
| "grad_norm": 39.25, |
| "learning_rate": 1.9123343527013256e-05, |
| "loss": 0.4615, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.7000430354325061, |
| "grad_norm": 67.5, |
| "learning_rate": 1.9118246687054026e-05, |
| "loss": 0.8637, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.701190646966002, |
| "grad_norm": 22.5, |
| "learning_rate": 1.9113149847094803e-05, |
| "loss": 0.4727, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.7023382584994979, |
| "grad_norm": 27.125, |
| "learning_rate": 1.9108053007135577e-05, |
| "loss": 0.6226, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7034858700329938, |
| "grad_norm": 62.5, |
| "learning_rate": 1.910295616717635e-05, |
| "loss": 0.6596, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.7046334815664897, |
| "grad_norm": 40.0, |
| "learning_rate": 1.909785932721713e-05, |
| "loss": 0.5424, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7057810930999856, |
| "grad_norm": 62.5, |
| "learning_rate": 1.9092762487257902e-05, |
| "loss": 0.6348, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.7069287046334816, |
| "grad_norm": 80.5, |
| "learning_rate": 1.9087665647298676e-05, |
| "loss": 0.9329, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.7080763161669775, |
| "grad_norm": 101.5, |
| "learning_rate": 1.9082568807339454e-05, |
| "loss": 1.0578, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.7092239277004734, |
| "grad_norm": 23.375, |
| "learning_rate": 1.9077471967380224e-05, |
| "loss": 0.6725, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.7103715392339693, |
| "grad_norm": 42.0, |
| "learning_rate": 1.9072375127421e-05, |
| "loss": 0.6087, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.7115191507674652, |
| "grad_norm": 37.0, |
| "learning_rate": 1.9067278287461775e-05, |
| "loss": 0.6237, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7126667623009612, |
| "grad_norm": 18.5, |
| "learning_rate": 1.906218144750255e-05, |
| "loss": 0.6327, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.713814373834457, |
| "grad_norm": 27.375, |
| "learning_rate": 1.9057084607543327e-05, |
| "loss": 0.7476, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.714961985367953, |
| "grad_norm": 28.25, |
| "learning_rate": 1.90519877675841e-05, |
| "loss": 0.7312, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.7161095969014488, |
| "grad_norm": 57.75, |
| "learning_rate": 1.9046890927624874e-05, |
| "loss": 0.6385, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.7172572084349448, |
| "grad_norm": 61.75, |
| "learning_rate": 1.9041794087665648e-05, |
| "loss": 0.3454, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.7184048199684406, |
| "grad_norm": 39.0, |
| "learning_rate": 1.9036697247706422e-05, |
| "loss": 0.6641, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7195524315019366, |
| "grad_norm": 62.5, |
| "learning_rate": 1.9031600407747196e-05, |
| "loss": 0.8142, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.7207000430354326, |
| "grad_norm": 16.375, |
| "learning_rate": 1.9026503567787973e-05, |
| "loss": 0.6127, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.7218476545689284, |
| "grad_norm": 57.75, |
| "learning_rate": 1.9021406727828747e-05, |
| "loss": 0.7968, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.7229952661024244, |
| "grad_norm": 25.375, |
| "learning_rate": 1.901630988786952e-05, |
| "loss": 0.4927, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7241428776359202, |
| "grad_norm": 48.25, |
| "learning_rate": 1.90112130479103e-05, |
| "loss": 0.6833, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.7252904891694162, |
| "grad_norm": 54.75, |
| "learning_rate": 1.9006116207951072e-05, |
| "loss": 0.6494, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.726438100702912, |
| "grad_norm": 54.5, |
| "learning_rate": 1.9001019367991846e-05, |
| "loss": 0.2914, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.727585712236408, |
| "grad_norm": 10.125, |
| "learning_rate": 1.8995922528032624e-05, |
| "loss": 0.745, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7287333237699039, |
| "grad_norm": 45.75, |
| "learning_rate": 1.8990825688073394e-05, |
| "loss": 0.8078, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.7298809353033998, |
| "grad_norm": 95.0, |
| "learning_rate": 1.898572884811417e-05, |
| "loss": 0.9361, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7310285468368957, |
| "grad_norm": 64.5, |
| "learning_rate": 1.8980632008154945e-05, |
| "loss": 0.5982, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.7321761583703916, |
| "grad_norm": 22.25, |
| "learning_rate": 1.897553516819572e-05, |
| "loss": 0.5722, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7333237699038875, |
| "grad_norm": 51.5, |
| "learning_rate": 1.8970438328236496e-05, |
| "loss": 0.7216, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.7344713814373834, |
| "grad_norm": 18.5, |
| "learning_rate": 1.896534148827727e-05, |
| "loss": 0.5961, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7356189929708794, |
| "grad_norm": 47.75, |
| "learning_rate": 1.8960244648318044e-05, |
| "loss": 0.6, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.7367666045043753, |
| "grad_norm": 67.0, |
| "learning_rate": 1.8955147808358818e-05, |
| "loss": 0.7799, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7379142160378712, |
| "grad_norm": 90.5, |
| "learning_rate": 1.8950050968399592e-05, |
| "loss": 0.979, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.7390618275713671, |
| "grad_norm": 47.0, |
| "learning_rate": 1.894495412844037e-05, |
| "loss": 0.6637, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.740209439104863, |
| "grad_norm": 28.875, |
| "learning_rate": 1.8939857288481143e-05, |
| "loss": 0.7095, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.7413570506383589, |
| "grad_norm": 70.5, |
| "learning_rate": 1.8934760448521917e-05, |
| "loss": 0.7767, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7425046621718548, |
| "grad_norm": 67.0, |
| "learning_rate": 1.892966360856269e-05, |
| "loss": 0.8117, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.7436522737053507, |
| "grad_norm": 47.0, |
| "learning_rate": 1.892456676860347e-05, |
| "loss": 0.7253, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.7447998852388467, |
| "grad_norm": 42.25, |
| "learning_rate": 1.8919469928644242e-05, |
| "loss": 0.5712, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.7459474967723425, |
| "grad_norm": 10.3125, |
| "learning_rate": 1.8914373088685016e-05, |
| "loss": 0.4933, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.7470951083058385, |
| "grad_norm": 17.875, |
| "learning_rate": 1.8909276248725793e-05, |
| "loss": 0.468, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.7482427198393344, |
| "grad_norm": 61.75, |
| "learning_rate": 1.8904179408766564e-05, |
| "loss": 0.7518, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.7493903313728303, |
| "grad_norm": 71.0, |
| "learning_rate": 1.889908256880734e-05, |
| "loss": 0.8373, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.7505379429063262, |
| "grad_norm": 26.75, |
| "learning_rate": 1.8893985728848115e-05, |
| "loss": 0.7643, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.7516855544398221, |
| "grad_norm": 28.25, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 0.5331, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.7528331659733181, |
| "grad_norm": 8.375, |
| "learning_rate": 1.8883792048929666e-05, |
| "loss": 0.4439, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.7539807775068139, |
| "grad_norm": 17.875, |
| "learning_rate": 1.887869520897044e-05, |
| "loss": 0.5831, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.7551283890403099, |
| "grad_norm": 34.25, |
| "learning_rate": 1.8873598369011214e-05, |
| "loss": 0.5412, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.7562760005738057, |
| "grad_norm": 50.75, |
| "learning_rate": 1.886850152905199e-05, |
| "loss": 0.5549, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.7574236121073017, |
| "grad_norm": 56.0, |
| "learning_rate": 1.8863404689092762e-05, |
| "loss": 0.4515, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.7585712236407975, |
| "grad_norm": 76.5, |
| "learning_rate": 1.885830784913354e-05, |
| "loss": 0.8915, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.7597188351742935, |
| "grad_norm": 88.0, |
| "learning_rate": 1.8853211009174313e-05, |
| "loss": 0.7725, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.7608664467077895, |
| "grad_norm": 32.0, |
| "learning_rate": 1.8848114169215087e-05, |
| "loss": 0.674, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.7620140582412853, |
| "grad_norm": 37.0, |
| "learning_rate": 1.8843017329255864e-05, |
| "loss": 0.4771, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.7631616697747813, |
| "grad_norm": 47.5, |
| "learning_rate": 1.883792048929664e-05, |
| "loss": 0.665, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.7643092813082771, |
| "grad_norm": 49.0, |
| "learning_rate": 1.8832823649337412e-05, |
| "loss": 0.5971, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.7654568928417731, |
| "grad_norm": 17.875, |
| "learning_rate": 1.8827726809378186e-05, |
| "loss": 0.507, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.766604504375269, |
| "grad_norm": 17.5, |
| "learning_rate": 1.8822629969418963e-05, |
| "loss": 0.3678, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.7677521159087649, |
| "grad_norm": 39.5, |
| "learning_rate": 1.8817533129459737e-05, |
| "loss": 0.5468, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.7688997274422608, |
| "grad_norm": 26.375, |
| "learning_rate": 1.881243628950051e-05, |
| "loss": 0.4158, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.7700473389757567, |
| "grad_norm": 70.5, |
| "learning_rate": 1.8807339449541285e-05, |
| "loss": 0.8145, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.7711949505092526, |
| "grad_norm": 57.0, |
| "learning_rate": 1.880224260958206e-05, |
| "loss": 0.5283, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.7723425620427485, |
| "grad_norm": 15.875, |
| "learning_rate": 1.8797145769622836e-05, |
| "loss": 0.6116, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.7734901735762444, |
| "grad_norm": 10.6875, |
| "learning_rate": 1.879204892966361e-05, |
| "loss": 0.6081, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.7746377851097404, |
| "grad_norm": 41.0, |
| "learning_rate": 1.8786952089704384e-05, |
| "loss": 0.6481, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.7757853966432363, |
| "grad_norm": 13.375, |
| "learning_rate": 1.878185524974516e-05, |
| "loss": 0.4866, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.7769330081767322, |
| "grad_norm": 32.75, |
| "learning_rate": 1.8776758409785932e-05, |
| "loss": 0.7627, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.7780806197102281, |
| "grad_norm": 83.0, |
| "learning_rate": 1.877166156982671e-05, |
| "loss": 0.8497, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.779228231243724, |
| "grad_norm": 46.75, |
| "learning_rate": 1.8766564729867483e-05, |
| "loss": 0.3555, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.7803758427772199, |
| "grad_norm": 67.0, |
| "learning_rate": 1.8761467889908257e-05, |
| "loss": 0.7386, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.7815234543107158, |
| "grad_norm": 98.5, |
| "learning_rate": 1.8756371049949034e-05, |
| "loss": 0.6743, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.7826710658442118, |
| "grad_norm": 52.25, |
| "learning_rate": 1.8751274209989808e-05, |
| "loss": 0.6193, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.7838186773777076, |
| "grad_norm": 18.0, |
| "learning_rate": 1.8746177370030582e-05, |
| "loss": 0.4905, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.7849662889112036, |
| "grad_norm": 37.75, |
| "learning_rate": 1.874108053007136e-05, |
| "loss": 0.4043, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.7861139004446994, |
| "grad_norm": 22.25, |
| "learning_rate": 1.8735983690112133e-05, |
| "loss": 0.5243, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.7872615119781954, |
| "grad_norm": 21.75, |
| "learning_rate": 1.8730886850152907e-05, |
| "loss": 0.4342, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.7884091235116913, |
| "grad_norm": 31.375, |
| "learning_rate": 1.872579001019368e-05, |
| "loss": 0.5206, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.7895567350451872, |
| "grad_norm": 21.25, |
| "learning_rate": 1.8720693170234455e-05, |
| "loss": 0.6048, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.7907043465786832, |
| "grad_norm": 49.0, |
| "learning_rate": 1.8715596330275232e-05, |
| "loss": 0.5758, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.791851958112179, |
| "grad_norm": 26.75, |
| "learning_rate": 1.8710499490316006e-05, |
| "loss": 0.5446, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.792999569645675, |
| "grad_norm": 28.5, |
| "learning_rate": 1.870540265035678e-05, |
| "loss": 0.4504, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.7941471811791708, |
| "grad_norm": 28.375, |
| "learning_rate": 1.8700305810397554e-05, |
| "loss": 0.7349, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.7952947927126668, |
| "grad_norm": 102.0, |
| "learning_rate": 1.869520897043833e-05, |
| "loss": 0.6082, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.7964424042461626, |
| "grad_norm": 87.0, |
| "learning_rate": 1.8690112130479105e-05, |
| "loss": 0.7548, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.7975900157796586, |
| "grad_norm": 24.875, |
| "learning_rate": 1.868501529051988e-05, |
| "loss": 0.7732, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.7987376273131545, |
| "grad_norm": 44.0, |
| "learning_rate": 1.8679918450560653e-05, |
| "loss": 0.5928, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.7998852388466504, |
| "grad_norm": 24.375, |
| "learning_rate": 1.8674821610601427e-05, |
| "loss": 0.5727, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.8010328503801463, |
| "grad_norm": 72.5, |
| "learning_rate": 1.8669724770642204e-05, |
| "loss": 0.811, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8021804619136422, |
| "grad_norm": 13.25, |
| "learning_rate": 1.8664627930682978e-05, |
| "loss": 0.2618, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.8033280734471382, |
| "grad_norm": 34.5, |
| "learning_rate": 1.8659531090723752e-05, |
| "loss": 0.8214, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8033280734471382, |
| "eval_accuracy": 0.61, |
| "eval_loss": 0.5829592347145081, |
| "eval_runtime": 49.9174, |
| "eval_samples_per_second": 2.003, |
| "eval_steps_per_second": 2.003, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.804475684980634, |
| "grad_norm": 25.125, |
| "learning_rate": 1.865443425076453e-05, |
| "loss": 0.5568, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.80562329651413, |
| "grad_norm": 14.9375, |
| "learning_rate": 1.86493374108053e-05, |
| "loss": 0.3704, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.8067709080476259, |
| "grad_norm": 15.8125, |
| "learning_rate": 1.8644240570846077e-05, |
| "loss": 0.4246, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.8079185195811218, |
| "grad_norm": 16.125, |
| "learning_rate": 1.863914373088685e-05, |
| "loss": 0.3896, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.8090661311146177, |
| "grad_norm": 50.5, |
| "learning_rate": 1.8634046890927625e-05, |
| "loss": 0.3966, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.8102137426481136, |
| "grad_norm": 45.25, |
| "learning_rate": 1.8628950050968402e-05, |
| "loss": 0.3742, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.8113613541816095, |
| "grad_norm": 39.75, |
| "learning_rate": 1.8623853211009176e-05, |
| "loss": 0.4672, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.8125089657151054, |
| "grad_norm": 39.25, |
| "learning_rate": 1.861875637104995e-05, |
| "loss": 0.6046, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.8136565772486013, |
| "grad_norm": 38.25, |
| "learning_rate": 1.8613659531090724e-05, |
| "loss": 1.0867, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.8148041887820973, |
| "grad_norm": 26.875, |
| "learning_rate": 1.86085626911315e-05, |
| "loss": 0.3141, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8159518003155932, |
| "grad_norm": 53.0, |
| "learning_rate": 1.8603465851172275e-05, |
| "loss": 0.8153, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.8170994118490891, |
| "grad_norm": 35.0, |
| "learning_rate": 1.859836901121305e-05, |
| "loss": 0.7676, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.818247023382585, |
| "grad_norm": 55.75, |
| "learning_rate": 1.8593272171253823e-05, |
| "loss": 0.5664, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.8193946349160809, |
| "grad_norm": 14.5, |
| "learning_rate": 1.8588175331294597e-05, |
| "loss": 0.6436, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.8205422464495769, |
| "grad_norm": 29.0, |
| "learning_rate": 1.8583078491335374e-05, |
| "loss": 0.4355, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.8216898579830727, |
| "grad_norm": 82.5, |
| "learning_rate": 1.8577981651376148e-05, |
| "loss": 1.2766, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.8228374695165687, |
| "grad_norm": 37.5, |
| "learning_rate": 1.8572884811416922e-05, |
| "loss": 0.4578, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.8239850810500645, |
| "grad_norm": 16.875, |
| "learning_rate": 1.85677879714577e-05, |
| "loss": 0.5334, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8251326925835605, |
| "grad_norm": 22.875, |
| "learning_rate": 1.856269113149847e-05, |
| "loss": 0.5546, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.8262803041170563, |
| "grad_norm": 106.0, |
| "learning_rate": 1.8557594291539247e-05, |
| "loss": 0.9589, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8274279156505523, |
| "grad_norm": 21.75, |
| "learning_rate": 1.855249745158002e-05, |
| "loss": 0.5682, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.8285755271840483, |
| "grad_norm": 52.75, |
| "learning_rate": 1.8547400611620795e-05, |
| "loss": 0.4809, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8297231387175441, |
| "grad_norm": 19.0, |
| "learning_rate": 1.8542303771661572e-05, |
| "loss": 0.3774, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.8308707502510401, |
| "grad_norm": 25.125, |
| "learning_rate": 1.8537206931702346e-05, |
| "loss": 0.4828, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8320183617845359, |
| "grad_norm": 34.0, |
| "learning_rate": 1.853211009174312e-05, |
| "loss": 0.4859, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.8331659733180319, |
| "grad_norm": 48.75, |
| "learning_rate": 1.8527013251783897e-05, |
| "loss": 0.5612, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8343135848515277, |
| "grad_norm": 45.5, |
| "learning_rate": 1.852191641182467e-05, |
| "loss": 0.5976, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.8354611963850237, |
| "grad_norm": 52.0, |
| "learning_rate": 1.8516819571865445e-05, |
| "loss": 0.6551, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8366088079185195, |
| "grad_norm": 23.75, |
| "learning_rate": 1.851172273190622e-05, |
| "loss": 0.5022, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.8377564194520155, |
| "grad_norm": 71.5, |
| "learning_rate": 1.8506625891946993e-05, |
| "loss": 0.8474, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.8389040309855114, |
| "grad_norm": 67.5, |
| "learning_rate": 1.850152905198777e-05, |
| "loss": 0.6511, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.8400516425190073, |
| "grad_norm": 36.5, |
| "learning_rate": 1.8496432212028544e-05, |
| "loss": 0.5485, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.8411992540525032, |
| "grad_norm": 63.25, |
| "learning_rate": 1.8491335372069318e-05, |
| "loss": 0.7833, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.8423468655859991, |
| "grad_norm": 11.625, |
| "learning_rate": 1.8486238532110092e-05, |
| "loss": 0.5295, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.8434944771194951, |
| "grad_norm": 44.25, |
| "learning_rate": 1.848114169215087e-05, |
| "loss": 0.4733, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.844642088652991, |
| "grad_norm": 6.90625, |
| "learning_rate": 1.8476044852191643e-05, |
| "loss": 0.2207, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.8457897001864869, |
| "grad_norm": 63.0, |
| "learning_rate": 1.8470948012232417e-05, |
| "loss": 0.5543, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.8469373117199828, |
| "grad_norm": 19.5, |
| "learning_rate": 1.846585117227319e-05, |
| "loss": 0.4689, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.8480849232534787, |
| "grad_norm": 14.6875, |
| "learning_rate": 1.8460754332313965e-05, |
| "loss": 0.5446, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.8492325347869746, |
| "grad_norm": 68.5, |
| "learning_rate": 1.8455657492354742e-05, |
| "loss": 0.6652, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.8503801463204705, |
| "grad_norm": 25.0, |
| "learning_rate": 1.8450560652395516e-05, |
| "loss": 0.3413, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.8515277578539664, |
| "grad_norm": 43.5, |
| "learning_rate": 1.844546381243629e-05, |
| "loss": 0.5552, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.8526753693874624, |
| "grad_norm": 50.25, |
| "learning_rate": 1.8440366972477067e-05, |
| "loss": 0.5051, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.8538229809209582, |
| "grad_norm": 30.625, |
| "learning_rate": 1.843527013251784e-05, |
| "loss": 0.5442, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.8549705924544542, |
| "grad_norm": 21.75, |
| "learning_rate": 1.8430173292558615e-05, |
| "loss": 0.724, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.8561182039879501, |
| "grad_norm": 23.0, |
| "learning_rate": 1.8425076452599392e-05, |
| "loss": 0.6982, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.857265815521446, |
| "grad_norm": 34.5, |
| "learning_rate": 1.8419979612640163e-05, |
| "loss": 0.4268, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.858413427054942, |
| "grad_norm": 12.0, |
| "learning_rate": 1.841488277268094e-05, |
| "loss": 0.5611, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.8595610385884378, |
| "grad_norm": 20.5, |
| "learning_rate": 1.8409785932721714e-05, |
| "loss": 0.552, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.8607086501219338, |
| "grad_norm": 35.0, |
| "learning_rate": 1.8404689092762488e-05, |
| "loss": 0.5549, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.8618562616554296, |
| "grad_norm": 43.5, |
| "learning_rate": 1.8399592252803265e-05, |
| "loss": 0.5242, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.8630038731889256, |
| "grad_norm": 13.875, |
| "learning_rate": 1.839449541284404e-05, |
| "loss": 0.3858, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.8641514847224214, |
| "grad_norm": 45.25, |
| "learning_rate": 1.8389398572884813e-05, |
| "loss": 0.6547, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.8652990962559174, |
| "grad_norm": 66.5, |
| "learning_rate": 1.8384301732925587e-05, |
| "loss": 0.5999, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.8664467077894132, |
| "grad_norm": 31.25, |
| "learning_rate": 1.837920489296636e-05, |
| "loss": 0.6402, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.8675943193229092, |
| "grad_norm": 16.125, |
| "learning_rate": 1.8374108053007138e-05, |
| "loss": 0.4183, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.868741930856405, |
| "grad_norm": 34.75, |
| "learning_rate": 1.8369011213047912e-05, |
| "loss": 0.46, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.869889542389901, |
| "grad_norm": 11.6875, |
| "learning_rate": 1.8363914373088686e-05, |
| "loss": 0.5179, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.871037153923397, |
| "grad_norm": 60.0, |
| "learning_rate": 1.835881753312946e-05, |
| "loss": 0.5217, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.8721847654568928, |
| "grad_norm": 9.75, |
| "learning_rate": 1.8353720693170237e-05, |
| "loss": 0.3772, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.8733323769903888, |
| "grad_norm": 11.75, |
| "learning_rate": 1.834862385321101e-05, |
| "loss": 0.5367, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.8744799885238846, |
| "grad_norm": 41.5, |
| "learning_rate": 1.8343527013251785e-05, |
| "loss": 0.573, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.8756276000573806, |
| "grad_norm": 34.25, |
| "learning_rate": 1.8338430173292562e-05, |
| "loss": 0.1937, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.8767752115908765, |
| "grad_norm": 23.125, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 0.4614, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.8779228231243724, |
| "grad_norm": 46.5, |
| "learning_rate": 1.832823649337411e-05, |
| "loss": 0.442, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.8790704346578683, |
| "grad_norm": 74.5, |
| "learning_rate": 1.8323139653414884e-05, |
| "loss": 0.9426, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.8802180461913642, |
| "grad_norm": 65.5, |
| "learning_rate": 1.8318042813455658e-05, |
| "loss": 0.8281, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.8813656577248601, |
| "grad_norm": 30.375, |
| "learning_rate": 1.8312945973496435e-05, |
| "loss": 0.6035, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.882513269258356, |
| "grad_norm": 89.5, |
| "learning_rate": 1.830784913353721e-05, |
| "loss": 0.8813, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.883660880791852, |
| "grad_norm": 71.5, |
| "learning_rate": 1.8302752293577983e-05, |
| "loss": 0.7809, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.8848084923253479, |
| "grad_norm": 87.5, |
| "learning_rate": 1.829765545361876e-05, |
| "loss": 0.9051, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.8859561038588438, |
| "grad_norm": 78.5, |
| "learning_rate": 1.829255861365953e-05, |
| "loss": 0.8777, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.8871037153923397, |
| "grad_norm": 38.5, |
| "learning_rate": 1.8287461773700308e-05, |
| "loss": 0.3393, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.8882513269258356, |
| "grad_norm": 34.0, |
| "learning_rate": 1.8282364933741082e-05, |
| "loss": 0.4772, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.8893989384593315, |
| "grad_norm": 42.25, |
| "learning_rate": 1.8277268093781856e-05, |
| "loss": 0.5136, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.8905465499928275, |
| "grad_norm": 51.25, |
| "learning_rate": 1.8272171253822633e-05, |
| "loss": 0.4965, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.8916941615263233, |
| "grad_norm": 23.5, |
| "learning_rate": 1.8267074413863407e-05, |
| "loss": 0.6667, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.8928417730598193, |
| "grad_norm": 69.0, |
| "learning_rate": 1.826197757390418e-05, |
| "loss": 0.7309, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.8939893845933151, |
| "grad_norm": 13.5625, |
| "learning_rate": 1.8256880733944955e-05, |
| "loss": 0.5041, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.8951369961268111, |
| "grad_norm": 115.0, |
| "learning_rate": 1.825178389398573e-05, |
| "loss": 0.9974, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.8962846076603069, |
| "grad_norm": 19.125, |
| "learning_rate": 1.8246687054026503e-05, |
| "loss": 0.5556, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.8974322191938029, |
| "grad_norm": 52.5, |
| "learning_rate": 1.824159021406728e-05, |
| "loss": 0.7631, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.8985798307272989, |
| "grad_norm": 10.4375, |
| "learning_rate": 1.8236493374108054e-05, |
| "loss": 0.6217, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.8997274422607947, |
| "grad_norm": 20.625, |
| "learning_rate": 1.8231396534148828e-05, |
| "loss": 0.4404, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.9008750537942907, |
| "grad_norm": 81.0, |
| "learning_rate": 1.8226299694189605e-05, |
| "loss": 0.8382, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.9020226653277865, |
| "grad_norm": 25.125, |
| "learning_rate": 1.822120285423038e-05, |
| "loss": 0.465, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.9031702768612825, |
| "grad_norm": 4.90625, |
| "learning_rate": 1.8216106014271153e-05, |
| "loss": 0.2211, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.9043178883947783, |
| "grad_norm": 18.25, |
| "learning_rate": 1.821100917431193e-05, |
| "loss": 0.5354, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.9054654999282743, |
| "grad_norm": 22.625, |
| "learning_rate": 1.82059123343527e-05, |
| "loss": 0.4656, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.9066131114617701, |
| "grad_norm": 22.125, |
| "learning_rate": 1.8200815494393478e-05, |
| "loss": 0.7412, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9077607229952661, |
| "grad_norm": 18.125, |
| "learning_rate": 1.8195718654434252e-05, |
| "loss": 0.5085, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.908908334528762, |
| "grad_norm": 17.625, |
| "learning_rate": 1.8190621814475026e-05, |
| "loss": 0.4275, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.9100559460622579, |
| "grad_norm": 74.5, |
| "learning_rate": 1.8185524974515803e-05, |
| "loss": 0.8506, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.9112035575957539, |
| "grad_norm": 59.75, |
| "learning_rate": 1.8180428134556577e-05, |
| "loss": 0.5863, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.9123511691292497, |
| "grad_norm": 17.875, |
| "learning_rate": 1.817533129459735e-05, |
| "loss": 0.5018, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.9134987806627457, |
| "grad_norm": 73.0, |
| "learning_rate": 1.8170234454638125e-05, |
| "loss": 0.7539, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.9146463921962416, |
| "grad_norm": 38.0, |
| "learning_rate": 1.81651376146789e-05, |
| "loss": 0.6021, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.9157940037297375, |
| "grad_norm": 34.75, |
| "learning_rate": 1.8160040774719676e-05, |
| "loss": 0.4989, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.9169416152632334, |
| "grad_norm": 29.25, |
| "learning_rate": 1.815494393476045e-05, |
| "loss": 0.5503, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.9180892267967293, |
| "grad_norm": 113.0, |
| "learning_rate": 1.8149847094801224e-05, |
| "loss": 0.7238, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9180892267967293, |
| "eval_accuracy": 0.67, |
| "eval_loss": 0.5950115323066711, |
| "eval_runtime": 49.3005, |
| "eval_samples_per_second": 2.028, |
| "eval_steps_per_second": 2.028, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9192368383302252, |
| "grad_norm": 12.9375, |
| "learning_rate": 1.8144750254841998e-05, |
| "loss": 0.632, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.9203844498637211, |
| "grad_norm": 49.75, |
| "learning_rate": 1.8139653414882775e-05, |
| "loss": 0.6413, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.921532061397217, |
| "grad_norm": 13.125, |
| "learning_rate": 1.813455657492355e-05, |
| "loss": 0.5482, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.922679672930713, |
| "grad_norm": 20.125, |
| "learning_rate": 1.8129459734964323e-05, |
| "loss": 0.5773, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.9238272844642089, |
| "grad_norm": 100.0, |
| "learning_rate": 1.81243628950051e-05, |
| "loss": 1.35, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.9249748959977048, |
| "grad_norm": 25.75, |
| "learning_rate": 1.811926605504587e-05, |
| "loss": 0.5234, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.9261225075312007, |
| "grad_norm": 37.0, |
| "learning_rate": 1.8114169215086648e-05, |
| "loss": 0.473, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.9272701190646966, |
| "grad_norm": 29.0, |
| "learning_rate": 1.8109072375127422e-05, |
| "loss": 0.4716, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.9284177305981925, |
| "grad_norm": 22.0, |
| "learning_rate": 1.8103975535168196e-05, |
| "loss": 0.5146, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.9295653421316884, |
| "grad_norm": 11.75, |
| "learning_rate": 1.8098878695208973e-05, |
| "loss": 0.6532, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9307129536651844, |
| "grad_norm": 14.375, |
| "learning_rate": 1.8093781855249747e-05, |
| "loss": 0.5441, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.9318605651986802, |
| "grad_norm": 42.0, |
| "learning_rate": 1.808868501529052e-05, |
| "loss": 0.4905, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.9330081767321762, |
| "grad_norm": 64.0, |
| "learning_rate": 1.8083588175331298e-05, |
| "loss": 0.8364, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.934155788265672, |
| "grad_norm": 28.875, |
| "learning_rate": 1.807849133537207e-05, |
| "loss": 0.414, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.935303399799168, |
| "grad_norm": 7.6875, |
| "learning_rate": 1.8073394495412846e-05, |
| "loss": 0.3923, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.9364510113326638, |
| "grad_norm": 32.25, |
| "learning_rate": 1.806829765545362e-05, |
| "loss": 0.5358, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.9375986228661598, |
| "grad_norm": 57.5, |
| "learning_rate": 1.8063200815494394e-05, |
| "loss": 0.4813, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.9387462343996558, |
| "grad_norm": 33.5, |
| "learning_rate": 1.805810397553517e-05, |
| "loss": 0.4693, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.9398938459331516, |
| "grad_norm": 35.0, |
| "learning_rate": 1.8053007135575945e-05, |
| "loss": 0.3321, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.9410414574666476, |
| "grad_norm": 19.75, |
| "learning_rate": 1.804791029561672e-05, |
| "loss": 0.5709, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.9421890690001434, |
| "grad_norm": 133.0, |
| "learning_rate": 1.8042813455657493e-05, |
| "loss": 1.0803, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.9433366805336394, |
| "grad_norm": 7.90625, |
| "learning_rate": 1.803771661569827e-05, |
| "loss": 0.5182, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.9444842920671352, |
| "grad_norm": 25.375, |
| "learning_rate": 1.8032619775739044e-05, |
| "loss": 0.6489, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.9456319036006312, |
| "grad_norm": 24.125, |
| "learning_rate": 1.8027522935779818e-05, |
| "loss": 0.5298, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.9467795151341271, |
| "grad_norm": 26.5, |
| "learning_rate": 1.8022426095820592e-05, |
| "loss": 0.4967, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.947927126667623, |
| "grad_norm": 55.5, |
| "learning_rate": 1.8017329255861366e-05, |
| "loss": 0.7623, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.9490747382011189, |
| "grad_norm": 38.75, |
| "learning_rate": 1.8012232415902143e-05, |
| "loss": 0.3873, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.9502223497346148, |
| "grad_norm": 36.5, |
| "learning_rate": 1.8007135575942917e-05, |
| "loss": 0.4885, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.9513699612681108, |
| "grad_norm": 36.75, |
| "learning_rate": 1.800203873598369e-05, |
| "loss": 0.4786, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.9525175728016066, |
| "grad_norm": 15.9375, |
| "learning_rate": 1.7996941896024468e-05, |
| "loss": 0.7344, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.9536651843351026, |
| "grad_norm": 35.75, |
| "learning_rate": 1.799184505606524e-05, |
| "loss": 0.4364, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.9548127958685985, |
| "grad_norm": 60.5, |
| "learning_rate": 1.7986748216106016e-05, |
| "loss": 0.4018, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.9559604074020944, |
| "grad_norm": 29.5, |
| "learning_rate": 1.798165137614679e-05, |
| "loss": 0.5492, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.9571080189355903, |
| "grad_norm": 51.75, |
| "learning_rate": 1.7976554536187564e-05, |
| "loss": 0.5751, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.9582556304690862, |
| "grad_norm": 27.0, |
| "learning_rate": 1.797145769622834e-05, |
| "loss": 0.4943, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.9594032420025821, |
| "grad_norm": 40.75, |
| "learning_rate": 1.7966360856269115e-05, |
| "loss": 0.6622, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.960550853536078, |
| "grad_norm": 82.0, |
| "learning_rate": 1.796126401630989e-05, |
| "loss": 0.6737, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.9616984650695739, |
| "grad_norm": 11.125, |
| "learning_rate": 1.7956167176350666e-05, |
| "loss": 0.4544, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.9628460766030699, |
| "grad_norm": 18.125, |
| "learning_rate": 1.795107033639144e-05, |
| "loss": 0.5389, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.9639936881365657, |
| "grad_norm": 15.9375, |
| "learning_rate": 1.7945973496432214e-05, |
| "loss": 0.1783, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.9651412996700617, |
| "grad_norm": 53.5, |
| "learning_rate": 1.7940876656472988e-05, |
| "loss": 0.3035, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.9662889112035576, |
| "grad_norm": 52.75, |
| "learning_rate": 1.7935779816513762e-05, |
| "loss": 0.6946, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.9674365227370535, |
| "grad_norm": 34.75, |
| "learning_rate": 1.793068297655454e-05, |
| "loss": 0.5466, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.9685841342705495, |
| "grad_norm": 21.875, |
| "learning_rate": 1.7925586136595313e-05, |
| "loss": 0.4619, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.9697317458040453, |
| "grad_norm": 50.5, |
| "learning_rate": 1.7920489296636087e-05, |
| "loss": 0.6513, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.9708793573375413, |
| "grad_norm": 15.125, |
| "learning_rate": 1.791539245667686e-05, |
| "loss": 0.379, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.9720269688710371, |
| "grad_norm": 10.625, |
| "learning_rate": 1.7910295616717638e-05, |
| "loss": 0.5085, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.9731745804045331, |
| "grad_norm": 12.6875, |
| "learning_rate": 1.7905198776758412e-05, |
| "loss": 0.5272, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.9743221919380289, |
| "grad_norm": 27.5, |
| "learning_rate": 1.7900101936799186e-05, |
| "loss": 1.0062, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.9754698034715249, |
| "grad_norm": 127.5, |
| "learning_rate": 1.789500509683996e-05, |
| "loss": 1.0798, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.9766174150050208, |
| "grad_norm": 103.5, |
| "learning_rate": 1.7889908256880734e-05, |
| "loss": 1.1638, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.9777650265385167, |
| "grad_norm": 69.0, |
| "learning_rate": 1.788481141692151e-05, |
| "loss": 0.9011, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.9789126380720127, |
| "grad_norm": 86.5, |
| "learning_rate": 1.7879714576962285e-05, |
| "loss": 0.8197, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.9800602496055085, |
| "grad_norm": 15.5, |
| "learning_rate": 1.787461773700306e-05, |
| "loss": 0.6134, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.9812078611390045, |
| "grad_norm": 110.5, |
| "learning_rate": 1.7869520897043836e-05, |
| "loss": 0.919, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.9823554726725003, |
| "grad_norm": 30.125, |
| "learning_rate": 1.786442405708461e-05, |
| "loss": 0.5746, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.9835030842059963, |
| "grad_norm": 100.0, |
| "learning_rate": 1.7859327217125384e-05, |
| "loss": 0.3361, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.9846506957394922, |
| "grad_norm": 60.0, |
| "learning_rate": 1.7854230377166158e-05, |
| "loss": 0.6782, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.9857983072729881, |
| "grad_norm": 62.5, |
| "learning_rate": 1.7849133537206932e-05, |
| "loss": 0.8552, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.986945918806484, |
| "grad_norm": 38.75, |
| "learning_rate": 1.784403669724771e-05, |
| "loss": 0.7251, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.9880935303399799, |
| "grad_norm": 84.5, |
| "learning_rate": 1.7838939857288483e-05, |
| "loss": 0.9825, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.9892411418734758, |
| "grad_norm": 32.75, |
| "learning_rate": 1.7833843017329257e-05, |
| "loss": 0.2631, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.9903887534069717, |
| "grad_norm": 101.5, |
| "learning_rate": 1.782874617737003e-05, |
| "loss": 1.0281, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.9915363649404677, |
| "grad_norm": 36.75, |
| "learning_rate": 1.7823649337410808e-05, |
| "loss": 0.6591, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.9926839764739636, |
| "grad_norm": 23.75, |
| "learning_rate": 1.7818552497451582e-05, |
| "loss": 0.2017, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.9938315880074595, |
| "grad_norm": 11.0625, |
| "learning_rate": 1.7813455657492356e-05, |
| "loss": 0.6496, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.9949791995409554, |
| "grad_norm": 52.0, |
| "learning_rate": 1.780835881753313e-05, |
| "loss": 0.7726, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.9961268110744513, |
| "grad_norm": 8.9375, |
| "learning_rate": 1.7803261977573904e-05, |
| "loss": 0.2688, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.9972744226079472, |
| "grad_norm": 29.625, |
| "learning_rate": 1.779816513761468e-05, |
| "loss": 0.4991, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.9984220341414431, |
| "grad_norm": 6.59375, |
| "learning_rate": 1.7793068297655455e-05, |
| "loss": 0.3232, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.999569645674939, |
| "grad_norm": 39.25, |
| "learning_rate": 1.778797145769623e-05, |
| "loss": 0.4866, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 31.25, |
| "learning_rate": 1.7782874617737006e-05, |
| "loss": 0.1617, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.0011476115334959, |
| "grad_norm": 54.25, |
| "learning_rate": 1.7777777777777777e-05, |
| "loss": 0.5081, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.002295223066992, |
| "grad_norm": 59.5, |
| "learning_rate": 1.7772680937818554e-05, |
| "loss": 0.6284, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.0034428346004878, |
| "grad_norm": 62.0, |
| "learning_rate": 1.7767584097859328e-05, |
| "loss": 0.6364, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.0045904461339836, |
| "grad_norm": 99.5, |
| "learning_rate": 1.7762487257900102e-05, |
| "loss": 1.521, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.0057380576674795, |
| "grad_norm": 105.5, |
| "learning_rate": 1.775739041794088e-05, |
| "loss": 1.0837, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.0068856692009756, |
| "grad_norm": 117.0, |
| "learning_rate": 1.7752293577981653e-05, |
| "loss": 1.0871, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.0080332807344714, |
| "grad_norm": 91.5, |
| "learning_rate": 1.7747196738022427e-05, |
| "loss": 0.7927, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.0091808922679673, |
| "grad_norm": 68.5, |
| "learning_rate": 1.7742099898063204e-05, |
| "loss": 0.6309, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.010328503801463, |
| "grad_norm": 11.3125, |
| "learning_rate": 1.7737003058103978e-05, |
| "loss": 0.3369, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.0114761153349592, |
| "grad_norm": 11.125, |
| "learning_rate": 1.7731906218144752e-05, |
| "loss": 0.2181, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.012623726868455, |
| "grad_norm": 23.75, |
| "learning_rate": 1.7726809378185526e-05, |
| "loss": 0.4936, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.0137713384019509, |
| "grad_norm": 26.25, |
| "learning_rate": 1.77217125382263e-05, |
| "loss": 0.5372, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.014918949935447, |
| "grad_norm": 23.375, |
| "learning_rate": 1.7716615698267077e-05, |
| "loss": 0.2898, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.0160665614689428, |
| "grad_norm": 61.25, |
| "learning_rate": 1.771151885830785e-05, |
| "loss": 1.0463, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.0172141730024387, |
| "grad_norm": 16.125, |
| "learning_rate": 1.7706422018348625e-05, |
| "loss": 0.3061, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.0183617845359345, |
| "grad_norm": 83.5, |
| "learning_rate": 1.77013251783894e-05, |
| "loss": 0.7545, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.0195093960694306, |
| "grad_norm": 57.75, |
| "learning_rate": 1.7696228338430176e-05, |
| "loss": 0.7643, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.0206570076029264, |
| "grad_norm": 19.125, |
| "learning_rate": 1.769113149847095e-05, |
| "loss": 0.6013, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0218046191364223, |
| "grad_norm": 12.25, |
| "learning_rate": 1.7686034658511724e-05, |
| "loss": 0.4579, |
| "step": 891 |
| }, |
| { |
| "epoch": 1.0229522306699181, |
| "grad_norm": 38.25, |
| "learning_rate": 1.7680937818552498e-05, |
| "loss": 0.4669, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.0240998422034142, |
| "grad_norm": 68.0, |
| "learning_rate": 1.767584097859327e-05, |
| "loss": 0.4824, |
| "step": 893 |
| }, |
| { |
| "epoch": 1.02524745373691, |
| "grad_norm": 10.5625, |
| "learning_rate": 1.767074413863405e-05, |
| "loss": 0.5689, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.026395065270406, |
| "grad_norm": 8.875, |
| "learning_rate": 1.7665647298674823e-05, |
| "loss": 0.3161, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.0275426768039018, |
| "grad_norm": 23.625, |
| "learning_rate": 1.7660550458715597e-05, |
| "loss": 0.4443, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.0286902883373978, |
| "grad_norm": 15.75, |
| "learning_rate": 1.7655453618756374e-05, |
| "loss": 0.2331, |
| "step": 897 |
| }, |
| { |
| "epoch": 1.0298378998708937, |
| "grad_norm": 8.25, |
| "learning_rate": 1.7650356778797148e-05, |
| "loss": 0.3554, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.0309855114043895, |
| "grad_norm": 14.5, |
| "learning_rate": 1.7645259938837922e-05, |
| "loss": 0.6107, |
| "step": 899 |
| }, |
| { |
| "epoch": 1.0321331229378856, |
| "grad_norm": 42.25, |
| "learning_rate": 1.76401630988787e-05, |
| "loss": 0.3624, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0321331229378856, |
| "eval_accuracy": 0.64, |
| "eval_loss": 0.6176496744155884, |
| "eval_runtime": 49.5336, |
| "eval_samples_per_second": 2.019, |
| "eval_steps_per_second": 2.019, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.0332807344713815, |
| "grad_norm": 23.0, |
| "learning_rate": 1.763506625891947e-05, |
| "loss": 0.4606, |
| "step": 901 |
| }, |
| { |
| "epoch": 1.0344283460048773, |
| "grad_norm": 50.75, |
| "learning_rate": 1.7629969418960247e-05, |
| "loss": 0.5176, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.0355759575383732, |
| "grad_norm": 58.0, |
| "learning_rate": 1.762487257900102e-05, |
| "loss": 0.3688, |
| "step": 903 |
| }, |
| { |
| "epoch": 1.0367235690718692, |
| "grad_norm": 36.0, |
| "learning_rate": 1.7619775739041795e-05, |
| "loss": 0.7414, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.037871180605365, |
| "grad_norm": 28.5, |
| "learning_rate": 1.7614678899082572e-05, |
| "loss": 0.8468, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.039018792138861, |
| "grad_norm": 26.25, |
| "learning_rate": 1.7609582059123346e-05, |
| "loss": 0.4338, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.040166403672357, |
| "grad_norm": 122.5, |
| "learning_rate": 1.760448521916412e-05, |
| "loss": 0.9431, |
| "step": 907 |
| }, |
| { |
| "epoch": 1.0413140152058529, |
| "grad_norm": 15.375, |
| "learning_rate": 1.7599388379204894e-05, |
| "loss": 0.5602, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.0424616267393487, |
| "grad_norm": 96.5, |
| "learning_rate": 1.7594291539245668e-05, |
| "loss": 0.6268, |
| "step": 909 |
| }, |
| { |
| "epoch": 1.0436092382728446, |
| "grad_norm": 59.25, |
| "learning_rate": 1.7589194699286445e-05, |
| "loss": 0.404, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.0447568498063406, |
| "grad_norm": 30.5, |
| "learning_rate": 1.758409785932722e-05, |
| "loss": 0.5772, |
| "step": 911 |
| }, |
| { |
| "epoch": 1.0459044613398365, |
| "grad_norm": 15.875, |
| "learning_rate": 1.7579001019367993e-05, |
| "loss": 0.4666, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.0470520728733324, |
| "grad_norm": 20.25, |
| "learning_rate": 1.7573904179408767e-05, |
| "loss": 0.4576, |
| "step": 913 |
| }, |
| { |
| "epoch": 1.0481996844068282, |
| "grad_norm": 33.5, |
| "learning_rate": 1.7568807339449544e-05, |
| "loss": 0.4427, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.0493472959403243, |
| "grad_norm": 6.59375, |
| "learning_rate": 1.7563710499490318e-05, |
| "loss": 0.1466, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.0504949074738201, |
| "grad_norm": 44.0, |
| "learning_rate": 1.7558613659531092e-05, |
| "loss": 0.3564, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.051642519007316, |
| "grad_norm": 26.75, |
| "learning_rate": 1.755351681957187e-05, |
| "loss": 0.6648, |
| "step": 917 |
| }, |
| { |
| "epoch": 1.0527901305408118, |
| "grad_norm": 53.5, |
| "learning_rate": 1.754841997961264e-05, |
| "loss": 0.5389, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.053937742074308, |
| "grad_norm": 23.875, |
| "learning_rate": 1.7543323139653417e-05, |
| "loss": 0.4424, |
| "step": 919 |
| }, |
| { |
| "epoch": 1.0550853536078038, |
| "grad_norm": 34.75, |
| "learning_rate": 1.753822629969419e-05, |
| "loss": 0.4035, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.0562329651412996, |
| "grad_norm": 25.125, |
| "learning_rate": 1.7533129459734965e-05, |
| "loss": 0.5704, |
| "step": 921 |
| }, |
| { |
| "epoch": 1.0573805766747957, |
| "grad_norm": 25.625, |
| "learning_rate": 1.7528032619775742e-05, |
| "loss": 0.7077, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.0585281882082915, |
| "grad_norm": 11.25, |
| "learning_rate": 1.7522935779816516e-05, |
| "loss": 0.228, |
| "step": 923 |
| }, |
| { |
| "epoch": 1.0596757997417874, |
| "grad_norm": 31.125, |
| "learning_rate": 1.751783893985729e-05, |
| "loss": 0.5716, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.0608234112752832, |
| "grad_norm": 21.25, |
| "learning_rate": 1.7512742099898067e-05, |
| "loss": 0.2658, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.0619710228087793, |
| "grad_norm": 12.875, |
| "learning_rate": 1.7507645259938838e-05, |
| "loss": 0.2415, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.0631186343422752, |
| "grad_norm": 100.5, |
| "learning_rate": 1.7502548419979615e-05, |
| "loss": 1.0011, |
| "step": 927 |
| }, |
| { |
| "epoch": 1.064266245875771, |
| "grad_norm": 67.0, |
| "learning_rate": 1.749745158002039e-05, |
| "loss": 0.6753, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.0654138574092669, |
| "grad_norm": 109.0, |
| "learning_rate": 1.7492354740061163e-05, |
| "loss": 0.8631, |
| "step": 929 |
| }, |
| { |
| "epoch": 1.066561468942763, |
| "grad_norm": 68.5, |
| "learning_rate": 1.7487257900101937e-05, |
| "loss": 1.0799, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.0677090804762588, |
| "grad_norm": 74.0, |
| "learning_rate": 1.7482161060142714e-05, |
| "loss": 0.5419, |
| "step": 931 |
| }, |
| { |
| "epoch": 1.0688566920097546, |
| "grad_norm": 61.25, |
| "learning_rate": 1.7477064220183488e-05, |
| "loss": 0.6041, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.0700043035432507, |
| "grad_norm": 32.25, |
| "learning_rate": 1.7471967380224262e-05, |
| "loss": 0.8215, |
| "step": 933 |
| }, |
| { |
| "epoch": 1.0711519150767466, |
| "grad_norm": 45.25, |
| "learning_rate": 1.746687054026504e-05, |
| "loss": 0.5843, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.0722995266102424, |
| "grad_norm": 104.5, |
| "learning_rate": 1.746177370030581e-05, |
| "loss": 1.3222, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.0734471381437383, |
| "grad_norm": 56.0, |
| "learning_rate": 1.7456676860346587e-05, |
| "loss": 0.5504, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.0745947496772343, |
| "grad_norm": 54.0, |
| "learning_rate": 1.745158002038736e-05, |
| "loss": 0.8466, |
| "step": 937 |
| }, |
| { |
| "epoch": 1.0757423612107302, |
| "grad_norm": 27.375, |
| "learning_rate": 1.7446483180428135e-05, |
| "loss": 0.9508, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.076889972744226, |
| "grad_norm": 14.625, |
| "learning_rate": 1.7441386340468912e-05, |
| "loss": 0.3969, |
| "step": 939 |
| }, |
| { |
| "epoch": 1.078037584277722, |
| "grad_norm": 75.0, |
| "learning_rate": 1.7436289500509686e-05, |
| "loss": 0.9936, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.079185195811218, |
| "grad_norm": 51.75, |
| "learning_rate": 1.743119266055046e-05, |
| "loss": 0.5978, |
| "step": 941 |
| }, |
| { |
| "epoch": 1.0803328073447138, |
| "grad_norm": 57.5, |
| "learning_rate": 1.7426095820591237e-05, |
| "loss": 0.6549, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.0814804188782097, |
| "grad_norm": 19.625, |
| "learning_rate": 1.7420998980632008e-05, |
| "loss": 0.4942, |
| "step": 943 |
| }, |
| { |
| "epoch": 1.0826280304117057, |
| "grad_norm": 83.0, |
| "learning_rate": 1.7415902140672785e-05, |
| "loss": 0.6702, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.0837756419452016, |
| "grad_norm": 42.5, |
| "learning_rate": 1.741080530071356e-05, |
| "loss": 0.6299, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.0849232534786974, |
| "grad_norm": 19.25, |
| "learning_rate": 1.7405708460754333e-05, |
| "loss": 0.5421, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.0860708650121933, |
| "grad_norm": 34.0, |
| "learning_rate": 1.740061162079511e-05, |
| "loss": 0.7019, |
| "step": 947 |
| }, |
| { |
| "epoch": 1.0872184765456894, |
| "grad_norm": 34.0, |
| "learning_rate": 1.7395514780835884e-05, |
| "loss": 0.5919, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.0883660880791852, |
| "grad_norm": 17.875, |
| "learning_rate": 1.7390417940876658e-05, |
| "loss": 0.2788, |
| "step": 949 |
| }, |
| { |
| "epoch": 1.089513699612681, |
| "grad_norm": 16.0, |
| "learning_rate": 1.738532110091743e-05, |
| "loss": 0.7744, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.090661311146177, |
| "grad_norm": 61.5, |
| "learning_rate": 1.7380224260958206e-05, |
| "loss": 0.6198, |
| "step": 951 |
| }, |
| { |
| "epoch": 1.091808922679673, |
| "grad_norm": 17.5, |
| "learning_rate": 1.7375127420998983e-05, |
| "loss": 0.5995, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.0929565342131689, |
| "grad_norm": 15.0, |
| "learning_rate": 1.7370030581039757e-05, |
| "loss": 0.4392, |
| "step": 953 |
| }, |
| { |
| "epoch": 1.0941041457466647, |
| "grad_norm": 54.75, |
| "learning_rate": 1.736493374108053e-05, |
| "loss": 0.4673, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.0952517572801606, |
| "grad_norm": 31.5, |
| "learning_rate": 1.7359836901121305e-05, |
| "loss": 0.5318, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.0963993688136566, |
| "grad_norm": 35.0, |
| "learning_rate": 1.7354740061162082e-05, |
| "loss": 0.5184, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.0975469803471525, |
| "grad_norm": 23.75, |
| "learning_rate": 1.7349643221202856e-05, |
| "loss": 0.5015, |
| "step": 957 |
| }, |
| { |
| "epoch": 1.0986945918806483, |
| "grad_norm": 54.0, |
| "learning_rate": 1.734454638124363e-05, |
| "loss": 0.5254, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.0998422034141444, |
| "grad_norm": 10.375, |
| "learning_rate": 1.7339449541284407e-05, |
| "loss": 0.4739, |
| "step": 959 |
| }, |
| { |
| "epoch": 1.1009898149476403, |
| "grad_norm": 23.5, |
| "learning_rate": 1.7334352701325177e-05, |
| "loss": 0.5565, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.102137426481136, |
| "grad_norm": 11.875, |
| "learning_rate": 1.7329255861365955e-05, |
| "loss": 0.3887, |
| "step": 961 |
| }, |
| { |
| "epoch": 1.103285038014632, |
| "grad_norm": 10.875, |
| "learning_rate": 1.732415902140673e-05, |
| "loss": 0.6166, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.104432649548128, |
| "grad_norm": 43.75, |
| "learning_rate": 1.7319062181447503e-05, |
| "loss": 0.9438, |
| "step": 963 |
| }, |
| { |
| "epoch": 1.1055802610816239, |
| "grad_norm": 17.375, |
| "learning_rate": 1.731396534148828e-05, |
| "loss": 0.6131, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.1067278726151197, |
| "grad_norm": 36.5, |
| "learning_rate": 1.7308868501529054e-05, |
| "loss": 0.5897, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.1078754841486158, |
| "grad_norm": 34.25, |
| "learning_rate": 1.7303771661569828e-05, |
| "loss": 0.473, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.1090230956821117, |
| "grad_norm": 23.75, |
| "learning_rate": 1.7298674821610605e-05, |
| "loss": 0.6736, |
| "step": 967 |
| }, |
| { |
| "epoch": 1.1101707072156075, |
| "grad_norm": 47.25, |
| "learning_rate": 1.7293577981651376e-05, |
| "loss": 0.4113, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.1113183187491034, |
| "grad_norm": 13.6875, |
| "learning_rate": 1.7288481141692153e-05, |
| "loss": 0.2634, |
| "step": 969 |
| }, |
| { |
| "epoch": 1.1124659302825994, |
| "grad_norm": 36.75, |
| "learning_rate": 1.7283384301732927e-05, |
| "loss": 0.5289, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.1136135418160953, |
| "grad_norm": 28.625, |
| "learning_rate": 1.72782874617737e-05, |
| "loss": 0.5775, |
| "step": 971 |
| }, |
| { |
| "epoch": 1.1147611533495911, |
| "grad_norm": 42.25, |
| "learning_rate": 1.7273190621814478e-05, |
| "loss": 0.7163, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.115908764883087, |
| "grad_norm": 57.0, |
| "learning_rate": 1.7268093781855252e-05, |
| "loss": 0.5009, |
| "step": 973 |
| }, |
| { |
| "epoch": 1.117056376416583, |
| "grad_norm": 22.375, |
| "learning_rate": 1.7262996941896026e-05, |
| "loss": 0.4101, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.118203987950079, |
| "grad_norm": 41.25, |
| "learning_rate": 1.72579001019368e-05, |
| "loss": 0.4195, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.1193515994835748, |
| "grad_norm": 17.625, |
| "learning_rate": 1.7252803261977577e-05, |
| "loss": 0.4409, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.1204992110170706, |
| "grad_norm": 18.375, |
| "learning_rate": 1.724770642201835e-05, |
| "loss": 0.4041, |
| "step": 977 |
| }, |
| { |
| "epoch": 1.1216468225505667, |
| "grad_norm": 39.0, |
| "learning_rate": 1.7242609582059125e-05, |
| "loss": 0.6333, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.1227944340840625, |
| "grad_norm": 62.25, |
| "learning_rate": 1.72375127420999e-05, |
| "loss": 0.648, |
| "step": 979 |
| }, |
| { |
| "epoch": 1.1239420456175584, |
| "grad_norm": 57.0, |
| "learning_rate": 1.7232415902140673e-05, |
| "loss": 0.5549, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.1250896571510545, |
| "grad_norm": 17.875, |
| "learning_rate": 1.722731906218145e-05, |
| "loss": 0.3829, |
| "step": 981 |
| }, |
| { |
| "epoch": 1.1262372686845503, |
| "grad_norm": 23.5, |
| "learning_rate": 1.7222222222222224e-05, |
| "loss": 0.3594, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.1273848802180462, |
| "grad_norm": 53.0, |
| "learning_rate": 1.7217125382262998e-05, |
| "loss": 0.6625, |
| "step": 983 |
| }, |
| { |
| "epoch": 1.128532491751542, |
| "grad_norm": 49.75, |
| "learning_rate": 1.7212028542303775e-05, |
| "loss": 0.4887, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.129680103285038, |
| "grad_norm": 15.0, |
| "learning_rate": 1.7206931702344545e-05, |
| "loss": 0.5548, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.130827714818534, |
| "grad_norm": 40.5, |
| "learning_rate": 1.7201834862385323e-05, |
| "loss": 0.7024, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.1319753263520298, |
| "grad_norm": 58.25, |
| "learning_rate": 1.7196738022426097e-05, |
| "loss": 0.4027, |
| "step": 987 |
| }, |
| { |
| "epoch": 1.1331229378855259, |
| "grad_norm": 70.5, |
| "learning_rate": 1.719164118246687e-05, |
| "loss": 0.6295, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.1342705494190217, |
| "grad_norm": 34.25, |
| "learning_rate": 1.7186544342507648e-05, |
| "loss": 0.3274, |
| "step": 989 |
| }, |
| { |
| "epoch": 1.1354181609525176, |
| "grad_norm": 20.0, |
| "learning_rate": 1.7181447502548422e-05, |
| "loss": 0.1818, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.1365657724860134, |
| "grad_norm": 46.5, |
| "learning_rate": 1.7176350662589196e-05, |
| "loss": 0.4344, |
| "step": 991 |
| }, |
| { |
| "epoch": 1.1377133840195093, |
| "grad_norm": 60.0, |
| "learning_rate": 1.7171253822629973e-05, |
| "loss": 0.3682, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.1388609955530054, |
| "grad_norm": 30.25, |
| "learning_rate": 1.7166156982670747e-05, |
| "loss": 0.4771, |
| "step": 993 |
| }, |
| { |
| "epoch": 1.1400086070865012, |
| "grad_norm": 23.375, |
| "learning_rate": 1.716106014271152e-05, |
| "loss": 0.4939, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.141156218619997, |
| "grad_norm": 17.625, |
| "learning_rate": 1.7155963302752295e-05, |
| "loss": 0.6885, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.1423038301534931, |
| "grad_norm": 64.5, |
| "learning_rate": 1.715086646279307e-05, |
| "loss": 0.8163, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.143451441686989, |
| "grad_norm": 39.5, |
| "learning_rate": 1.7145769622833846e-05, |
| "loss": 0.3577, |
| "step": 997 |
| }, |
| { |
| "epoch": 1.1445990532204848, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.714067278287462e-05, |
| "loss": 0.202, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.1457466647539807, |
| "grad_norm": 52.0, |
| "learning_rate": 1.7135575942915394e-05, |
| "loss": 0.5541, |
| "step": 999 |
| }, |
| { |
| "epoch": 1.1468942762874768, |
| "grad_norm": 22.125, |
| "learning_rate": 1.7130479102956168e-05, |
| "loss": 0.2125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1468942762874768, |
| "eval_accuracy": 0.6, |
| "eval_loss": 0.5487725734710693, |
| "eval_runtime": 50.2711, |
| "eval_samples_per_second": 1.989, |
| "eval_steps_per_second": 1.989, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.1480418878209726, |
| "grad_norm": 21.25, |
| "learning_rate": 1.7125382262996945e-05, |
| "loss": 0.3415, |
| "step": 1001 |
| }, |
| { |
| "epoch": 1.1491894993544685, |
| "grad_norm": 39.5, |
| "learning_rate": 1.712028542303772e-05, |
| "loss": 0.6746, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.1503371108879645, |
| "grad_norm": 16.875, |
| "learning_rate": 1.7115188583078493e-05, |
| "loss": 0.7315, |
| "step": 1003 |
| }, |
| { |
| "epoch": 1.1514847224214604, |
| "grad_norm": 13.6875, |
| "learning_rate": 1.7110091743119267e-05, |
| "loss": 0.5293, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.1526323339549562, |
| "grad_norm": 10.4375, |
| "learning_rate": 1.710499490316004e-05, |
| "loss": 0.4509, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.153779945488452, |
| "grad_norm": 18.375, |
| "learning_rate": 1.7099898063200818e-05, |
| "loss": 0.3469, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.1549275570219482, |
| "grad_norm": 12.375, |
| "learning_rate": 1.709480122324159e-05, |
| "loss": 0.4868, |
| "step": 1007 |
| }, |
| { |
| "epoch": 1.156075168555444, |
| "grad_norm": 57.5, |
| "learning_rate": 1.7089704383282366e-05, |
| "loss": 0.5211, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.1572227800889399, |
| "grad_norm": 13.875, |
| "learning_rate": 1.7084607543323143e-05, |
| "loss": 0.3623, |
| "step": 1009 |
| }, |
| { |
| "epoch": 1.1583703916224357, |
| "grad_norm": 69.0, |
| "learning_rate": 1.7079510703363917e-05, |
| "loss": 0.274, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.1595180031559318, |
| "grad_norm": 18.5, |
| "learning_rate": 1.707441386340469e-05, |
| "loss": 0.2365, |
| "step": 1011 |
| }, |
| { |
| "epoch": 1.1606656146894276, |
| "grad_norm": 40.25, |
| "learning_rate": 1.7069317023445465e-05, |
| "loss": 0.2999, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.1618132262229235, |
| "grad_norm": 57.5, |
| "learning_rate": 1.706422018348624e-05, |
| "loss": 0.5137, |
| "step": 1013 |
| }, |
| { |
| "epoch": 1.1629608377564193, |
| "grad_norm": 20.875, |
| "learning_rate": 1.7059123343527016e-05, |
| "loss": 0.6691, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.1641084492899154, |
| "grad_norm": 30.875, |
| "learning_rate": 1.705402650356779e-05, |
| "loss": 0.6642, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.1652560608234113, |
| "grad_norm": 15.9375, |
| "learning_rate": 1.7048929663608564e-05, |
| "loss": 0.2695, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.1664036723569071, |
| "grad_norm": 88.5, |
| "learning_rate": 1.7043832823649338e-05, |
| "loss": 0.8211, |
| "step": 1017 |
| }, |
| { |
| "epoch": 1.1675512838904032, |
| "grad_norm": 45.5, |
| "learning_rate": 1.7038735983690115e-05, |
| "loss": 0.7956, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.168698895423899, |
| "grad_norm": 80.0, |
| "learning_rate": 1.703363914373089e-05, |
| "loss": 0.8805, |
| "step": 1019 |
| }, |
| { |
| "epoch": 1.169846506957395, |
| "grad_norm": 15.1875, |
| "learning_rate": 1.7028542303771663e-05, |
| "loss": 0.5262, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.1709941184908907, |
| "grad_norm": 60.75, |
| "learning_rate": 1.7023445463812437e-05, |
| "loss": 1.1968, |
| "step": 1021 |
| }, |
| { |
| "epoch": 1.1721417300243868, |
| "grad_norm": 31.375, |
| "learning_rate": 1.701834862385321e-05, |
| "loss": 0.744, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.1732893415578827, |
| "grad_norm": 20.625, |
| "learning_rate": 1.7013251783893988e-05, |
| "loss": 0.3321, |
| "step": 1023 |
| }, |
| { |
| "epoch": 1.1744369530913785, |
| "grad_norm": 19.875, |
| "learning_rate": 1.700815494393476e-05, |
| "loss": 0.4447, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.1755845646248746, |
| "grad_norm": 23.625, |
| "learning_rate": 1.7003058103975536e-05, |
| "loss": 0.3697, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.1767321761583704, |
| "grad_norm": 20.625, |
| "learning_rate": 1.6997961264016313e-05, |
| "loss": 0.2759, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.1778797876918663, |
| "grad_norm": 58.75, |
| "learning_rate": 1.6992864424057087e-05, |
| "loss": 0.7182, |
| "step": 1027 |
| }, |
| { |
| "epoch": 1.1790273992253621, |
| "grad_norm": 6.96875, |
| "learning_rate": 1.698776758409786e-05, |
| "loss": 0.1403, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.1801750107588582, |
| "grad_norm": 24.875, |
| "learning_rate": 1.6982670744138638e-05, |
| "loss": 0.513, |
| "step": 1029 |
| }, |
| { |
| "epoch": 1.181322622292354, |
| "grad_norm": 15.8125, |
| "learning_rate": 1.697757390417941e-05, |
| "loss": 0.5238, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.18247023382585, |
| "grad_norm": 22.0, |
| "learning_rate": 1.6972477064220186e-05, |
| "loss": 0.445, |
| "step": 1031 |
| }, |
| { |
| "epoch": 1.1836178453593458, |
| "grad_norm": 86.5, |
| "learning_rate": 1.696738022426096e-05, |
| "loss": 0.7085, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.1847654568928419, |
| "grad_norm": 63.75, |
| "learning_rate": 1.6962283384301734e-05, |
| "loss": 1.0473, |
| "step": 1033 |
| }, |
| { |
| "epoch": 1.1859130684263377, |
| "grad_norm": 63.5, |
| "learning_rate": 1.695718654434251e-05, |
| "loss": 0.3947, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.1870606799598336, |
| "grad_norm": 12.375, |
| "learning_rate": 1.6952089704383285e-05, |
| "loss": 0.3453, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.1882082914933294, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.694699286442406e-05, |
| "loss": 0.2802, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.1893559030268255, |
| "grad_norm": 29.75, |
| "learning_rate": 1.6941896024464833e-05, |
| "loss": 0.543, |
| "step": 1037 |
| }, |
| { |
| "epoch": 1.1905035145603213, |
| "grad_norm": 60.75, |
| "learning_rate": 1.6936799184505606e-05, |
| "loss": 0.7737, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.1916511260938172, |
| "grad_norm": 38.0, |
| "learning_rate": 1.6931702344546384e-05, |
| "loss": 0.7725, |
| "step": 1039 |
| }, |
| { |
| "epoch": 1.1927987376273133, |
| "grad_norm": 9.9375, |
| "learning_rate": 1.6926605504587158e-05, |
| "loss": 0.2875, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.193946349160809, |
| "grad_norm": 53.5, |
| "learning_rate": 1.692150866462793e-05, |
| "loss": 0.6683, |
| "step": 1041 |
| }, |
| { |
| "epoch": 1.195093960694305, |
| "grad_norm": 58.25, |
| "learning_rate": 1.6916411824668705e-05, |
| "loss": 0.4406, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.1962415722278008, |
| "grad_norm": 46.0, |
| "learning_rate": 1.6911314984709483e-05, |
| "loss": 0.6739, |
| "step": 1043 |
| }, |
| { |
| "epoch": 1.1973891837612969, |
| "grad_norm": 47.25, |
| "learning_rate": 1.6906218144750257e-05, |
| "loss": 0.278, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.1985367952947927, |
| "grad_norm": 42.5, |
| "learning_rate": 1.690112130479103e-05, |
| "loss": 0.4348, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.1996844068282886, |
| "grad_norm": 20.0, |
| "learning_rate": 1.6896024464831804e-05, |
| "loss": 0.1145, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.2008320183617847, |
| "grad_norm": 36.0, |
| "learning_rate": 1.689092762487258e-05, |
| "loss": 0.7542, |
| "step": 1047 |
| }, |
| { |
| "epoch": 1.2019796298952805, |
| "grad_norm": 7.5625, |
| "learning_rate": 1.6885830784913356e-05, |
| "loss": 0.1475, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.2031272414287764, |
| "grad_norm": 54.75, |
| "learning_rate": 1.688073394495413e-05, |
| "loss": 0.3082, |
| "step": 1049 |
| }, |
| { |
| "epoch": 1.2042748529622722, |
| "grad_norm": 24.5, |
| "learning_rate": 1.6875637104994903e-05, |
| "loss": 0.8289, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.205422464495768, |
| "grad_norm": 69.5, |
| "learning_rate": 1.687054026503568e-05, |
| "loss": 0.7198, |
| "step": 1051 |
| }, |
| { |
| "epoch": 1.2065700760292641, |
| "grad_norm": 57.25, |
| "learning_rate": 1.6865443425076455e-05, |
| "loss": 0.3022, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.20771768756276, |
| "grad_norm": 43.5, |
| "learning_rate": 1.686034658511723e-05, |
| "loss": 0.5085, |
| "step": 1053 |
| }, |
| { |
| "epoch": 1.2088652990962558, |
| "grad_norm": 16.75, |
| "learning_rate": 1.6855249745158006e-05, |
| "loss": 0.4784, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.210012910629752, |
| "grad_norm": 14.3125, |
| "learning_rate": 1.6850152905198776e-05, |
| "loss": 0.3837, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.2111605221632478, |
| "grad_norm": 9.625, |
| "learning_rate": 1.6845056065239554e-05, |
| "loss": 0.2057, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.2123081336967436, |
| "grad_norm": 23.625, |
| "learning_rate": 1.6839959225280328e-05, |
| "loss": 0.9273, |
| "step": 1057 |
| }, |
| { |
| "epoch": 1.2134557452302395, |
| "grad_norm": 27.125, |
| "learning_rate": 1.68348623853211e-05, |
| "loss": 0.5371, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.2146033567637355, |
| "grad_norm": 55.75, |
| "learning_rate": 1.682976554536188e-05, |
| "loss": 0.5682, |
| "step": 1059 |
| }, |
| { |
| "epoch": 1.2157509682972314, |
| "grad_norm": 55.25, |
| "learning_rate": 1.6824668705402653e-05, |
| "loss": 0.4674, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.2168985798307272, |
| "grad_norm": 18.625, |
| "learning_rate": 1.6819571865443427e-05, |
| "loss": 0.515, |
| "step": 1061 |
| }, |
| { |
| "epoch": 1.2180461913642233, |
| "grad_norm": 118.5, |
| "learning_rate": 1.68144750254842e-05, |
| "loss": 1.1109, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.2191938028977192, |
| "grad_norm": 45.5, |
| "learning_rate": 1.6809378185524974e-05, |
| "loss": 0.2941, |
| "step": 1063 |
| }, |
| { |
| "epoch": 1.220341414431215, |
| "grad_norm": 21.0, |
| "learning_rate": 1.6804281345565752e-05, |
| "loss": 0.4562, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.2214890259647109, |
| "grad_norm": 9.375, |
| "learning_rate": 1.6799184505606526e-05, |
| "loss": 0.1801, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.222636637498207, |
| "grad_norm": 12.5, |
| "learning_rate": 1.67940876656473e-05, |
| "loss": 0.2672, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.2237842490317028, |
| "grad_norm": 77.5, |
| "learning_rate": 1.6788990825688073e-05, |
| "loss": 0.6601, |
| "step": 1067 |
| }, |
| { |
| "epoch": 1.2249318605651986, |
| "grad_norm": 12.0625, |
| "learning_rate": 1.678389398572885e-05, |
| "loss": 0.1519, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.2260794720986945, |
| "grad_norm": 24.75, |
| "learning_rate": 1.6778797145769625e-05, |
| "loss": 0.5777, |
| "step": 1069 |
| }, |
| { |
| "epoch": 1.2272270836321906, |
| "grad_norm": 31.125, |
| "learning_rate": 1.67737003058104e-05, |
| "loss": 0.7135, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.2283746951656864, |
| "grad_norm": 63.5, |
| "learning_rate": 1.6768603465851176e-05, |
| "loss": 0.8834, |
| "step": 1071 |
| }, |
| { |
| "epoch": 1.2295223066991823, |
| "grad_norm": 64.0, |
| "learning_rate": 1.6763506625891946e-05, |
| "loss": 0.8991, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.2306699182326781, |
| "grad_norm": 21.375, |
| "learning_rate": 1.6758409785932724e-05, |
| "loss": 0.2697, |
| "step": 1073 |
| }, |
| { |
| "epoch": 1.2318175297661742, |
| "grad_norm": 44.0, |
| "learning_rate": 1.6753312945973498e-05, |
| "loss": 0.4755, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.23296514129967, |
| "grad_norm": 28.875, |
| "learning_rate": 1.674821610601427e-05, |
| "loss": 0.3531, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.234112752833166, |
| "grad_norm": 33.0, |
| "learning_rate": 1.674311926605505e-05, |
| "loss": 0.1501, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.235260364366662, |
| "grad_norm": 23.0, |
| "learning_rate": 1.6738022426095823e-05, |
| "loss": 0.7386, |
| "step": 1077 |
| }, |
| { |
| "epoch": 1.2364079759001578, |
| "grad_norm": 18.75, |
| "learning_rate": 1.6732925586136597e-05, |
| "loss": 0.1371, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.2375555874336537, |
| "grad_norm": 32.25, |
| "learning_rate": 1.672782874617737e-05, |
| "loss": 0.4283, |
| "step": 1079 |
| }, |
| { |
| "epoch": 1.2387031989671495, |
| "grad_norm": 15.25, |
| "learning_rate": 1.6722731906218144e-05, |
| "loss": 0.221, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.2398508105006456, |
| "grad_norm": 30.625, |
| "learning_rate": 1.671763506625892e-05, |
| "loss": 0.4018, |
| "step": 1081 |
| }, |
| { |
| "epoch": 1.2409984220341415, |
| "grad_norm": 67.0, |
| "learning_rate": 1.6712538226299696e-05, |
| "loss": 0.9727, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.2421460335676373, |
| "grad_norm": 31.375, |
| "learning_rate": 1.670744138634047e-05, |
| "loss": 0.4461, |
| "step": 1083 |
| }, |
| { |
| "epoch": 1.2432936451011334, |
| "grad_norm": 15.9375, |
| "learning_rate": 1.6702344546381243e-05, |
| "loss": 0.4182, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.2444412566346292, |
| "grad_norm": 58.25, |
| "learning_rate": 1.669724770642202e-05, |
| "loss": 0.6867, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.245588868168125, |
| "grad_norm": 30.75, |
| "learning_rate": 1.6692150866462795e-05, |
| "loss": 0.3318, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.246736479701621, |
| "grad_norm": 52.5, |
| "learning_rate": 1.668705402650357e-05, |
| "loss": 0.4148, |
| "step": 1087 |
| }, |
| { |
| "epoch": 1.247884091235117, |
| "grad_norm": 22.125, |
| "learning_rate": 1.6681957186544346e-05, |
| "loss": 0.5934, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.2490317027686129, |
| "grad_norm": 33.5, |
| "learning_rate": 1.6676860346585116e-05, |
| "loss": 0.2049, |
| "step": 1089 |
| }, |
| { |
| "epoch": 1.2501793143021087, |
| "grad_norm": 37.5, |
| "learning_rate": 1.6671763506625894e-05, |
| "loss": 0.3963, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.2513269258356048, |
| "grad_norm": 18.5, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.2542, |
| "step": 1091 |
| }, |
| { |
| "epoch": 1.2524745373691006, |
| "grad_norm": 22.75, |
| "learning_rate": 1.666156982670744e-05, |
| "loss": 0.608, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.2536221489025965, |
| "grad_norm": 29.625, |
| "learning_rate": 1.665647298674822e-05, |
| "loss": 0.6878, |
| "step": 1093 |
| }, |
| { |
| "epoch": 1.2547697604360923, |
| "grad_norm": 30.125, |
| "learning_rate": 1.6651376146788993e-05, |
| "loss": 0.172, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.2559173719695882, |
| "grad_norm": 32.75, |
| "learning_rate": 1.6646279306829766e-05, |
| "loss": 0.535, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.2570649835030843, |
| "grad_norm": 53.25, |
| "learning_rate": 1.6641182466870544e-05, |
| "loss": 0.5492, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.2582125950365801, |
| "grad_norm": 29.0, |
| "learning_rate": 1.6636085626911314e-05, |
| "loss": 0.3528, |
| "step": 1097 |
| }, |
| { |
| "epoch": 1.259360206570076, |
| "grad_norm": 72.5, |
| "learning_rate": 1.663098878695209e-05, |
| "loss": 0.5466, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.260507818103572, |
| "grad_norm": 33.25, |
| "learning_rate": 1.6625891946992865e-05, |
| "loss": 0.2994, |
| "step": 1099 |
| }, |
| { |
| "epoch": 1.261655429637068, |
| "grad_norm": 44.5, |
| "learning_rate": 1.662079510703364e-05, |
| "loss": 0.5398, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.261655429637068, |
| "eval_accuracy": 0.56, |
| "eval_loss": 0.5154783129692078, |
| "eval_runtime": 49.6732, |
| "eval_samples_per_second": 2.013, |
| "eval_steps_per_second": 2.013, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.2628030411705637, |
| "grad_norm": 23.5, |
| "learning_rate": 1.6615698267074417e-05, |
| "loss": 0.6216, |
| "step": 1101 |
| }, |
| { |
| "epoch": 1.2639506527040596, |
| "grad_norm": 42.0, |
| "learning_rate": 1.661060142711519e-05, |
| "loss": 0.2358, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.2650982642375557, |
| "grad_norm": 32.75, |
| "learning_rate": 1.6605504587155964e-05, |
| "loss": 0.2636, |
| "step": 1103 |
| }, |
| { |
| "epoch": 1.2662458757710515, |
| "grad_norm": 19.875, |
| "learning_rate": 1.660040774719674e-05, |
| "loss": 0.2472, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.2673934873045474, |
| "grad_norm": 11.8125, |
| "learning_rate": 1.6595310907237516e-05, |
| "loss": 0.1907, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.2685410988380434, |
| "grad_norm": 74.0, |
| "learning_rate": 1.659021406727829e-05, |
| "loss": 0.8738, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.2696887103715393, |
| "grad_norm": 39.5, |
| "learning_rate": 1.6585117227319063e-05, |
| "loss": 0.4113, |
| "step": 1107 |
| }, |
| { |
| "epoch": 1.2708363219050351, |
| "grad_norm": 34.25, |
| "learning_rate": 1.6580020387359837e-05, |
| "loss": 0.7458, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.271983933438531, |
| "grad_norm": 130.0, |
| "learning_rate": 1.657492354740061e-05, |
| "loss": 1.2238, |
| "step": 1109 |
| }, |
| { |
| "epoch": 1.2731315449720269, |
| "grad_norm": 37.5, |
| "learning_rate": 1.656982670744139e-05, |
| "loss": 0.6994, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.274279156505523, |
| "grad_norm": 83.5, |
| "learning_rate": 1.6564729867482163e-05, |
| "loss": 1.1055, |
| "step": 1111 |
| }, |
| { |
| "epoch": 1.2754267680390188, |
| "grad_norm": 7.15625, |
| "learning_rate": 1.6559633027522936e-05, |
| "loss": 0.1926, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.2765743795725146, |
| "grad_norm": 151.0, |
| "learning_rate": 1.6554536187563714e-05, |
| "loss": 0.4989, |
| "step": 1113 |
| }, |
| { |
| "epoch": 1.2777219911060107, |
| "grad_norm": 43.75, |
| "learning_rate": 1.6549439347604484e-05, |
| "loss": 0.4593, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.2788696026395066, |
| "grad_norm": 23.5, |
| "learning_rate": 1.654434250764526e-05, |
| "loss": 0.2898, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.2800172141730024, |
| "grad_norm": 37.5, |
| "learning_rate": 1.6539245667686035e-05, |
| "loss": 0.3342, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.2811648257064983, |
| "grad_norm": 35.25, |
| "learning_rate": 1.653414882772681e-05, |
| "loss": 0.4059, |
| "step": 1117 |
| }, |
| { |
| "epoch": 1.2823124372399943, |
| "grad_norm": 17.875, |
| "learning_rate": 1.6529051987767587e-05, |
| "loss": 0.3272, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.2834600487734902, |
| "grad_norm": 59.25, |
| "learning_rate": 1.652395514780836e-05, |
| "loss": 0.5725, |
| "step": 1119 |
| }, |
| { |
| "epoch": 1.284607660306986, |
| "grad_norm": 66.0, |
| "learning_rate": 1.6518858307849134e-05, |
| "loss": 0.8477, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.285755271840482, |
| "grad_norm": 67.0, |
| "learning_rate": 1.6513761467889912e-05, |
| "loss": 0.5421, |
| "step": 1121 |
| }, |
| { |
| "epoch": 1.286902883373978, |
| "grad_norm": 23.75, |
| "learning_rate": 1.6508664627930682e-05, |
| "loss": 0.457, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.2880504949074738, |
| "grad_norm": 22.875, |
| "learning_rate": 1.650356778797146e-05, |
| "loss": 0.5799, |
| "step": 1123 |
| }, |
| { |
| "epoch": 1.2891981064409697, |
| "grad_norm": 48.25, |
| "learning_rate": 1.6498470948012233e-05, |
| "loss": 0.5672, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.2903457179744655, |
| "grad_norm": 31.625, |
| "learning_rate": 1.6493374108053007e-05, |
| "loss": 0.6196, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.2914933295079616, |
| "grad_norm": 79.5, |
| "learning_rate": 1.6488277268093785e-05, |
| "loss": 0.6727, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.2926409410414574, |
| "grad_norm": 55.25, |
| "learning_rate": 1.648318042813456e-05, |
| "loss": 0.6848, |
| "step": 1127 |
| }, |
| { |
| "epoch": 1.2937885525749535, |
| "grad_norm": 49.5, |
| "learning_rate": 1.6478083588175332e-05, |
| "loss": 1.015, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.2949361641084494, |
| "grad_norm": 30.375, |
| "learning_rate": 1.6472986748216106e-05, |
| "loss": 0.9048, |
| "step": 1129 |
| }, |
| { |
| "epoch": 1.2960837756419452, |
| "grad_norm": 65.5, |
| "learning_rate": 1.6467889908256884e-05, |
| "loss": 0.7712, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.297231387175441, |
| "grad_norm": 14.8125, |
| "learning_rate": 1.6462793068297658e-05, |
| "loss": 0.1942, |
| "step": 1131 |
| }, |
| { |
| "epoch": 1.298378998708937, |
| "grad_norm": 57.0, |
| "learning_rate": 1.645769622833843e-05, |
| "loss": 0.5278, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.299526610242433, |
| "grad_norm": 20.125, |
| "learning_rate": 1.6452599388379205e-05, |
| "loss": 0.3787, |
| "step": 1133 |
| }, |
| { |
| "epoch": 1.3006742217759288, |
| "grad_norm": 20.875, |
| "learning_rate": 1.644750254841998e-05, |
| "loss": 0.347, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.3018218333094247, |
| "grad_norm": 36.25, |
| "learning_rate": 1.6442405708460757e-05, |
| "loss": 0.613, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.3029694448429208, |
| "grad_norm": 46.75, |
| "learning_rate": 1.643730886850153e-05, |
| "loss": 0.3531, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.3041170563764166, |
| "grad_norm": 51.5, |
| "learning_rate": 1.6432212028542304e-05, |
| "loss": 0.4654, |
| "step": 1137 |
| }, |
| { |
| "epoch": 1.3052646679099125, |
| "grad_norm": 59.5, |
| "learning_rate": 1.642711518858308e-05, |
| "loss": 0.6825, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.3064122794434083, |
| "grad_norm": 20.125, |
| "learning_rate": 1.6422018348623852e-05, |
| "loss": 0.5258, |
| "step": 1139 |
| }, |
| { |
| "epoch": 1.3075598909769044, |
| "grad_norm": 21.375, |
| "learning_rate": 1.641692150866463e-05, |
| "loss": 0.2334, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.3087075025104002, |
| "grad_norm": 57.5, |
| "learning_rate": 1.6411824668705403e-05, |
| "loss": 0.9003, |
| "step": 1141 |
| }, |
| { |
| "epoch": 1.309855114043896, |
| "grad_norm": 61.5, |
| "learning_rate": 1.6406727828746177e-05, |
| "loss": 0.6237, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.3110027255773922, |
| "grad_norm": 20.875, |
| "learning_rate": 1.6401630988786955e-05, |
| "loss": 0.3164, |
| "step": 1143 |
| }, |
| { |
| "epoch": 1.312150337110888, |
| "grad_norm": 48.5, |
| "learning_rate": 1.639653414882773e-05, |
| "loss": 0.4018, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.3132979486443839, |
| "grad_norm": 56.0, |
| "learning_rate": 1.6391437308868502e-05, |
| "loss": 0.7092, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.3144455601778797, |
| "grad_norm": 38.75, |
| "learning_rate": 1.638634046890928e-05, |
| "loss": 0.5181, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.3155931717113756, |
| "grad_norm": 40.5, |
| "learning_rate": 1.6381243628950054e-05, |
| "loss": 0.3165, |
| "step": 1147 |
| }, |
| { |
| "epoch": 1.3167407832448716, |
| "grad_norm": 32.25, |
| "learning_rate": 1.6376146788990827e-05, |
| "loss": 0.7836, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.3178883947783675, |
| "grad_norm": 61.5, |
| "learning_rate": 1.63710499490316e-05, |
| "loss": 0.7627, |
| "step": 1149 |
| }, |
| { |
| "epoch": 1.3190360063118634, |
| "grad_norm": 79.5, |
| "learning_rate": 1.6365953109072375e-05, |
| "loss": 0.9356, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.3201836178453594, |
| "grad_norm": 189.0, |
| "learning_rate": 1.6360856269113153e-05, |
| "loss": 1.0391, |
| "step": 1151 |
| }, |
| { |
| "epoch": 1.3213312293788553, |
| "grad_norm": 20.25, |
| "learning_rate": 1.6355759429153926e-05, |
| "loss": 0.6406, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.3224788409123511, |
| "grad_norm": 26.375, |
| "learning_rate": 1.63506625891947e-05, |
| "loss": 0.3832, |
| "step": 1153 |
| }, |
| { |
| "epoch": 1.323626452445847, |
| "grad_norm": 33.0, |
| "learning_rate": 1.6345565749235474e-05, |
| "loss": 0.4041, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.324774063979343, |
| "grad_norm": 23.625, |
| "learning_rate": 1.634046890927625e-05, |
| "loss": 0.3527, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.325921675512839, |
| "grad_norm": 99.5, |
| "learning_rate": 1.6335372069317022e-05, |
| "loss": 0.9746, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.3270692870463348, |
| "grad_norm": 45.0, |
| "learning_rate": 1.63302752293578e-05, |
| "loss": 0.2891, |
| "step": 1157 |
| }, |
| { |
| "epoch": 1.3282168985798308, |
| "grad_norm": 56.5, |
| "learning_rate": 1.6325178389398573e-05, |
| "loss": 0.8078, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.3293645101133267, |
| "grad_norm": 16.125, |
| "learning_rate": 1.6320081549439347e-05, |
| "loss": 0.6181, |
| "step": 1159 |
| }, |
| { |
| "epoch": 1.3305121216468225, |
| "grad_norm": 31.5, |
| "learning_rate": 1.6314984709480125e-05, |
| "loss": 0.3313, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.3316597331803184, |
| "grad_norm": 11.75, |
| "learning_rate": 1.63098878695209e-05, |
| "loss": 0.2764, |
| "step": 1161 |
| }, |
| { |
| "epoch": 1.3328073447138145, |
| "grad_norm": 32.0, |
| "learning_rate": 1.6304791029561672e-05, |
| "loss": 0.6992, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.3339549562473103, |
| "grad_norm": 40.25, |
| "learning_rate": 1.629969418960245e-05, |
| "loss": 0.4695, |
| "step": 1163 |
| }, |
| { |
| "epoch": 1.3351025677808062, |
| "grad_norm": 60.75, |
| "learning_rate": 1.6294597349643224e-05, |
| "loss": 0.5952, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.3362501793143022, |
| "grad_norm": 52.75, |
| "learning_rate": 1.6289500509683997e-05, |
| "loss": 0.4987, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.337397790847798, |
| "grad_norm": 28.25, |
| "learning_rate": 1.628440366972477e-05, |
| "loss": 0.31, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.338545402381294, |
| "grad_norm": 21.25, |
| "learning_rate": 1.6279306829765545e-05, |
| "loss": 0.4244, |
| "step": 1167 |
| }, |
| { |
| "epoch": 1.3396930139147898, |
| "grad_norm": 30.75, |
| "learning_rate": 1.6274209989806323e-05, |
| "loss": 0.5522, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.3408406254482856, |
| "grad_norm": 18.5, |
| "learning_rate": 1.6269113149847096e-05, |
| "loss": 0.3786, |
| "step": 1169 |
| }, |
| { |
| "epoch": 1.3419882369817817, |
| "grad_norm": 14.6875, |
| "learning_rate": 1.626401630988787e-05, |
| "loss": 0.0966, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.3431358485152776, |
| "grad_norm": 50.0, |
| "learning_rate": 1.6258919469928644e-05, |
| "loss": 0.4607, |
| "step": 1171 |
| }, |
| { |
| "epoch": 1.3442834600487734, |
| "grad_norm": 27.375, |
| "learning_rate": 1.625382262996942e-05, |
| "loss": 0.847, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.3454310715822695, |
| "grad_norm": 13.0625, |
| "learning_rate": 1.6248725790010195e-05, |
| "loss": 0.4091, |
| "step": 1173 |
| }, |
| { |
| "epoch": 1.3465786831157653, |
| "grad_norm": 16.0, |
| "learning_rate": 1.624362895005097e-05, |
| "loss": 0.2403, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.3477262946492612, |
| "grad_norm": 37.25, |
| "learning_rate": 1.6238532110091743e-05, |
| "loss": 0.421, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.348873906182757, |
| "grad_norm": 55.25, |
| "learning_rate": 1.6233435270132517e-05, |
| "loss": 0.662, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.3500215177162531, |
| "grad_norm": 40.5, |
| "learning_rate": 1.6228338430173294e-05, |
| "loss": 0.4565, |
| "step": 1177 |
| }, |
| { |
| "epoch": 1.351169129249749, |
| "grad_norm": 14.3125, |
| "learning_rate": 1.622324159021407e-05, |
| "loss": 0.4465, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.3523167407832448, |
| "grad_norm": 28.625, |
| "learning_rate": 1.6218144750254842e-05, |
| "loss": 0.3729, |
| "step": 1179 |
| }, |
| { |
| "epoch": 1.353464352316741, |
| "grad_norm": 55.25, |
| "learning_rate": 1.621304791029562e-05, |
| "loss": 0.3222, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.3546119638502367, |
| "grad_norm": 22.875, |
| "learning_rate": 1.6207951070336393e-05, |
| "loss": 0.437, |
| "step": 1181 |
| }, |
| { |
| "epoch": 1.3557595753837326, |
| "grad_norm": 38.0, |
| "learning_rate": 1.6202854230377167e-05, |
| "loss": 0.651, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.3569071869172284, |
| "grad_norm": 21.625, |
| "learning_rate": 1.6197757390417945e-05, |
| "loss": 0.4508, |
| "step": 1183 |
| }, |
| { |
| "epoch": 1.3580547984507243, |
| "grad_norm": 27.875, |
| "learning_rate": 1.6192660550458715e-05, |
| "loss": 0.3005, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.3592024099842204, |
| "grad_norm": 42.0, |
| "learning_rate": 1.6187563710499492e-05, |
| "loss": 0.2964, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.3603500215177162, |
| "grad_norm": 26.125, |
| "learning_rate": 1.6182466870540266e-05, |
| "loss": 0.6497, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.3614976330512123, |
| "grad_norm": 20.375, |
| "learning_rate": 1.617737003058104e-05, |
| "loss": 0.3097, |
| "step": 1187 |
| }, |
| { |
| "epoch": 1.3626452445847081, |
| "grad_norm": 56.5, |
| "learning_rate": 1.6172273190621818e-05, |
| "loss": 0.437, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.363792856118204, |
| "grad_norm": 69.5, |
| "learning_rate": 1.616717635066259e-05, |
| "loss": 0.4491, |
| "step": 1189 |
| }, |
| { |
| "epoch": 1.3649404676516999, |
| "grad_norm": 58.75, |
| "learning_rate": 1.6162079510703365e-05, |
| "loss": 0.4697, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.3660880791851957, |
| "grad_norm": 15.0, |
| "learning_rate": 1.615698267074414e-05, |
| "loss": 0.2935, |
| "step": 1191 |
| }, |
| { |
| "epoch": 1.3672356907186918, |
| "grad_norm": 69.0, |
| "learning_rate": 1.6151885830784913e-05, |
| "loss": 0.8532, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.3683833022521876, |
| "grad_norm": 27.5, |
| "learning_rate": 1.614678899082569e-05, |
| "loss": 0.3305, |
| "step": 1193 |
| }, |
| { |
| "epoch": 1.3695309137856835, |
| "grad_norm": 65.5, |
| "learning_rate": 1.6141692150866464e-05, |
| "loss": 0.6747, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.3706785253191796, |
| "grad_norm": 39.5, |
| "learning_rate": 1.6136595310907238e-05, |
| "loss": 0.4378, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.3718261368526754, |
| "grad_norm": 36.25, |
| "learning_rate": 1.6131498470948012e-05, |
| "loss": 0.4756, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.3729737483861713, |
| "grad_norm": 25.75, |
| "learning_rate": 1.612640163098879e-05, |
| "loss": 0.2116, |
| "step": 1197 |
| }, |
| { |
| "epoch": 1.374121359919667, |
| "grad_norm": 51.5, |
| "learning_rate": 1.6121304791029563e-05, |
| "loss": 0.6976, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.3752689714531632, |
| "grad_norm": 28.25, |
| "learning_rate": 1.6116207951070337e-05, |
| "loss": 0.3644, |
| "step": 1199 |
| }, |
| { |
| "epoch": 1.376416582986659, |
| "grad_norm": 21.25, |
| "learning_rate": 1.6111111111111115e-05, |
| "loss": 0.4288, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.376416582986659, |
| "eval_accuracy": 0.61, |
| "eval_loss": 0.5443911552429199, |
| "eval_runtime": 49.3817, |
| "eval_samples_per_second": 2.025, |
| "eval_steps_per_second": 2.025, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.3775641945201549, |
| "grad_norm": 16.75, |
| "learning_rate": 1.6106014271151885e-05, |
| "loss": 0.5041, |
| "step": 1201 |
| }, |
| { |
| "epoch": 1.378711806053651, |
| "grad_norm": 50.25, |
| "learning_rate": 1.6100917431192662e-05, |
| "loss": 0.5077, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.3798594175871468, |
| "grad_norm": 15.875, |
| "learning_rate": 1.6095820591233436e-05, |
| "loss": 0.2509, |
| "step": 1203 |
| }, |
| { |
| "epoch": 1.3810070291206427, |
| "grad_norm": 52.5, |
| "learning_rate": 1.609072375127421e-05, |
| "loss": 0.6619, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.3821546406541385, |
| "grad_norm": 27.0, |
| "learning_rate": 1.6085626911314988e-05, |
| "loss": 0.3906, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.3833022521876344, |
| "grad_norm": 28.5, |
| "learning_rate": 1.608053007135576e-05, |
| "loss": 0.506, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.3844498637211304, |
| "grad_norm": 34.25, |
| "learning_rate": 1.6075433231396535e-05, |
| "loss": 0.3932, |
| "step": 1207 |
| }, |
| { |
| "epoch": 1.3855974752546263, |
| "grad_norm": 36.75, |
| "learning_rate": 1.6070336391437313e-05, |
| "loss": 0.5362, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.3867450867881221, |
| "grad_norm": 52.5, |
| "learning_rate": 1.6065239551478083e-05, |
| "loss": 0.5699, |
| "step": 1209 |
| }, |
| { |
| "epoch": 1.3878926983216182, |
| "grad_norm": 45.5, |
| "learning_rate": 1.606014271151886e-05, |
| "loss": 0.5685, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.389040309855114, |
| "grad_norm": 60.25, |
| "learning_rate": 1.6055045871559634e-05, |
| "loss": 0.9313, |
| "step": 1211 |
| }, |
| { |
| "epoch": 1.39018792138861, |
| "grad_norm": 38.0, |
| "learning_rate": 1.6049949031600408e-05, |
| "loss": 0.5542, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.3913355329221058, |
| "grad_norm": 40.75, |
| "learning_rate": 1.6044852191641186e-05, |
| "loss": 0.8328, |
| "step": 1213 |
| }, |
| { |
| "epoch": 1.3924831444556018, |
| "grad_norm": 42.25, |
| "learning_rate": 1.603975535168196e-05, |
| "loss": 0.2783, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.3936307559890977, |
| "grad_norm": 39.5, |
| "learning_rate": 1.6034658511722733e-05, |
| "loss": 0.5385, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.3947783675225935, |
| "grad_norm": 42.75, |
| "learning_rate": 1.6029561671763507e-05, |
| "loss": 0.5375, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.3959259790560896, |
| "grad_norm": 22.25, |
| "learning_rate": 1.602446483180428e-05, |
| "loss": 0.6028, |
| "step": 1217 |
| }, |
| { |
| "epoch": 1.3970735905895855, |
| "grad_norm": 34.0, |
| "learning_rate": 1.601936799184506e-05, |
| "loss": 0.5153, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.3982212021230813, |
| "grad_norm": 79.5, |
| "learning_rate": 1.6014271151885832e-05, |
| "loss": 0.7959, |
| "step": 1219 |
| }, |
| { |
| "epoch": 1.3993688136565772, |
| "grad_norm": 28.0, |
| "learning_rate": 1.6009174311926606e-05, |
| "loss": 0.271, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.400516425190073, |
| "grad_norm": 76.5, |
| "learning_rate": 1.600407747196738e-05, |
| "loss": 0.6952, |
| "step": 1221 |
| }, |
| { |
| "epoch": 1.401664036723569, |
| "grad_norm": 21.875, |
| "learning_rate": 1.5998980632008157e-05, |
| "loss": 0.452, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.402811648257065, |
| "grad_norm": 70.5, |
| "learning_rate": 1.599388379204893e-05, |
| "loss": 0.5592, |
| "step": 1223 |
| }, |
| { |
| "epoch": 1.403959259790561, |
| "grad_norm": 17.875, |
| "learning_rate": 1.5988786952089705e-05, |
| "loss": 0.451, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.4051068713240569, |
| "grad_norm": 30.125, |
| "learning_rate": 1.5983690112130483e-05, |
| "loss": 0.4143, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.4062544828575527, |
| "grad_norm": 25.625, |
| "learning_rate": 1.5978593272171253e-05, |
| "loss": 0.454, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.4074020943910486, |
| "grad_norm": 24.625, |
| "learning_rate": 1.597349643221203e-05, |
| "loss": 0.4827, |
| "step": 1227 |
| }, |
| { |
| "epoch": 1.4085497059245444, |
| "grad_norm": 14.6875, |
| "learning_rate": 1.5968399592252804e-05, |
| "loss": 0.1517, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.4096973174580405, |
| "grad_norm": 12.3125, |
| "learning_rate": 1.5963302752293578e-05, |
| "loss": 0.4015, |
| "step": 1229 |
| }, |
| { |
| "epoch": 1.4108449289915364, |
| "grad_norm": 59.0, |
| "learning_rate": 1.5958205912334355e-05, |
| "loss": 0.5366, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.4119925405250322, |
| "grad_norm": 11.1875, |
| "learning_rate": 1.595310907237513e-05, |
| "loss": 0.3743, |
| "step": 1231 |
| }, |
| { |
| "epoch": 1.4131401520585283, |
| "grad_norm": 18.75, |
| "learning_rate": 1.5948012232415903e-05, |
| "loss": 0.4668, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.4142877635920241, |
| "grad_norm": 50.75, |
| "learning_rate": 1.5942915392456677e-05, |
| "loss": 0.3211, |
| "step": 1233 |
| }, |
| { |
| "epoch": 1.41543537512552, |
| "grad_norm": 41.5, |
| "learning_rate": 1.593781855249745e-05, |
| "loss": 0.5208, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.4165829866590158, |
| "grad_norm": 16.5, |
| "learning_rate": 1.593272171253823e-05, |
| "loss": 0.2334, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.417730598192512, |
| "grad_norm": 72.0, |
| "learning_rate": 1.5927624872579002e-05, |
| "loss": 0.4065, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.4188782097260078, |
| "grad_norm": 21.0, |
| "learning_rate": 1.5922528032619776e-05, |
| "loss": 0.4257, |
| "step": 1237 |
| }, |
| { |
| "epoch": 1.4200258212595036, |
| "grad_norm": 18.75, |
| "learning_rate": 1.591743119266055e-05, |
| "loss": 0.3615, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.4211734327929997, |
| "grad_norm": 54.5, |
| "learning_rate": 1.5912334352701327e-05, |
| "loss": 0.2902, |
| "step": 1239 |
| }, |
| { |
| "epoch": 1.4223210443264955, |
| "grad_norm": 8.3125, |
| "learning_rate": 1.59072375127421e-05, |
| "loss": 0.1653, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.4234686558599914, |
| "grad_norm": 18.125, |
| "learning_rate": 1.5902140672782875e-05, |
| "loss": 0.3842, |
| "step": 1241 |
| }, |
| { |
| "epoch": 1.4246162673934872, |
| "grad_norm": 85.0, |
| "learning_rate": 1.5897043832823652e-05, |
| "loss": 0.7718, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.425763878926983, |
| "grad_norm": 27.125, |
| "learning_rate": 1.5891946992864423e-05, |
| "loss": 0.195, |
| "step": 1243 |
| }, |
| { |
| "epoch": 1.4269114904604792, |
| "grad_norm": 31.125, |
| "learning_rate": 1.58868501529052e-05, |
| "loss": 0.5963, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.428059101993975, |
| "grad_norm": 67.0, |
| "learning_rate": 1.5881753312945974e-05, |
| "loss": 0.709, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.429206713527471, |
| "grad_norm": 20.25, |
| "learning_rate": 1.5876656472986748e-05, |
| "loss": 0.3003, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.430354325060967, |
| "grad_norm": 40.25, |
| "learning_rate": 1.5871559633027525e-05, |
| "loss": 0.7344, |
| "step": 1247 |
| }, |
| { |
| "epoch": 1.4315019365944628, |
| "grad_norm": 26.75, |
| "learning_rate": 1.58664627930683e-05, |
| "loss": 1.0281, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.4326495481279586, |
| "grad_norm": 49.25, |
| "learning_rate": 1.5861365953109073e-05, |
| "loss": 0.3, |
| "step": 1249 |
| }, |
| { |
| "epoch": 1.4337971596614545, |
| "grad_norm": 27.125, |
| "learning_rate": 1.585626911314985e-05, |
| "loss": 0.5945, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.4349447711949506, |
| "grad_norm": 41.5, |
| "learning_rate": 1.585117227319062e-05, |
| "loss": 0.6692, |
| "step": 1251 |
| }, |
| { |
| "epoch": 1.4360923827284464, |
| "grad_norm": 14.375, |
| "learning_rate": 1.58460754332314e-05, |
| "loss": 0.3908, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.4372399942619423, |
| "grad_norm": 77.5, |
| "learning_rate": 1.5840978593272172e-05, |
| "loss": 0.7376, |
| "step": 1253 |
| }, |
| { |
| "epoch": 1.4383876057954383, |
| "grad_norm": 29.75, |
| "learning_rate": 1.5835881753312946e-05, |
| "loss": 0.355, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.4395352173289342, |
| "grad_norm": 32.0, |
| "learning_rate": 1.5830784913353723e-05, |
| "loss": 0.7525, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.44068282886243, |
| "grad_norm": 42.75, |
| "learning_rate": 1.5825688073394497e-05, |
| "loss": 0.2832, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.441830440395926, |
| "grad_norm": 21.125, |
| "learning_rate": 1.582059123343527e-05, |
| "loss": 0.3375, |
| "step": 1257 |
| }, |
| { |
| "epoch": 1.442978051929422, |
| "grad_norm": 33.25, |
| "learning_rate": 1.5815494393476045e-05, |
| "loss": 0.3517, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.4441256634629178, |
| "grad_norm": 35.0, |
| "learning_rate": 1.5810397553516822e-05, |
| "loss": 0.382, |
| "step": 1259 |
| }, |
| { |
| "epoch": 1.4452732749964137, |
| "grad_norm": 53.75, |
| "learning_rate": 1.5805300713557596e-05, |
| "loss": 0.3113, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.4464208865299097, |
| "grad_norm": 43.75, |
| "learning_rate": 1.580020387359837e-05, |
| "loss": 0.3177, |
| "step": 1261 |
| }, |
| { |
| "epoch": 1.4475684980634056, |
| "grad_norm": 35.0, |
| "learning_rate": 1.5795107033639144e-05, |
| "loss": 0.3791, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.4487161095969014, |
| "grad_norm": 45.0, |
| "learning_rate": 1.5790010193679918e-05, |
| "loss": 0.4492, |
| "step": 1263 |
| }, |
| { |
| "epoch": 1.4498637211303973, |
| "grad_norm": 27.875, |
| "learning_rate": 1.5784913353720695e-05, |
| "loss": 0.3343, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.4510113326638931, |
| "grad_norm": 19.125, |
| "learning_rate": 1.577981651376147e-05, |
| "loss": 0.8559, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.4521589441973892, |
| "grad_norm": 8.0, |
| "learning_rate": 1.5774719673802243e-05, |
| "loss": 0.1379, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.453306555730885, |
| "grad_norm": 49.5, |
| "learning_rate": 1.576962283384302e-05, |
| "loss": 0.4941, |
| "step": 1267 |
| }, |
| { |
| "epoch": 1.454454167264381, |
| "grad_norm": 84.5, |
| "learning_rate": 1.576452599388379e-05, |
| "loss": 1.4308, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.455601778797877, |
| "grad_norm": 92.0, |
| "learning_rate": 1.5759429153924568e-05, |
| "loss": 0.9692, |
| "step": 1269 |
| }, |
| { |
| "epoch": 1.4567493903313729, |
| "grad_norm": 88.0, |
| "learning_rate": 1.5754332313965342e-05, |
| "loss": 0.9589, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.4578970018648687, |
| "grad_norm": 50.25, |
| "learning_rate": 1.5749235474006116e-05, |
| "loss": 0.5352, |
| "step": 1271 |
| }, |
| { |
| "epoch": 1.4590446133983646, |
| "grad_norm": 42.0, |
| "learning_rate": 1.5744138634046893e-05, |
| "loss": 0.3708, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.4601922249318606, |
| "grad_norm": 35.0, |
| "learning_rate": 1.5739041794087667e-05, |
| "loss": 0.7022, |
| "step": 1273 |
| }, |
| { |
| "epoch": 1.4613398364653565, |
| "grad_norm": 13.375, |
| "learning_rate": 1.573394495412844e-05, |
| "loss": 0.3201, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.4624874479988523, |
| "grad_norm": 87.5, |
| "learning_rate": 1.572884811416922e-05, |
| "loss": 0.576, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.4636350595323484, |
| "grad_norm": 68.5, |
| "learning_rate": 1.5723751274209992e-05, |
| "loss": 0.5697, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.4647826710658443, |
| "grad_norm": 31.75, |
| "learning_rate": 1.5718654434250766e-05, |
| "loss": 0.4631, |
| "step": 1277 |
| }, |
| { |
| "epoch": 1.46593028259934, |
| "grad_norm": 19.5, |
| "learning_rate": 1.571355759429154e-05, |
| "loss": 0.4516, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.467077894132836, |
| "grad_norm": 52.0, |
| "learning_rate": 1.5708460754332314e-05, |
| "loss": 0.6808, |
| "step": 1279 |
| }, |
| { |
| "epoch": 1.4682255056663318, |
| "grad_norm": 17.875, |
| "learning_rate": 1.570336391437309e-05, |
| "loss": 0.3936, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.4693731171998279, |
| "grad_norm": 24.25, |
| "learning_rate": 1.5698267074413865e-05, |
| "loss": 0.4196, |
| "step": 1281 |
| }, |
| { |
| "epoch": 1.4705207287333237, |
| "grad_norm": 111.0, |
| "learning_rate": 1.569317023445464e-05, |
| "loss": 0.8228, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.4716683402668198, |
| "grad_norm": 36.5, |
| "learning_rate": 1.5688073394495413e-05, |
| "loss": 0.5546, |
| "step": 1283 |
| }, |
| { |
| "epoch": 1.4728159518003157, |
| "grad_norm": 40.5, |
| "learning_rate": 1.568297655453619e-05, |
| "loss": 0.4347, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.4739635633338115, |
| "grad_norm": 59.75, |
| "learning_rate": 1.5677879714576964e-05, |
| "loss": 0.8506, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.4751111748673074, |
| "grad_norm": 58.25, |
| "learning_rate": 1.5672782874617738e-05, |
| "loss": 0.4958, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.4762587864008032, |
| "grad_norm": 41.5, |
| "learning_rate": 1.5667686034658512e-05, |
| "loss": 0.6571, |
| "step": 1287 |
| }, |
| { |
| "epoch": 1.4774063979342993, |
| "grad_norm": 20.75, |
| "learning_rate": 1.5662589194699286e-05, |
| "loss": 0.2749, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.4785540094677951, |
| "grad_norm": 24.875, |
| "learning_rate": 1.5657492354740063e-05, |
| "loss": 0.552, |
| "step": 1289 |
| }, |
| { |
| "epoch": 1.479701621001291, |
| "grad_norm": 24.625, |
| "learning_rate": 1.5652395514780837e-05, |
| "loss": 0.5655, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.480849232534787, |
| "grad_norm": 71.0, |
| "learning_rate": 1.564729867482161e-05, |
| "loss": 1.1072, |
| "step": 1291 |
| }, |
| { |
| "epoch": 1.481996844068283, |
| "grad_norm": 56.5, |
| "learning_rate": 1.564220183486239e-05, |
| "loss": 0.9029, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.4831444556017788, |
| "grad_norm": 75.0, |
| "learning_rate": 1.563710499490316e-05, |
| "loss": 0.8671, |
| "step": 1293 |
| }, |
| { |
| "epoch": 1.4842920671352746, |
| "grad_norm": 68.5, |
| "learning_rate": 1.5632008154943936e-05, |
| "loss": 0.6165, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.4854396786687707, |
| "grad_norm": 57.5, |
| "learning_rate": 1.5626911314984713e-05, |
| "loss": 0.4413, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.4865872902022665, |
| "grad_norm": 38.25, |
| "learning_rate": 1.5621814475025484e-05, |
| "loss": 0.4508, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.4877349017357624, |
| "grad_norm": 22.375, |
| "learning_rate": 1.561671763506626e-05, |
| "loss": 0.4694, |
| "step": 1297 |
| }, |
| { |
| "epoch": 1.4888825132692585, |
| "grad_norm": 19.625, |
| "learning_rate": 1.5611620795107035e-05, |
| "loss": 0.4833, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.4900301248027543, |
| "grad_norm": 74.0, |
| "learning_rate": 1.560652395514781e-05, |
| "loss": 0.6443, |
| "step": 1299 |
| }, |
| { |
| "epoch": 1.4911777363362502, |
| "grad_norm": 30.25, |
| "learning_rate": 1.5601427115188586e-05, |
| "loss": 0.5003, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.4911777363362502, |
| "eval_accuracy": 0.64, |
| "eval_loss": 0.5184877514839172, |
| "eval_runtime": 49.6613, |
| "eval_samples_per_second": 2.014, |
| "eval_steps_per_second": 2.014, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.492325347869746, |
| "grad_norm": 26.5, |
| "learning_rate": 1.559633027522936e-05, |
| "loss": 0.4356, |
| "step": 1301 |
| }, |
| { |
| "epoch": 1.4934729594032419, |
| "grad_norm": 93.0, |
| "learning_rate": 1.5591233435270134e-05, |
| "loss": 0.6945, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.494620570936738, |
| "grad_norm": 84.0, |
| "learning_rate": 1.5586136595310908e-05, |
| "loss": 0.7059, |
| "step": 1303 |
| }, |
| { |
| "epoch": 1.4957681824702338, |
| "grad_norm": 84.5, |
| "learning_rate": 1.5581039755351682e-05, |
| "loss": 0.8654, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.4969157940037299, |
| "grad_norm": 79.5, |
| "learning_rate": 1.5575942915392456e-05, |
| "loss": 0.8112, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.4980634055372257, |
| "grad_norm": 41.75, |
| "learning_rate": 1.5570846075433233e-05, |
| "loss": 1.0995, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.4992110170707216, |
| "grad_norm": 28.625, |
| "learning_rate": 1.5565749235474007e-05, |
| "loss": 0.8355, |
| "step": 1307 |
| }, |
| { |
| "epoch": 1.5003586286042174, |
| "grad_norm": 67.5, |
| "learning_rate": 1.556065239551478e-05, |
| "loss": 0.7727, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.5015062401377133, |
| "grad_norm": 17.5, |
| "learning_rate": 1.555555555555556e-05, |
| "loss": 0.4682, |
| "step": 1309 |
| }, |
| { |
| "epoch": 1.5026538516712094, |
| "grad_norm": 18.625, |
| "learning_rate": 1.555045871559633e-05, |
| "loss": 0.2126, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.5038014632047052, |
| "grad_norm": 15.75, |
| "learning_rate": 1.5545361875637106e-05, |
| "loss": 0.4916, |
| "step": 1311 |
| }, |
| { |
| "epoch": 1.5049490747382013, |
| "grad_norm": 31.625, |
| "learning_rate": 1.554026503567788e-05, |
| "loss": 0.2308, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.5060966862716971, |
| "grad_norm": 51.75, |
| "learning_rate": 1.5535168195718654e-05, |
| "loss": 1.0898, |
| "step": 1313 |
| }, |
| { |
| "epoch": 1.507244297805193, |
| "grad_norm": 31.75, |
| "learning_rate": 1.553007135575943e-05, |
| "loss": 0.4099, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.5083919093386888, |
| "grad_norm": 88.0, |
| "learning_rate": 1.5524974515800205e-05, |
| "loss": 0.9649, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.5095395208721847, |
| "grad_norm": 24.75, |
| "learning_rate": 1.551987767584098e-05, |
| "loss": 1.0352, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.5106871324056805, |
| "grad_norm": 13.625, |
| "learning_rate": 1.5514780835881756e-05, |
| "loss": 0.3537, |
| "step": 1317 |
| }, |
| { |
| "epoch": 1.5118347439391766, |
| "grad_norm": 94.0, |
| "learning_rate": 1.550968399592253e-05, |
| "loss": 0.9038, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.5129823554726725, |
| "grad_norm": 26.0, |
| "learning_rate": 1.5504587155963304e-05, |
| "loss": 0.346, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.5141299670061685, |
| "grad_norm": 44.25, |
| "learning_rate": 1.5499490316004078e-05, |
| "loss": 0.7941, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.5152775785396644, |
| "grad_norm": 27.75, |
| "learning_rate": 1.5494393476044852e-05, |
| "loss": 0.3747, |
| "step": 1321 |
| }, |
| { |
| "epoch": 1.5164251900731602, |
| "grad_norm": 97.5, |
| "learning_rate": 1.548929663608563e-05, |
| "loss": 0.9651, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.517572801606656, |
| "grad_norm": 10.6875, |
| "learning_rate": 1.5484199796126403e-05, |
| "loss": 0.2523, |
| "step": 1323 |
| }, |
| { |
| "epoch": 1.518720413140152, |
| "grad_norm": 23.25, |
| "learning_rate": 1.5479102956167177e-05, |
| "loss": 0.5667, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.519868024673648, |
| "grad_norm": 22.25, |
| "learning_rate": 1.547400611620795e-05, |
| "loss": 0.3108, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.5210156362071439, |
| "grad_norm": 29.125, |
| "learning_rate": 1.5468909276248728e-05, |
| "loss": 0.5994, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.52216324774064, |
| "grad_norm": 37.0, |
| "learning_rate": 1.5463812436289502e-05, |
| "loss": 0.6835, |
| "step": 1327 |
| }, |
| { |
| "epoch": 1.5233108592741358, |
| "grad_norm": 35.0, |
| "learning_rate": 1.5458715596330276e-05, |
| "loss": 0.3934, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.5244584708076316, |
| "grad_norm": 42.5, |
| "learning_rate": 1.545361875637105e-05, |
| "loss": 0.4904, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.5256060823411275, |
| "grad_norm": 79.5, |
| "learning_rate": 1.5448521916411824e-05, |
| "loss": 0.8999, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.5267536938746233, |
| "grad_norm": 51.25, |
| "learning_rate": 1.54434250764526e-05, |
| "loss": 0.5231, |
| "step": 1331 |
| }, |
| { |
| "epoch": 1.5279013054081192, |
| "grad_norm": 53.5, |
| "learning_rate": 1.5438328236493375e-05, |
| "loss": 0.6297, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.5290489169416153, |
| "grad_norm": 65.5, |
| "learning_rate": 1.543323139653415e-05, |
| "loss": 0.5863, |
| "step": 1333 |
| }, |
| { |
| "epoch": 1.5301965284751113, |
| "grad_norm": 44.0, |
| "learning_rate": 1.5428134556574926e-05, |
| "loss": 0.402, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.5313441400086072, |
| "grad_norm": 54.0, |
| "learning_rate": 1.54230377166157e-05, |
| "loss": 0.5476, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.532491751542103, |
| "grad_norm": 40.5, |
| "learning_rate": 1.5417940876656474e-05, |
| "loss": 0.4921, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.533639363075599, |
| "grad_norm": 15.125, |
| "learning_rate": 1.541284403669725e-05, |
| "loss": 0.4748, |
| "step": 1337 |
| }, |
| { |
| "epoch": 1.5347869746090947, |
| "grad_norm": 35.25, |
| "learning_rate": 1.5407747196738022e-05, |
| "loss": 0.5071, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.5359345861425906, |
| "grad_norm": 26.5, |
| "learning_rate": 1.54026503567788e-05, |
| "loss": 0.2151, |
| "step": 1339 |
| }, |
| { |
| "epoch": 1.5370821976760867, |
| "grad_norm": 32.5, |
| "learning_rate": 1.5397553516819573e-05, |
| "loss": 0.4312, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.5382298092095825, |
| "grad_norm": 80.5, |
| "learning_rate": 1.5392456676860347e-05, |
| "loss": 0.6625, |
| "step": 1341 |
| }, |
| { |
| "epoch": 1.5393774207430786, |
| "grad_norm": 46.25, |
| "learning_rate": 1.5387359836901124e-05, |
| "loss": 0.3488, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.5405250322765744, |
| "grad_norm": 41.75, |
| "learning_rate": 1.5382262996941898e-05, |
| "loss": 0.5342, |
| "step": 1343 |
| }, |
| { |
| "epoch": 1.5416726438100703, |
| "grad_norm": 44.0, |
| "learning_rate": 1.5377166156982672e-05, |
| "loss": 0.4736, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.5428202553435661, |
| "grad_norm": 11.625, |
| "learning_rate": 1.5372069317023446e-05, |
| "loss": 0.3527, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.543967866877062, |
| "grad_norm": 31.75, |
| "learning_rate": 1.536697247706422e-05, |
| "loss": 0.6221, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.545115478410558, |
| "grad_norm": 47.0, |
| "learning_rate": 1.5361875637104997e-05, |
| "loss": 0.6081, |
| "step": 1347 |
| }, |
| { |
| "epoch": 1.546263089944054, |
| "grad_norm": 22.5, |
| "learning_rate": 1.535677879714577e-05, |
| "loss": 0.4948, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.54741070147755, |
| "grad_norm": 82.5, |
| "learning_rate": 1.5351681957186545e-05, |
| "loss": 0.7993, |
| "step": 1349 |
| }, |
| { |
| "epoch": 1.5485583130110459, |
| "grad_norm": 45.5, |
| "learning_rate": 1.534658511722732e-05, |
| "loss": 0.6413, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.5497059245445417, |
| "grad_norm": 14.875, |
| "learning_rate": 1.5341488277268096e-05, |
| "loss": 0.4485, |
| "step": 1351 |
| }, |
| { |
| "epoch": 1.5508535360780376, |
| "grad_norm": 50.0, |
| "learning_rate": 1.533639143730887e-05, |
| "loss": 1.0687, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.5520011476115334, |
| "grad_norm": 58.75, |
| "learning_rate": 1.5331294597349644e-05, |
| "loss": 1.0185, |
| "step": 1353 |
| }, |
| { |
| "epoch": 1.5531487591450293, |
| "grad_norm": 43.0, |
| "learning_rate": 1.532619775739042e-05, |
| "loss": 0.5074, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.5542963706785253, |
| "grad_norm": 47.25, |
| "learning_rate": 1.5321100917431192e-05, |
| "loss": 0.3446, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.5554439822120212, |
| "grad_norm": 37.25, |
| "learning_rate": 1.531600407747197e-05, |
| "loss": 0.3883, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.5565915937455173, |
| "grad_norm": 79.0, |
| "learning_rate": 1.5310907237512743e-05, |
| "loss": 0.8577, |
| "step": 1357 |
| }, |
| { |
| "epoch": 1.557739205279013, |
| "grad_norm": 45.0, |
| "learning_rate": 1.5305810397553517e-05, |
| "loss": 0.5937, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.558886816812509, |
| "grad_norm": 56.25, |
| "learning_rate": 1.5300713557594294e-05, |
| "loss": 0.8568, |
| "step": 1359 |
| }, |
| { |
| "epoch": 1.5600344283460048, |
| "grad_norm": 33.25, |
| "learning_rate": 1.5295616717635068e-05, |
| "loss": 0.4064, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.5611820398795007, |
| "grad_norm": 59.0, |
| "learning_rate": 1.5290519877675842e-05, |
| "loss": 0.6067, |
| "step": 1361 |
| }, |
| { |
| "epoch": 1.5623296514129967, |
| "grad_norm": 11.75, |
| "learning_rate": 1.528542303771662e-05, |
| "loss": 0.3149, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.5634772629464926, |
| "grad_norm": 38.0, |
| "learning_rate": 1.528032619775739e-05, |
| "loss": 0.5482, |
| "step": 1363 |
| }, |
| { |
| "epoch": 1.5646248744799887, |
| "grad_norm": 43.0, |
| "learning_rate": 1.5275229357798167e-05, |
| "loss": 0.3758, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.5657724860134845, |
| "grad_norm": 17.625, |
| "learning_rate": 1.527013251783894e-05, |
| "loss": 0.0865, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.5669200975469804, |
| "grad_norm": 79.0, |
| "learning_rate": 1.5265035677879715e-05, |
| "loss": 1.07, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.5680677090804762, |
| "grad_norm": 86.5, |
| "learning_rate": 1.5259938837920492e-05, |
| "loss": 1.2776, |
| "step": 1367 |
| }, |
| { |
| "epoch": 1.569215320613972, |
| "grad_norm": 65.0, |
| "learning_rate": 1.5254841997961264e-05, |
| "loss": 1.0829, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.570362932147468, |
| "grad_norm": 11.25, |
| "learning_rate": 1.524974515800204e-05, |
| "loss": 0.1616, |
| "step": 1369 |
| }, |
| { |
| "epoch": 1.571510543680964, |
| "grad_norm": 175.0, |
| "learning_rate": 1.5244648318042814e-05, |
| "loss": 0.788, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.57265815521446, |
| "grad_norm": 52.25, |
| "learning_rate": 1.523955147808359e-05, |
| "loss": 0.6801, |
| "step": 1371 |
| }, |
| { |
| "epoch": 1.573805766747956, |
| "grad_norm": 90.0, |
| "learning_rate": 1.5234454638124365e-05, |
| "loss": 1.1125, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.5749533782814518, |
| "grad_norm": 69.5, |
| "learning_rate": 1.5229357798165139e-05, |
| "loss": 0.7275, |
| "step": 1373 |
| }, |
| { |
| "epoch": 1.5761009898149476, |
| "grad_norm": 21.625, |
| "learning_rate": 1.5224260958205915e-05, |
| "loss": 0.2809, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.5772486013484435, |
| "grad_norm": 41.5, |
| "learning_rate": 1.5219164118246687e-05, |
| "loss": 1.0073, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.5783962128819393, |
| "grad_norm": 65.5, |
| "learning_rate": 1.5214067278287462e-05, |
| "loss": 0.6342, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.5795438244154354, |
| "grad_norm": 19.5, |
| "learning_rate": 1.5208970438328238e-05, |
| "loss": 0.2352, |
| "step": 1377 |
| }, |
| { |
| "epoch": 1.5806914359489312, |
| "grad_norm": 17.125, |
| "learning_rate": 1.5203873598369012e-05, |
| "loss": 0.5829, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.5818390474824273, |
| "grad_norm": 40.25, |
| "learning_rate": 1.5198776758409788e-05, |
| "loss": 0.5567, |
| "step": 1379 |
| }, |
| { |
| "epoch": 1.5829866590159232, |
| "grad_norm": 20.0, |
| "learning_rate": 1.5193679918450561e-05, |
| "loss": 0.6663, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.584134270549419, |
| "grad_norm": 84.0, |
| "learning_rate": 1.5188583078491337e-05, |
| "loss": 0.6399, |
| "step": 1381 |
| }, |
| { |
| "epoch": 1.5852818820829149, |
| "grad_norm": 13.5, |
| "learning_rate": 1.5183486238532111e-05, |
| "loss": 0.3913, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.5864294936164107, |
| "grad_norm": 39.75, |
| "learning_rate": 1.5178389398572887e-05, |
| "loss": 0.4537, |
| "step": 1383 |
| }, |
| { |
| "epoch": 1.5875771051499068, |
| "grad_norm": 10.3125, |
| "learning_rate": 1.5173292558613662e-05, |
| "loss": 0.5198, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.5887247166834026, |
| "grad_norm": 26.75, |
| "learning_rate": 1.5168195718654434e-05, |
| "loss": 0.4686, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.5898723282168987, |
| "grad_norm": 57.25, |
| "learning_rate": 1.516309887869521e-05, |
| "loss": 0.5172, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.5910199397503946, |
| "grad_norm": 82.0, |
| "learning_rate": 1.5158002038735984e-05, |
| "loss": 0.9411, |
| "step": 1387 |
| }, |
| { |
| "epoch": 1.5921675512838904, |
| "grad_norm": 31.0, |
| "learning_rate": 1.515290519877676e-05, |
| "loss": 0.3182, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.5933151628173863, |
| "grad_norm": 79.5, |
| "learning_rate": 1.5147808358817535e-05, |
| "loss": 0.7013, |
| "step": 1389 |
| }, |
| { |
| "epoch": 1.5944627743508821, |
| "grad_norm": 17.875, |
| "learning_rate": 1.5142711518858309e-05, |
| "loss": 0.5569, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.595610385884378, |
| "grad_norm": 23.375, |
| "learning_rate": 1.5137614678899085e-05, |
| "loss": 0.5306, |
| "step": 1391 |
| }, |
| { |
| "epoch": 1.596757997417874, |
| "grad_norm": 11.5, |
| "learning_rate": 1.5132517838939857e-05, |
| "loss": 0.2887, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.5979056089513701, |
| "grad_norm": 22.5, |
| "learning_rate": 1.5127420998980632e-05, |
| "loss": 0.5286, |
| "step": 1393 |
| }, |
| { |
| "epoch": 1.599053220484866, |
| "grad_norm": 21.0, |
| "learning_rate": 1.5122324159021408e-05, |
| "loss": 0.3716, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.6002008320183618, |
| "grad_norm": 63.25, |
| "learning_rate": 1.5117227319062182e-05, |
| "loss": 0.7257, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.6013484435518577, |
| "grad_norm": 7.6875, |
| "learning_rate": 1.5112130479102958e-05, |
| "loss": 0.127, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.6024960550853535, |
| "grad_norm": 17.0, |
| "learning_rate": 1.5107033639143731e-05, |
| "loss": 0.2272, |
| "step": 1397 |
| }, |
| { |
| "epoch": 1.6036436666188494, |
| "grad_norm": 30.875, |
| "learning_rate": 1.5101936799184507e-05, |
| "loss": 0.4778, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.6047912781523455, |
| "grad_norm": 19.5, |
| "learning_rate": 1.5096839959225283e-05, |
| "loss": 0.5537, |
| "step": 1399 |
| }, |
| { |
| "epoch": 1.6059388896858413, |
| "grad_norm": 57.5, |
| "learning_rate": 1.5091743119266057e-05, |
| "loss": 0.6817, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.6059388896858413, |
| "eval_accuracy": 0.63, |
| "eval_loss": 0.49080872535705566, |
| "eval_runtime": 49.7511, |
| "eval_samples_per_second": 2.01, |
| "eval_steps_per_second": 2.01, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.6070865012193374, |
| "grad_norm": 14.125, |
| "learning_rate": 1.5086646279306832e-05, |
| "loss": 0.6062, |
| "step": 1401 |
| }, |
| { |
| "epoch": 1.6082341127528332, |
| "grad_norm": 9.625, |
| "learning_rate": 1.5081549439347604e-05, |
| "loss": 0.2577, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.609381724286329, |
| "grad_norm": 19.25, |
| "learning_rate": 1.507645259938838e-05, |
| "loss": 0.4531, |
| "step": 1403 |
| }, |
| { |
| "epoch": 1.610529335819825, |
| "grad_norm": 41.0, |
| "learning_rate": 1.5071355759429156e-05, |
| "loss": 0.6092, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.6116769473533208, |
| "grad_norm": 34.0, |
| "learning_rate": 1.506625891946993e-05, |
| "loss": 0.4515, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.6128245588868169, |
| "grad_norm": 23.75, |
| "learning_rate": 1.5061162079510705e-05, |
| "loss": 0.5269, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.6139721704203127, |
| "grad_norm": 31.75, |
| "learning_rate": 1.5056065239551479e-05, |
| "loss": 0.4641, |
| "step": 1407 |
| }, |
| { |
| "epoch": 1.6151197819538088, |
| "grad_norm": 33.75, |
| "learning_rate": 1.5050968399592255e-05, |
| "loss": 0.3172, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.6162673934873046, |
| "grad_norm": 9.9375, |
| "learning_rate": 1.504587155963303e-05, |
| "loss": 0.231, |
| "step": 1409 |
| }, |
| { |
| "epoch": 1.6174150050208005, |
| "grad_norm": 10.5, |
| "learning_rate": 1.5040774719673802e-05, |
| "loss": 0.19, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.6185626165542963, |
| "grad_norm": 26.375, |
| "learning_rate": 1.5035677879714578e-05, |
| "loss": 0.6969, |
| "step": 1411 |
| }, |
| { |
| "epoch": 1.6197102280877922, |
| "grad_norm": 11.5625, |
| "learning_rate": 1.5030581039755352e-05, |
| "loss": 0.3084, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.620857839621288, |
| "grad_norm": 33.5, |
| "learning_rate": 1.5025484199796127e-05, |
| "loss": 0.9029, |
| "step": 1413 |
| }, |
| { |
| "epoch": 1.6220054511547841, |
| "grad_norm": 26.75, |
| "learning_rate": 1.5020387359836903e-05, |
| "loss": 0.7869, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.62315306268828, |
| "grad_norm": 18.5, |
| "learning_rate": 1.5015290519877677e-05, |
| "loss": 0.555, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.624300674221776, |
| "grad_norm": 26.25, |
| "learning_rate": 1.5010193679918453e-05, |
| "loss": 0.8343, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.625448285755272, |
| "grad_norm": 18.625, |
| "learning_rate": 1.5005096839959225e-05, |
| "loss": 0.4117, |
| "step": 1417 |
| }, |
| { |
| "epoch": 1.6265958972887677, |
| "grad_norm": 30.75, |
| "learning_rate": 1.5000000000000002e-05, |
| "loss": 0.3806, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.6277435088222636, |
| "grad_norm": 24.375, |
| "learning_rate": 1.4994903160040778e-05, |
| "loss": 0.4463, |
| "step": 1419 |
| }, |
| { |
| "epoch": 1.6288911203557594, |
| "grad_norm": 14.5, |
| "learning_rate": 1.498980632008155e-05, |
| "loss": 0.1973, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.6300387318892555, |
| "grad_norm": 19.25, |
| "learning_rate": 1.4984709480122325e-05, |
| "loss": 0.689, |
| "step": 1421 |
| }, |
| { |
| "epoch": 1.6311863434227514, |
| "grad_norm": 21.125, |
| "learning_rate": 1.49796126401631e-05, |
| "loss": 0.5135, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.6323339549562474, |
| "grad_norm": 29.5, |
| "learning_rate": 1.4974515800203875e-05, |
| "loss": 0.3149, |
| "step": 1423 |
| }, |
| { |
| "epoch": 1.6334815664897433, |
| "grad_norm": 13.1875, |
| "learning_rate": 1.496941896024465e-05, |
| "loss": 0.2246, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.6346291780232391, |
| "grad_norm": 76.0, |
| "learning_rate": 1.4964322120285424e-05, |
| "loss": 0.7469, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.635776789556735, |
| "grad_norm": 90.5, |
| "learning_rate": 1.49592252803262e-05, |
| "loss": 0.8995, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.6369244010902309, |
| "grad_norm": 23.25, |
| "learning_rate": 1.4954128440366972e-05, |
| "loss": 0.7444, |
| "step": 1427 |
| }, |
| { |
| "epoch": 1.6380720126237267, |
| "grad_norm": 18.0, |
| "learning_rate": 1.4949031600407748e-05, |
| "loss": 0.4238, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.6392196241572228, |
| "grad_norm": 31.25, |
| "learning_rate": 1.4943934760448523e-05, |
| "loss": 0.4251, |
| "step": 1429 |
| }, |
| { |
| "epoch": 1.6403672356907189, |
| "grad_norm": 20.0, |
| "learning_rate": 1.4938837920489297e-05, |
| "loss": 0.6363, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.6415148472242147, |
| "grad_norm": 42.0, |
| "learning_rate": 1.4933741080530073e-05, |
| "loss": 0.6076, |
| "step": 1431 |
| }, |
| { |
| "epoch": 1.6426624587577106, |
| "grad_norm": 62.0, |
| "learning_rate": 1.4928644240570847e-05, |
| "loss": 0.5137, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.6438100702912064, |
| "grad_norm": 26.125, |
| "learning_rate": 1.4923547400611623e-05, |
| "loss": 0.4482, |
| "step": 1433 |
| }, |
| { |
| "epoch": 1.6449576818247023, |
| "grad_norm": 66.0, |
| "learning_rate": 1.4918450560652398e-05, |
| "loss": 0.9944, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.646105293358198, |
| "grad_norm": 21.25, |
| "learning_rate": 1.491335372069317e-05, |
| "loss": 0.2451, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.6472529048916942, |
| "grad_norm": 18.0, |
| "learning_rate": 1.4908256880733946e-05, |
| "loss": 0.699, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.64840051642519, |
| "grad_norm": 17.125, |
| "learning_rate": 1.490316004077472e-05, |
| "loss": 0.4074, |
| "step": 1437 |
| }, |
| { |
| "epoch": 1.649548127958686, |
| "grad_norm": 59.25, |
| "learning_rate": 1.4898063200815495e-05, |
| "loss": 0.4132, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.650695739492182, |
| "grad_norm": 20.5, |
| "learning_rate": 1.4892966360856271e-05, |
| "loss": 0.339, |
| "step": 1439 |
| }, |
| { |
| "epoch": 1.6518433510256778, |
| "grad_norm": 27.0, |
| "learning_rate": 1.4887869520897045e-05, |
| "loss": 0.3662, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.6529909625591737, |
| "grad_norm": 9.5625, |
| "learning_rate": 1.488277268093782e-05, |
| "loss": 0.3819, |
| "step": 1441 |
| }, |
| { |
| "epoch": 1.6541385740926695, |
| "grad_norm": 130.0, |
| "learning_rate": 1.4877675840978594e-05, |
| "loss": 0.4787, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.6552861856261656, |
| "grad_norm": 29.375, |
| "learning_rate": 1.487257900101937e-05, |
| "loss": 0.4502, |
| "step": 1443 |
| }, |
| { |
| "epoch": 1.6564337971596614, |
| "grad_norm": 28.125, |
| "learning_rate": 1.4867482161060146e-05, |
| "loss": 0.4953, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.6575814086931575, |
| "grad_norm": 36.0, |
| "learning_rate": 1.4862385321100918e-05, |
| "loss": 0.9421, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.6587290202266534, |
| "grad_norm": 35.0, |
| "learning_rate": 1.4857288481141693e-05, |
| "loss": 0.3018, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.6598766317601492, |
| "grad_norm": 18.625, |
| "learning_rate": 1.4852191641182467e-05, |
| "loss": 0.2527, |
| "step": 1447 |
| }, |
| { |
| "epoch": 1.661024243293645, |
| "grad_norm": 12.5, |
| "learning_rate": 1.4847094801223243e-05, |
| "loss": 0.3482, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.662171854827141, |
| "grad_norm": 18.75, |
| "learning_rate": 1.4841997961264019e-05, |
| "loss": 0.1798, |
| "step": 1449 |
| }, |
| { |
| "epoch": 1.6633194663606368, |
| "grad_norm": 6.34375, |
| "learning_rate": 1.4836901121304792e-05, |
| "loss": 0.1118, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.6644670778941328, |
| "grad_norm": 21.375, |
| "learning_rate": 1.4831804281345568e-05, |
| "loss": 0.5154, |
| "step": 1451 |
| }, |
| { |
| "epoch": 1.665614689427629, |
| "grad_norm": 57.75, |
| "learning_rate": 1.482670744138634e-05, |
| "loss": 0.845, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.6667623009611248, |
| "grad_norm": 31.875, |
| "learning_rate": 1.4821610601427116e-05, |
| "loss": 0.6743, |
| "step": 1453 |
| }, |
| { |
| "epoch": 1.6679099124946206, |
| "grad_norm": 30.5, |
| "learning_rate": 1.4816513761467891e-05, |
| "loss": 0.6286, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.6690575240281165, |
| "grad_norm": 26.25, |
| "learning_rate": 1.4811416921508665e-05, |
| "loss": 0.2807, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.6702051355616123, |
| "grad_norm": 21.25, |
| "learning_rate": 1.4806320081549441e-05, |
| "loss": 0.5438, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.6713527470951082, |
| "grad_norm": 15.875, |
| "learning_rate": 1.4801223241590215e-05, |
| "loss": 0.4873, |
| "step": 1457 |
| }, |
| { |
| "epoch": 1.6725003586286042, |
| "grad_norm": 12.3125, |
| "learning_rate": 1.479612640163099e-05, |
| "loss": 0.2455, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.6736479701621, |
| "grad_norm": 36.25, |
| "learning_rate": 1.4791029561671764e-05, |
| "loss": 0.6741, |
| "step": 1459 |
| }, |
| { |
| "epoch": 1.6747955816955962, |
| "grad_norm": 36.25, |
| "learning_rate": 1.478593272171254e-05, |
| "loss": 0.2113, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.675943193229092, |
| "grad_norm": 15.0625, |
| "learning_rate": 1.4780835881753316e-05, |
| "loss": 0.3223, |
| "step": 1461 |
| }, |
| { |
| "epoch": 1.6770908047625879, |
| "grad_norm": 70.5, |
| "learning_rate": 1.4775739041794088e-05, |
| "loss": 0.7413, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.6782384162960837, |
| "grad_norm": 50.25, |
| "learning_rate": 1.4770642201834863e-05, |
| "loss": 0.5802, |
| "step": 1463 |
| }, |
| { |
| "epoch": 1.6793860278295796, |
| "grad_norm": 13.3125, |
| "learning_rate": 1.4765545361875637e-05, |
| "loss": 0.3988, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.6805336393630756, |
| "grad_norm": 22.125, |
| "learning_rate": 1.4760448521916413e-05, |
| "loss": 0.2763, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.6816812508965715, |
| "grad_norm": 63.5, |
| "learning_rate": 1.4755351681957188e-05, |
| "loss": 0.5855, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.6828288624300676, |
| "grad_norm": 41.75, |
| "learning_rate": 1.4750254841997962e-05, |
| "loss": 0.413, |
| "step": 1467 |
| }, |
| { |
| "epoch": 1.6839764739635634, |
| "grad_norm": 75.0, |
| "learning_rate": 1.4745158002038738e-05, |
| "loss": 1.2905, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.6851240854970593, |
| "grad_norm": 35.0, |
| "learning_rate": 1.474006116207951e-05, |
| "loss": 0.5774, |
| "step": 1469 |
| }, |
| { |
| "epoch": 1.6862716970305551, |
| "grad_norm": 28.25, |
| "learning_rate": 1.4734964322120286e-05, |
| "loss": 0.8901, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.687419308564051, |
| "grad_norm": 35.0, |
| "learning_rate": 1.4729867482161061e-05, |
| "loss": 0.457, |
| "step": 1471 |
| }, |
| { |
| "epoch": 1.6885669200975468, |
| "grad_norm": 34.0, |
| "learning_rate": 1.4724770642201835e-05, |
| "loss": 0.4638, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.689714531631043, |
| "grad_norm": 37.5, |
| "learning_rate": 1.4719673802242611e-05, |
| "loss": 0.5084, |
| "step": 1473 |
| }, |
| { |
| "epoch": 1.6908621431645388, |
| "grad_norm": 26.625, |
| "learning_rate": 1.4714576962283385e-05, |
| "loss": 0.2821, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.6920097546980348, |
| "grad_norm": 34.25, |
| "learning_rate": 1.470948012232416e-05, |
| "loss": 0.3812, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.6931573662315307, |
| "grad_norm": 59.0, |
| "learning_rate": 1.4704383282364936e-05, |
| "loss": 0.5477, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.6943049777650265, |
| "grad_norm": 28.25, |
| "learning_rate": 1.469928644240571e-05, |
| "loss": 0.6984, |
| "step": 1477 |
| }, |
| { |
| "epoch": 1.6954525892985224, |
| "grad_norm": 69.5, |
| "learning_rate": 1.4694189602446486e-05, |
| "loss": 0.7855, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.6966002008320182, |
| "grad_norm": 49.0, |
| "learning_rate": 1.4689092762487258e-05, |
| "loss": 0.984, |
| "step": 1479 |
| }, |
| { |
| "epoch": 1.6977478123655143, |
| "grad_norm": 22.875, |
| "learning_rate": 1.4683995922528033e-05, |
| "loss": 0.6088, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.6988954238990102, |
| "grad_norm": 17.875, |
| "learning_rate": 1.4678899082568809e-05, |
| "loss": 0.1793, |
| "step": 1481 |
| }, |
| { |
| "epoch": 1.7000430354325062, |
| "grad_norm": 22.375, |
| "learning_rate": 1.4673802242609583e-05, |
| "loss": 0.4399, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.701190646966002, |
| "grad_norm": 44.75, |
| "learning_rate": 1.4668705402650358e-05, |
| "loss": 0.8196, |
| "step": 1483 |
| }, |
| { |
| "epoch": 1.702338258499498, |
| "grad_norm": 17.75, |
| "learning_rate": 1.4663608562691132e-05, |
| "loss": 0.3481, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.7034858700329938, |
| "grad_norm": 30.75, |
| "learning_rate": 1.4658511722731908e-05, |
| "loss": 0.5881, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.7046334815664896, |
| "grad_norm": 54.5, |
| "learning_rate": 1.4653414882772684e-05, |
| "loss": 0.9103, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.7057810930999855, |
| "grad_norm": 22.0, |
| "learning_rate": 1.4648318042813456e-05, |
| "loss": 0.9757, |
| "step": 1487 |
| }, |
| { |
| "epoch": 1.7069287046334816, |
| "grad_norm": 41.25, |
| "learning_rate": 1.4643221202854231e-05, |
| "loss": 0.2791, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.7080763161669776, |
| "grad_norm": 72.5, |
| "learning_rate": 1.4638124362895005e-05, |
| "loss": 0.6413, |
| "step": 1489 |
| }, |
| { |
| "epoch": 1.7092239277004735, |
| "grad_norm": 31.25, |
| "learning_rate": 1.463302752293578e-05, |
| "loss": 0.6097, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.7103715392339693, |
| "grad_norm": 31.625, |
| "learning_rate": 1.4627930682976556e-05, |
| "loss": 0.6532, |
| "step": 1491 |
| }, |
| { |
| "epoch": 1.7115191507674652, |
| "grad_norm": 23.75, |
| "learning_rate": 1.462283384301733e-05, |
| "loss": 0.5511, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.712666762300961, |
| "grad_norm": 44.25, |
| "learning_rate": 1.4617737003058106e-05, |
| "loss": 0.5933, |
| "step": 1493 |
| }, |
| { |
| "epoch": 1.713814373834457, |
| "grad_norm": 175.0, |
| "learning_rate": 1.461264016309888e-05, |
| "loss": 0.8476, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.714961985367953, |
| "grad_norm": 12.0625, |
| "learning_rate": 1.4607543323139655e-05, |
| "loss": 0.2916, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.7161095969014488, |
| "grad_norm": 40.0, |
| "learning_rate": 1.4602446483180431e-05, |
| "loss": 0.4779, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.717257208434945, |
| "grad_norm": 19.25, |
| "learning_rate": 1.4597349643221203e-05, |
| "loss": 0.3403, |
| "step": 1497 |
| }, |
| { |
| "epoch": 1.7184048199684407, |
| "grad_norm": 20.125, |
| "learning_rate": 1.4592252803261979e-05, |
| "loss": 0.4278, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.7195524315019366, |
| "grad_norm": 11.125, |
| "learning_rate": 1.4587155963302753e-05, |
| "loss": 0.4435, |
| "step": 1499 |
| }, |
| { |
| "epoch": 1.7207000430354324, |
| "grad_norm": 47.75, |
| "learning_rate": 1.4582059123343528e-05, |
| "loss": 0.6405, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.7207000430354324, |
| "eval_accuracy": 0.64, |
| "eval_loss": 0.4719592034816742, |
| "eval_runtime": 49.6324, |
| "eval_samples_per_second": 2.015, |
| "eval_steps_per_second": 2.015, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.7218476545689283, |
| "grad_norm": 11.5, |
| "learning_rate": 1.4576962283384304e-05, |
| "loss": 0.3975, |
| "step": 1501 |
| }, |
| { |
| "epoch": 1.7229952661024244, |
| "grad_norm": 20.875, |
| "learning_rate": 1.4571865443425078e-05, |
| "loss": 0.3939, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.7241428776359202, |
| "grad_norm": 44.25, |
| "learning_rate": 1.4566768603465853e-05, |
| "loss": 0.7124, |
| "step": 1503 |
| }, |
| { |
| "epoch": 1.7252904891694163, |
| "grad_norm": 33.0, |
| "learning_rate": 1.4561671763506626e-05, |
| "loss": 0.5179, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.7264381007029121, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.4556574923547401e-05, |
| "loss": 0.6342, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.727585712236408, |
| "grad_norm": 20.75, |
| "learning_rate": 1.4551478083588177e-05, |
| "loss": 0.397, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.7287333237699039, |
| "grad_norm": 12.375, |
| "learning_rate": 1.454638124362895e-05, |
| "loss": 0.3495, |
| "step": 1507 |
| }, |
| { |
| "epoch": 1.7298809353033997, |
| "grad_norm": 53.75, |
| "learning_rate": 1.4541284403669726e-05, |
| "loss": 0.5092, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.7310285468368956, |
| "grad_norm": 14.25, |
| "learning_rate": 1.45361875637105e-05, |
| "loss": 0.1927, |
| "step": 1509 |
| }, |
| { |
| "epoch": 1.7321761583703916, |
| "grad_norm": 20.875, |
| "learning_rate": 1.4531090723751276e-05, |
| "loss": 0.7156, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.7333237699038875, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.4525993883792051e-05, |
| "loss": 0.3399, |
| "step": 1511 |
| }, |
| { |
| "epoch": 1.7344713814373836, |
| "grad_norm": 16.125, |
| "learning_rate": 1.4520897043832824e-05, |
| "loss": 0.5978, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.7356189929708794, |
| "grad_norm": 42.0, |
| "learning_rate": 1.45158002038736e-05, |
| "loss": 0.9311, |
| "step": 1513 |
| }, |
| { |
| "epoch": 1.7367666045043753, |
| "grad_norm": 70.5, |
| "learning_rate": 1.4510703363914373e-05, |
| "loss": 0.7334, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.737914216037871, |
| "grad_norm": 16.625, |
| "learning_rate": 1.4505606523955149e-05, |
| "loss": 0.4106, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.739061827571367, |
| "grad_norm": 12.0, |
| "learning_rate": 1.4500509683995924e-05, |
| "loss": 0.2984, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.740209439104863, |
| "grad_norm": 27.125, |
| "learning_rate": 1.4495412844036698e-05, |
| "loss": 0.3245, |
| "step": 1517 |
| }, |
| { |
| "epoch": 1.7413570506383589, |
| "grad_norm": 40.25, |
| "learning_rate": 1.4490316004077474e-05, |
| "loss": 0.5248, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.742504662171855, |
| "grad_norm": 15.5, |
| "learning_rate": 1.4485219164118248e-05, |
| "loss": 0.3244, |
| "step": 1519 |
| }, |
| { |
| "epoch": 1.7436522737053508, |
| "grad_norm": 70.5, |
| "learning_rate": 1.4480122324159023e-05, |
| "loss": 0.9236, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.7447998852388467, |
| "grad_norm": 30.625, |
| "learning_rate": 1.4475025484199799e-05, |
| "loss": 0.8874, |
| "step": 1521 |
| }, |
| { |
| "epoch": 1.7459474967723425, |
| "grad_norm": 11.6875, |
| "learning_rate": 1.4469928644240571e-05, |
| "loss": 0.3286, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.7470951083058384, |
| "grad_norm": 26.875, |
| "learning_rate": 1.4464831804281347e-05, |
| "loss": 0.3404, |
| "step": 1523 |
| }, |
| { |
| "epoch": 1.7482427198393344, |
| "grad_norm": 15.375, |
| "learning_rate": 1.445973496432212e-05, |
| "loss": 0.4482, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.7493903313728303, |
| "grad_norm": 27.0, |
| "learning_rate": 1.4454638124362896e-05, |
| "loss": 0.476, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.7505379429063264, |
| "grad_norm": 20.5, |
| "learning_rate": 1.4449541284403672e-05, |
| "loss": 0.3796, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.7516855544398222, |
| "grad_norm": 47.75, |
| "learning_rate": 1.4444444444444446e-05, |
| "loss": 0.5618, |
| "step": 1527 |
| }, |
| { |
| "epoch": 1.752833165973318, |
| "grad_norm": 29.5, |
| "learning_rate": 1.4439347604485221e-05, |
| "loss": 0.4359, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.753980777506814, |
| "grad_norm": 52.25, |
| "learning_rate": 1.4434250764525994e-05, |
| "loss": 0.6163, |
| "step": 1529 |
| }, |
| { |
| "epoch": 1.7551283890403098, |
| "grad_norm": 19.125, |
| "learning_rate": 1.442915392456677e-05, |
| "loss": 0.5202, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.7562760005738056, |
| "grad_norm": 14.0, |
| "learning_rate": 1.4424057084607545e-05, |
| "loss": 0.3921, |
| "step": 1531 |
| }, |
| { |
| "epoch": 1.7574236121073017, |
| "grad_norm": 64.0, |
| "learning_rate": 1.4418960244648319e-05, |
| "loss": 0.7896, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.7585712236407975, |
| "grad_norm": 23.5, |
| "learning_rate": 1.4413863404689094e-05, |
| "loss": 0.4141, |
| "step": 1533 |
| }, |
| { |
| "epoch": 1.7597188351742936, |
| "grad_norm": 39.75, |
| "learning_rate": 1.4408766564729868e-05, |
| "loss": 0.8279, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.7608664467077895, |
| "grad_norm": 60.5, |
| "learning_rate": 1.4403669724770644e-05, |
| "loss": 0.6541, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.7620140582412853, |
| "grad_norm": 22.375, |
| "learning_rate": 1.4398572884811418e-05, |
| "loss": 0.4579, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.7631616697747812, |
| "grad_norm": 34.5, |
| "learning_rate": 1.4393476044852193e-05, |
| "loss": 0.3177, |
| "step": 1537 |
| }, |
| { |
| "epoch": 1.764309281308277, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.4388379204892969e-05, |
| "loss": 0.405, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.765456892841773, |
| "grad_norm": 45.25, |
| "learning_rate": 1.4383282364933741e-05, |
| "loss": 0.4536, |
| "step": 1539 |
| }, |
| { |
| "epoch": 1.766604504375269, |
| "grad_norm": 15.0, |
| "learning_rate": 1.4378185524974517e-05, |
| "loss": 0.658, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.767752115908765, |
| "grad_norm": 23.125, |
| "learning_rate": 1.437308868501529e-05, |
| "loss": 0.5647, |
| "step": 1541 |
| }, |
| { |
| "epoch": 1.7688997274422609, |
| "grad_norm": 49.5, |
| "learning_rate": 1.4367991845056066e-05, |
| "loss": 0.6544, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.7700473389757567, |
| "grad_norm": 14.625, |
| "learning_rate": 1.4362895005096842e-05, |
| "loss": 0.3288, |
| "step": 1543 |
| }, |
| { |
| "epoch": 1.7711949505092526, |
| "grad_norm": 14.875, |
| "learning_rate": 1.4357798165137616e-05, |
| "loss": 0.5407, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.7723425620427484, |
| "grad_norm": 69.0, |
| "learning_rate": 1.4352701325178391e-05, |
| "loss": 0.4395, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.7734901735762443, |
| "grad_norm": 32.5, |
| "learning_rate": 1.4347604485219164e-05, |
| "loss": 0.4165, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.7746377851097404, |
| "grad_norm": 52.25, |
| "learning_rate": 1.434250764525994e-05, |
| "loss": 0.455, |
| "step": 1547 |
| }, |
| { |
| "epoch": 1.7757853966432364, |
| "grad_norm": 26.875, |
| "learning_rate": 1.4337410805300715e-05, |
| "loss": 0.5133, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.7769330081767323, |
| "grad_norm": 63.75, |
| "learning_rate": 1.4332313965341489e-05, |
| "loss": 0.8173, |
| "step": 1549 |
| }, |
| { |
| "epoch": 1.7780806197102281, |
| "grad_norm": 69.5, |
| "learning_rate": 1.4327217125382264e-05, |
| "loss": 0.7585, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.779228231243724, |
| "grad_norm": 12.25, |
| "learning_rate": 1.4322120285423038e-05, |
| "loss": 0.4586, |
| "step": 1551 |
| }, |
| { |
| "epoch": 1.7803758427772198, |
| "grad_norm": 76.0, |
| "learning_rate": 1.4317023445463814e-05, |
| "loss": 0.6924, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.7815234543107157, |
| "grad_norm": 12.4375, |
| "learning_rate": 1.431192660550459e-05, |
| "loss": 0.3333, |
| "step": 1553 |
| }, |
| { |
| "epoch": 1.7826710658442118, |
| "grad_norm": 23.5, |
| "learning_rate": 1.4306829765545363e-05, |
| "loss": 0.7329, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.7838186773777076, |
| "grad_norm": 22.875, |
| "learning_rate": 1.4301732925586139e-05, |
| "loss": 0.2949, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.7849662889112037, |
| "grad_norm": 52.0, |
| "learning_rate": 1.4296636085626911e-05, |
| "loss": 0.6708, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.7861139004446995, |
| "grad_norm": 75.0, |
| "learning_rate": 1.4291539245667687e-05, |
| "loss": 0.6416, |
| "step": 1557 |
| }, |
| { |
| "epoch": 1.7872615119781954, |
| "grad_norm": 16.0, |
| "learning_rate": 1.4286442405708462e-05, |
| "loss": 0.1615, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.7884091235116912, |
| "grad_norm": 13.8125, |
| "learning_rate": 1.4281345565749236e-05, |
| "loss": 0.2567, |
| "step": 1559 |
| }, |
| { |
| "epoch": 1.789556735045187, |
| "grad_norm": 27.125, |
| "learning_rate": 1.4276248725790012e-05, |
| "loss": 0.3011, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.7907043465786832, |
| "grad_norm": 37.5, |
| "learning_rate": 1.4271151885830786e-05, |
| "loss": 0.4136, |
| "step": 1561 |
| }, |
| { |
| "epoch": 1.791851958112179, |
| "grad_norm": 64.0, |
| "learning_rate": 1.4266055045871561e-05, |
| "loss": 0.5132, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.792999569645675, |
| "grad_norm": 23.5, |
| "learning_rate": 1.4260958205912337e-05, |
| "loss": 0.8581, |
| "step": 1563 |
| }, |
| { |
| "epoch": 1.794147181179171, |
| "grad_norm": 35.75, |
| "learning_rate": 1.4255861365953109e-05, |
| "loss": 0.4336, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.7952947927126668, |
| "grad_norm": 34.5, |
| "learning_rate": 1.4250764525993885e-05, |
| "loss": 0.7922, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.7964424042461626, |
| "grad_norm": 12.375, |
| "learning_rate": 1.4245667686034659e-05, |
| "loss": 0.385, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.7975900157796585, |
| "grad_norm": 22.125, |
| "learning_rate": 1.4240570846075434e-05, |
| "loss": 0.1375, |
| "step": 1567 |
| }, |
| { |
| "epoch": 1.7987376273131543, |
| "grad_norm": 49.25, |
| "learning_rate": 1.423547400611621e-05, |
| "loss": 0.2854, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.7998852388466504, |
| "grad_norm": 74.5, |
| "learning_rate": 1.4230377166156984e-05, |
| "loss": 0.8727, |
| "step": 1569 |
| }, |
| { |
| "epoch": 1.8010328503801463, |
| "grad_norm": 5.875, |
| "learning_rate": 1.422528032619776e-05, |
| "loss": 0.0896, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.8021804619136423, |
| "grad_norm": 23.0, |
| "learning_rate": 1.4220183486238533e-05, |
| "loss": 0.709, |
| "step": 1571 |
| }, |
| { |
| "epoch": 1.8033280734471382, |
| "grad_norm": 8.5, |
| "learning_rate": 1.4215086646279309e-05, |
| "loss": 0.1583, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.804475684980634, |
| "grad_norm": 45.25, |
| "learning_rate": 1.4209989806320084e-05, |
| "loss": 0.3676, |
| "step": 1573 |
| }, |
| { |
| "epoch": 1.80562329651413, |
| "grad_norm": 14.125, |
| "learning_rate": 1.4204892966360857e-05, |
| "loss": 0.2197, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.8067709080476257, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.4199796126401632e-05, |
| "loss": 0.2591, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.8079185195811218, |
| "grad_norm": 43.25, |
| "learning_rate": 1.4194699286442406e-05, |
| "loss": 0.4846, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.8090661311146177, |
| "grad_norm": 32.0, |
| "learning_rate": 1.4189602446483182e-05, |
| "loss": 0.2703, |
| "step": 1577 |
| }, |
| { |
| "epoch": 1.8102137426481137, |
| "grad_norm": 46.25, |
| "learning_rate": 1.4184505606523957e-05, |
| "loss": 0.6256, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.8113613541816096, |
| "grad_norm": 38.25, |
| "learning_rate": 1.4179408766564731e-05, |
| "loss": 1.0764, |
| "step": 1579 |
| }, |
| { |
| "epoch": 1.8125089657151054, |
| "grad_norm": 21.625, |
| "learning_rate": 1.4174311926605507e-05, |
| "loss": 0.1879, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.8136565772486013, |
| "grad_norm": 25.375, |
| "learning_rate": 1.4169215086646279e-05, |
| "loss": 0.8602, |
| "step": 1581 |
| }, |
| { |
| "epoch": 1.8148041887820971, |
| "grad_norm": 73.0, |
| "learning_rate": 1.4164118246687055e-05, |
| "loss": 0.6298, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.8159518003155932, |
| "grad_norm": 33.0, |
| "learning_rate": 1.415902140672783e-05, |
| "loss": 0.3714, |
| "step": 1583 |
| }, |
| { |
| "epoch": 1.817099411849089, |
| "grad_norm": 13.9375, |
| "learning_rate": 1.4153924566768604e-05, |
| "loss": 0.2252, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.8182470233825851, |
| "grad_norm": 42.5, |
| "learning_rate": 1.414882772680938e-05, |
| "loss": 0.577, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.819394634916081, |
| "grad_norm": 28.375, |
| "learning_rate": 1.4143730886850154e-05, |
| "loss": 0.5294, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.8205422464495769, |
| "grad_norm": 29.25, |
| "learning_rate": 1.413863404689093e-05, |
| "loss": 0.4661, |
| "step": 1587 |
| }, |
| { |
| "epoch": 1.8216898579830727, |
| "grad_norm": 15.6875, |
| "learning_rate": 1.4133537206931705e-05, |
| "loss": 0.358, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.8228374695165686, |
| "grad_norm": 42.0, |
| "learning_rate": 1.4128440366972477e-05, |
| "loss": 0.5276, |
| "step": 1589 |
| }, |
| { |
| "epoch": 1.8239850810500644, |
| "grad_norm": 98.5, |
| "learning_rate": 1.4123343527013254e-05, |
| "loss": 0.6566, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.8251326925835605, |
| "grad_norm": 37.0, |
| "learning_rate": 1.4118246687054027e-05, |
| "loss": 0.2234, |
| "step": 1591 |
| }, |
| { |
| "epoch": 1.8262803041170563, |
| "grad_norm": 49.5, |
| "learning_rate": 1.4113149847094802e-05, |
| "loss": 0.5727, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.8274279156505524, |
| "grad_norm": 31.75, |
| "learning_rate": 1.4108053007135578e-05, |
| "loss": 0.7391, |
| "step": 1593 |
| }, |
| { |
| "epoch": 1.8285755271840483, |
| "grad_norm": 81.0, |
| "learning_rate": 1.4102956167176352e-05, |
| "loss": 0.762, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.829723138717544, |
| "grad_norm": 56.0, |
| "learning_rate": 1.4097859327217127e-05, |
| "loss": 0.371, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.83087075025104, |
| "grad_norm": 33.75, |
| "learning_rate": 1.4092762487257901e-05, |
| "loss": 0.5857, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.8320183617845358, |
| "grad_norm": 15.0625, |
| "learning_rate": 1.4087665647298677e-05, |
| "loss": 0.2163, |
| "step": 1597 |
| }, |
| { |
| "epoch": 1.8331659733180319, |
| "grad_norm": 21.25, |
| "learning_rate": 1.4082568807339452e-05, |
| "loss": 0.4766, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.8343135848515277, |
| "grad_norm": 49.75, |
| "learning_rate": 1.4077471967380225e-05, |
| "loss": 0.3923, |
| "step": 1599 |
| }, |
| { |
| "epoch": 1.8354611963850238, |
| "grad_norm": 38.25, |
| "learning_rate": 1.4072375127421e-05, |
| "loss": 0.445, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.8354611963850238, |
| "eval_accuracy": 0.69, |
| "eval_loss": 0.5018435120582581, |
| "eval_runtime": 49.4827, |
| "eval_samples_per_second": 2.021, |
| "eval_steps_per_second": 2.021, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.8366088079185197, |
| "grad_norm": 36.25, |
| "learning_rate": 1.4067278287461774e-05, |
| "loss": 0.7721, |
| "step": 1601 |
| }, |
| { |
| "epoch": 1.8377564194520155, |
| "grad_norm": 26.875, |
| "learning_rate": 1.406218144750255e-05, |
| "loss": 0.9496, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.8389040309855114, |
| "grad_norm": 47.5, |
| "learning_rate": 1.4057084607543325e-05, |
| "loss": 0.5079, |
| "step": 1603 |
| }, |
| { |
| "epoch": 1.8400516425190072, |
| "grad_norm": 14.0625, |
| "learning_rate": 1.40519877675841e-05, |
| "loss": 0.2523, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.841199254052503, |
| "grad_norm": 36.5, |
| "learning_rate": 1.4046890927624875e-05, |
| "loss": 0.6013, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.8423468655859991, |
| "grad_norm": 33.25, |
| "learning_rate": 1.4041794087665647e-05, |
| "loss": 0.4822, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.8434944771194952, |
| "grad_norm": 12.6875, |
| "learning_rate": 1.4036697247706423e-05, |
| "loss": 0.4923, |
| "step": 1607 |
| }, |
| { |
| "epoch": 1.844642088652991, |
| "grad_norm": 15.0625, |
| "learning_rate": 1.4031600407747196e-05, |
| "loss": 0.2454, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.845789700186487, |
| "grad_norm": 20.5, |
| "learning_rate": 1.4026503567787972e-05, |
| "loss": 0.2125, |
| "step": 1609 |
| }, |
| { |
| "epoch": 1.8469373117199828, |
| "grad_norm": 18.875, |
| "learning_rate": 1.4021406727828748e-05, |
| "loss": 0.6114, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.8480849232534786, |
| "grad_norm": 37.25, |
| "learning_rate": 1.4016309887869522e-05, |
| "loss": 0.4515, |
| "step": 1611 |
| }, |
| { |
| "epoch": 1.8492325347869745, |
| "grad_norm": 10.5, |
| "learning_rate": 1.4011213047910297e-05, |
| "loss": 0.2101, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.8503801463204705, |
| "grad_norm": 33.5, |
| "learning_rate": 1.4006116207951071e-05, |
| "loss": 0.5889, |
| "step": 1613 |
| }, |
| { |
| "epoch": 1.8515277578539664, |
| "grad_norm": 19.875, |
| "learning_rate": 1.4001019367991847e-05, |
| "loss": 0.4676, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.8526753693874625, |
| "grad_norm": 51.0, |
| "learning_rate": 1.3995922528032622e-05, |
| "loss": 0.5021, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.8538229809209583, |
| "grad_norm": 38.0, |
| "learning_rate": 1.3990825688073395e-05, |
| "loss": 0.6099, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.8549705924544542, |
| "grad_norm": 49.75, |
| "learning_rate": 1.398572884811417e-05, |
| "loss": 0.6493, |
| "step": 1617 |
| }, |
| { |
| "epoch": 1.85611820398795, |
| "grad_norm": 14.5625, |
| "learning_rate": 1.3980632008154944e-05, |
| "loss": 0.1851, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.8572658155214459, |
| "grad_norm": 74.5, |
| "learning_rate": 1.397553516819572e-05, |
| "loss": 0.625, |
| "step": 1619 |
| }, |
| { |
| "epoch": 1.858413427054942, |
| "grad_norm": 49.25, |
| "learning_rate": 1.3970438328236495e-05, |
| "loss": 0.4501, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.8595610385884378, |
| "grad_norm": 36.0, |
| "learning_rate": 1.3965341488277269e-05, |
| "loss": 0.4769, |
| "step": 1621 |
| }, |
| { |
| "epoch": 1.8607086501219339, |
| "grad_norm": 72.5, |
| "learning_rate": 1.3960244648318045e-05, |
| "loss": 0.6018, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.8618562616554297, |
| "grad_norm": 28.75, |
| "learning_rate": 1.3955147808358817e-05, |
| "loss": 0.4446, |
| "step": 1623 |
| }, |
| { |
| "epoch": 1.8630038731889256, |
| "grad_norm": 58.5, |
| "learning_rate": 1.3950050968399593e-05, |
| "loss": 0.7133, |
| "step": 1624 |
| }, |
| { |
| "epoch": 1.8641514847224214, |
| "grad_norm": 14.6875, |
| "learning_rate": 1.3944954128440368e-05, |
| "loss": 0.2074, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.8652990962559173, |
| "grad_norm": 54.25, |
| "learning_rate": 1.3939857288481142e-05, |
| "loss": 0.3376, |
| "step": 1626 |
| }, |
| { |
| "epoch": 1.8664467077894131, |
| "grad_norm": 23.0, |
| "learning_rate": 1.3934760448521918e-05, |
| "loss": 0.5169, |
| "step": 1627 |
| }, |
| { |
| "epoch": 1.8675943193229092, |
| "grad_norm": 55.5, |
| "learning_rate": 1.3929663608562692e-05, |
| "loss": 0.3699, |
| "step": 1628 |
| }, |
| { |
| "epoch": 1.868741930856405, |
| "grad_norm": 18.125, |
| "learning_rate": 1.3924566768603467e-05, |
| "loss": 0.207, |
| "step": 1629 |
| }, |
| { |
| "epoch": 1.8698895423899011, |
| "grad_norm": 26.5, |
| "learning_rate": 1.3919469928644243e-05, |
| "loss": 0.4332, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.871037153923397, |
| "grad_norm": 22.75, |
| "learning_rate": 1.3914373088685017e-05, |
| "loss": 0.4837, |
| "step": 1631 |
| }, |
| { |
| "epoch": 1.8721847654568928, |
| "grad_norm": 69.5, |
| "learning_rate": 1.3909276248725792e-05, |
| "loss": 0.5754, |
| "step": 1632 |
| }, |
| { |
| "epoch": 1.8733323769903887, |
| "grad_norm": 16.25, |
| "learning_rate": 1.3904179408766564e-05, |
| "loss": 0.2141, |
| "step": 1633 |
| }, |
| { |
| "epoch": 1.8744799885238845, |
| "grad_norm": 35.75, |
| "learning_rate": 1.389908256880734e-05, |
| "loss": 0.3208, |
| "step": 1634 |
| }, |
| { |
| "epoch": 1.8756276000573806, |
| "grad_norm": 29.75, |
| "learning_rate": 1.3893985728848116e-05, |
| "loss": 0.6767, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.8767752115908765, |
| "grad_norm": 19.375, |
| "learning_rate": 1.388888888888889e-05, |
| "loss": 0.1118, |
| "step": 1636 |
| }, |
| { |
| "epoch": 1.8779228231243725, |
| "grad_norm": 15.8125, |
| "learning_rate": 1.3883792048929665e-05, |
| "loss": 0.1238, |
| "step": 1637 |
| }, |
| { |
| "epoch": 1.8790704346578684, |
| "grad_norm": 31.875, |
| "learning_rate": 1.3878695208970439e-05, |
| "loss": 0.5031, |
| "step": 1638 |
| }, |
| { |
| "epoch": 1.8802180461913642, |
| "grad_norm": 40.25, |
| "learning_rate": 1.3873598369011215e-05, |
| "loss": 0.8107, |
| "step": 1639 |
| }, |
| { |
| "epoch": 1.88136565772486, |
| "grad_norm": 25.0, |
| "learning_rate": 1.386850152905199e-05, |
| "loss": 0.3873, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.882513269258356, |
| "grad_norm": 78.0, |
| "learning_rate": 1.3863404689092762e-05, |
| "loss": 1.1926, |
| "step": 1641 |
| }, |
| { |
| "epoch": 1.883660880791852, |
| "grad_norm": 34.25, |
| "learning_rate": 1.3858307849133538e-05, |
| "loss": 0.5274, |
| "step": 1642 |
| }, |
| { |
| "epoch": 1.8848084923253479, |
| "grad_norm": 15.125, |
| "learning_rate": 1.3853211009174312e-05, |
| "loss": 0.4215, |
| "step": 1643 |
| }, |
| { |
| "epoch": 1.885956103858844, |
| "grad_norm": 28.0, |
| "learning_rate": 1.3848114169215088e-05, |
| "loss": 0.2697, |
| "step": 1644 |
| }, |
| { |
| "epoch": 1.8871037153923398, |
| "grad_norm": 34.25, |
| "learning_rate": 1.3843017329255863e-05, |
| "loss": 0.2025, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.8882513269258356, |
| "grad_norm": 91.5, |
| "learning_rate": 1.3837920489296637e-05, |
| "loss": 0.7438, |
| "step": 1646 |
| }, |
| { |
| "epoch": 1.8893989384593315, |
| "grad_norm": 88.0, |
| "learning_rate": 1.3832823649337413e-05, |
| "loss": 0.9659, |
| "step": 1647 |
| }, |
| { |
| "epoch": 1.8905465499928273, |
| "grad_norm": 26.875, |
| "learning_rate": 1.3827726809378187e-05, |
| "loss": 0.2307, |
| "step": 1648 |
| }, |
| { |
| "epoch": 1.8916941615263232, |
| "grad_norm": 13.375, |
| "learning_rate": 1.3822629969418962e-05, |
| "loss": 0.3359, |
| "step": 1649 |
| }, |
| { |
| "epoch": 1.8928417730598193, |
| "grad_norm": 72.0, |
| "learning_rate": 1.3817533129459738e-05, |
| "loss": 0.5043, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.8939893845933151, |
| "grad_norm": 46.75, |
| "learning_rate": 1.381243628950051e-05, |
| "loss": 0.4365, |
| "step": 1651 |
| }, |
| { |
| "epoch": 1.8951369961268112, |
| "grad_norm": 27.0, |
| "learning_rate": 1.3807339449541286e-05, |
| "loss": 0.4578, |
| "step": 1652 |
| }, |
| { |
| "epoch": 1.896284607660307, |
| "grad_norm": 49.75, |
| "learning_rate": 1.380224260958206e-05, |
| "loss": 0.645, |
| "step": 1653 |
| }, |
| { |
| "epoch": 1.897432219193803, |
| "grad_norm": 58.25, |
| "learning_rate": 1.3797145769622835e-05, |
| "loss": 0.7014, |
| "step": 1654 |
| }, |
| { |
| "epoch": 1.8985798307272987, |
| "grad_norm": 41.75, |
| "learning_rate": 1.379204892966361e-05, |
| "loss": 0.6419, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.8997274422607946, |
| "grad_norm": 49.5, |
| "learning_rate": 1.3786952089704385e-05, |
| "loss": 0.6695, |
| "step": 1656 |
| }, |
| { |
| "epoch": 1.9008750537942907, |
| "grad_norm": 34.5, |
| "learning_rate": 1.378185524974516e-05, |
| "loss": 0.324, |
| "step": 1657 |
| }, |
| { |
| "epoch": 1.9020226653277865, |
| "grad_norm": 14.25, |
| "learning_rate": 1.3776758409785932e-05, |
| "loss": 0.289, |
| "step": 1658 |
| }, |
| { |
| "epoch": 1.9031702768612826, |
| "grad_norm": 20.875, |
| "learning_rate": 1.3771661569826708e-05, |
| "loss": 0.2563, |
| "step": 1659 |
| }, |
| { |
| "epoch": 1.9043178883947784, |
| "grad_norm": 26.0, |
| "learning_rate": 1.3766564729867484e-05, |
| "loss": 0.7482, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.9054654999282743, |
| "grad_norm": 14.4375, |
| "learning_rate": 1.3761467889908258e-05, |
| "loss": 0.4048, |
| "step": 1661 |
| }, |
| { |
| "epoch": 1.9066131114617701, |
| "grad_norm": 51.25, |
| "learning_rate": 1.3756371049949033e-05, |
| "loss": 0.6209, |
| "step": 1662 |
| }, |
| { |
| "epoch": 1.907760722995266, |
| "grad_norm": 30.875, |
| "learning_rate": 1.3751274209989807e-05, |
| "loss": 0.7158, |
| "step": 1663 |
| }, |
| { |
| "epoch": 1.9089083345287619, |
| "grad_norm": 9.4375, |
| "learning_rate": 1.3746177370030583e-05, |
| "loss": 0.1511, |
| "step": 1664 |
| }, |
| { |
| "epoch": 1.910055946062258, |
| "grad_norm": 19.125, |
| "learning_rate": 1.3741080530071358e-05, |
| "loss": 0.3676, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.911203557595754, |
| "grad_norm": 19.875, |
| "learning_rate": 1.3735983690112132e-05, |
| "loss": 0.3149, |
| "step": 1666 |
| }, |
| { |
| "epoch": 1.9123511691292499, |
| "grad_norm": 22.25, |
| "learning_rate": 1.3730886850152908e-05, |
| "loss": 0.2507, |
| "step": 1667 |
| }, |
| { |
| "epoch": 1.9134987806627457, |
| "grad_norm": 13.0, |
| "learning_rate": 1.372579001019368e-05, |
| "loss": 0.5281, |
| "step": 1668 |
| }, |
| { |
| "epoch": 1.9146463921962416, |
| "grad_norm": 22.625, |
| "learning_rate": 1.3720693170234456e-05, |
| "loss": 0.3352, |
| "step": 1669 |
| }, |
| { |
| "epoch": 1.9157940037297374, |
| "grad_norm": 25.625, |
| "learning_rate": 1.3715596330275231e-05, |
| "loss": 0.3003, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.9169416152632333, |
| "grad_norm": 37.5, |
| "learning_rate": 1.3710499490316005e-05, |
| "loss": 0.2462, |
| "step": 1671 |
| }, |
| { |
| "epoch": 1.9180892267967293, |
| "grad_norm": 20.75, |
| "learning_rate": 1.370540265035678e-05, |
| "loss": 0.6685, |
| "step": 1672 |
| }, |
| { |
| "epoch": 1.9192368383302252, |
| "grad_norm": 30.0, |
| "learning_rate": 1.3700305810397555e-05, |
| "loss": 0.5793, |
| "step": 1673 |
| }, |
| { |
| "epoch": 1.9203844498637213, |
| "grad_norm": 67.5, |
| "learning_rate": 1.369520897043833e-05, |
| "loss": 0.5628, |
| "step": 1674 |
| }, |
| { |
| "epoch": 1.921532061397217, |
| "grad_norm": 68.0, |
| "learning_rate": 1.3690112130479106e-05, |
| "loss": 0.3445, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.922679672930713, |
| "grad_norm": 18.375, |
| "learning_rate": 1.3685015290519878e-05, |
| "loss": 0.3626, |
| "step": 1676 |
| }, |
| { |
| "epoch": 1.9238272844642088, |
| "grad_norm": 26.875, |
| "learning_rate": 1.3679918450560654e-05, |
| "loss": 0.8984, |
| "step": 1677 |
| }, |
| { |
| "epoch": 1.9249748959977047, |
| "grad_norm": 27.125, |
| "learning_rate": 1.3674821610601427e-05, |
| "loss": 0.4586, |
| "step": 1678 |
| }, |
| { |
| "epoch": 1.9261225075312007, |
| "grad_norm": 62.0, |
| "learning_rate": 1.3669724770642203e-05, |
| "loss": 0.7513, |
| "step": 1679 |
| }, |
| { |
| "epoch": 1.9272701190646966, |
| "grad_norm": 38.25, |
| "learning_rate": 1.3664627930682979e-05, |
| "loss": 0.4712, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.9284177305981927, |
| "grad_norm": 19.125, |
| "learning_rate": 1.3659531090723753e-05, |
| "loss": 0.2701, |
| "step": 1681 |
| }, |
| { |
| "epoch": 1.9295653421316885, |
| "grad_norm": 16.625, |
| "learning_rate": 1.3654434250764528e-05, |
| "loss": 0.3847, |
| "step": 1682 |
| }, |
| { |
| "epoch": 1.9307129536651844, |
| "grad_norm": 52.0, |
| "learning_rate": 1.36493374108053e-05, |
| "loss": 0.4352, |
| "step": 1683 |
| }, |
| { |
| "epoch": 1.9318605651986802, |
| "grad_norm": 100.0, |
| "learning_rate": 1.3644240570846076e-05, |
| "loss": 1.0839, |
| "step": 1684 |
| }, |
| { |
| "epoch": 1.933008176732176, |
| "grad_norm": 53.25, |
| "learning_rate": 1.363914373088685e-05, |
| "loss": 0.5791, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.934155788265672, |
| "grad_norm": 55.5, |
| "learning_rate": 1.3634046890927625e-05, |
| "loss": 0.7248, |
| "step": 1686 |
| }, |
| { |
| "epoch": 1.935303399799168, |
| "grad_norm": 16.5, |
| "learning_rate": 1.3628950050968401e-05, |
| "loss": 0.3914, |
| "step": 1687 |
| }, |
| { |
| "epoch": 1.9364510113326638, |
| "grad_norm": 43.5, |
| "learning_rate": 1.3623853211009175e-05, |
| "loss": 0.348, |
| "step": 1688 |
| }, |
| { |
| "epoch": 1.93759862286616, |
| "grad_norm": 34.25, |
| "learning_rate": 1.361875637104995e-05, |
| "loss": 0.4504, |
| "step": 1689 |
| }, |
| { |
| "epoch": 1.9387462343996558, |
| "grad_norm": 32.5, |
| "learning_rate": 1.3613659531090724e-05, |
| "loss": 0.4256, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.9398938459331516, |
| "grad_norm": 17.125, |
| "learning_rate": 1.36085626911315e-05, |
| "loss": 0.2441, |
| "step": 1691 |
| }, |
| { |
| "epoch": 1.9410414574666475, |
| "grad_norm": 31.625, |
| "learning_rate": 1.3603465851172276e-05, |
| "loss": 0.5579, |
| "step": 1692 |
| }, |
| { |
| "epoch": 1.9421890690001433, |
| "grad_norm": 29.75, |
| "learning_rate": 1.3598369011213048e-05, |
| "loss": 0.8088, |
| "step": 1693 |
| }, |
| { |
| "epoch": 1.9433366805336394, |
| "grad_norm": 51.5, |
| "learning_rate": 1.3593272171253823e-05, |
| "loss": 0.6118, |
| "step": 1694 |
| }, |
| { |
| "epoch": 1.9444842920671352, |
| "grad_norm": 27.875, |
| "learning_rate": 1.3588175331294597e-05, |
| "loss": 0.2742, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.9456319036006313, |
| "grad_norm": 11.8125, |
| "learning_rate": 1.3583078491335373e-05, |
| "loss": 0.2417, |
| "step": 1696 |
| }, |
| { |
| "epoch": 1.9467795151341272, |
| "grad_norm": 30.75, |
| "learning_rate": 1.3577981651376149e-05, |
| "loss": 0.236, |
| "step": 1697 |
| }, |
| { |
| "epoch": 1.947927126667623, |
| "grad_norm": 28.5, |
| "learning_rate": 1.3572884811416922e-05, |
| "loss": 0.299, |
| "step": 1698 |
| }, |
| { |
| "epoch": 1.9490747382011189, |
| "grad_norm": 15.125, |
| "learning_rate": 1.3567787971457698e-05, |
| "loss": 0.1027, |
| "step": 1699 |
| }, |
| { |
| "epoch": 1.9502223497346147, |
| "grad_norm": 35.25, |
| "learning_rate": 1.356269113149847e-05, |
| "loss": 0.2869, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.9502223497346147, |
| "eval_accuracy": 0.72, |
| "eval_loss": 0.4552258551120758, |
| "eval_runtime": 49.3148, |
| "eval_samples_per_second": 2.028, |
| "eval_steps_per_second": 2.028, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.9513699612681108, |
| "grad_norm": 27.875, |
| "learning_rate": 1.3557594291539246e-05, |
| "loss": 0.9343, |
| "step": 1701 |
| }, |
| { |
| "epoch": 1.9525175728016066, |
| "grad_norm": 31.25, |
| "learning_rate": 1.3552497451580021e-05, |
| "loss": 0.5365, |
| "step": 1702 |
| }, |
| { |
| "epoch": 1.9536651843351027, |
| "grad_norm": 56.25, |
| "learning_rate": 1.3547400611620795e-05, |
| "loss": 0.6064, |
| "step": 1703 |
| }, |
| { |
| "epoch": 1.9548127958685986, |
| "grad_norm": 8.875, |
| "learning_rate": 1.3542303771661571e-05, |
| "loss": 0.2503, |
| "step": 1704 |
| }, |
| { |
| "epoch": 1.9559604074020944, |
| "grad_norm": 23.25, |
| "learning_rate": 1.3537206931702345e-05, |
| "loss": 0.6551, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.9571080189355903, |
| "grad_norm": 26.75, |
| "learning_rate": 1.353211009174312e-05, |
| "loss": 0.4402, |
| "step": 1706 |
| }, |
| { |
| "epoch": 1.9582556304690861, |
| "grad_norm": 19.75, |
| "learning_rate": 1.3527013251783896e-05, |
| "loss": 0.5219, |
| "step": 1707 |
| }, |
| { |
| "epoch": 1.959403242002582, |
| "grad_norm": 109.0, |
| "learning_rate": 1.352191641182467e-05, |
| "loss": 0.698, |
| "step": 1708 |
| }, |
| { |
| "epoch": 1.960550853536078, |
| "grad_norm": 81.5, |
| "learning_rate": 1.3516819571865446e-05, |
| "loss": 0.5249, |
| "step": 1709 |
| }, |
| { |
| "epoch": 1.961698465069574, |
| "grad_norm": 29.125, |
| "learning_rate": 1.3511722731906218e-05, |
| "loss": 0.5226, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.96284607660307, |
| "grad_norm": 59.75, |
| "learning_rate": 1.3506625891946993e-05, |
| "loss": 0.6405, |
| "step": 1711 |
| }, |
| { |
| "epoch": 1.9639936881365658, |
| "grad_norm": 21.5, |
| "learning_rate": 1.3501529051987769e-05, |
| "loss": 0.4448, |
| "step": 1712 |
| }, |
| { |
| "epoch": 1.9651412996700617, |
| "grad_norm": 34.75, |
| "learning_rate": 1.3496432212028543e-05, |
| "loss": 0.667, |
| "step": 1713 |
| }, |
| { |
| "epoch": 1.9662889112035575, |
| "grad_norm": 15.125, |
| "learning_rate": 1.3491335372069319e-05, |
| "loss": 0.4765, |
| "step": 1714 |
| }, |
| { |
| "epoch": 1.9674365227370534, |
| "grad_norm": 31.0, |
| "learning_rate": 1.3486238532110092e-05, |
| "loss": 0.2273, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.9685841342705495, |
| "grad_norm": 20.75, |
| "learning_rate": 1.3481141692150868e-05, |
| "loss": 0.3604, |
| "step": 1716 |
| }, |
| { |
| "epoch": 1.9697317458040453, |
| "grad_norm": 39.5, |
| "learning_rate": 1.3476044852191644e-05, |
| "loss": 0.4167, |
| "step": 1717 |
| }, |
| { |
| "epoch": 1.9708793573375414, |
| "grad_norm": 28.5, |
| "learning_rate": 1.3470948012232416e-05, |
| "loss": 0.4476, |
| "step": 1718 |
| }, |
| { |
| "epoch": 1.9720269688710372, |
| "grad_norm": 19.25, |
| "learning_rate": 1.3465851172273191e-05, |
| "loss": 0.5297, |
| "step": 1719 |
| }, |
| { |
| "epoch": 1.973174580404533, |
| "grad_norm": 65.0, |
| "learning_rate": 1.3460754332313965e-05, |
| "loss": 0.8327, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.974322191938029, |
| "grad_norm": 23.875, |
| "learning_rate": 1.3455657492354741e-05, |
| "loss": 0.1996, |
| "step": 1721 |
| }, |
| { |
| "epoch": 1.9754698034715248, |
| "grad_norm": 23.0, |
| "learning_rate": 1.3450560652395517e-05, |
| "loss": 0.4416, |
| "step": 1722 |
| }, |
| { |
| "epoch": 1.9766174150050206, |
| "grad_norm": 11.4375, |
| "learning_rate": 1.344546381243629e-05, |
| "loss": 0.2721, |
| "step": 1723 |
| }, |
| { |
| "epoch": 1.9777650265385167, |
| "grad_norm": 35.0, |
| "learning_rate": 1.3440366972477066e-05, |
| "loss": 0.5629, |
| "step": 1724 |
| }, |
| { |
| "epoch": 1.9789126380720128, |
| "grad_norm": 67.5, |
| "learning_rate": 1.343527013251784e-05, |
| "loss": 0.6305, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.9800602496055086, |
| "grad_norm": 32.75, |
| "learning_rate": 1.3430173292558616e-05, |
| "loss": 0.2927, |
| "step": 1726 |
| }, |
| { |
| "epoch": 1.9812078611390045, |
| "grad_norm": 35.25, |
| "learning_rate": 1.3425076452599391e-05, |
| "loss": 0.238, |
| "step": 1727 |
| }, |
| { |
| "epoch": 1.9823554726725003, |
| "grad_norm": 20.875, |
| "learning_rate": 1.3419979612640163e-05, |
| "loss": 0.4392, |
| "step": 1728 |
| }, |
| { |
| "epoch": 1.9835030842059962, |
| "grad_norm": 44.0, |
| "learning_rate": 1.3414882772680939e-05, |
| "loss": 0.398, |
| "step": 1729 |
| }, |
| { |
| "epoch": 1.984650695739492, |
| "grad_norm": 26.0, |
| "learning_rate": 1.3409785932721713e-05, |
| "loss": 0.7501, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.9857983072729881, |
| "grad_norm": 20.75, |
| "learning_rate": 1.3404689092762488e-05, |
| "loss": 0.3494, |
| "step": 1731 |
| }, |
| { |
| "epoch": 1.986945918806484, |
| "grad_norm": 30.875, |
| "learning_rate": 1.3399592252803264e-05, |
| "loss": 1.1064, |
| "step": 1732 |
| }, |
| { |
| "epoch": 1.98809353033998, |
| "grad_norm": 30.0, |
| "learning_rate": 1.3394495412844038e-05, |
| "loss": 0.6117, |
| "step": 1733 |
| }, |
| { |
| "epoch": 1.989241141873476, |
| "grad_norm": 16.875, |
| "learning_rate": 1.3389398572884814e-05, |
| "loss": 0.3173, |
| "step": 1734 |
| }, |
| { |
| "epoch": 1.9903887534069717, |
| "grad_norm": 15.75, |
| "learning_rate": 1.3384301732925586e-05, |
| "loss": 0.4467, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.9915363649404676, |
| "grad_norm": 49.0, |
| "learning_rate": 1.3379204892966361e-05, |
| "loss": 0.7462, |
| "step": 1736 |
| }, |
| { |
| "epoch": 1.9926839764739634, |
| "grad_norm": 22.25, |
| "learning_rate": 1.3374108053007137e-05, |
| "loss": 0.4648, |
| "step": 1737 |
| }, |
| { |
| "epoch": 1.9938315880074595, |
| "grad_norm": 41.25, |
| "learning_rate": 1.3369011213047911e-05, |
| "loss": 0.2781, |
| "step": 1738 |
| }, |
| { |
| "epoch": 1.9949791995409554, |
| "grad_norm": 22.625, |
| "learning_rate": 1.3363914373088686e-05, |
| "loss": 0.6798, |
| "step": 1739 |
| }, |
| { |
| "epoch": 1.9961268110744514, |
| "grad_norm": 61.5, |
| "learning_rate": 1.335881753312946e-05, |
| "loss": 0.4519, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.9972744226079473, |
| "grad_norm": 22.375, |
| "learning_rate": 1.3353720693170236e-05, |
| "loss": 0.7196, |
| "step": 1741 |
| }, |
| { |
| "epoch": 1.9984220341414431, |
| "grad_norm": 9.9375, |
| "learning_rate": 1.3348623853211012e-05, |
| "loss": 0.2518, |
| "step": 1742 |
| }, |
| { |
| "epoch": 1.999569645674939, |
| "grad_norm": 24.0, |
| "learning_rate": 1.3343527013251785e-05, |
| "loss": 0.5652, |
| "step": 1743 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 43.0, |
| "learning_rate": 1.3338430173292561e-05, |
| "loss": 0.2228, |
| "step": 1744 |
| }, |
| { |
| "epoch": 2.001147611533496, |
| "grad_norm": 12.75, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.2022, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.0022952230669917, |
| "grad_norm": 38.5, |
| "learning_rate": 1.3328236493374109e-05, |
| "loss": 0.5685, |
| "step": 1746 |
| }, |
| { |
| "epoch": 2.0034428346004876, |
| "grad_norm": 17.875, |
| "learning_rate": 1.3323139653414884e-05, |
| "loss": 0.4231, |
| "step": 1747 |
| }, |
| { |
| "epoch": 2.004590446133984, |
| "grad_norm": 30.125, |
| "learning_rate": 1.3318042813455658e-05, |
| "loss": 0.4095, |
| "step": 1748 |
| }, |
| { |
| "epoch": 2.0057380576674797, |
| "grad_norm": 9.125, |
| "learning_rate": 1.3312945973496434e-05, |
| "loss": 0.2246, |
| "step": 1749 |
| }, |
| { |
| "epoch": 2.0068856692009756, |
| "grad_norm": 13.3125, |
| "learning_rate": 1.3307849133537208e-05, |
| "loss": 0.3579, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.0080332807344714, |
| "grad_norm": 43.25, |
| "learning_rate": 1.3302752293577984e-05, |
| "loss": 0.2611, |
| "step": 1751 |
| }, |
| { |
| "epoch": 2.0091808922679673, |
| "grad_norm": 29.125, |
| "learning_rate": 1.3297655453618759e-05, |
| "loss": 0.1953, |
| "step": 1752 |
| }, |
| { |
| "epoch": 2.010328503801463, |
| "grad_norm": 25.625, |
| "learning_rate": 1.3292558613659531e-05, |
| "loss": 0.3513, |
| "step": 1753 |
| }, |
| { |
| "epoch": 2.011476115334959, |
| "grad_norm": 16.5, |
| "learning_rate": 1.3287461773700307e-05, |
| "loss": 0.2294, |
| "step": 1754 |
| }, |
| { |
| "epoch": 2.012623726868455, |
| "grad_norm": 7.34375, |
| "learning_rate": 1.328236493374108e-05, |
| "loss": 0.1434, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.013771338401951, |
| "grad_norm": 18.625, |
| "learning_rate": 1.3277268093781856e-05, |
| "loss": 0.5704, |
| "step": 1756 |
| }, |
| { |
| "epoch": 2.014918949935447, |
| "grad_norm": 47.25, |
| "learning_rate": 1.3272171253822632e-05, |
| "loss": 0.4283, |
| "step": 1757 |
| }, |
| { |
| "epoch": 2.016066561468943, |
| "grad_norm": 9.6875, |
| "learning_rate": 1.3267074413863406e-05, |
| "loss": 0.2391, |
| "step": 1758 |
| }, |
| { |
| "epoch": 2.0172141730024387, |
| "grad_norm": 34.5, |
| "learning_rate": 1.3261977573904182e-05, |
| "loss": 0.248, |
| "step": 1759 |
| }, |
| { |
| "epoch": 2.0183617845359345, |
| "grad_norm": 11.9375, |
| "learning_rate": 1.3256880733944954e-05, |
| "loss": 0.2023, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.0195093960694304, |
| "grad_norm": 27.375, |
| "learning_rate": 1.325178389398573e-05, |
| "loss": 0.261, |
| "step": 1761 |
| }, |
| { |
| "epoch": 2.020657007602926, |
| "grad_norm": 10.875, |
| "learning_rate": 1.3246687054026503e-05, |
| "loss": 0.224, |
| "step": 1762 |
| }, |
| { |
| "epoch": 2.0218046191364225, |
| "grad_norm": 24.375, |
| "learning_rate": 1.3241590214067279e-05, |
| "loss": 0.2463, |
| "step": 1763 |
| }, |
| { |
| "epoch": 2.0229522306699184, |
| "grad_norm": 18.5, |
| "learning_rate": 1.3236493374108054e-05, |
| "loss": 0.2944, |
| "step": 1764 |
| }, |
| { |
| "epoch": 2.024099842203414, |
| "grad_norm": 24.0, |
| "learning_rate": 1.3231396534148828e-05, |
| "loss": 0.315, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.02524745373691, |
| "grad_norm": 54.0, |
| "learning_rate": 1.3226299694189604e-05, |
| "loss": 0.3818, |
| "step": 1766 |
| }, |
| { |
| "epoch": 2.026395065270406, |
| "grad_norm": 18.5, |
| "learning_rate": 1.3221202854230378e-05, |
| "loss": 0.3524, |
| "step": 1767 |
| }, |
| { |
| "epoch": 2.0275426768039018, |
| "grad_norm": 19.75, |
| "learning_rate": 1.3216106014271153e-05, |
| "loss": 0.3522, |
| "step": 1768 |
| }, |
| { |
| "epoch": 2.0286902883373976, |
| "grad_norm": 35.0, |
| "learning_rate": 1.3211009174311929e-05, |
| "loss": 0.3279, |
| "step": 1769 |
| }, |
| { |
| "epoch": 2.029837899870894, |
| "grad_norm": 56.75, |
| "learning_rate": 1.3205912334352701e-05, |
| "loss": 1.3613, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.0309855114043898, |
| "grad_norm": 28.0, |
| "learning_rate": 1.3200815494393477e-05, |
| "loss": 0.2122, |
| "step": 1771 |
| }, |
| { |
| "epoch": 2.0321331229378856, |
| "grad_norm": 37.0, |
| "learning_rate": 1.319571865443425e-05, |
| "loss": 0.1997, |
| "step": 1772 |
| }, |
| { |
| "epoch": 2.0332807344713815, |
| "grad_norm": 6.0, |
| "learning_rate": 1.3190621814475026e-05, |
| "loss": 0.0679, |
| "step": 1773 |
| }, |
| { |
| "epoch": 2.0344283460048773, |
| "grad_norm": 16.75, |
| "learning_rate": 1.3185524974515802e-05, |
| "loss": 0.3065, |
| "step": 1774 |
| }, |
| { |
| "epoch": 2.035575957538373, |
| "grad_norm": 33.5, |
| "learning_rate": 1.3180428134556576e-05, |
| "loss": 0.2069, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.036723569071869, |
| "grad_norm": 27.25, |
| "learning_rate": 1.3175331294597351e-05, |
| "loss": 0.2496, |
| "step": 1776 |
| }, |
| { |
| "epoch": 2.037871180605365, |
| "grad_norm": 29.25, |
| "learning_rate": 1.3170234454638124e-05, |
| "loss": 0.3496, |
| "step": 1777 |
| }, |
| { |
| "epoch": 2.039018792138861, |
| "grad_norm": 22.375, |
| "learning_rate": 1.31651376146789e-05, |
| "loss": 0.302, |
| "step": 1778 |
| }, |
| { |
| "epoch": 2.040166403672357, |
| "grad_norm": 30.375, |
| "learning_rate": 1.3160040774719675e-05, |
| "loss": 0.5632, |
| "step": 1779 |
| }, |
| { |
| "epoch": 2.041314015205853, |
| "grad_norm": 61.0, |
| "learning_rate": 1.3154943934760449e-05, |
| "loss": 0.3705, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.0424616267393487, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.3149847094801224e-05, |
| "loss": 0.1027, |
| "step": 1781 |
| }, |
| { |
| "epoch": 2.0436092382728446, |
| "grad_norm": 13.75, |
| "learning_rate": 1.3144750254841998e-05, |
| "loss": 0.0892, |
| "step": 1782 |
| }, |
| { |
| "epoch": 2.0447568498063404, |
| "grad_norm": 25.0, |
| "learning_rate": 1.3139653414882774e-05, |
| "loss": 0.6006, |
| "step": 1783 |
| }, |
| { |
| "epoch": 2.0459044613398363, |
| "grad_norm": 11.0625, |
| "learning_rate": 1.313455657492355e-05, |
| "loss": 0.1804, |
| "step": 1784 |
| }, |
| { |
| "epoch": 2.0470520728733326, |
| "grad_norm": 32.75, |
| "learning_rate": 1.3129459734964323e-05, |
| "loss": 0.3527, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.0481996844068284, |
| "grad_norm": 98.5, |
| "learning_rate": 1.3124362895005099e-05, |
| "loss": 0.8699, |
| "step": 1786 |
| }, |
| { |
| "epoch": 2.0493472959403243, |
| "grad_norm": 40.5, |
| "learning_rate": 1.3119266055045871e-05, |
| "loss": 0.193, |
| "step": 1787 |
| }, |
| { |
| "epoch": 2.05049490747382, |
| "grad_norm": 27.75, |
| "learning_rate": 1.3114169215086647e-05, |
| "loss": 0.3553, |
| "step": 1788 |
| }, |
| { |
| "epoch": 2.051642519007316, |
| "grad_norm": 14.4375, |
| "learning_rate": 1.3109072375127422e-05, |
| "loss": 0.1813, |
| "step": 1789 |
| }, |
| { |
| "epoch": 2.052790130540812, |
| "grad_norm": 17.75, |
| "learning_rate": 1.3103975535168196e-05, |
| "loss": 0.0898, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.0539377420743077, |
| "grad_norm": 9.25, |
| "learning_rate": 1.3098878695208972e-05, |
| "loss": 0.1149, |
| "step": 1791 |
| }, |
| { |
| "epoch": 2.0550853536078035, |
| "grad_norm": 46.0, |
| "learning_rate": 1.3093781855249746e-05, |
| "loss": 0.2937, |
| "step": 1792 |
| }, |
| { |
| "epoch": 2.0562329651413, |
| "grad_norm": 17.125, |
| "learning_rate": 1.3088685015290521e-05, |
| "loss": 0.3886, |
| "step": 1793 |
| }, |
| { |
| "epoch": 2.0573805766747957, |
| "grad_norm": 25.875, |
| "learning_rate": 1.3083588175331297e-05, |
| "loss": 0.2858, |
| "step": 1794 |
| }, |
| { |
| "epoch": 2.0585281882082915, |
| "grad_norm": 42.5, |
| "learning_rate": 1.307849133537207e-05, |
| "loss": 0.6463, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.0596757997417874, |
| "grad_norm": 97.5, |
| "learning_rate": 1.3073394495412845e-05, |
| "loss": 0.9309, |
| "step": 1796 |
| }, |
| { |
| "epoch": 2.0608234112752832, |
| "grad_norm": 27.25, |
| "learning_rate": 1.3068297655453619e-05, |
| "loss": 0.3763, |
| "step": 1797 |
| }, |
| { |
| "epoch": 2.061971022808779, |
| "grad_norm": 137.0, |
| "learning_rate": 1.3063200815494394e-05, |
| "loss": 1.1044, |
| "step": 1798 |
| }, |
| { |
| "epoch": 2.063118634342275, |
| "grad_norm": 12.1875, |
| "learning_rate": 1.305810397553517e-05, |
| "loss": 0.1574, |
| "step": 1799 |
| }, |
| { |
| "epoch": 2.0642662458757712, |
| "grad_norm": 22.875, |
| "learning_rate": 1.3053007135575944e-05, |
| "loss": 0.1174, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.0642662458757712, |
| "eval_accuracy": 0.74, |
| "eval_loss": 0.4835154712200165, |
| "eval_runtime": 49.2987, |
| "eval_samples_per_second": 2.028, |
| "eval_steps_per_second": 2.028, |
| "step": 1800 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 4360, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.358825065150048e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|