| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.998016660055534, |
| "eval_steps": 500, |
| "global_step": 1575, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0031733439111463705, |
| "grad_norm": 6.348144432543206, |
| "learning_rate": 5.063291139240507e-07, |
| "loss": 0.8982, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006346687822292741, |
| "grad_norm": 6.383645545622549, |
| "learning_rate": 1.0126582278481013e-06, |
| "loss": 0.9021, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.009520031733439112, |
| "grad_norm": 6.2539360712589085, |
| "learning_rate": 1.518987341772152e-06, |
| "loss": 0.8937, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.012693375644585482, |
| "grad_norm": 6.225921771088206, |
| "learning_rate": 2.0253164556962026e-06, |
| "loss": 0.8949, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01586671955573185, |
| "grad_norm": 5.8423675974230775, |
| "learning_rate": 2.5316455696202535e-06, |
| "loss": 0.8753, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.019040063466878223, |
| "grad_norm": 4.757379141562461, |
| "learning_rate": 3.037974683544304e-06, |
| "loss": 0.8416, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.022213407378024592, |
| "grad_norm": 4.272314367321253, |
| "learning_rate": 3.544303797468355e-06, |
| "loss": 0.8311, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.025386751289170964, |
| "grad_norm": 2.4318992207766845, |
| "learning_rate": 4.050632911392405e-06, |
| "loss": 0.7784, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.028560095200317333, |
| "grad_norm": 2.248095466527857, |
| "learning_rate": 4.556962025316456e-06, |
| "loss": 0.7769, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0317334391114637, |
| "grad_norm": 4.2093273786875836, |
| "learning_rate": 5.063291139240507e-06, |
| "loss": 0.7732, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.03490678302261008, |
| "grad_norm": 4.309584499183112, |
| "learning_rate": 5.569620253164557e-06, |
| "loss": 0.7619, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.038080126933756446, |
| "grad_norm": 4.357671892488039, |
| "learning_rate": 6.075949367088608e-06, |
| "loss": 0.7564, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.041253470844902815, |
| "grad_norm": 3.6078600546011184, |
| "learning_rate": 6.582278481012659e-06, |
| "loss": 0.7009, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.044426814756049184, |
| "grad_norm": 3.651249913873882, |
| "learning_rate": 7.08860759493671e-06, |
| "loss": 0.699, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.04760015866719556, |
| "grad_norm": 2.9003418519249107, |
| "learning_rate": 7.5949367088607605e-06, |
| "loss": 0.6807, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.05077350257834193, |
| "grad_norm": 2.0070454552304744, |
| "learning_rate": 8.10126582278481e-06, |
| "loss": 0.6622, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0539468464894883, |
| "grad_norm": 1.6956361687151162, |
| "learning_rate": 8.607594936708861e-06, |
| "loss": 0.6405, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.057120190400634666, |
| "grad_norm": 2.212305308576385, |
| "learning_rate": 9.113924050632912e-06, |
| "loss": 0.6294, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.06029353431178104, |
| "grad_norm": 2.2094016296877634, |
| "learning_rate": 9.620253164556963e-06, |
| "loss": 0.6225, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0634668782229274, |
| "grad_norm": 1.6141377735329752, |
| "learning_rate": 1.0126582278481014e-05, |
| "loss": 0.611, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06664022213407378, |
| "grad_norm": 1.204912005316192, |
| "learning_rate": 1.0632911392405063e-05, |
| "loss": 0.6073, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.06981356604522015, |
| "grad_norm": 1.3322615853509538, |
| "learning_rate": 1.1139240506329114e-05, |
| "loss": 0.5935, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.07298690995636652, |
| "grad_norm": 1.3306388942331462, |
| "learning_rate": 1.1645569620253165e-05, |
| "loss": 0.5827, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.07616025386751289, |
| "grad_norm": 0.8751536661021161, |
| "learning_rate": 1.2151898734177216e-05, |
| "loss": 0.5805, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.07933359777865927, |
| "grad_norm": 0.9015772829079778, |
| "learning_rate": 1.2658227848101268e-05, |
| "loss": 0.5726, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.08250694168980563, |
| "grad_norm": 0.9818736669411537, |
| "learning_rate": 1.3164556962025317e-05, |
| "loss": 0.5683, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.085680285600952, |
| "grad_norm": 0.6758237512246137, |
| "learning_rate": 1.3670886075949368e-05, |
| "loss": 0.5554, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.08885362951209837, |
| "grad_norm": 0.7798065896044373, |
| "learning_rate": 1.417721518987342e-05, |
| "loss": 0.562, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.09202697342324474, |
| "grad_norm": 0.5943455450936407, |
| "learning_rate": 1.468354430379747e-05, |
| "loss": 0.5516, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.09520031733439112, |
| "grad_norm": 0.5360484272133458, |
| "learning_rate": 1.5189873417721521e-05, |
| "loss": 0.5466, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09837366124553748, |
| "grad_norm": 0.6427312239326713, |
| "learning_rate": 1.5696202531645572e-05, |
| "loss": 0.5282, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.10154700515668386, |
| "grad_norm": 0.52852465695618, |
| "learning_rate": 1.620253164556962e-05, |
| "loss": 0.5358, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.10472034906783023, |
| "grad_norm": 0.5252420797220524, |
| "learning_rate": 1.6708860759493674e-05, |
| "loss": 0.5292, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.1078936929789766, |
| "grad_norm": 0.6243715389867477, |
| "learning_rate": 1.7215189873417723e-05, |
| "loss": 0.5371, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.11106703689012297, |
| "grad_norm": 0.4003457981794737, |
| "learning_rate": 1.7721518987341772e-05, |
| "loss": 0.5186, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.11424038080126933, |
| "grad_norm": 0.5457989224110974, |
| "learning_rate": 1.8227848101265824e-05, |
| "loss": 0.5223, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1174137247124157, |
| "grad_norm": 0.3580435227349059, |
| "learning_rate": 1.8734177215189874e-05, |
| "loss": 0.5158, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.12058706862356208, |
| "grad_norm": 0.4663870760426878, |
| "learning_rate": 1.9240506329113926e-05, |
| "loss": 0.518, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.12376041253470844, |
| "grad_norm": 0.4046971344951154, |
| "learning_rate": 1.974683544303798e-05, |
| "loss": 0.5107, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1269337564458548, |
| "grad_norm": 0.41758513906570793, |
| "learning_rate": 2.0253164556962028e-05, |
| "loss": 0.5124, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.13010710035700118, |
| "grad_norm": 0.4062531142420596, |
| "learning_rate": 2.0759493670886077e-05, |
| "loss": 0.504, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.13328044426814756, |
| "grad_norm": 0.3998294866521708, |
| "learning_rate": 2.1265822784810126e-05, |
| "loss": 0.5025, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.13645378817929393, |
| "grad_norm": 0.3867028742210112, |
| "learning_rate": 2.177215189873418e-05, |
| "loss": 0.5043, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.1396271320904403, |
| "grad_norm": 0.43575380555266197, |
| "learning_rate": 2.2278481012658228e-05, |
| "loss": 0.4972, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.14280047600158668, |
| "grad_norm": 0.6078264125881937, |
| "learning_rate": 2.278481012658228e-05, |
| "loss": 0.4975, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.14597381991273303, |
| "grad_norm": 0.973098853907099, |
| "learning_rate": 2.329113924050633e-05, |
| "loss": 0.4998, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1491471638238794, |
| "grad_norm": 1.2032473307606681, |
| "learning_rate": 2.379746835443038e-05, |
| "loss": 0.5023, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.15232050773502578, |
| "grad_norm": 0.517082628902644, |
| "learning_rate": 2.430379746835443e-05, |
| "loss": 0.4926, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.15549385164617216, |
| "grad_norm": 0.8487019755795518, |
| "learning_rate": 2.481012658227848e-05, |
| "loss": 0.4981, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.15866719555731854, |
| "grad_norm": 1.0951673394024417, |
| "learning_rate": 2.5316455696202537e-05, |
| "loss": 0.4962, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.16184053946846488, |
| "grad_norm": 0.755728182616416, |
| "learning_rate": 2.5822784810126586e-05, |
| "loss": 0.4962, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.16501388337961126, |
| "grad_norm": 1.3057901440899915, |
| "learning_rate": 2.6329113924050635e-05, |
| "loss": 0.4865, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.16818722729075763, |
| "grad_norm": 0.6650026611670792, |
| "learning_rate": 2.6835443037974687e-05, |
| "loss": 0.4895, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.171360571201904, |
| "grad_norm": 0.7738295996967581, |
| "learning_rate": 2.7341772151898737e-05, |
| "loss": 0.4968, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.1745339151130504, |
| "grad_norm": 0.9000555872432531, |
| "learning_rate": 2.784810126582279e-05, |
| "loss": 0.4909, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.17770725902419673, |
| "grad_norm": 0.8162914872438413, |
| "learning_rate": 2.835443037974684e-05, |
| "loss": 0.4834, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1808806029353431, |
| "grad_norm": 0.7906164914396787, |
| "learning_rate": 2.8860759493670888e-05, |
| "loss": 0.4865, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.18405394684648949, |
| "grad_norm": 0.9490020845109697, |
| "learning_rate": 2.936708860759494e-05, |
| "loss": 0.4908, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.18722729075763586, |
| "grad_norm": 0.7752131353512624, |
| "learning_rate": 2.987341772151899e-05, |
| "loss": 0.4782, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.19040063466878224, |
| "grad_norm": 0.84561035331472, |
| "learning_rate": 3.0379746835443042e-05, |
| "loss": 0.4703, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1935739785799286, |
| "grad_norm": 0.5602877470932804, |
| "learning_rate": 3.088607594936709e-05, |
| "loss": 0.4775, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.19674732249107496, |
| "grad_norm": 0.6821307536004709, |
| "learning_rate": 3.1392405063291144e-05, |
| "loss": 0.4773, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.19992066640222134, |
| "grad_norm": 0.717968030563759, |
| "learning_rate": 3.1898734177215196e-05, |
| "loss": 0.4742, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.2030940103133677, |
| "grad_norm": 0.8033651165413181, |
| "learning_rate": 3.240506329113924e-05, |
| "loss": 0.4721, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.2062673542245141, |
| "grad_norm": 0.8192446128586057, |
| "learning_rate": 3.2911392405063295e-05, |
| "loss": 0.4772, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.20944069813566046, |
| "grad_norm": 1.431814362557968, |
| "learning_rate": 3.341772151898735e-05, |
| "loss": 0.4833, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.2126140420468068, |
| "grad_norm": 0.8878416063140385, |
| "learning_rate": 3.392405063291139e-05, |
| "loss": 0.4769, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.2157873859579532, |
| "grad_norm": 0.8851153809304894, |
| "learning_rate": 3.4430379746835445e-05, |
| "loss": 0.4702, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.21896072986909956, |
| "grad_norm": 0.8786193628266902, |
| "learning_rate": 3.49367088607595e-05, |
| "loss": 0.4685, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.22213407378024594, |
| "grad_norm": 0.7405283080781369, |
| "learning_rate": 3.5443037974683544e-05, |
| "loss": 0.4717, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.22530741769139231, |
| "grad_norm": 1.043777209641996, |
| "learning_rate": 3.5949367088607596e-05, |
| "loss": 0.4739, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.22848076160253866, |
| "grad_norm": 1.3637808792967987, |
| "learning_rate": 3.645569620253165e-05, |
| "loss": 0.4609, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.23165410551368504, |
| "grad_norm": 0.5922364571167895, |
| "learning_rate": 3.69620253164557e-05, |
| "loss": 0.4633, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.2348274494248314, |
| "grad_norm": 1.092592419482634, |
| "learning_rate": 3.746835443037975e-05, |
| "loss": 0.4709, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.2380007933359778, |
| "grad_norm": 1.3150716961544304, |
| "learning_rate": 3.79746835443038e-05, |
| "loss": 0.4645, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.24117413724712417, |
| "grad_norm": 0.8868277668057346, |
| "learning_rate": 3.848101265822785e-05, |
| "loss": 0.4701, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.24434748115827054, |
| "grad_norm": 1.0869800415170585, |
| "learning_rate": 3.89873417721519e-05, |
| "loss": 0.4646, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.2475208250694169, |
| "grad_norm": 1.0125334083010975, |
| "learning_rate": 3.949367088607596e-05, |
| "loss": 0.4678, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.2506941689805633, |
| "grad_norm": 1.312025871541637, |
| "learning_rate": 4e-05, |
| "loss": 0.4689, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.2538675128917096, |
| "grad_norm": 0.7660489575183855, |
| "learning_rate": 4.0506329113924056e-05, |
| "loss": 0.4627, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.257040856802856, |
| "grad_norm": 1.0627032211180467, |
| "learning_rate": 4.10126582278481e-05, |
| "loss": 0.4665, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.26021420071400236, |
| "grad_norm": 1.2004403182979329, |
| "learning_rate": 4.1518987341772154e-05, |
| "loss": 0.463, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.26338754462514874, |
| "grad_norm": 0.8697213717507531, |
| "learning_rate": 4.202531645569621e-05, |
| "loss": 0.4639, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.2665608885362951, |
| "grad_norm": 0.7770576209264962, |
| "learning_rate": 4.253164556962025e-05, |
| "loss": 0.4636, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.2697342324474415, |
| "grad_norm": 1.006934185356337, |
| "learning_rate": 4.3037974683544305e-05, |
| "loss": 0.4687, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.27290757635858787, |
| "grad_norm": 1.7918476049624474, |
| "learning_rate": 4.354430379746836e-05, |
| "loss": 0.4775, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.27608092026973424, |
| "grad_norm": 1.0471693596288953, |
| "learning_rate": 4.405063291139241e-05, |
| "loss": 0.4736, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.2792542641808806, |
| "grad_norm": 1.4597496826948222, |
| "learning_rate": 4.4556962025316456e-05, |
| "loss": 0.4786, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.282427608092027, |
| "grad_norm": 1.400079131837375, |
| "learning_rate": 4.506329113924051e-05, |
| "loss": 0.4699, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.28560095200317337, |
| "grad_norm": 1.1646233119330298, |
| "learning_rate": 4.556962025316456e-05, |
| "loss": 0.4721, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2887742959143197, |
| "grad_norm": 1.5770765835982208, |
| "learning_rate": 4.607594936708861e-05, |
| "loss": 0.4741, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.29194763982546607, |
| "grad_norm": 1.2163067580443867, |
| "learning_rate": 4.658227848101266e-05, |
| "loss": 0.466, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.29512098373661244, |
| "grad_norm": 1.2720825044452424, |
| "learning_rate": 4.708860759493671e-05, |
| "loss": 0.4641, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.2982943276477588, |
| "grad_norm": 1.1086102734888972, |
| "learning_rate": 4.759493670886076e-05, |
| "loss": 0.4689, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.3014676715589052, |
| "grad_norm": 1.1621389294628086, |
| "learning_rate": 4.810126582278481e-05, |
| "loss": 0.46, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.30464101547005157, |
| "grad_norm": 1.023227048678522, |
| "learning_rate": 4.860759493670886e-05, |
| "loss": 0.4664, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.30781435938119794, |
| "grad_norm": 0.6681644577920394, |
| "learning_rate": 4.911392405063292e-05, |
| "loss": 0.46, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.3109877032923443, |
| "grad_norm": 0.8942148240973403, |
| "learning_rate": 4.962025316455696e-05, |
| "loss": 0.4578, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.3141610472034907, |
| "grad_norm": 0.8129981254381207, |
| "learning_rate": 5.012658227848102e-05, |
| "loss": 0.4588, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.31733439111463707, |
| "grad_norm": 0.7399247303363102, |
| "learning_rate": 5.063291139240507e-05, |
| "loss": 0.4684, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.32050773502578345, |
| "grad_norm": 0.9208344037352115, |
| "learning_rate": 5.113924050632911e-05, |
| "loss": 0.4537, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.32368107893692977, |
| "grad_norm": 0.7718334071956167, |
| "learning_rate": 5.164556962025317e-05, |
| "loss": 0.4541, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.32685442284807614, |
| "grad_norm": 0.9193553200855868, |
| "learning_rate": 5.2151898734177224e-05, |
| "loss": 0.4639, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.3300277667592225, |
| "grad_norm": 1.0893805228839906, |
| "learning_rate": 5.265822784810127e-05, |
| "loss": 0.4591, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.3332011106703689, |
| "grad_norm": 1.3296951795184973, |
| "learning_rate": 5.316455696202532e-05, |
| "loss": 0.4609, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.33637445458151527, |
| "grad_norm": 0.749655425862983, |
| "learning_rate": 5.3670886075949375e-05, |
| "loss": 0.4552, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.33954779849266165, |
| "grad_norm": 1.0304107904402815, |
| "learning_rate": 5.417721518987342e-05, |
| "loss": 0.455, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.342721142403808, |
| "grad_norm": 1.259806934599001, |
| "learning_rate": 5.468354430379747e-05, |
| "loss": 0.456, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.3458944863149544, |
| "grad_norm": 0.7667285389018935, |
| "learning_rate": 5.5189873417721526e-05, |
| "loss": 0.446, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.3490678302261008, |
| "grad_norm": 0.8396599205938527, |
| "learning_rate": 5.569620253164558e-05, |
| "loss": 0.444, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.35224117413724715, |
| "grad_norm": 1.0519977391007944, |
| "learning_rate": 5.6202531645569624e-05, |
| "loss": 0.4509, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.35541451804839347, |
| "grad_norm": 1.0013235222472492, |
| "learning_rate": 5.670886075949368e-05, |
| "loss": 0.4486, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.35858786195953984, |
| "grad_norm": 1.0607291724487944, |
| "learning_rate": 5.721518987341773e-05, |
| "loss": 0.4469, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.3617612058706862, |
| "grad_norm": 0.9739072199836216, |
| "learning_rate": 5.7721518987341775e-05, |
| "loss": 0.453, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.3649345497818326, |
| "grad_norm": 0.9878026910095292, |
| "learning_rate": 5.822784810126583e-05, |
| "loss": 0.4504, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.36810789369297897, |
| "grad_norm": 1.0083153796649502, |
| "learning_rate": 5.873417721518988e-05, |
| "loss": 0.4514, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.37128123760412535, |
| "grad_norm": 1.230069181330947, |
| "learning_rate": 5.9240506329113926e-05, |
| "loss": 0.4547, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.3744545815152717, |
| "grad_norm": 0.8910651737656846, |
| "learning_rate": 5.974683544303798e-05, |
| "loss": 0.447, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.3776279254264181, |
| "grad_norm": 1.0595908240339857, |
| "learning_rate": 6.025316455696203e-05, |
| "loss": 0.4466, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.3808012693375645, |
| "grad_norm": 0.8363383114472455, |
| "learning_rate": 6.0759493670886084e-05, |
| "loss": 0.4463, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.38397461324871085, |
| "grad_norm": 0.907469468063755, |
| "learning_rate": 6.126582278481012e-05, |
| "loss": 0.4492, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.3871479571598572, |
| "grad_norm": 1.3417514038396015, |
| "learning_rate": 6.177215189873418e-05, |
| "loss": 0.4572, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.39032130107100355, |
| "grad_norm": 0.6834706943125461, |
| "learning_rate": 6.227848101265824e-05, |
| "loss": 0.4452, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.3934946449821499, |
| "grad_norm": 1.1807244230068368, |
| "learning_rate": 6.278481012658229e-05, |
| "loss": 0.4486, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.3966679888932963, |
| "grad_norm": 0.9150568429572349, |
| "learning_rate": 6.329113924050633e-05, |
| "loss": 0.4475, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.3998413328044427, |
| "grad_norm": 0.9711686157986756, |
| "learning_rate": 6.379746835443039e-05, |
| "loss": 0.4548, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.40301467671558905, |
| "grad_norm": 0.8470526923713568, |
| "learning_rate": 6.430379746835444e-05, |
| "loss": 0.4573, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.4061880206267354, |
| "grad_norm": 1.1867353576740691, |
| "learning_rate": 6.481012658227848e-05, |
| "loss": 0.4524, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.4093613645378818, |
| "grad_norm": 0.9924058775384482, |
| "learning_rate": 6.531645569620254e-05, |
| "loss": 0.4445, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.4125347084490282, |
| "grad_norm": 1.0279428635787766, |
| "learning_rate": 6.582278481012659e-05, |
| "loss": 0.4536, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.41570805236017455, |
| "grad_norm": 1.327497625966449, |
| "learning_rate": 6.632911392405063e-05, |
| "loss": 0.4517, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.4188813962713209, |
| "grad_norm": 0.8047709339837646, |
| "learning_rate": 6.68354430379747e-05, |
| "loss": 0.4507, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.42205474018246725, |
| "grad_norm": 0.6624995618693278, |
| "learning_rate": 6.734177215189874e-05, |
| "loss": 0.4416, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.4252280840936136, |
| "grad_norm": 0.8937479460795225, |
| "learning_rate": 6.784810126582279e-05, |
| "loss": 0.4413, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.42840142800476, |
| "grad_norm": 1.2288491348657344, |
| "learning_rate": 6.835443037974685e-05, |
| "loss": 0.4481, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.4315747719159064, |
| "grad_norm": 0.7888292495946069, |
| "learning_rate": 6.886075949367089e-05, |
| "loss": 0.4434, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.43474811582705275, |
| "grad_norm": 0.8552313093639724, |
| "learning_rate": 6.936708860759494e-05, |
| "loss": 0.4415, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.4379214597381991, |
| "grad_norm": 1.1555499174697232, |
| "learning_rate": 6.9873417721519e-05, |
| "loss": 0.4469, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.4410948036493455, |
| "grad_norm": 0.8107672566991376, |
| "learning_rate": 7.037974683544304e-05, |
| "loss": 0.4391, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.4442681475604919, |
| "grad_norm": 0.7123665736327819, |
| "learning_rate": 7.088607594936709e-05, |
| "loss": 0.44, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.44744149147163825, |
| "grad_norm": 0.7484877480349433, |
| "learning_rate": 7.139240506329115e-05, |
| "loss": 0.4444, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.45061483538278463, |
| "grad_norm": 0.8956448687190801, |
| "learning_rate": 7.189873417721519e-05, |
| "loss": 0.4472, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.453788179293931, |
| "grad_norm": 1.1407806224479462, |
| "learning_rate": 7.240506329113925e-05, |
| "loss": 0.4549, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.4569615232050773, |
| "grad_norm": 0.6987492103428099, |
| "learning_rate": 7.29113924050633e-05, |
| "loss": 0.4376, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.4601348671162237, |
| "grad_norm": 1.0417474479567967, |
| "learning_rate": 7.341772151898734e-05, |
| "loss": 0.4496, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.4633082110273701, |
| "grad_norm": 0.9448597075694767, |
| "learning_rate": 7.39240506329114e-05, |
| "loss": 0.4446, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.46648155493851645, |
| "grad_norm": 0.891015819461564, |
| "learning_rate": 7.443037974683545e-05, |
| "loss": 0.445, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.4696548988496628, |
| "grad_norm": 0.6640649337472588, |
| "learning_rate": 7.49367088607595e-05, |
| "loss": 0.4415, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.4728282427608092, |
| "grad_norm": 0.6255801769841464, |
| "learning_rate": 7.544303797468355e-05, |
| "loss": 0.4401, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.4760015866719556, |
| "grad_norm": 0.6462120499489888, |
| "learning_rate": 7.59493670886076e-05, |
| "loss": 0.4443, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.47917493058310195, |
| "grad_norm": 0.6031979187078812, |
| "learning_rate": 7.645569620253165e-05, |
| "loss": 0.4315, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.48234827449424833, |
| "grad_norm": 0.7422409369854028, |
| "learning_rate": 7.69620253164557e-05, |
| "loss": 0.4411, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.4855216184053947, |
| "grad_norm": 0.6026165860234792, |
| "learning_rate": 7.746835443037976e-05, |
| "loss": 0.4372, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.4886949623165411, |
| "grad_norm": 0.4790898172907393, |
| "learning_rate": 7.79746835443038e-05, |
| "loss": 0.4397, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.4918683062276874, |
| "grad_norm": 0.5637573427525947, |
| "learning_rate": 7.848101265822786e-05, |
| "loss": 0.4364, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.4950416501388338, |
| "grad_norm": 0.7151994774739086, |
| "learning_rate": 7.898734177215191e-05, |
| "loss": 0.4418, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.49821499404998015, |
| "grad_norm": 0.8523966891138255, |
| "learning_rate": 7.949367088607595e-05, |
| "loss": 0.4426, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.5013883379611266, |
| "grad_norm": 1.0199664244681628, |
| "learning_rate": 8e-05, |
| "loss": 0.4396, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.5045616818722729, |
| "grad_norm": 0.9730498513447443, |
| "learning_rate": 7.999990169177323e-05, |
| "loss": 0.4524, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.5077350257834192, |
| "grad_norm": 0.8763687649740503, |
| "learning_rate": 7.99996067675761e-05, |
| "loss": 0.4385, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5109083696945657, |
| "grad_norm": 0.7439185882238727, |
| "learning_rate": 7.99991152288583e-05, |
| "loss": 0.4426, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.514081713605712, |
| "grad_norm": 0.8277849239630293, |
| "learning_rate": 7.999842707803597e-05, |
| "loss": 0.4336, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.5172550575168584, |
| "grad_norm": 0.6740425300459671, |
| "learning_rate": 7.999754231849163e-05, |
| "loss": 0.4371, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.5204284014280047, |
| "grad_norm": 0.6114082854253373, |
| "learning_rate": 7.999646095457422e-05, |
| "loss": 0.4421, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.5236017453391512, |
| "grad_norm": 0.8489305672656007, |
| "learning_rate": 7.999518299159912e-05, |
| "loss": 0.4366, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.5267750892502975, |
| "grad_norm": 0.7585314746590911, |
| "learning_rate": 7.999370843584805e-05, |
| "loss": 0.432, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.5299484331614439, |
| "grad_norm": 0.9398955324495176, |
| "learning_rate": 7.999203729456902e-05, |
| "loss": 0.4361, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.5331217770725902, |
| "grad_norm": 1.7667814883362698, |
| "learning_rate": 7.99901695759764e-05, |
| "loss": 0.4484, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.5362951209837367, |
| "grad_norm": 0.5640015415710142, |
| "learning_rate": 7.99881052892508e-05, |
| "loss": 0.4347, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.539468464894883, |
| "grad_norm": 1.512073308742083, |
| "learning_rate": 7.998584444453901e-05, |
| "loss": 0.4454, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5426418088060293, |
| "grad_norm": 0.9689824722762622, |
| "learning_rate": 7.998338705295406e-05, |
| "loss": 0.4386, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.5458151527171757, |
| "grad_norm": 0.7646001072380342, |
| "learning_rate": 7.9980733126575e-05, |
| "loss": 0.4358, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.548988496628322, |
| "grad_norm": 0.9621346516255858, |
| "learning_rate": 7.997788267844699e-05, |
| "loss": 0.4438, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.5521618405394685, |
| "grad_norm": 0.8313101918847394, |
| "learning_rate": 7.997483572258112e-05, |
| "loss": 0.4402, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.5553351844506148, |
| "grad_norm": 0.6402057742934584, |
| "learning_rate": 7.997159227395449e-05, |
| "loss": 0.4289, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.5585085283617612, |
| "grad_norm": 0.5861524014663406, |
| "learning_rate": 7.996815234850994e-05, |
| "loss": 0.4258, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.5616818722729076, |
| "grad_norm": 0.49147871141369276, |
| "learning_rate": 7.996451596315613e-05, |
| "loss": 0.4284, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.564855216184054, |
| "grad_norm": 0.5385589588185274, |
| "learning_rate": 7.99606831357674e-05, |
| "loss": 0.4326, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.5680285600952003, |
| "grad_norm": 0.5020636674847232, |
| "learning_rate": 7.995665388518366e-05, |
| "loss": 0.4275, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.5712019040063467, |
| "grad_norm": 0.34677645292080367, |
| "learning_rate": 7.995242823121035e-05, |
| "loss": 0.4313, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.5743752479174931, |
| "grad_norm": 0.41481246440728614, |
| "learning_rate": 7.994800619461826e-05, |
| "loss": 0.428, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.5775485918286394, |
| "grad_norm": 0.4728147459852834, |
| "learning_rate": 7.994338779714356e-05, |
| "loss": 0.429, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.5807219357397858, |
| "grad_norm": 0.34714272709199906, |
| "learning_rate": 7.993857306148757e-05, |
| "loss": 0.42, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.5838952796509321, |
| "grad_norm": 0.33307391540828185, |
| "learning_rate": 7.993356201131667e-05, |
| "loss": 0.4306, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.5870686235620786, |
| "grad_norm": 0.4557580388879463, |
| "learning_rate": 7.992835467126226e-05, |
| "loss": 0.4336, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.5902419674732249, |
| "grad_norm": 0.3413621320410112, |
| "learning_rate": 7.992295106692053e-05, |
| "loss": 0.4244, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.5934153113843713, |
| "grad_norm": 0.30301313621067855, |
| "learning_rate": 7.991735122485244e-05, |
| "loss": 0.4264, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.5965886552955176, |
| "grad_norm": 0.27873012092060434, |
| "learning_rate": 7.991155517258351e-05, |
| "loss": 0.4254, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.5997619992066641, |
| "grad_norm": 0.3234296671716833, |
| "learning_rate": 7.990556293860373e-05, |
| "loss": 0.4226, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.6029353431178104, |
| "grad_norm": 0.26061056621175, |
| "learning_rate": 7.989937455236738e-05, |
| "loss": 0.4212, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6061086870289568, |
| "grad_norm": 0.32864221465315796, |
| "learning_rate": 7.989299004429294e-05, |
| "loss": 0.4226, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.6092820309401031, |
| "grad_norm": 0.4444198405279817, |
| "learning_rate": 7.988640944576287e-05, |
| "loss": 0.4266, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.6124553748512495, |
| "grad_norm": 0.5842630238421751, |
| "learning_rate": 7.987963278912353e-05, |
| "loss": 0.4239, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.6156287187623959, |
| "grad_norm": 0.8349781810783966, |
| "learning_rate": 7.9872660107685e-05, |
| "loss": 0.4249, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.6188020626735422, |
| "grad_norm": 1.167134998920243, |
| "learning_rate": 7.986549143572085e-05, |
| "loss": 0.4274, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.6219754065846886, |
| "grad_norm": 0.9339211226210348, |
| "learning_rate": 7.985812680846804e-05, |
| "loss": 0.4259, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.625148750495835, |
| "grad_norm": 0.7637681964537499, |
| "learning_rate": 7.985056626212678e-05, |
| "loss": 0.4293, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.6283220944069814, |
| "grad_norm": 0.758578666942809, |
| "learning_rate": 7.984280983386022e-05, |
| "loss": 0.4268, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.6314954383181277, |
| "grad_norm": 0.7219115681675803, |
| "learning_rate": 7.983485756179443e-05, |
| "loss": 0.4284, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.6346687822292741, |
| "grad_norm": 0.7774185389424639, |
| "learning_rate": 7.98267094850181e-05, |
| "loss": 0.4357, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6378421261404205, |
| "grad_norm": 0.8684313423895228, |
| "learning_rate": 7.981836564358235e-05, |
| "loss": 0.4364, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.6410154700515669, |
| "grad_norm": 0.9490290231535304, |
| "learning_rate": 7.980982607850062e-05, |
| "loss": 0.4316, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.6441888139627132, |
| "grad_norm": 1.1158847171655581, |
| "learning_rate": 7.980109083174838e-05, |
| "loss": 0.4326, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.6473621578738595, |
| "grad_norm": 0.5821381704555008, |
| "learning_rate": 7.979215994626295e-05, |
| "loss": 0.4248, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.650535501785006, |
| "grad_norm": 0.6159942615763982, |
| "learning_rate": 7.97830334659433e-05, |
| "loss": 0.4228, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.6537088456961523, |
| "grad_norm": 0.9854660383074767, |
| "learning_rate": 7.977371143564986e-05, |
| "loss": 0.4319, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.6568821896072987, |
| "grad_norm": 0.6753873782895369, |
| "learning_rate": 7.976419390120422e-05, |
| "loss": 0.4257, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.660055533518445, |
| "grad_norm": 0.4578459840507763, |
| "learning_rate": 7.9754480909389e-05, |
| "loss": 0.4305, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.6632288774295915, |
| "grad_norm": 0.7788354439242892, |
| "learning_rate": 7.974457250794752e-05, |
| "loss": 0.4294, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.6664022213407378, |
| "grad_norm": 0.5659494748709242, |
| "learning_rate": 7.973446874558367e-05, |
| "loss": 0.4244, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.6695755652518842, |
| "grad_norm": 0.37038793237805434, |
| "learning_rate": 7.97241696719616e-05, |
| "loss": 0.4259, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.6727489091630305, |
| "grad_norm": 0.5598930828252042, |
| "learning_rate": 7.971367533770548e-05, |
| "loss": 0.424, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.6759222530741769, |
| "grad_norm": 0.38017398481352066, |
| "learning_rate": 7.97029857943993e-05, |
| "loss": 0.4259, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.6790955969853233, |
| "grad_norm": 0.34969847402690163, |
| "learning_rate": 7.969210109458653e-05, |
| "loss": 0.4224, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.6822689408964696, |
| "grad_norm": 0.4326242238353279, |
| "learning_rate": 7.968102129176998e-05, |
| "loss": 0.4217, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.685442284807616, |
| "grad_norm": 0.3678356901641005, |
| "learning_rate": 7.966974644041142e-05, |
| "loss": 0.4268, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.6886156287187624, |
| "grad_norm": 0.3291254839559168, |
| "learning_rate": 7.965827659593138e-05, |
| "loss": 0.4206, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.6917889726299088, |
| "grad_norm": 0.3567104966136532, |
| "learning_rate": 7.964661181470887e-05, |
| "loss": 0.4191, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.6949623165410551, |
| "grad_norm": 0.3146162751453091, |
| "learning_rate": 7.96347521540811e-05, |
| "loss": 0.4133, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.6981356604522015, |
| "grad_norm": 0.25903345117835813, |
| "learning_rate": 7.962269767234315e-05, |
| "loss": 0.4241, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7013090043633479, |
| "grad_norm": 0.3697676932101927, |
| "learning_rate": 7.96104484287478e-05, |
| "loss": 0.4248, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.7044823482744943, |
| "grad_norm": 0.4012189220411039, |
| "learning_rate": 7.959800448350507e-05, |
| "loss": 0.4188, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.7076556921856406, |
| "grad_norm": 0.43083222853217606, |
| "learning_rate": 7.95853658977821e-05, |
| "loss": 0.4184, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.7108290360967869, |
| "grad_norm": 0.41949845884539716, |
| "learning_rate": 7.957253273370275e-05, |
| "loss": 0.4168, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.7140023800079334, |
| "grad_norm": 0.460716406317274, |
| "learning_rate": 7.955950505434725e-05, |
| "loss": 0.4193, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.7171757239190797, |
| "grad_norm": 0.6387648521603024, |
| "learning_rate": 7.954628292375207e-05, |
| "loss": 0.428, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.7203490678302261, |
| "grad_norm": 0.7552539307927338, |
| "learning_rate": 7.953286640690936e-05, |
| "loss": 0.4243, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.7235224117413724, |
| "grad_norm": 0.6681939851633835, |
| "learning_rate": 7.951925556976686e-05, |
| "loss": 0.4256, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.7266957556525189, |
| "grad_norm": 0.6068582323945105, |
| "learning_rate": 7.950545047922741e-05, |
| "loss": 0.4231, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.7298690995636652, |
| "grad_norm": 0.5668354270139984, |
| "learning_rate": 7.949145120314871e-05, |
| "loss": 0.4216, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7330424434748116, |
| "grad_norm": 0.5694091160000379, |
| "learning_rate": 7.947725781034299e-05, |
| "loss": 0.4173, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.7362157873859579, |
| "grad_norm": 0.546717014419698, |
| "learning_rate": 7.946287037057657e-05, |
| "loss": 0.4181, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.7393891312971044, |
| "grad_norm": 0.3630503498245246, |
| "learning_rate": 7.944828895456968e-05, |
| "loss": 0.4137, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.7425624752082507, |
| "grad_norm": 0.43965037772138893, |
| "learning_rate": 7.943351363399593e-05, |
| "loss": 0.423, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.745735819119397, |
| "grad_norm": 0.6391260482000344, |
| "learning_rate": 7.941854448148212e-05, |
| "loss": 0.4161, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.7489091630305434, |
| "grad_norm": 0.5110774940868824, |
| "learning_rate": 7.94033815706078e-05, |
| "loss": 0.4128, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.7520825069416898, |
| "grad_norm": 0.3565529564135717, |
| "learning_rate": 7.938802497590491e-05, |
| "loss": 0.4138, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.7552558508528362, |
| "grad_norm": 0.4657810664776175, |
| "learning_rate": 7.937247477285743e-05, |
| "loss": 0.4161, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.7584291947639825, |
| "grad_norm": 0.4101562014538087, |
| "learning_rate": 7.935673103790101e-05, |
| "loss": 0.4198, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.761602538675129, |
| "grad_norm": 0.3474690454226505, |
| "learning_rate": 7.934079384842255e-05, |
| "loss": 0.4182, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.7647758825862753, |
| "grad_norm": 0.501425607063933, |
| "learning_rate": 7.932466328275994e-05, |
| "loss": 0.4154, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.7679492264974217, |
| "grad_norm": 0.5505691214538797, |
| "learning_rate": 7.93083394202015e-05, |
| "loss": 0.4192, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.771122570408568, |
| "grad_norm": 0.48063376141835473, |
| "learning_rate": 7.929182234098576e-05, |
| "loss": 0.4142, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.7742959143197145, |
| "grad_norm": 0.4777993396793008, |
| "learning_rate": 7.927511212630096e-05, |
| "loss": 0.4166, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.7774692582308608, |
| "grad_norm": 0.7002665548552994, |
| "learning_rate": 7.925820885828468e-05, |
| "loss": 0.4222, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.7806426021420071, |
| "grad_norm": 0.9739111639000991, |
| "learning_rate": 7.924111262002338e-05, |
| "loss": 0.4252, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.7838159460531535, |
| "grad_norm": 1.0430449575283998, |
| "learning_rate": 7.922382349555218e-05, |
| "loss": 0.4252, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.7869892899642998, |
| "grad_norm": 0.7337059616611151, |
| "learning_rate": 7.92063415698542e-05, |
| "loss": 0.4144, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.7901626338754463, |
| "grad_norm": 0.5385536889326978, |
| "learning_rate": 7.918866692886031e-05, |
| "loss": 0.4226, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.7933359777865926, |
| "grad_norm": 0.7647340729347486, |
| "learning_rate": 7.917079965944862e-05, |
| "loss": 0.4201, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.796509321697739, |
| "grad_norm": 0.8450817800646696, |
| "learning_rate": 7.915273984944412e-05, |
| "loss": 0.4224, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.7996826656088853, |
| "grad_norm": 0.4920042288499847, |
| "learning_rate": 7.913448758761821e-05, |
| "loss": 0.4146, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.8028560095200318, |
| "grad_norm": 0.39445337326690816, |
| "learning_rate": 7.911604296368826e-05, |
| "loss": 0.4157, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.8060293534311781, |
| "grad_norm": 0.5853009915458679, |
| "learning_rate": 7.909740606831719e-05, |
| "loss": 0.417, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.8092026973423245, |
| "grad_norm": 0.4974087565357452, |
| "learning_rate": 7.907857699311299e-05, |
| "loss": 0.4158, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.8123760412534708, |
| "grad_norm": 0.35737310825619095, |
| "learning_rate": 7.905955583062833e-05, |
| "loss": 0.4231, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.8155493851646172, |
| "grad_norm": 0.3107479347233733, |
| "learning_rate": 7.904034267436004e-05, |
| "loss": 0.4091, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.8187227290757636, |
| "grad_norm": 0.3938008913327802, |
| "learning_rate": 7.902093761874867e-05, |
| "loss": 0.4184, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.8218960729869099, |
| "grad_norm": 0.36143088089529846, |
| "learning_rate": 7.900134075917807e-05, |
| "loss": 0.4188, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.8250694168980564, |
| "grad_norm": 0.27530043552201605, |
| "learning_rate": 7.898155219197488e-05, |
| "loss": 0.409, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8282427608092027, |
| "grad_norm": 0.30129724789128026, |
| "learning_rate": 7.896157201440801e-05, |
| "loss": 0.4114, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.8314161047203491, |
| "grad_norm": 0.3380484464752199, |
| "learning_rate": 7.894140032468828e-05, |
| "loss": 0.413, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.8345894486314954, |
| "grad_norm": 0.3928098788344459, |
| "learning_rate": 7.892103722196782e-05, |
| "loss": 0.4138, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.8377627925426419, |
| "grad_norm": 0.36724870276604, |
| "learning_rate": 7.890048280633967e-05, |
| "loss": 0.4084, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.8409361364537882, |
| "grad_norm": 0.35512151600153574, |
| "learning_rate": 7.887973717883725e-05, |
| "loss": 0.4101, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.8441094803649345, |
| "grad_norm": 0.32038340926763376, |
| "learning_rate": 7.885880044143382e-05, |
| "loss": 0.4149, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.8472828242760809, |
| "grad_norm": 0.385102685025381, |
| "learning_rate": 7.883767269704209e-05, |
| "loss": 0.4141, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.8504561681872272, |
| "grad_norm": 0.41759937563314564, |
| "learning_rate": 7.88163540495136e-05, |
| "loss": 0.4168, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.8536295120983737, |
| "grad_norm": 0.47749975244001913, |
| "learning_rate": 7.879484460363825e-05, |
| "loss": 0.4164, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.85680285600952, |
| "grad_norm": 0.49048299669029655, |
| "learning_rate": 7.877314446514385e-05, |
| "loss": 0.4102, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.8599761999206664, |
| "grad_norm": 0.5033680425309713, |
| "learning_rate": 7.87512537406955e-05, |
| "loss": 0.4168, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.8631495438318127, |
| "grad_norm": 0.632095279857067, |
| "learning_rate": 7.87291725378951e-05, |
| "loss": 0.4192, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.8663228877429592, |
| "grad_norm": 0.7890821846662114, |
| "learning_rate": 7.870690096528084e-05, |
| "loss": 0.4104, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.8694962316541055, |
| "grad_norm": 0.8461099097389182, |
| "learning_rate": 7.868443913232669e-05, |
| "loss": 0.4166, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.8726695755652519, |
| "grad_norm": 0.7484663137119127, |
| "learning_rate": 7.866178714944178e-05, |
| "loss": 0.4141, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.8758429194763983, |
| "grad_norm": 0.45570202002824084, |
| "learning_rate": 7.863894512796992e-05, |
| "loss": 0.4123, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.8790162633875446, |
| "grad_norm": 0.4294812192002864, |
| "learning_rate": 7.861591318018904e-05, |
| "loss": 0.4144, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.882189607298691, |
| "grad_norm": 0.58391480443298, |
| "learning_rate": 7.859269141931065e-05, |
| "loss": 0.4131, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.8853629512098373, |
| "grad_norm": 0.47580822422237723, |
| "learning_rate": 7.856927995947925e-05, |
| "loss": 0.4086, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.8885362951209838, |
| "grad_norm": 0.38129055085982533, |
| "learning_rate": 7.854567891577179e-05, |
| "loss": 0.4184, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.8917096390321301, |
| "grad_norm": 0.43052433481163255, |
| "learning_rate": 7.852188840419711e-05, |
| "loss": 0.4096, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.8948829829432765, |
| "grad_norm": 0.4433184262594291, |
| "learning_rate": 7.849790854169536e-05, |
| "loss": 0.4104, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.8980563268544228, |
| "grad_norm": 0.4435382933392679, |
| "learning_rate": 7.847373944613745e-05, |
| "loss": 0.4059, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.9012296707655693, |
| "grad_norm": 0.40447203187517, |
| "learning_rate": 7.844938123632439e-05, |
| "loss": 0.4094, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.9044030146767156, |
| "grad_norm": 0.34449120043205606, |
| "learning_rate": 7.842483403198683e-05, |
| "loss": 0.408, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.907576358587862, |
| "grad_norm": 0.37462807346909405, |
| "learning_rate": 7.840009795378436e-05, |
| "loss": 0.4146, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.9107497024990083, |
| "grad_norm": 0.4704923604854831, |
| "learning_rate": 7.837517312330498e-05, |
| "loss": 0.4122, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.9139230464101546, |
| "grad_norm": 0.5097972249194679, |
| "learning_rate": 7.83500596630645e-05, |
| "loss": 0.416, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.9170963903213011, |
| "grad_norm": 0.5351289342983636, |
| "learning_rate": 7.832475769650588e-05, |
| "loss": 0.415, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.9202697342324474, |
| "grad_norm": 0.4674732233010469, |
| "learning_rate": 7.829926734799872e-05, |
| "loss": 0.4107, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.9234430781435938, |
| "grad_norm": 0.4263783054265977, |
| "learning_rate": 7.827358874283855e-05, |
| "loss": 0.4131, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.9266164220547402, |
| "grad_norm": 0.5102719397399545, |
| "learning_rate": 7.824772200724629e-05, |
| "loss": 0.4096, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.9297897659658866, |
| "grad_norm": 0.5129309020648893, |
| "learning_rate": 7.822166726836758e-05, |
| "loss": 0.4077, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.9329631098770329, |
| "grad_norm": 0.45936698859229286, |
| "learning_rate": 7.819542465427217e-05, |
| "loss": 0.4072, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.9361364537881793, |
| "grad_norm": 0.35187396508942126, |
| "learning_rate": 7.816899429395332e-05, |
| "loss": 0.41, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.9393097976993257, |
| "grad_norm": 0.3773900389073242, |
| "learning_rate": 7.814237631732711e-05, |
| "loss": 0.4097, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.9424831416104721, |
| "grad_norm": 0.48196894071104524, |
| "learning_rate": 7.811557085523187e-05, |
| "loss": 0.4125, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.9456564855216184, |
| "grad_norm": 0.43803354037854536, |
| "learning_rate": 7.808857803942741e-05, |
| "loss": 0.4047, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.9488298294327647, |
| "grad_norm": 0.36020876635054705, |
| "learning_rate": 7.80613980025946e-05, |
| "loss": 0.4005, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.9520031733439112, |
| "grad_norm": 0.2741750720795513, |
| "learning_rate": 7.803403087833444e-05, |
| "loss": 0.4023, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9551765172550575, |
| "grad_norm": 0.25720288277565834, |
| "learning_rate": 7.800647680116764e-05, |
| "loss": 0.4041, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.9583498611662039, |
| "grad_norm": 0.3359257955372094, |
| "learning_rate": 7.797873590653381e-05, |
| "loss": 0.3991, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.9615232050773502, |
| "grad_norm": 0.37500047533306236, |
| "learning_rate": 7.795080833079084e-05, |
| "loss": 0.4075, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.9646965489884967, |
| "grad_norm": 0.4231851966084708, |
| "learning_rate": 7.792269421121429e-05, |
| "loss": 0.408, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.967869892899643, |
| "grad_norm": 0.46773700878625746, |
| "learning_rate": 7.78943936859966e-05, |
| "loss": 0.406, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.9710432368107894, |
| "grad_norm": 0.48276037296021734, |
| "learning_rate": 7.78659068942465e-05, |
| "loss": 0.4113, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.9742165807219357, |
| "grad_norm": 0.4520209571383191, |
| "learning_rate": 7.783723397598829e-05, |
| "loss": 0.418, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.9773899246330822, |
| "grad_norm": 0.4761928527698079, |
| "learning_rate": 7.780837507216114e-05, |
| "loss": 0.406, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.9805632685442285, |
| "grad_norm": 0.6222583620657315, |
| "learning_rate": 7.777933032461845e-05, |
| "loss": 0.4115, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.9837366124553748, |
| "grad_norm": 0.6260831309463507, |
| "learning_rate": 7.775009987612711e-05, |
| "loss": 0.4079, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9869099563665212, |
| "grad_norm": 0.44371491044635886, |
| "learning_rate": 7.772068387036677e-05, |
| "loss": 0.4057, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.9900833002776676, |
| "grad_norm": 0.3096918566187467, |
| "learning_rate": 7.769108245192922e-05, |
| "loss": 0.4134, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.993256644188814, |
| "grad_norm": 0.497339716362162, |
| "learning_rate": 7.766129576631759e-05, |
| "loss": 0.41, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.9964299880999603, |
| "grad_norm": 0.5309378071519795, |
| "learning_rate": 7.763132395994572e-05, |
| "loss": 0.4023, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.9996033320111067, |
| "grad_norm": 0.44514647594528395, |
| "learning_rate": 7.760116718013735e-05, |
| "loss": 0.4079, |
| "step": 315 |
| }, |
| { |
| "epoch": 1.0027766759222532, |
| "grad_norm": 0.7972378708928999, |
| "learning_rate": 7.757082557512545e-05, |
| "loss": 0.7527, |
| "step": 316 |
| }, |
| { |
| "epoch": 1.0059500198333995, |
| "grad_norm": 1.9850051036948724, |
| "learning_rate": 7.75402992940515e-05, |
| "loss": 0.4284, |
| "step": 317 |
| }, |
| { |
| "epoch": 1.0091233637445458, |
| "grad_norm": 0.5262986098049574, |
| "learning_rate": 7.750958848696473e-05, |
| "loss": 0.3962, |
| "step": 318 |
| }, |
| { |
| "epoch": 1.0122967076556921, |
| "grad_norm": 1.5250119834033022, |
| "learning_rate": 7.747869330482137e-05, |
| "loss": 0.4258, |
| "step": 319 |
| }, |
| { |
| "epoch": 1.0154700515668384, |
| "grad_norm": 0.5638471189301302, |
| "learning_rate": 7.744761389948397e-05, |
| "loss": 0.4036, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.018643395477985, |
| "grad_norm": 0.9646189002979927, |
| "learning_rate": 7.741635042372059e-05, |
| "loss": 0.418, |
| "step": 321 |
| }, |
| { |
| "epoch": 1.0218167393891313, |
| "grad_norm": 0.9961382446454835, |
| "learning_rate": 7.738490303120407e-05, |
| "loss": 0.4059, |
| "step": 322 |
| }, |
| { |
| "epoch": 1.0249900833002776, |
| "grad_norm": 0.7909414038458441, |
| "learning_rate": 7.735327187651127e-05, |
| "loss": 0.4079, |
| "step": 323 |
| }, |
| { |
| "epoch": 1.028163427211424, |
| "grad_norm": 0.5245790298223422, |
| "learning_rate": 7.732145711512234e-05, |
| "loss": 0.4063, |
| "step": 324 |
| }, |
| { |
| "epoch": 1.0313367711225705, |
| "grad_norm": 0.7541383994484598, |
| "learning_rate": 7.728945890341991e-05, |
| "loss": 0.4016, |
| "step": 325 |
| }, |
| { |
| "epoch": 1.0345101150337168, |
| "grad_norm": 0.484806781405723, |
| "learning_rate": 7.725727739868837e-05, |
| "loss": 0.3989, |
| "step": 326 |
| }, |
| { |
| "epoch": 1.0376834589448631, |
| "grad_norm": 0.5797811834279861, |
| "learning_rate": 7.722491275911302e-05, |
| "loss": 0.4013, |
| "step": 327 |
| }, |
| { |
| "epoch": 1.0408568028560095, |
| "grad_norm": 0.45973779856894975, |
| "learning_rate": 7.71923651437794e-05, |
| "loss": 0.4022, |
| "step": 328 |
| }, |
| { |
| "epoch": 1.044030146767156, |
| "grad_norm": 0.4205253473707581, |
| "learning_rate": 7.715963471267243e-05, |
| "loss": 0.394, |
| "step": 329 |
| }, |
| { |
| "epoch": 1.0472034906783023, |
| "grad_norm": 0.3671719170264264, |
| "learning_rate": 7.712672162667563e-05, |
| "loss": 0.3953, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.0503768345894486, |
| "grad_norm": 0.3923903715008762, |
| "learning_rate": 7.709362604757037e-05, |
| "loss": 0.3941, |
| "step": 331 |
| }, |
| { |
| "epoch": 1.053550178500595, |
| "grad_norm": 0.38611037315155544, |
| "learning_rate": 7.706034813803501e-05, |
| "loss": 0.3967, |
| "step": 332 |
| }, |
| { |
| "epoch": 1.0567235224117413, |
| "grad_norm": 0.3205248790594937, |
| "learning_rate": 7.702688806164419e-05, |
| "loss": 0.3978, |
| "step": 333 |
| }, |
| { |
| "epoch": 1.0598968663228878, |
| "grad_norm": 0.3851041881561772, |
| "learning_rate": 7.699324598286794e-05, |
| "loss": 0.3969, |
| "step": 334 |
| }, |
| { |
| "epoch": 1.0630702102340341, |
| "grad_norm": 0.3048551045247859, |
| "learning_rate": 7.69594220670709e-05, |
| "loss": 0.3981, |
| "step": 335 |
| }, |
| { |
| "epoch": 1.0662435541451805, |
| "grad_norm": 0.30649990794634424, |
| "learning_rate": 7.692541648051156e-05, |
| "loss": 0.3883, |
| "step": 336 |
| }, |
| { |
| "epoch": 1.0694168980563268, |
| "grad_norm": 0.28629978941116185, |
| "learning_rate": 7.689122939034135e-05, |
| "loss": 0.391, |
| "step": 337 |
| }, |
| { |
| "epoch": 1.0725902419674733, |
| "grad_norm": 0.2815013785922316, |
| "learning_rate": 7.685686096460387e-05, |
| "loss": 0.3869, |
| "step": 338 |
| }, |
| { |
| "epoch": 1.0757635858786196, |
| "grad_norm": 0.2953463475399726, |
| "learning_rate": 7.682231137223409e-05, |
| "loss": 0.3913, |
| "step": 339 |
| }, |
| { |
| "epoch": 1.078936929789766, |
| "grad_norm": 0.2366855254680076, |
| "learning_rate": 7.678758078305745e-05, |
| "loss": 0.3883, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.0821102737009123, |
| "grad_norm": 0.22104183899331858, |
| "learning_rate": 7.67526693677891e-05, |
| "loss": 0.3926, |
| "step": 341 |
| }, |
| { |
| "epoch": 1.0852836176120586, |
| "grad_norm": 0.22652465216086823, |
| "learning_rate": 7.671757729803299e-05, |
| "loss": 0.3831, |
| "step": 342 |
| }, |
| { |
| "epoch": 1.0884569615232051, |
| "grad_norm": 0.2160165389313366, |
| "learning_rate": 7.668230474628108e-05, |
| "loss": 0.3921, |
| "step": 343 |
| }, |
| { |
| "epoch": 1.0916303054343515, |
| "grad_norm": 0.2107397076821849, |
| "learning_rate": 7.664685188591246e-05, |
| "loss": 0.3855, |
| "step": 344 |
| }, |
| { |
| "epoch": 1.0948036493454978, |
| "grad_norm": 0.24162805284413108, |
| "learning_rate": 7.661121889119257e-05, |
| "loss": 0.3864, |
| "step": 345 |
| }, |
| { |
| "epoch": 1.097976993256644, |
| "grad_norm": 0.20058743270523813, |
| "learning_rate": 7.657540593727218e-05, |
| "loss": 0.3898, |
| "step": 346 |
| }, |
| { |
| "epoch": 1.1011503371677906, |
| "grad_norm": 0.19024320800558892, |
| "learning_rate": 7.653941320018672e-05, |
| "loss": 0.3881, |
| "step": 347 |
| }, |
| { |
| "epoch": 1.104323681078937, |
| "grad_norm": 0.19348467821715107, |
| "learning_rate": 7.650324085685528e-05, |
| "loss": 0.3861, |
| "step": 348 |
| }, |
| { |
| "epoch": 1.1074970249900833, |
| "grad_norm": 0.17855591849282623, |
| "learning_rate": 7.646688908507983e-05, |
| "loss": 0.3879, |
| "step": 349 |
| }, |
| { |
| "epoch": 1.1106703689012296, |
| "grad_norm": 0.17423718420205644, |
| "learning_rate": 7.643035806354427e-05, |
| "loss": 0.393, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.113843712812376, |
| "grad_norm": 0.17435999115090034, |
| "learning_rate": 7.639364797181359e-05, |
| "loss": 0.3923, |
| "step": 351 |
| }, |
| { |
| "epoch": 1.1170170567235225, |
| "grad_norm": 0.1709673536337813, |
| "learning_rate": 7.6356758990333e-05, |
| "loss": 0.3906, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.1201904006346688, |
| "grad_norm": 0.17317855376977867, |
| "learning_rate": 7.6319691300427e-05, |
| "loss": 0.3888, |
| "step": 353 |
| }, |
| { |
| "epoch": 1.1233637445458151, |
| "grad_norm": 0.18174880626145437, |
| "learning_rate": 7.628244508429856e-05, |
| "loss": 0.3865, |
| "step": 354 |
| }, |
| { |
| "epoch": 1.1265370884569614, |
| "grad_norm": 0.20331689410780052, |
| "learning_rate": 7.624502052502814e-05, |
| "loss": 0.3874, |
| "step": 355 |
| }, |
| { |
| "epoch": 1.129710432368108, |
| "grad_norm": 0.23136698366970124, |
| "learning_rate": 7.620741780657284e-05, |
| "loss": 0.3862, |
| "step": 356 |
| }, |
| { |
| "epoch": 1.1328837762792543, |
| "grad_norm": 0.2578608983871263, |
| "learning_rate": 7.61696371137655e-05, |
| "loss": 0.3885, |
| "step": 357 |
| }, |
| { |
| "epoch": 1.1360571201904006, |
| "grad_norm": 0.31713285899714155, |
| "learning_rate": 7.613167863231376e-05, |
| "loss": 0.3858, |
| "step": 358 |
| }, |
| { |
| "epoch": 1.139230464101547, |
| "grad_norm": 0.4030624344185451, |
| "learning_rate": 7.609354254879916e-05, |
| "loss": 0.3894, |
| "step": 359 |
| }, |
| { |
| "epoch": 1.1424038080126935, |
| "grad_norm": 0.5246514767856207, |
| "learning_rate": 7.605522905067626e-05, |
| "loss": 0.3852, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.1455771519238398, |
| "grad_norm": 0.5257615152434831, |
| "learning_rate": 7.601673832627162e-05, |
| "loss": 0.3892, |
| "step": 361 |
| }, |
| { |
| "epoch": 1.1487504958349861, |
| "grad_norm": 0.47164366725819173, |
| "learning_rate": 7.597807056478304e-05, |
| "loss": 0.3941, |
| "step": 362 |
| }, |
| { |
| "epoch": 1.1519238397461324, |
| "grad_norm": 0.40276010096670556, |
| "learning_rate": 7.593922595627843e-05, |
| "loss": 0.3895, |
| "step": 363 |
| }, |
| { |
| "epoch": 1.1550971836572788, |
| "grad_norm": 0.3335379837317923, |
| "learning_rate": 7.590020469169505e-05, |
| "loss": 0.3868, |
| "step": 364 |
| }, |
| { |
| "epoch": 1.1582705275684253, |
| "grad_norm": 0.389480610213482, |
| "learning_rate": 7.586100696283845e-05, |
| "loss": 0.3888, |
| "step": 365 |
| }, |
| { |
| "epoch": 1.1614438714795716, |
| "grad_norm": 0.40641502830320464, |
| "learning_rate": 7.582163296238158e-05, |
| "loss": 0.3886, |
| "step": 366 |
| }, |
| { |
| "epoch": 1.164617215390718, |
| "grad_norm": 0.34031545798126167, |
| "learning_rate": 7.578208288386386e-05, |
| "loss": 0.3888, |
| "step": 367 |
| }, |
| { |
| "epoch": 1.1677905593018643, |
| "grad_norm": 0.3375442621679508, |
| "learning_rate": 7.574235692169021e-05, |
| "loss": 0.3901, |
| "step": 368 |
| }, |
| { |
| "epoch": 1.1709639032130108, |
| "grad_norm": 0.34505650189241893, |
| "learning_rate": 7.570245527113004e-05, |
| "loss": 0.3875, |
| "step": 369 |
| }, |
| { |
| "epoch": 1.1741372471241571, |
| "grad_norm": 0.32855600104681076, |
| "learning_rate": 7.566237812831641e-05, |
| "loss": 0.3808, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.1773105910353034, |
| "grad_norm": 0.33479682880517575, |
| "learning_rate": 7.562212569024494e-05, |
| "loss": 0.3831, |
| "step": 371 |
| }, |
| { |
| "epoch": 1.1804839349464498, |
| "grad_norm": 0.3323687537285638, |
| "learning_rate": 7.558169815477293e-05, |
| "loss": 0.3763, |
| "step": 372 |
| }, |
| { |
| "epoch": 1.1836572788575963, |
| "grad_norm": 0.29597912982148444, |
| "learning_rate": 7.554109572061835e-05, |
| "loss": 0.384, |
| "step": 373 |
| }, |
| { |
| "epoch": 1.1868306227687426, |
| "grad_norm": 0.2608818292389468, |
| "learning_rate": 7.550031858735885e-05, |
| "loss": 0.3844, |
| "step": 374 |
| }, |
| { |
| "epoch": 1.190003966679889, |
| "grad_norm": 0.2362089436062283, |
| "learning_rate": 7.545936695543084e-05, |
| "loss": 0.3847, |
| "step": 375 |
| }, |
| { |
| "epoch": 1.1931773105910353, |
| "grad_norm": 0.27249864452918143, |
| "learning_rate": 7.541824102612839e-05, |
| "loss": 0.3843, |
| "step": 376 |
| }, |
| { |
| "epoch": 1.1963506545021816, |
| "grad_norm": 0.291889265790341, |
| "learning_rate": 7.537694100160242e-05, |
| "loss": 0.385, |
| "step": 377 |
| }, |
| { |
| "epoch": 1.1995239984133281, |
| "grad_norm": 0.26769841877142225, |
| "learning_rate": 7.533546708485949e-05, |
| "loss": 0.3857, |
| "step": 378 |
| }, |
| { |
| "epoch": 1.2026973423244744, |
| "grad_norm": 0.25310229652456356, |
| "learning_rate": 7.529381947976097e-05, |
| "loss": 0.3842, |
| "step": 379 |
| }, |
| { |
| "epoch": 1.2058706862356208, |
| "grad_norm": 0.3794658972915025, |
| "learning_rate": 7.525199839102198e-05, |
| "loss": 0.3853, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.209044030146767, |
| "grad_norm": 0.4433997626787792, |
| "learning_rate": 7.521000402421039e-05, |
| "loss": 0.3871, |
| "step": 381 |
| }, |
| { |
| "epoch": 1.2122173740579134, |
| "grad_norm": 0.44670046763987153, |
| "learning_rate": 7.516783658574575e-05, |
| "loss": 0.38, |
| "step": 382 |
| }, |
| { |
| "epoch": 1.21539071796906, |
| "grad_norm": 0.4470855938681001, |
| "learning_rate": 7.51254962828984e-05, |
| "loss": 0.3883, |
| "step": 383 |
| }, |
| { |
| "epoch": 1.2185640618802063, |
| "grad_norm": 0.37472305499391584, |
| "learning_rate": 7.508298332378832e-05, |
| "loss": 0.3874, |
| "step": 384 |
| }, |
| { |
| "epoch": 1.2217374057913526, |
| "grad_norm": 0.2675107029023701, |
| "learning_rate": 7.504029791738419e-05, |
| "loss": 0.3797, |
| "step": 385 |
| }, |
| { |
| "epoch": 1.224910749702499, |
| "grad_norm": 0.2692926766700756, |
| "learning_rate": 7.499744027350236e-05, |
| "loss": 0.3877, |
| "step": 386 |
| }, |
| { |
| "epoch": 1.2280840936136455, |
| "grad_norm": 0.35704547610000964, |
| "learning_rate": 7.495441060280577e-05, |
| "loss": 0.3847, |
| "step": 387 |
| }, |
| { |
| "epoch": 1.2312574375247918, |
| "grad_norm": 0.393025585260643, |
| "learning_rate": 7.491120911680295e-05, |
| "loss": 0.3833, |
| "step": 388 |
| }, |
| { |
| "epoch": 1.234430781435938, |
| "grad_norm": 0.34992195520115954, |
| "learning_rate": 7.486783602784697e-05, |
| "loss": 0.3804, |
| "step": 389 |
| }, |
| { |
| "epoch": 1.2376041253470844, |
| "grad_norm": 0.26770410504282594, |
| "learning_rate": 7.48242915491344e-05, |
| "loss": 0.3792, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.240777469258231, |
| "grad_norm": 0.21538249724047004, |
| "learning_rate": 7.478057589470429e-05, |
| "loss": 0.3823, |
| "step": 391 |
| }, |
| { |
| "epoch": 1.2439508131693773, |
| "grad_norm": 0.24012637317741398, |
| "learning_rate": 7.473668927943703e-05, |
| "loss": 0.3898, |
| "step": 392 |
| }, |
| { |
| "epoch": 1.2471241570805236, |
| "grad_norm": 0.21927612950882752, |
| "learning_rate": 7.469263191905342e-05, |
| "loss": 0.3832, |
| "step": 393 |
| }, |
| { |
| "epoch": 1.25029750099167, |
| "grad_norm": 0.22562831710853817, |
| "learning_rate": 7.464840403011348e-05, |
| "loss": 0.3781, |
| "step": 394 |
| }, |
| { |
| "epoch": 1.2534708449028162, |
| "grad_norm": 0.25428834981926673, |
| "learning_rate": 7.460400583001549e-05, |
| "loss": 0.3812, |
| "step": 395 |
| }, |
| { |
| "epoch": 1.2566441888139628, |
| "grad_norm": 0.2702984486835505, |
| "learning_rate": 7.455943753699485e-05, |
| "loss": 0.3866, |
| "step": 396 |
| }, |
| { |
| "epoch": 1.259817532725109, |
| "grad_norm": 0.24121323703671108, |
| "learning_rate": 7.451469937012308e-05, |
| "loss": 0.3873, |
| "step": 397 |
| }, |
| { |
| "epoch": 1.2629908766362554, |
| "grad_norm": 0.18653342749296287, |
| "learning_rate": 7.446979154930664e-05, |
| "loss": 0.3825, |
| "step": 398 |
| }, |
| { |
| "epoch": 1.2661642205474017, |
| "grad_norm": 0.16332575184703055, |
| "learning_rate": 7.4424714295286e-05, |
| "loss": 0.3815, |
| "step": 399 |
| }, |
| { |
| "epoch": 1.269337564458548, |
| "grad_norm": 0.25403145188276544, |
| "learning_rate": 7.437946782963434e-05, |
| "loss": 0.383, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.2725109083696946, |
| "grad_norm": 0.3444161535033614, |
| "learning_rate": 7.433405237475668e-05, |
| "loss": 0.3813, |
| "step": 401 |
| }, |
| { |
| "epoch": 1.275684252280841, |
| "grad_norm": 0.3519022102988297, |
| "learning_rate": 7.428846815388867e-05, |
| "loss": 0.3855, |
| "step": 402 |
| }, |
| { |
| "epoch": 1.2788575961919872, |
| "grad_norm": 0.33077524829421917, |
| "learning_rate": 7.424271539109548e-05, |
| "loss": 0.3821, |
| "step": 403 |
| }, |
| { |
| "epoch": 1.2820309401031338, |
| "grad_norm": 0.3162039421386741, |
| "learning_rate": 7.419679431127078e-05, |
| "loss": 0.3851, |
| "step": 404 |
| }, |
| { |
| "epoch": 1.28520428401428, |
| "grad_norm": 0.32717321840623165, |
| "learning_rate": 7.415070514013554e-05, |
| "loss": 0.3835, |
| "step": 405 |
| }, |
| { |
| "epoch": 1.2883776279254264, |
| "grad_norm": 0.3260212879527716, |
| "learning_rate": 7.410444810423703e-05, |
| "loss": 0.3851, |
| "step": 406 |
| }, |
| { |
| "epoch": 1.2915509718365727, |
| "grad_norm": 0.28141296423668416, |
| "learning_rate": 7.405802343094761e-05, |
| "loss": 0.384, |
| "step": 407 |
| }, |
| { |
| "epoch": 1.294724315747719, |
| "grad_norm": 0.29242681028873885, |
| "learning_rate": 7.401143134846361e-05, |
| "loss": 0.3845, |
| "step": 408 |
| }, |
| { |
| "epoch": 1.2978976596588656, |
| "grad_norm": 0.3089571656384374, |
| "learning_rate": 7.396467208580431e-05, |
| "loss": 0.3876, |
| "step": 409 |
| }, |
| { |
| "epoch": 1.301071003570012, |
| "grad_norm": 0.2727831374787424, |
| "learning_rate": 7.39177458728107e-05, |
| "loss": 0.3853, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.3042443474811583, |
| "grad_norm": 0.24218021592794953, |
| "learning_rate": 7.387065294014444e-05, |
| "loss": 0.3854, |
| "step": 411 |
| }, |
| { |
| "epoch": 1.3074176913923046, |
| "grad_norm": 0.26309492335546847, |
| "learning_rate": 7.382339351928664e-05, |
| "loss": 0.3831, |
| "step": 412 |
| }, |
| { |
| "epoch": 1.310591035303451, |
| "grad_norm": 0.2590023202596478, |
| "learning_rate": 7.377596784253682e-05, |
| "loss": 0.3874, |
| "step": 413 |
| }, |
| { |
| "epoch": 1.3137643792145974, |
| "grad_norm": 0.22355363656378588, |
| "learning_rate": 7.372837614301167e-05, |
| "loss": 0.3794, |
| "step": 414 |
| }, |
| { |
| "epoch": 1.3169377231257438, |
| "grad_norm": 0.23277360549451911, |
| "learning_rate": 7.368061865464398e-05, |
| "loss": 0.3823, |
| "step": 415 |
| }, |
| { |
| "epoch": 1.32011106703689, |
| "grad_norm": 0.29759664882744374, |
| "learning_rate": 7.363269561218144e-05, |
| "loss": 0.3854, |
| "step": 416 |
| }, |
| { |
| "epoch": 1.3232844109480366, |
| "grad_norm": 0.37555739230090074, |
| "learning_rate": 7.358460725118553e-05, |
| "loss": 0.3796, |
| "step": 417 |
| }, |
| { |
| "epoch": 1.326457754859183, |
| "grad_norm": 0.4302124224232923, |
| "learning_rate": 7.353635380803031e-05, |
| "loss": 0.3887, |
| "step": 418 |
| }, |
| { |
| "epoch": 1.3296310987703293, |
| "grad_norm": 0.4773944462131208, |
| "learning_rate": 7.348793551990132e-05, |
| "loss": 0.3814, |
| "step": 419 |
| }, |
| { |
| "epoch": 1.3328044426814756, |
| "grad_norm": 0.5403161096787458, |
| "learning_rate": 7.343935262479433e-05, |
| "loss": 0.3823, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.335977786592622, |
| "grad_norm": 0.5311329635820989, |
| "learning_rate": 7.33906053615143e-05, |
| "loss": 0.3863, |
| "step": 421 |
| }, |
| { |
| "epoch": 1.3391511305037684, |
| "grad_norm": 0.44347216349851915, |
| "learning_rate": 7.334169396967403e-05, |
| "loss": 0.3811, |
| "step": 422 |
| }, |
| { |
| "epoch": 1.3423244744149148, |
| "grad_norm": 0.4038867029004121, |
| "learning_rate": 7.329261868969318e-05, |
| "loss": 0.3786, |
| "step": 423 |
| }, |
| { |
| "epoch": 1.345497818326061, |
| "grad_norm": 0.42166277469573826, |
| "learning_rate": 7.324337976279688e-05, |
| "loss": 0.386, |
| "step": 424 |
| }, |
| { |
| "epoch": 1.3486711622372074, |
| "grad_norm": 0.4142405224624998, |
| "learning_rate": 7.319397743101478e-05, |
| "loss": 0.3824, |
| "step": 425 |
| }, |
| { |
| "epoch": 1.3518445061483537, |
| "grad_norm": 0.36700141049097496, |
| "learning_rate": 7.31444119371796e-05, |
| "loss": 0.3829, |
| "step": 426 |
| }, |
| { |
| "epoch": 1.3550178500595003, |
| "grad_norm": 0.3135329163503587, |
| "learning_rate": 7.309468352492616e-05, |
| "loss": 0.3824, |
| "step": 427 |
| }, |
| { |
| "epoch": 1.3581911939706466, |
| "grad_norm": 0.2823327229949492, |
| "learning_rate": 7.304479243869007e-05, |
| "loss": 0.376, |
| "step": 428 |
| }, |
| { |
| "epoch": 1.361364537881793, |
| "grad_norm": 0.30150602495208256, |
| "learning_rate": 7.299473892370651e-05, |
| "loss": 0.3792, |
| "step": 429 |
| }, |
| { |
| "epoch": 1.3645378817929394, |
| "grad_norm": 0.2849241799804419, |
| "learning_rate": 7.294452322600912e-05, |
| "loss": 0.374, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.3677112257040855, |
| "grad_norm": 0.23906164695179707, |
| "learning_rate": 7.289414559242871e-05, |
| "loss": 0.3841, |
| "step": 431 |
| }, |
| { |
| "epoch": 1.370884569615232, |
| "grad_norm": 0.27790217133033873, |
| "learning_rate": 7.284360627059205e-05, |
| "loss": 0.3894, |
| "step": 432 |
| }, |
| { |
| "epoch": 1.3740579135263784, |
| "grad_norm": 0.27965618860067953, |
| "learning_rate": 7.279290550892071e-05, |
| "loss": 0.3856, |
| "step": 433 |
| }, |
| { |
| "epoch": 1.3772312574375247, |
| "grad_norm": 0.2596553182069096, |
| "learning_rate": 7.274204355662981e-05, |
| "loss": 0.3835, |
| "step": 434 |
| }, |
| { |
| "epoch": 1.3804046013486713, |
| "grad_norm": 0.22914836462750776, |
| "learning_rate": 7.269102066372672e-05, |
| "loss": 0.3801, |
| "step": 435 |
| }, |
| { |
| "epoch": 1.3835779452598176, |
| "grad_norm": 0.24961150632312626, |
| "learning_rate": 7.263983708100998e-05, |
| "loss": 0.3792, |
| "step": 436 |
| }, |
| { |
| "epoch": 1.386751289170964, |
| "grad_norm": 0.2799354977533222, |
| "learning_rate": 7.258849306006796e-05, |
| "loss": 0.3785, |
| "step": 437 |
| }, |
| { |
| "epoch": 1.3899246330821102, |
| "grad_norm": 0.2609555149345694, |
| "learning_rate": 7.253698885327761e-05, |
| "loss": 0.3877, |
| "step": 438 |
| }, |
| { |
| "epoch": 1.3930979769932565, |
| "grad_norm": 0.25476756664542266, |
| "learning_rate": 7.24853247138033e-05, |
| "loss": 0.3865, |
| "step": 439 |
| }, |
| { |
| "epoch": 1.396271320904403, |
| "grad_norm": 0.2853597140584723, |
| "learning_rate": 7.243350089559555e-05, |
| "loss": 0.3835, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.3994446648155494, |
| "grad_norm": 0.3037545404410833, |
| "learning_rate": 7.238151765338974e-05, |
| "loss": 0.3755, |
| "step": 441 |
| }, |
| { |
| "epoch": 1.4026180087266957, |
| "grad_norm": 0.32448084195253646, |
| "learning_rate": 7.232937524270486e-05, |
| "loss": 0.3824, |
| "step": 442 |
| }, |
| { |
| "epoch": 1.405791352637842, |
| "grad_norm": 0.3389903691526235, |
| "learning_rate": 7.227707391984233e-05, |
| "loss": 0.3851, |
| "step": 443 |
| }, |
| { |
| "epoch": 1.4089646965489884, |
| "grad_norm": 0.32339033942705137, |
| "learning_rate": 7.222461394188467e-05, |
| "loss": 0.3809, |
| "step": 444 |
| }, |
| { |
| "epoch": 1.412138040460135, |
| "grad_norm": 0.3004560485468966, |
| "learning_rate": 7.217199556669423e-05, |
| "loss": 0.3753, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.4153113843712812, |
| "grad_norm": 0.2963352362165301, |
| "learning_rate": 7.211921905291198e-05, |
| "loss": 0.3845, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.4184847282824276, |
| "grad_norm": 0.3394987023293836, |
| "learning_rate": 7.20662846599562e-05, |
| "loss": 0.3829, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.421658072193574, |
| "grad_norm": 0.3691233795245846, |
| "learning_rate": 7.201319264802118e-05, |
| "loss": 0.3746, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.4248314161047204, |
| "grad_norm": 0.419112740006911, |
| "learning_rate": 7.195994327807603e-05, |
| "loss": 0.3889, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.4280047600158667, |
| "grad_norm": 0.4313621841160134, |
| "learning_rate": 7.19065368118633e-05, |
| "loss": 0.3821, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.431178103927013, |
| "grad_norm": 0.3566308145795978, |
| "learning_rate": 7.185297351189771e-05, |
| "loss": 0.3844, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.4343514478381594, |
| "grad_norm": 0.3637230498145468, |
| "learning_rate": 7.179925364146496e-05, |
| "loss": 0.3765, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.437524791749306, |
| "grad_norm": 0.3468382349782321, |
| "learning_rate": 7.174537746462027e-05, |
| "loss": 0.3744, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.4406981356604522, |
| "grad_norm": 0.3131084073191254, |
| "learning_rate": 7.169134524618723e-05, |
| "loss": 0.3814, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.4438714795715986, |
| "grad_norm": 0.23863373901146623, |
| "learning_rate": 7.163715725175641e-05, |
| "loss": 0.3848, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.4470448234827449, |
| "grad_norm": 0.25976033784025593, |
| "learning_rate": 7.15828137476841e-05, |
| "loss": 0.3836, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.4502181673938912, |
| "grad_norm": 0.3648800662769983, |
| "learning_rate": 7.152831500109096e-05, |
| "loss": 0.3848, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.4533915113050377, |
| "grad_norm": 0.3566172184643027, |
| "learning_rate": 7.14736612798608e-05, |
| "loss": 0.382, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.456564855216184, |
| "grad_norm": 0.3135261324431755, |
| "learning_rate": 7.141885285263906e-05, |
| "loss": 0.3741, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.4597381991273304, |
| "grad_norm": 0.3588285727185198, |
| "learning_rate": 7.136388998883176e-05, |
| "loss": 0.3836, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.462911543038477, |
| "grad_norm": 0.4144697912185411, |
| "learning_rate": 7.130877295860396e-05, |
| "loss": 0.3814, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.4660848869496232, |
| "grad_norm": 0.27886664253240845, |
| "learning_rate": 7.125350203287856e-05, |
| "loss": 0.3793, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.4692582308607696, |
| "grad_norm": 0.3160540382900367, |
| "learning_rate": 7.119807748333488e-05, |
| "loss": 0.382, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.4724315747719159, |
| "grad_norm": 0.43245832112598737, |
| "learning_rate": 7.114249958240736e-05, |
| "loss": 0.3792, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.4756049186830622, |
| "grad_norm": 0.3929381559573092, |
| "learning_rate": 7.108676860328429e-05, |
| "loss": 0.3813, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.4787782625942087, |
| "grad_norm": 0.36518334446855616, |
| "learning_rate": 7.103088481990631e-05, |
| "loss": 0.3794, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.481951606505355, |
| "grad_norm": 0.4062717060922552, |
| "learning_rate": 7.097484850696523e-05, |
| "loss": 0.3788, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.4851249504165014, |
| "grad_norm": 0.3985440491593577, |
| "learning_rate": 7.091865993990257e-05, |
| "loss": 0.3839, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.4882982943276477, |
| "grad_norm": 0.2478605758310385, |
| "learning_rate": 7.086231939490825e-05, |
| "loss": 0.3822, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.491471638238794, |
| "grad_norm": 0.2198881914089004, |
| "learning_rate": 7.080582714891922e-05, |
| "loss": 0.3844, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.4946449821499406, |
| "grad_norm": 0.30913344433263185, |
| "learning_rate": 7.074918347961812e-05, |
| "loss": 0.383, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.497818326061087, |
| "grad_norm": 0.3416286926665111, |
| "learning_rate": 7.069238866543186e-05, |
| "loss": 0.3836, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.5009916699722332, |
| "grad_norm": 0.26203884165321145, |
| "learning_rate": 7.063544298553036e-05, |
| "loss": 0.3857, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.5041650138833798, |
| "grad_norm": 0.2277269534373725, |
| "learning_rate": 7.0578346719825e-05, |
| "loss": 0.3768, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.5073383577945259, |
| "grad_norm": 0.29105785846246257, |
| "learning_rate": 7.052110014896745e-05, |
| "loss": 0.383, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.5105117017056724, |
| "grad_norm": 0.2943191343272462, |
| "learning_rate": 7.046370355434814e-05, |
| "loss": 0.3824, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.5136850456168187, |
| "grad_norm": 0.25257147532228097, |
| "learning_rate": 7.040615721809495e-05, |
| "loss": 0.3832, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.516858389527965, |
| "grad_norm": 0.227234673966253, |
| "learning_rate": 7.03484614230718e-05, |
| "loss": 0.3744, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.5200317334391116, |
| "grad_norm": 0.24823996008997273, |
| "learning_rate": 7.029061645287724e-05, |
| "loss": 0.3796, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.5232050773502577, |
| "grad_norm": 0.2871638979837815, |
| "learning_rate": 7.023262259184309e-05, |
| "loss": 0.381, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.5263784212614042, |
| "grad_norm": 0.3204999675625282, |
| "learning_rate": 7.017448012503306e-05, |
| "loss": 0.3798, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.5295517651725505, |
| "grad_norm": 0.3133074262497558, |
| "learning_rate": 7.011618933824124e-05, |
| "loss": 0.3811, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.5327251090836969, |
| "grad_norm": 0.2699740914992895, |
| "learning_rate": 7.005775051799088e-05, |
| "loss": 0.3764, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.5358984529948434, |
| "grad_norm": 0.26103326951221123, |
| "learning_rate": 6.999916395153279e-05, |
| "loss": 0.3839, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.5390717969059897, |
| "grad_norm": 0.319889184006099, |
| "learning_rate": 6.994042992684406e-05, |
| "loss": 0.3807, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.542245140817136, |
| "grad_norm": 0.3366203795427369, |
| "learning_rate": 6.988154873262655e-05, |
| "loss": 0.3859, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.5454184847282826, |
| "grad_norm": 0.31334787713113593, |
| "learning_rate": 6.982252065830557e-05, |
| "loss": 0.3801, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.5485918286394287, |
| "grad_norm": 0.2612084722641158, |
| "learning_rate": 6.976334599402838e-05, |
| "loss": 0.3818, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.5517651725505752, |
| "grad_norm": 0.23800865263666302, |
| "learning_rate": 6.970402503066281e-05, |
| "loss": 0.3796, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.5549385164617215, |
| "grad_norm": 0.20619756878025533, |
| "learning_rate": 6.96445580597958e-05, |
| "loss": 0.3784, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.5581118603728679, |
| "grad_norm": 0.22823026276130054, |
| "learning_rate": 6.958494537373194e-05, |
| "loss": 0.3898, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.5612852042840144, |
| "grad_norm": 0.2320286176349148, |
| "learning_rate": 6.952518726549212e-05, |
| "loss": 0.3776, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.5644585481951605, |
| "grad_norm": 0.18333626484330504, |
| "learning_rate": 6.946528402881204e-05, |
| "loss": 0.3768, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.567631892106307, |
| "grad_norm": 0.23594130345003322, |
| "learning_rate": 6.940523595814073e-05, |
| "loss": 0.3813, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.5708052360174534, |
| "grad_norm": 0.28075187874529217, |
| "learning_rate": 6.934504334863915e-05, |
| "loss": 0.3831, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.5739785799285997, |
| "grad_norm": 0.3164356465842641, |
| "learning_rate": 6.928470649617876e-05, |
| "loss": 0.3828, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.5771519238397462, |
| "grad_norm": 0.29758964129105847, |
| "learning_rate": 6.922422569733998e-05, |
| "loss": 0.3784, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.5803252677508925, |
| "grad_norm": 0.2444339210595453, |
| "learning_rate": 6.916360124941084e-05, |
| "loss": 0.3834, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.5834986116620389, |
| "grad_norm": 0.20948868463222103, |
| "learning_rate": 6.910283345038542e-05, |
| "loss": 0.3717, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.5866719555731852, |
| "grad_norm": 0.2172132590543899, |
| "learning_rate": 6.904192259896247e-05, |
| "loss": 0.3725, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.5898452994843315, |
| "grad_norm": 0.21384474550035673, |
| "learning_rate": 6.898086899454387e-05, |
| "loss": 0.3813, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.593018643395478, |
| "grad_norm": 0.1765294609272397, |
| "learning_rate": 6.891967293723318e-05, |
| "loss": 0.3754, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.5961919873066244, |
| "grad_norm": 0.19836511713366814, |
| "learning_rate": 6.885833472783422e-05, |
| "loss": 0.3789, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.5993653312177707, |
| "grad_norm": 0.24256580925484045, |
| "learning_rate": 6.879685466784951e-05, |
| "loss": 0.3786, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.6025386751289172, |
| "grad_norm": 0.2332208270352636, |
| "learning_rate": 6.873523305947883e-05, |
| "loss": 0.378, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.6057120190400633, |
| "grad_norm": 0.2380059856826742, |
| "learning_rate": 6.867347020561774e-05, |
| "loss": 0.382, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.6088853629512099, |
| "grad_norm": 0.24371122406892495, |
| "learning_rate": 6.861156640985607e-05, |
| "loss": 0.3813, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.6120587068623562, |
| "grad_norm": 0.22447844663148647, |
| "learning_rate": 6.854952197647643e-05, |
| "loss": 0.3762, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.6152320507735025, |
| "grad_norm": 0.19727536404273158, |
| "learning_rate": 6.848733721045275e-05, |
| "loss": 0.3732, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.618405394684649, |
| "grad_norm": 0.21660640742566128, |
| "learning_rate": 6.842501241744873e-05, |
| "loss": 0.3786, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.6215787385957952, |
| "grad_norm": 0.2567200817107154, |
| "learning_rate": 6.836254790381635e-05, |
| "loss": 0.3763, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.6247520825069417, |
| "grad_norm": 0.2867034859585703, |
| "learning_rate": 6.829994397659439e-05, |
| "loss": 0.3747, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.627925426418088, |
| "grad_norm": 0.3529920349888837, |
| "learning_rate": 6.823720094350691e-05, |
| "loss": 0.3801, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.6310987703292343, |
| "grad_norm": 0.393616670612644, |
| "learning_rate": 6.817431911296174e-05, |
| "loss": 0.3763, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.6342721142403809, |
| "grad_norm": 0.37774302228207385, |
| "learning_rate": 6.811129879404892e-05, |
| "loss": 0.374, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.6374454581515272, |
| "grad_norm": 0.3864964477468986, |
| "learning_rate": 6.804814029653926e-05, |
| "loss": 0.3791, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.6406188020626735, |
| "grad_norm": 0.39159495583279336, |
| "learning_rate": 6.798484393088273e-05, |
| "loss": 0.3829, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.64379214597382, |
| "grad_norm": 0.39302151980304456, |
| "learning_rate": 6.792141000820703e-05, |
| "loss": 0.3824, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.6469654898849662, |
| "grad_norm": 0.36360896642596874, |
| "learning_rate": 6.785783884031596e-05, |
| "loss": 0.3761, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.6501388337961127, |
| "grad_norm": 0.3241622597538385, |
| "learning_rate": 6.779413073968798e-05, |
| "loss": 0.3781, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.653312177707259, |
| "grad_norm": 0.29559003085786345, |
| "learning_rate": 6.77302860194746e-05, |
| "loss": 0.3767, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.6564855216184053, |
| "grad_norm": 0.30944837922176927, |
| "learning_rate": 6.766630499349888e-05, |
| "loss": 0.3747, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.6596588655295519, |
| "grad_norm": 0.3338849982759374, |
| "learning_rate": 6.760218797625389e-05, |
| "loss": 0.3778, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.662832209440698, |
| "grad_norm": 0.305222030262685, |
| "learning_rate": 6.753793528290112e-05, |
| "loss": 0.3861, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.6660055533518445, |
| "grad_norm": 0.23959583080697613, |
| "learning_rate": 6.747354722926903e-05, |
| "loss": 0.3845, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.6691788972629908, |
| "grad_norm": 0.28220363781779934, |
| "learning_rate": 6.740902413185133e-05, |
| "loss": 0.3788, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.6723522411741372, |
| "grad_norm": 0.2787836540873191, |
| "learning_rate": 6.734436630780565e-05, |
| "loss": 0.379, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.6755255850852837, |
| "grad_norm": 0.24464321106241743, |
| "learning_rate": 6.727957407495174e-05, |
| "loss": 0.3745, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.67869892899643, |
| "grad_norm": 0.2605023337214791, |
| "learning_rate": 6.721464775177009e-05, |
| "loss": 0.3742, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.6818722729075763, |
| "grad_norm": 0.257073037128768, |
| "learning_rate": 6.71495876574003e-05, |
| "loss": 0.3696, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.685045616818723, |
| "grad_norm": 0.22905434546418404, |
| "learning_rate": 6.708439411163948e-05, |
| "loss": 0.3744, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.688218960729869, |
| "grad_norm": 0.18138198252013102, |
| "learning_rate": 6.701906743494075e-05, |
| "loss": 0.3822, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.6913923046410155, |
| "grad_norm": 0.21324403956364202, |
| "learning_rate": 6.695360794841156e-05, |
| "loss": 0.374, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.6945656485521619, |
| "grad_norm": 0.2589520378305425, |
| "learning_rate": 6.688801597381223e-05, |
| "loss": 0.3766, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.6977389924633082, |
| "grad_norm": 0.2739815290314333, |
| "learning_rate": 6.68222918335543e-05, |
| "loss": 0.3787, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.7009123363744547, |
| "grad_norm": 0.2622879792779608, |
| "learning_rate": 6.675643585069894e-05, |
| "loss": 0.3762, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.7040856802856008, |
| "grad_norm": 0.3267913358280892, |
| "learning_rate": 6.669044834895541e-05, |
| "loss": 0.3785, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.7072590241967474, |
| "grad_norm": 0.32086484201627036, |
| "learning_rate": 6.662432965267944e-05, |
| "loss": 0.3761, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.7104323681078937, |
| "grad_norm": 0.23790201809058262, |
| "learning_rate": 6.655808008687156e-05, |
| "loss": 0.3779, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.71360571201904, |
| "grad_norm": 0.20836977293389647, |
| "learning_rate": 6.649169997717571e-05, |
| "loss": 0.3776, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.7167790559301865, |
| "grad_norm": 0.2053799652061652, |
| "learning_rate": 6.642518964987739e-05, |
| "loss": 0.3846, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.7199523998413329, |
| "grad_norm": 0.21743619572422304, |
| "learning_rate": 6.635854943190221e-05, |
| "loss": 0.3818, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.7231257437524792, |
| "grad_norm": 0.2330911970783851, |
| "learning_rate": 6.629177965081428e-05, |
| "loss": 0.3728, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.7262990876636255, |
| "grad_norm": 0.2971530159306548, |
| "learning_rate": 6.622488063481454e-05, |
| "loss": 0.385, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.7294724315747718, |
| "grad_norm": 0.34910792893542064, |
| "learning_rate": 6.615785271273913e-05, |
| "loss": 0.375, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.7326457754859184, |
| "grad_norm": 0.3668253793899281, |
| "learning_rate": 6.609069621405791e-05, |
| "loss": 0.3785, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.7358191193970647, |
| "grad_norm": 0.2748830512102228, |
| "learning_rate": 6.602341146887267e-05, |
| "loss": 0.3825, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.738992463308211, |
| "grad_norm": 0.20904532284444974, |
| "learning_rate": 6.595599880791562e-05, |
| "loss": 0.3825, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.7421658072193575, |
| "grad_norm": 0.23319626735985258, |
| "learning_rate": 6.58884585625477e-05, |
| "loss": 0.3817, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.7453391511305036, |
| "grad_norm": 0.3101304312167264, |
| "learning_rate": 6.582079106475702e-05, |
| "loss": 0.3783, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.7485124950416502, |
| "grad_norm": 0.40115725345017383, |
| "learning_rate": 6.575299664715714e-05, |
| "loss": 0.3753, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.7516858389527965, |
| "grad_norm": 0.3987859292934531, |
| "learning_rate": 6.568507564298553e-05, |
| "loss": 0.3855, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.7548591828639428, |
| "grad_norm": 0.3456599898982391, |
| "learning_rate": 6.561702838610186e-05, |
| "loss": 0.376, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.7580325267750894, |
| "grad_norm": 0.2417290147953845, |
| "learning_rate": 6.55488552109864e-05, |
| "loss": 0.3776, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.7612058706862355, |
| "grad_norm": 0.20640270006206152, |
| "learning_rate": 6.548055645273831e-05, |
| "loss": 0.3803, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.764379214597382, |
| "grad_norm": 0.2232956579416985, |
| "learning_rate": 6.541213244707412e-05, |
| "loss": 0.3752, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.7675525585085283, |
| "grad_norm": 0.2615817344479701, |
| "learning_rate": 6.534358353032593e-05, |
| "loss": 0.375, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.7707259024196746, |
| "grad_norm": 0.254734079264447, |
| "learning_rate": 6.52749100394399e-05, |
| "loss": 0.3751, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.7738992463308212, |
| "grad_norm": 0.21642500358045758, |
| "learning_rate": 6.520611231197446e-05, |
| "loss": 0.3845, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.7770725902419675, |
| "grad_norm": 0.19329608647091265, |
| "learning_rate": 6.513719068609874e-05, |
| "loss": 0.3789, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.7802459341531138, |
| "grad_norm": 0.2530286985366438, |
| "learning_rate": 6.506814550059091e-05, |
| "loss": 0.377, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.7834192780642604, |
| "grad_norm": 0.2896046782801796, |
| "learning_rate": 6.499897709483641e-05, |
| "loss": 0.375, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.7865926219754065, |
| "grad_norm": 0.2712317131665261, |
| "learning_rate": 6.492968580882644e-05, |
| "loss": 0.3776, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.789765965886553, |
| "grad_norm": 0.2314286827879497, |
| "learning_rate": 6.486027198315617e-05, |
| "loss": 0.3794, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.7929393097976993, |
| "grad_norm": 0.21626322109134916, |
| "learning_rate": 6.479073595902309e-05, |
| "loss": 0.3774, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.7961126537088457, |
| "grad_norm": 0.19733332972369577, |
| "learning_rate": 6.472107807822538e-05, |
| "loss": 0.3708, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.7992859976199922, |
| "grad_norm": 0.18353210245394538, |
| "learning_rate": 6.465129868316016e-05, |
| "loss": 0.3743, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.8024593415311383, |
| "grad_norm": 0.21893010037488972, |
| "learning_rate": 6.458139811682188e-05, |
| "loss": 0.3752, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.8056326854422848, |
| "grad_norm": 0.2628816377905819, |
| "learning_rate": 6.451137672280056e-05, |
| "loss": 0.3731, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.8088060293534312, |
| "grad_norm": 0.2548108269109919, |
| "learning_rate": 6.444123484528015e-05, |
| "loss": 0.3755, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.8119793732645775, |
| "grad_norm": 0.2260163520548028, |
| "learning_rate": 6.437097282903685e-05, |
| "loss": 0.3773, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.815152717175724, |
| "grad_norm": 0.19255377161005122, |
| "learning_rate": 6.430059101943736e-05, |
| "loss": 0.3788, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.8183260610868703, |
| "grad_norm": 0.16745873690738822, |
| "learning_rate": 6.423008976243722e-05, |
| "loss": 0.3742, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.8214994049980167, |
| "grad_norm": 0.24311299014307378, |
| "learning_rate": 6.415946940457911e-05, |
| "loss": 0.3781, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.824672748909163, |
| "grad_norm": 0.2692540724073406, |
| "learning_rate": 6.408873029299115e-05, |
| "loss": 0.3758, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.8278460928203093, |
| "grad_norm": 0.25293407337643353, |
| "learning_rate": 6.401787277538515e-05, |
| "loss": 0.3745, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.8310194367314558, |
| "grad_norm": 0.21866660096950907, |
| "learning_rate": 6.394689720005499e-05, |
| "loss": 0.3692, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.8341927806426022, |
| "grad_norm": 0.21682910277524756, |
| "learning_rate": 6.387580391587477e-05, |
| "loss": 0.3758, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.8373661245537485, |
| "grad_norm": 0.25230011318318546, |
| "learning_rate": 6.380459327229727e-05, |
| "loss": 0.3783, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.840539468464895, |
| "grad_norm": 0.26756389255355956, |
| "learning_rate": 6.373326561935207e-05, |
| "loss": 0.3723, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.8437128123760411, |
| "grad_norm": 0.27082347370882653, |
| "learning_rate": 6.366182130764392e-05, |
| "loss": 0.3695, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.8468861562871877, |
| "grad_norm": 0.2981854551164244, |
| "learning_rate": 6.359026068835101e-05, |
| "loss": 0.3725, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.850059500198334, |
| "grad_norm": 0.30431911893180386, |
| "learning_rate": 6.351858411322324e-05, |
| "loss": 0.3754, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.8532328441094803, |
| "grad_norm": 0.29505742082933084, |
| "learning_rate": 6.344679193458043e-05, |
| "loss": 0.374, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.8564061880206268, |
| "grad_norm": 0.2999630911197104, |
| "learning_rate": 6.337488450531068e-05, |
| "loss": 0.371, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.859579531931773, |
| "grad_norm": 0.35422358202016324, |
| "learning_rate": 6.330286217886857e-05, |
| "loss": 0.3689, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.8627528758429195, |
| "grad_norm": 0.31054325154643925, |
| "learning_rate": 6.323072530927349e-05, |
| "loss": 0.3803, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.8659262197540658, |
| "grad_norm": 0.27551184384415855, |
| "learning_rate": 6.31584742511078e-05, |
| "loss": 0.3731, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.8690995636652121, |
| "grad_norm": 0.31879930611418805, |
| "learning_rate": 6.308610935951516e-05, |
| "loss": 0.3767, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.8722729075763587, |
| "grad_norm": 0.33037474746040296, |
| "learning_rate": 6.301363099019881e-05, |
| "loss": 0.3751, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.875446251487505, |
| "grad_norm": 0.2595876949175527, |
| "learning_rate": 6.294103949941975e-05, |
| "loss": 0.3722, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.8786195953986513, |
| "grad_norm": 0.26859094455834825, |
| "learning_rate": 6.2868335243995e-05, |
| "loss": 0.3727, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.8817929393097979, |
| "grad_norm": 0.3602873489651573, |
| "learning_rate": 6.279551858129588e-05, |
| "loss": 0.371, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.884966283220944, |
| "grad_norm": 0.4148929616678669, |
| "learning_rate": 6.272258986924624e-05, |
| "loss": 0.3726, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.8881396271320905, |
| "grad_norm": 0.3239892420647555, |
| "learning_rate": 6.26495494663207e-05, |
| "loss": 0.3754, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.8913129710432368, |
| "grad_norm": 0.2219707121481662, |
| "learning_rate": 6.257639773154288e-05, |
| "loss": 0.3763, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.8944863149543831, |
| "grad_norm": 0.24701125744993022, |
| "learning_rate": 6.250313502448368e-05, |
| "loss": 0.3769, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.8976596588655297, |
| "grad_norm": 0.2508614116325903, |
| "learning_rate": 6.24297617052594e-05, |
| "loss": 0.3797, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.9008330027766758, |
| "grad_norm": 0.24307046170154817, |
| "learning_rate": 6.23562781345301e-05, |
| "loss": 0.3768, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.9040063466878223, |
| "grad_norm": 0.23596073863704914, |
| "learning_rate": 6.228268467349776e-05, |
| "loss": 0.3764, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.9071796905989686, |
| "grad_norm": 0.287641306711991, |
| "learning_rate": 6.22089816839045e-05, |
| "loss": 0.3679, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.910353034510115, |
| "grad_norm": 0.3350000639906152, |
| "learning_rate": 6.213516952803084e-05, |
| "loss": 0.3749, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.9135263784212615, |
| "grad_norm": 0.2953436732358185, |
| "learning_rate": 6.20612485686939e-05, |
| "loss": 0.3731, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.9166997223324078, |
| "grad_norm": 0.26307310235068826, |
| "learning_rate": 6.198721916924559e-05, |
| "loss": 0.3744, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.9198730662435541, |
| "grad_norm": 0.21992342429818196, |
| "learning_rate": 6.191308169357084e-05, |
| "loss": 0.3776, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.9230464101547007, |
| "grad_norm": 0.2096604115734754, |
| "learning_rate": 6.183883650608588e-05, |
| "loss": 0.3772, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.9262197540658468, |
| "grad_norm": 0.20221136579674542, |
| "learning_rate": 6.176448397173632e-05, |
| "loss": 0.3725, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.9293930979769933, |
| "grad_norm": 0.2152338983820113, |
| "learning_rate": 6.169002445599544e-05, |
| "loss": 0.3677, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.9325664418881396, |
| "grad_norm": 0.22320752106286315, |
| "learning_rate": 6.161545832486242e-05, |
| "loss": 0.3756, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.935739785799286, |
| "grad_norm": 0.2253156227252389, |
| "learning_rate": 6.154078594486045e-05, |
| "loss": 0.3714, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.9389131297104325, |
| "grad_norm": 0.2060476121512058, |
| "learning_rate": 6.146600768303498e-05, |
| "loss": 0.3765, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.9420864736215786, |
| "grad_norm": 0.25181408887727724, |
| "learning_rate": 6.139112390695195e-05, |
| "loss": 0.3736, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.9452598175327251, |
| "grad_norm": 0.28050692887038614, |
| "learning_rate": 6.13161349846959e-05, |
| "loss": 0.3711, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.9484331614438715, |
| "grad_norm": 0.2323287898084542, |
| "learning_rate": 6.124104128486824e-05, |
| "loss": 0.3787, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.9516065053550178, |
| "grad_norm": 0.22252533467543772, |
| "learning_rate": 6.11658431765854e-05, |
| "loss": 0.3761, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.9547798492661643, |
| "grad_norm": 0.23979064807450512, |
| "learning_rate": 6.109054102947701e-05, |
| "loss": 0.3768, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.9579531931773104, |
| "grad_norm": 0.20369908867353018, |
| "learning_rate": 6.101513521368409e-05, |
| "loss": 0.3717, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.961126537088457, |
| "grad_norm": 0.21979116733171586, |
| "learning_rate": 6.0939626099857256e-05, |
| "loss": 0.3719, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.9642998809996033, |
| "grad_norm": 0.2359782325683911, |
| "learning_rate": 6.086401405915485e-05, |
| "loss": 0.3697, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.9674732249107496, |
| "grad_norm": 0.22186897194718339, |
| "learning_rate": 6.0788299463241146e-05, |
| "loss": 0.3714, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.9706465688218961, |
| "grad_norm": 0.19286916683029354, |
| "learning_rate": 6.071248268428455e-05, |
| "loss": 0.3694, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.9738199127330425, |
| "grad_norm": 0.19038716732119104, |
| "learning_rate": 6.06365640949557e-05, |
| "loss": 0.3689, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.9769932566441888, |
| "grad_norm": 0.21853406474562687, |
| "learning_rate": 6.0560544068425704e-05, |
| "loss": 0.3713, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.9801666005553353, |
| "grad_norm": 0.2519707220662586, |
| "learning_rate": 6.048442297836424e-05, |
| "loss": 0.3745, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.9833399444664814, |
| "grad_norm": 0.27348429383893025, |
| "learning_rate": 6.040820119893781e-05, |
| "loss": 0.3726, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.986513288377628, |
| "grad_norm": 0.25727914228364634, |
| "learning_rate": 6.033187910480779e-05, |
| "loss": 0.3737, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.9896866322887743, |
| "grad_norm": 0.19830437617712135, |
| "learning_rate": 6.025545707112868e-05, |
| "loss": 0.3714, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.9928599761999206, |
| "grad_norm": 0.17578894688384492, |
| "learning_rate": 6.017893547354618e-05, |
| "loss": 0.3701, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.9960333201110672, |
| "grad_norm": 0.28565651904457573, |
| "learning_rate": 6.0102314688195466e-05, |
| "loss": 0.3707, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.9992066640222133, |
| "grad_norm": 0.3512726249295466, |
| "learning_rate": 6.002559509169917e-05, |
| "loss": 0.374, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.00238000793336, |
| "grad_norm": 0.5082104882029274, |
| "learning_rate": 5.994877706116571e-05, |
| "loss": 0.6682, |
| "step": 631 |
| }, |
| { |
| "epoch": 2.0055533518445063, |
| "grad_norm": 0.7958584718546614, |
| "learning_rate": 5.9871860974187266e-05, |
| "loss": 0.3515, |
| "step": 632 |
| }, |
| { |
| "epoch": 2.0087266957556524, |
| "grad_norm": 1.2309453398579198, |
| "learning_rate": 5.979484720883806e-05, |
| "loss": 0.3691, |
| "step": 633 |
| }, |
| { |
| "epoch": 2.011900039666799, |
| "grad_norm": 0.6258361012105962, |
| "learning_rate": 5.971773614367244e-05, |
| "loss": 0.3592, |
| "step": 634 |
| }, |
| { |
| "epoch": 2.015073383577945, |
| "grad_norm": 0.7180639249872159, |
| "learning_rate": 5.964052815772298e-05, |
| "loss": 0.3567, |
| "step": 635 |
| }, |
| { |
| "epoch": 2.0182467274890916, |
| "grad_norm": 0.9313882834628479, |
| "learning_rate": 5.9563223630498714e-05, |
| "loss": 0.3601, |
| "step": 636 |
| }, |
| { |
| "epoch": 2.021420071400238, |
| "grad_norm": 0.7992267279088702, |
| "learning_rate": 5.9485822941983185e-05, |
| "loss": 0.3532, |
| "step": 637 |
| }, |
| { |
| "epoch": 2.0245934153113843, |
| "grad_norm": 0.507785688644202, |
| "learning_rate": 5.940832647263262e-05, |
| "loss": 0.3522, |
| "step": 638 |
| }, |
| { |
| "epoch": 2.027766759222531, |
| "grad_norm": 0.578349620722966, |
| "learning_rate": 5.933073460337404e-05, |
| "loss": 0.347, |
| "step": 639 |
| }, |
| { |
| "epoch": 2.030940103133677, |
| "grad_norm": 0.5232367250977141, |
| "learning_rate": 5.9253047715603384e-05, |
| "loss": 0.36, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.0341134470448234, |
| "grad_norm": 0.48100594573201455, |
| "learning_rate": 5.917526619118368e-05, |
| "loss": 0.3458, |
| "step": 641 |
| }, |
| { |
| "epoch": 2.03728679095597, |
| "grad_norm": 0.3770908533781567, |
| "learning_rate": 5.909739041244311e-05, |
| "loss": 0.3491, |
| "step": 642 |
| }, |
| { |
| "epoch": 2.040460134867116, |
| "grad_norm": 0.43489851021818743, |
| "learning_rate": 5.9019420762173156e-05, |
| "loss": 0.35, |
| "step": 643 |
| }, |
| { |
| "epoch": 2.0436334787782626, |
| "grad_norm": 0.34486032651494825, |
| "learning_rate": 5.894135762362673e-05, |
| "loss": 0.3504, |
| "step": 644 |
| }, |
| { |
| "epoch": 2.046806822689409, |
| "grad_norm": 0.3111171335430375, |
| "learning_rate": 5.8863201380516255e-05, |
| "loss": 0.3518, |
| "step": 645 |
| }, |
| { |
| "epoch": 2.0499801666005553, |
| "grad_norm": 0.35964655450159183, |
| "learning_rate": 5.8784952417011826e-05, |
| "loss": 0.3492, |
| "step": 646 |
| }, |
| { |
| "epoch": 2.053153510511702, |
| "grad_norm": 0.2793218850087469, |
| "learning_rate": 5.8706611117739275e-05, |
| "loss": 0.345, |
| "step": 647 |
| }, |
| { |
| "epoch": 2.056326854422848, |
| "grad_norm": 0.2630228702707809, |
| "learning_rate": 5.862817786777832e-05, |
| "loss": 0.3527, |
| "step": 648 |
| }, |
| { |
| "epoch": 2.0595001983339944, |
| "grad_norm": 0.287600094925337, |
| "learning_rate": 5.854965305266065e-05, |
| "loss": 0.3472, |
| "step": 649 |
| }, |
| { |
| "epoch": 2.062673542245141, |
| "grad_norm": 0.23415839473852326, |
| "learning_rate": 5.8471037058368035e-05, |
| "loss": 0.3472, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.065846886156287, |
| "grad_norm": 0.2480609831522893, |
| "learning_rate": 5.839233027133041e-05, |
| "loss": 0.347, |
| "step": 651 |
| }, |
| { |
| "epoch": 2.0690202300674336, |
| "grad_norm": 0.24277577378910167, |
| "learning_rate": 5.8313533078424016e-05, |
| "loss": 0.3502, |
| "step": 652 |
| }, |
| { |
| "epoch": 2.0721935739785797, |
| "grad_norm": 0.22888858417929306, |
| "learning_rate": 5.823464586696947e-05, |
| "loss": 0.3398, |
| "step": 653 |
| }, |
| { |
| "epoch": 2.0753669178897263, |
| "grad_norm": 0.22777692875348907, |
| "learning_rate": 5.8155669024729864e-05, |
| "loss": 0.3502, |
| "step": 654 |
| }, |
| { |
| "epoch": 2.078540261800873, |
| "grad_norm": 0.21302274202843294, |
| "learning_rate": 5.807660293990887e-05, |
| "loss": 0.3467, |
| "step": 655 |
| }, |
| { |
| "epoch": 2.081713605712019, |
| "grad_norm": 0.17698466873204569, |
| "learning_rate": 5.79974480011488e-05, |
| "loss": 0.3468, |
| "step": 656 |
| }, |
| { |
| "epoch": 2.0848869496231655, |
| "grad_norm": 0.18921626118682697, |
| "learning_rate": 5.7918204597528755e-05, |
| "loss": 0.3401, |
| "step": 657 |
| }, |
| { |
| "epoch": 2.088060293534312, |
| "grad_norm": 0.19642637234378701, |
| "learning_rate": 5.7838873118562656e-05, |
| "loss": 0.3467, |
| "step": 658 |
| }, |
| { |
| "epoch": 2.091233637445458, |
| "grad_norm": 0.13768919840127494, |
| "learning_rate": 5.775945395419736e-05, |
| "loss": 0.3461, |
| "step": 659 |
| }, |
| { |
| "epoch": 2.0944069813566046, |
| "grad_norm": 0.18625327191674967, |
| "learning_rate": 5.7679947494810707e-05, |
| "loss": 0.3445, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.0975803252677507, |
| "grad_norm": 0.17171644648932974, |
| "learning_rate": 5.760035413120967e-05, |
| "loss": 0.3481, |
| "step": 661 |
| }, |
| { |
| "epoch": 2.1007536691788973, |
| "grad_norm": 0.181873538605243, |
| "learning_rate": 5.752067425462835e-05, |
| "loss": 0.348, |
| "step": 662 |
| }, |
| { |
| "epoch": 2.103927013090044, |
| "grad_norm": 0.19042134398803523, |
| "learning_rate": 5.744090825672615e-05, |
| "loss": 0.3447, |
| "step": 663 |
| }, |
| { |
| "epoch": 2.10710035700119, |
| "grad_norm": 0.160202051805729, |
| "learning_rate": 5.7361056529585736e-05, |
| "loss": 0.3508, |
| "step": 664 |
| }, |
| { |
| "epoch": 2.1102737009123365, |
| "grad_norm": 0.1568895178074113, |
| "learning_rate": 5.728111946571119e-05, |
| "loss": 0.3517, |
| "step": 665 |
| }, |
| { |
| "epoch": 2.1134470448234826, |
| "grad_norm": 0.16324890485854626, |
| "learning_rate": 5.720109745802607e-05, |
| "loss": 0.3464, |
| "step": 666 |
| }, |
| { |
| "epoch": 2.116620388734629, |
| "grad_norm": 0.1457391254676544, |
| "learning_rate": 5.712099089987146e-05, |
| "loss": 0.3475, |
| "step": 667 |
| }, |
| { |
| "epoch": 2.1197937326457756, |
| "grad_norm": 0.12805660018258297, |
| "learning_rate": 5.704080018500405e-05, |
| "loss": 0.3531, |
| "step": 668 |
| }, |
| { |
| "epoch": 2.1229670765569217, |
| "grad_norm": 0.14439965378280822, |
| "learning_rate": 5.696052570759418e-05, |
| "loss": 0.3395, |
| "step": 669 |
| }, |
| { |
| "epoch": 2.1261404204680683, |
| "grad_norm": 0.14704244500302163, |
| "learning_rate": 5.6880167862223915e-05, |
| "loss": 0.3456, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.129313764379215, |
| "grad_norm": 0.1650010303974902, |
| "learning_rate": 5.6799727043885155e-05, |
| "loss": 0.3472, |
| "step": 671 |
| }, |
| { |
| "epoch": 2.132487108290361, |
| "grad_norm": 0.13673543011389194, |
| "learning_rate": 5.671920364797758e-05, |
| "loss": 0.3446, |
| "step": 672 |
| }, |
| { |
| "epoch": 2.1356604522015075, |
| "grad_norm": 0.1524951210391999, |
| "learning_rate": 5.66385980703068e-05, |
| "loss": 0.3409, |
| "step": 673 |
| }, |
| { |
| "epoch": 2.1388337961126536, |
| "grad_norm": 0.1651899044016252, |
| "learning_rate": 5.655791070708242e-05, |
| "loss": 0.3458, |
| "step": 674 |
| }, |
| { |
| "epoch": 2.1420071400238, |
| "grad_norm": 0.18171434979507048, |
| "learning_rate": 5.647714195491599e-05, |
| "loss": 0.3467, |
| "step": 675 |
| }, |
| { |
| "epoch": 2.1451804839349466, |
| "grad_norm": 0.1895404480414663, |
| "learning_rate": 5.6396292210819154e-05, |
| "loss": 0.3458, |
| "step": 676 |
| }, |
| { |
| "epoch": 2.1483538278460927, |
| "grad_norm": 0.1977999320741365, |
| "learning_rate": 5.6315361872201663e-05, |
| "loss": 0.3439, |
| "step": 677 |
| }, |
| { |
| "epoch": 2.1515271717572393, |
| "grad_norm": 0.1709340652432958, |
| "learning_rate": 5.6234351336869425e-05, |
| "loss": 0.3435, |
| "step": 678 |
| }, |
| { |
| "epoch": 2.1547005156683854, |
| "grad_norm": 0.19031692182789184, |
| "learning_rate": 5.6153261003022556e-05, |
| "loss": 0.3449, |
| "step": 679 |
| }, |
| { |
| "epoch": 2.157873859579532, |
| "grad_norm": 0.16069939440920603, |
| "learning_rate": 5.607209126925337e-05, |
| "loss": 0.3537, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.1610472034906785, |
| "grad_norm": 0.1433199497560158, |
| "learning_rate": 5.599084253454452e-05, |
| "loss": 0.3393, |
| "step": 681 |
| }, |
| { |
| "epoch": 2.1642205474018246, |
| "grad_norm": 0.14956082186311095, |
| "learning_rate": 5.5909515198266965e-05, |
| "loss": 0.342, |
| "step": 682 |
| }, |
| { |
| "epoch": 2.167393891312971, |
| "grad_norm": 0.1544287677040818, |
| "learning_rate": 5.582810966017799e-05, |
| "loss": 0.3503, |
| "step": 683 |
| }, |
| { |
| "epoch": 2.170567235224117, |
| "grad_norm": 0.15319232608750777, |
| "learning_rate": 5.574662632041932e-05, |
| "loss": 0.3467, |
| "step": 684 |
| }, |
| { |
| "epoch": 2.1737405791352638, |
| "grad_norm": 0.14670339038716207, |
| "learning_rate": 5.566506557951508e-05, |
| "loss": 0.3422, |
| "step": 685 |
| }, |
| { |
| "epoch": 2.1769139230464103, |
| "grad_norm": 0.16667697899341136, |
| "learning_rate": 5.558342783836987e-05, |
| "loss": 0.3424, |
| "step": 686 |
| }, |
| { |
| "epoch": 2.1800872669575564, |
| "grad_norm": 0.1975464831082886, |
| "learning_rate": 5.550171349826675e-05, |
| "loss": 0.3461, |
| "step": 687 |
| }, |
| { |
| "epoch": 2.183260610868703, |
| "grad_norm": 0.17888577322944074, |
| "learning_rate": 5.5419922960865334e-05, |
| "loss": 0.3469, |
| "step": 688 |
| }, |
| { |
| "epoch": 2.186433954779849, |
| "grad_norm": 0.17541637626798942, |
| "learning_rate": 5.533805662819975e-05, |
| "loss": 0.3505, |
| "step": 689 |
| }, |
| { |
| "epoch": 2.1896072986909956, |
| "grad_norm": 0.2198583155605273, |
| "learning_rate": 5.5256114902676675e-05, |
| "loss": 0.3447, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.192780642602142, |
| "grad_norm": 0.22694834599020175, |
| "learning_rate": 5.517409818707343e-05, |
| "loss": 0.3504, |
| "step": 691 |
| }, |
| { |
| "epoch": 2.195953986513288, |
| "grad_norm": 0.159759032699931, |
| "learning_rate": 5.50920068845359e-05, |
| "loss": 0.3429, |
| "step": 692 |
| }, |
| { |
| "epoch": 2.1991273304244348, |
| "grad_norm": 0.11626634052913302, |
| "learning_rate": 5.500984139857659e-05, |
| "loss": 0.3465, |
| "step": 693 |
| }, |
| { |
| "epoch": 2.2023006743355813, |
| "grad_norm": 0.16029340678767326, |
| "learning_rate": 5.492760213307268e-05, |
| "loss": 0.3493, |
| "step": 694 |
| }, |
| { |
| "epoch": 2.2054740182467274, |
| "grad_norm": 0.20138781571087522, |
| "learning_rate": 5.484528949226397e-05, |
| "loss": 0.3448, |
| "step": 695 |
| }, |
| { |
| "epoch": 2.208647362157874, |
| "grad_norm": 0.20673832786589078, |
| "learning_rate": 5.4762903880750956e-05, |
| "loss": 0.3472, |
| "step": 696 |
| }, |
| { |
| "epoch": 2.21182070606902, |
| "grad_norm": 0.1945972361781099, |
| "learning_rate": 5.468044570349282e-05, |
| "loss": 0.3452, |
| "step": 697 |
| }, |
| { |
| "epoch": 2.2149940499801666, |
| "grad_norm": 0.15260971345475988, |
| "learning_rate": 5.45979153658054e-05, |
| "loss": 0.3444, |
| "step": 698 |
| }, |
| { |
| "epoch": 2.218167393891313, |
| "grad_norm": 0.16140310676647643, |
| "learning_rate": 5.451531327335927e-05, |
| "loss": 0.3464, |
| "step": 699 |
| }, |
| { |
| "epoch": 2.221340737802459, |
| "grad_norm": 0.1710826897248202, |
| "learning_rate": 5.4432639832177675e-05, |
| "loss": 0.3477, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.2245140817136058, |
| "grad_norm": 0.1907595410644264, |
| "learning_rate": 5.43498954486346e-05, |
| "loss": 0.3399, |
| "step": 701 |
| }, |
| { |
| "epoch": 2.227687425624752, |
| "grad_norm": 0.16532722473360295, |
| "learning_rate": 5.426708052945272e-05, |
| "loss": 0.3505, |
| "step": 702 |
| }, |
| { |
| "epoch": 2.2308607695358984, |
| "grad_norm": 0.1536194473143472, |
| "learning_rate": 5.4184195481701425e-05, |
| "loss": 0.3474, |
| "step": 703 |
| }, |
| { |
| "epoch": 2.234034113447045, |
| "grad_norm": 0.1520117940400569, |
| "learning_rate": 5.4101240712794826e-05, |
| "loss": 0.3437, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.237207457358191, |
| "grad_norm": 0.16336250647219758, |
| "learning_rate": 5.401821663048974e-05, |
| "loss": 0.3484, |
| "step": 705 |
| }, |
| { |
| "epoch": 2.2403808012693376, |
| "grad_norm": 0.18048860842589537, |
| "learning_rate": 5.393512364288366e-05, |
| "loss": 0.3461, |
| "step": 706 |
| }, |
| { |
| "epoch": 2.243554145180484, |
| "grad_norm": 0.2077517477600907, |
| "learning_rate": 5.3851962158412835e-05, |
| "loss": 0.3469, |
| "step": 707 |
| }, |
| { |
| "epoch": 2.2467274890916302, |
| "grad_norm": 0.23694455387298174, |
| "learning_rate": 5.3768732585850135e-05, |
| "loss": 0.3458, |
| "step": 708 |
| }, |
| { |
| "epoch": 2.2499008330027768, |
| "grad_norm": 0.2221303640440427, |
| "learning_rate": 5.3685435334303144e-05, |
| "loss": 0.3475, |
| "step": 709 |
| }, |
| { |
| "epoch": 2.253074176913923, |
| "grad_norm": 0.15740414776377862, |
| "learning_rate": 5.360207081321215e-05, |
| "loss": 0.3514, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.2562475208250694, |
| "grad_norm": 0.14133878333017164, |
| "learning_rate": 5.351863943234803e-05, |
| "loss": 0.3501, |
| "step": 711 |
| }, |
| { |
| "epoch": 2.259420864736216, |
| "grad_norm": 0.23287734656674552, |
| "learning_rate": 5.343514160181037e-05, |
| "loss": 0.3476, |
| "step": 712 |
| }, |
| { |
| "epoch": 2.262594208647362, |
| "grad_norm": 0.2747098009784896, |
| "learning_rate": 5.3351577732025324e-05, |
| "loss": 0.3467, |
| "step": 713 |
| }, |
| { |
| "epoch": 2.2657675525585086, |
| "grad_norm": 0.2544286593636421, |
| "learning_rate": 5.3267948233743705e-05, |
| "loss": 0.3474, |
| "step": 714 |
| }, |
| { |
| "epoch": 2.2689408964696547, |
| "grad_norm": 0.23428301254687212, |
| "learning_rate": 5.318425351803889e-05, |
| "loss": 0.3436, |
| "step": 715 |
| }, |
| { |
| "epoch": 2.2721142403808012, |
| "grad_norm": 0.19972273517158506, |
| "learning_rate": 5.310049399630483e-05, |
| "loss": 0.3428, |
| "step": 716 |
| }, |
| { |
| "epoch": 2.2752875842919478, |
| "grad_norm": 0.17312802850984527, |
| "learning_rate": 5.301667008025404e-05, |
| "loss": 0.3507, |
| "step": 717 |
| }, |
| { |
| "epoch": 2.278460928203094, |
| "grad_norm": 0.1884344961517114, |
| "learning_rate": 5.293278218191553e-05, |
| "loss": 0.344, |
| "step": 718 |
| }, |
| { |
| "epoch": 2.2816342721142404, |
| "grad_norm": 0.16251980995579765, |
| "learning_rate": 5.2848830713632844e-05, |
| "loss": 0.3524, |
| "step": 719 |
| }, |
| { |
| "epoch": 2.284807616025387, |
| "grad_norm": 0.13519307906275285, |
| "learning_rate": 5.2764816088061974e-05, |
| "loss": 0.3518, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.287980959936533, |
| "grad_norm": 0.15371744727582462, |
| "learning_rate": 5.2680738718169355e-05, |
| "loss": 0.3412, |
| "step": 721 |
| }, |
| { |
| "epoch": 2.2911543038476796, |
| "grad_norm": 0.14184577228089562, |
| "learning_rate": 5.2596599017229864e-05, |
| "loss": 0.3436, |
| "step": 722 |
| }, |
| { |
| "epoch": 2.2943276477588257, |
| "grad_norm": 0.1369004603255518, |
| "learning_rate": 5.251239739882472e-05, |
| "loss": 0.3446, |
| "step": 723 |
| }, |
| { |
| "epoch": 2.2975009916699722, |
| "grad_norm": 0.15048696535358383, |
| "learning_rate": 5.2428134276839525e-05, |
| "loss": 0.3442, |
| "step": 724 |
| }, |
| { |
| "epoch": 2.300674335581119, |
| "grad_norm": 0.14200830231004927, |
| "learning_rate": 5.234381006546219e-05, |
| "loss": 0.3446, |
| "step": 725 |
| }, |
| { |
| "epoch": 2.303847679492265, |
| "grad_norm": 0.16320009543918237, |
| "learning_rate": 5.2259425179180873e-05, |
| "loss": 0.3443, |
| "step": 726 |
| }, |
| { |
| "epoch": 2.3070210234034114, |
| "grad_norm": 0.15997774955995675, |
| "learning_rate": 5.217498003278204e-05, |
| "loss": 0.346, |
| "step": 727 |
| }, |
| { |
| "epoch": 2.3101943673145575, |
| "grad_norm": 0.17017241599015379, |
| "learning_rate": 5.209047504134828e-05, |
| "loss": 0.3436, |
| "step": 728 |
| }, |
| { |
| "epoch": 2.313367711225704, |
| "grad_norm": 0.13479061683911572, |
| "learning_rate": 5.200591062025641e-05, |
| "loss": 0.346, |
| "step": 729 |
| }, |
| { |
| "epoch": 2.3165410551368506, |
| "grad_norm": 0.12285044545606259, |
| "learning_rate": 5.192128718517535e-05, |
| "loss": 0.3383, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.3197143990479967, |
| "grad_norm": 0.1359930168100327, |
| "learning_rate": 5.1836605152064076e-05, |
| "loss": 0.3515, |
| "step": 731 |
| }, |
| { |
| "epoch": 2.3228877429591432, |
| "grad_norm": 0.14719463738675423, |
| "learning_rate": 5.175186493716963e-05, |
| "loss": 0.3451, |
| "step": 732 |
| }, |
| { |
| "epoch": 2.32606108687029, |
| "grad_norm": 0.1286663460165603, |
| "learning_rate": 5.1667066957025e-05, |
| "loss": 0.3512, |
| "step": 733 |
| }, |
| { |
| "epoch": 2.329234430781436, |
| "grad_norm": 0.10867634667327339, |
| "learning_rate": 5.158221162844717e-05, |
| "loss": 0.3439, |
| "step": 734 |
| }, |
| { |
| "epoch": 2.3324077746925824, |
| "grad_norm": 0.1591243176843703, |
| "learning_rate": 5.1497299368534965e-05, |
| "loss": 0.3511, |
| "step": 735 |
| }, |
| { |
| "epoch": 2.3355811186037285, |
| "grad_norm": 0.12942608362042907, |
| "learning_rate": 5.1412330594667075e-05, |
| "loss": 0.3434, |
| "step": 736 |
| }, |
| { |
| "epoch": 2.338754462514875, |
| "grad_norm": 0.10948773984311001, |
| "learning_rate": 5.132730572449997e-05, |
| "loss": 0.3524, |
| "step": 737 |
| }, |
| { |
| "epoch": 2.3419278064260216, |
| "grad_norm": 0.11783857681490648, |
| "learning_rate": 5.124222517596586e-05, |
| "loss": 0.349, |
| "step": 738 |
| }, |
| { |
| "epoch": 2.3451011503371677, |
| "grad_norm": 0.13439082971050517, |
| "learning_rate": 5.115708936727065e-05, |
| "loss": 0.3405, |
| "step": 739 |
| }, |
| { |
| "epoch": 2.3482744942483142, |
| "grad_norm": 0.11812799919852607, |
| "learning_rate": 5.1071898716891853e-05, |
| "loss": 0.3461, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.3514478381594603, |
| "grad_norm": 0.11652694906685064, |
| "learning_rate": 5.098665364357656e-05, |
| "loss": 0.3426, |
| "step": 741 |
| }, |
| { |
| "epoch": 2.354621182070607, |
| "grad_norm": 0.139468896494999, |
| "learning_rate": 5.0901354566339355e-05, |
| "loss": 0.3426, |
| "step": 742 |
| }, |
| { |
| "epoch": 2.3577945259817534, |
| "grad_norm": 0.13901937078693805, |
| "learning_rate": 5.0816001904460316e-05, |
| "loss": 0.3453, |
| "step": 743 |
| }, |
| { |
| "epoch": 2.3609678698928995, |
| "grad_norm": 0.1469507185762338, |
| "learning_rate": 5.073059607748287e-05, |
| "loss": 0.3438, |
| "step": 744 |
| }, |
| { |
| "epoch": 2.364141213804046, |
| "grad_norm": 0.12953053633992156, |
| "learning_rate": 5.064513750521179e-05, |
| "loss": 0.348, |
| "step": 745 |
| }, |
| { |
| "epoch": 2.3673145577151926, |
| "grad_norm": 0.15355601770329005, |
| "learning_rate": 5.05596266077111e-05, |
| "loss": 0.3487, |
| "step": 746 |
| }, |
| { |
| "epoch": 2.3704879016263387, |
| "grad_norm": 0.17548971874850716, |
| "learning_rate": 5.047406380530205e-05, |
| "loss": 0.3435, |
| "step": 747 |
| }, |
| { |
| "epoch": 2.3736612455374853, |
| "grad_norm": 0.1336318519267535, |
| "learning_rate": 5.038844951856101e-05, |
| "loss": 0.3472, |
| "step": 748 |
| }, |
| { |
| "epoch": 2.3768345894486314, |
| "grad_norm": 0.15489849858402407, |
| "learning_rate": 5.0302784168317405e-05, |
| "loss": 0.3507, |
| "step": 749 |
| }, |
| { |
| "epoch": 2.380007933359778, |
| "grad_norm": 0.15730467807777396, |
| "learning_rate": 5.021706817565168e-05, |
| "loss": 0.3494, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.383181277270924, |
| "grad_norm": 0.14142472540150533, |
| "learning_rate": 5.013130196189319e-05, |
| "loss": 0.3508, |
| "step": 751 |
| }, |
| { |
| "epoch": 2.3863546211820705, |
| "grad_norm": 0.1309447338438405, |
| "learning_rate": 5.004548594861815e-05, |
| "loss": 0.3414, |
| "step": 752 |
| }, |
| { |
| "epoch": 2.389527965093217, |
| "grad_norm": 0.13689757661127885, |
| "learning_rate": 4.995962055764758e-05, |
| "loss": 0.3462, |
| "step": 753 |
| }, |
| { |
| "epoch": 2.392701309004363, |
| "grad_norm": 0.15155743192346277, |
| "learning_rate": 4.987370621104518e-05, |
| "loss": 0.3502, |
| "step": 754 |
| }, |
| { |
| "epoch": 2.3958746529155097, |
| "grad_norm": 0.1771759302131552, |
| "learning_rate": 4.978774333111532e-05, |
| "loss": 0.3415, |
| "step": 755 |
| }, |
| { |
| "epoch": 2.3990479968266563, |
| "grad_norm": 0.14297561112928286, |
| "learning_rate": 4.97017323404009e-05, |
| "loss": 0.3445, |
| "step": 756 |
| }, |
| { |
| "epoch": 2.4022213407378024, |
| "grad_norm": 0.1359038159105317, |
| "learning_rate": 4.9615673661681314e-05, |
| "loss": 0.3431, |
| "step": 757 |
| }, |
| { |
| "epoch": 2.405394684648949, |
| "grad_norm": 0.14972172604238929, |
| "learning_rate": 4.952956771797039e-05, |
| "loss": 0.3449, |
| "step": 758 |
| }, |
| { |
| "epoch": 2.4085680285600954, |
| "grad_norm": 0.14098278608768156, |
| "learning_rate": 4.9443414932514245e-05, |
| "loss": 0.3446, |
| "step": 759 |
| }, |
| { |
| "epoch": 2.4117413724712415, |
| "grad_norm": 0.1300591686668367, |
| "learning_rate": 4.935721572878927e-05, |
| "loss": 0.3469, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.414914716382388, |
| "grad_norm": 0.13525638291526695, |
| "learning_rate": 4.9270970530499995e-05, |
| "loss": 0.3457, |
| "step": 761 |
| }, |
| { |
| "epoch": 2.418088060293534, |
| "grad_norm": 0.13706454820860725, |
| "learning_rate": 4.918467976157704e-05, |
| "loss": 0.3493, |
| "step": 762 |
| }, |
| { |
| "epoch": 2.4212614042046807, |
| "grad_norm": 0.1396470750888246, |
| "learning_rate": 4.909834384617505e-05, |
| "loss": 0.3466, |
| "step": 763 |
| }, |
| { |
| "epoch": 2.424434748115827, |
| "grad_norm": 0.1579884171451332, |
| "learning_rate": 4.901196320867054e-05, |
| "loss": 0.3434, |
| "step": 764 |
| }, |
| { |
| "epoch": 2.4276080920269734, |
| "grad_norm": 0.14762421037735238, |
| "learning_rate": 4.89255382736599e-05, |
| "loss": 0.3461, |
| "step": 765 |
| }, |
| { |
| "epoch": 2.43078143593812, |
| "grad_norm": 0.1484907586908843, |
| "learning_rate": 4.883906946595721e-05, |
| "loss": 0.3429, |
| "step": 766 |
| }, |
| { |
| "epoch": 2.433954779849266, |
| "grad_norm": 0.15347798353398318, |
| "learning_rate": 4.875255721059223e-05, |
| "loss": 0.3471, |
| "step": 767 |
| }, |
| { |
| "epoch": 2.4371281237604125, |
| "grad_norm": 0.1291098504201812, |
| "learning_rate": 4.8666001932808315e-05, |
| "loss": 0.3492, |
| "step": 768 |
| }, |
| { |
| "epoch": 2.440301467671559, |
| "grad_norm": 0.1277300904749933, |
| "learning_rate": 4.857940405806022e-05, |
| "loss": 0.3431, |
| "step": 769 |
| }, |
| { |
| "epoch": 2.443474811582705, |
| "grad_norm": 0.12939888924222556, |
| "learning_rate": 4.8492764012012146e-05, |
| "loss": 0.3467, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.4466481554938517, |
| "grad_norm": 0.1224426894168111, |
| "learning_rate": 4.840608222053553e-05, |
| "loss": 0.3417, |
| "step": 771 |
| }, |
| { |
| "epoch": 2.449821499404998, |
| "grad_norm": 0.1497720241182753, |
| "learning_rate": 4.831935910970706e-05, |
| "loss": 0.3444, |
| "step": 772 |
| }, |
| { |
| "epoch": 2.4529948433161444, |
| "grad_norm": 0.11319605561124714, |
| "learning_rate": 4.8232595105806486e-05, |
| "loss": 0.3445, |
| "step": 773 |
| }, |
| { |
| "epoch": 2.456168187227291, |
| "grad_norm": 0.1345350003328084, |
| "learning_rate": 4.814579063531458e-05, |
| "loss": 0.3453, |
| "step": 774 |
| }, |
| { |
| "epoch": 2.459341531138437, |
| "grad_norm": 0.14108407549438753, |
| "learning_rate": 4.8058946124911014e-05, |
| "loss": 0.3456, |
| "step": 775 |
| }, |
| { |
| "epoch": 2.4625148750495836, |
| "grad_norm": 0.13444987730516147, |
| "learning_rate": 4.797206200147229e-05, |
| "loss": 0.3444, |
| "step": 776 |
| }, |
| { |
| "epoch": 2.4656882189607296, |
| "grad_norm": 0.1375640631845117, |
| "learning_rate": 4.78851386920696e-05, |
| "loss": 0.3446, |
| "step": 777 |
| }, |
| { |
| "epoch": 2.468861562871876, |
| "grad_norm": 0.14096157244272645, |
| "learning_rate": 4.779817662396679e-05, |
| "loss": 0.3478, |
| "step": 778 |
| }, |
| { |
| "epoch": 2.4720349067830227, |
| "grad_norm": 0.12488925047337064, |
| "learning_rate": 4.771117622461816e-05, |
| "loss": 0.3413, |
| "step": 779 |
| }, |
| { |
| "epoch": 2.475208250694169, |
| "grad_norm": 0.13287851067977652, |
| "learning_rate": 4.7624137921666475e-05, |
| "loss": 0.3489, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.4783815946053154, |
| "grad_norm": 0.13197704125308232, |
| "learning_rate": 4.753706214294082e-05, |
| "loss": 0.3455, |
| "step": 781 |
| }, |
| { |
| "epoch": 2.481554938516462, |
| "grad_norm": 0.13265503243246032, |
| "learning_rate": 4.7449949316454425e-05, |
| "loss": 0.3439, |
| "step": 782 |
| }, |
| { |
| "epoch": 2.484728282427608, |
| "grad_norm": 0.130704370235696, |
| "learning_rate": 4.736279987040269e-05, |
| "loss": 0.3457, |
| "step": 783 |
| }, |
| { |
| "epoch": 2.4879016263387546, |
| "grad_norm": 0.14148325746554727, |
| "learning_rate": 4.727561423316099e-05, |
| "loss": 0.3463, |
| "step": 784 |
| }, |
| { |
| "epoch": 2.4910749702499007, |
| "grad_norm": 0.13608094038136562, |
| "learning_rate": 4.7188392833282575e-05, |
| "loss": 0.3484, |
| "step": 785 |
| }, |
| { |
| "epoch": 2.494248314161047, |
| "grad_norm": 0.13874721828210285, |
| "learning_rate": 4.710113609949653e-05, |
| "loss": 0.346, |
| "step": 786 |
| }, |
| { |
| "epoch": 2.4974216580721937, |
| "grad_norm": 0.14977483842518804, |
| "learning_rate": 4.701384446070557e-05, |
| "loss": 0.3424, |
| "step": 787 |
| }, |
| { |
| "epoch": 2.50059500198334, |
| "grad_norm": 0.12967959257083173, |
| "learning_rate": 4.6926518345984026e-05, |
| "loss": 0.3406, |
| "step": 788 |
| }, |
| { |
| "epoch": 2.5037683458944864, |
| "grad_norm": 0.12801141105870723, |
| "learning_rate": 4.683915818457566e-05, |
| "loss": 0.3396, |
| "step": 789 |
| }, |
| { |
| "epoch": 2.5069416898056325, |
| "grad_norm": 0.13327811790487024, |
| "learning_rate": 4.67517644058916e-05, |
| "loss": 0.3403, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.510115033716779, |
| "grad_norm": 0.16238667666380158, |
| "learning_rate": 4.6664337439508226e-05, |
| "loss": 0.3411, |
| "step": 791 |
| }, |
| { |
| "epoch": 2.5132883776279256, |
| "grad_norm": 0.16494439303835634, |
| "learning_rate": 4.657687771516503e-05, |
| "loss": 0.3409, |
| "step": 792 |
| }, |
| { |
| "epoch": 2.5164617215390717, |
| "grad_norm": 0.14403868840938389, |
| "learning_rate": 4.6489385662762544e-05, |
| "loss": 0.3439, |
| "step": 793 |
| }, |
| { |
| "epoch": 2.519635065450218, |
| "grad_norm": 0.13948670219193632, |
| "learning_rate": 4.640186171236018e-05, |
| "loss": 0.3455, |
| "step": 794 |
| }, |
| { |
| "epoch": 2.5228084093613647, |
| "grad_norm": 0.16277210257572064, |
| "learning_rate": 4.6314306294174164e-05, |
| "loss": 0.3467, |
| "step": 795 |
| }, |
| { |
| "epoch": 2.525981753272511, |
| "grad_norm": 0.17170615713941978, |
| "learning_rate": 4.622671983857539e-05, |
| "loss": 0.3447, |
| "step": 796 |
| }, |
| { |
| "epoch": 2.5291550971836574, |
| "grad_norm": 0.15234975751861424, |
| "learning_rate": 4.6139102776087316e-05, |
| "loss": 0.3468, |
| "step": 797 |
| }, |
| { |
| "epoch": 2.5323284410948035, |
| "grad_norm": 0.11279769192482242, |
| "learning_rate": 4.605145553738385e-05, |
| "loss": 0.3452, |
| "step": 798 |
| }, |
| { |
| "epoch": 2.53550178500595, |
| "grad_norm": 0.13961162984914652, |
| "learning_rate": 4.5963778553287215e-05, |
| "loss": 0.3396, |
| "step": 799 |
| }, |
| { |
| "epoch": 2.538675128917096, |
| "grad_norm": 0.15754039363591266, |
| "learning_rate": 4.587607225476585e-05, |
| "loss": 0.3464, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.5418484728282427, |
| "grad_norm": 0.15400642858424615, |
| "learning_rate": 4.57883370729323e-05, |
| "loss": 0.3429, |
| "step": 801 |
| }, |
| { |
| "epoch": 2.545021816739389, |
| "grad_norm": 0.13587095295609775, |
| "learning_rate": 4.570057343904107e-05, |
| "loss": 0.3444, |
| "step": 802 |
| }, |
| { |
| "epoch": 2.5481951606505353, |
| "grad_norm": 0.1661934512206222, |
| "learning_rate": 4.5612781784486516e-05, |
| "loss": 0.3456, |
| "step": 803 |
| }, |
| { |
| "epoch": 2.551368504561682, |
| "grad_norm": 0.1818284189001732, |
| "learning_rate": 4.5524962540800726e-05, |
| "loss": 0.3454, |
| "step": 804 |
| }, |
| { |
| "epoch": 2.5545418484728284, |
| "grad_norm": 0.134272995751436, |
| "learning_rate": 4.5437116139651416e-05, |
| "loss": 0.3458, |
| "step": 805 |
| }, |
| { |
| "epoch": 2.5577151923839745, |
| "grad_norm": 0.11098594445466681, |
| "learning_rate": 4.5349243012839787e-05, |
| "loss": 0.3428, |
| "step": 806 |
| }, |
| { |
| "epoch": 2.560888536295121, |
| "grad_norm": 0.1383868261385255, |
| "learning_rate": 4.52613435922984e-05, |
| "loss": 0.3433, |
| "step": 807 |
| }, |
| { |
| "epoch": 2.5640618802062676, |
| "grad_norm": 0.13958525369734295, |
| "learning_rate": 4.517341831008906e-05, |
| "loss": 0.343, |
| "step": 808 |
| }, |
| { |
| "epoch": 2.5672352241174137, |
| "grad_norm": 0.14299669733617631, |
| "learning_rate": 4.5085467598400687e-05, |
| "loss": 0.3481, |
| "step": 809 |
| }, |
| { |
| "epoch": 2.57040856802856, |
| "grad_norm": 0.16300391548684726, |
| "learning_rate": 4.499749188954721e-05, |
| "loss": 0.3442, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.5735819119397063, |
| "grad_norm": 0.15939899931970958, |
| "learning_rate": 4.490949161596545e-05, |
| "loss": 0.346, |
| "step": 811 |
| }, |
| { |
| "epoch": 2.576755255850853, |
| "grad_norm": 0.11711293584718259, |
| "learning_rate": 4.4821467210212924e-05, |
| "loss": 0.3394, |
| "step": 812 |
| }, |
| { |
| "epoch": 2.579928599761999, |
| "grad_norm": 0.12318146826993423, |
| "learning_rate": 4.473341910496579e-05, |
| "loss": 0.3391, |
| "step": 813 |
| }, |
| { |
| "epoch": 2.5831019436731455, |
| "grad_norm": 0.12321952201871014, |
| "learning_rate": 4.464534773301674e-05, |
| "loss": 0.3424, |
| "step": 814 |
| }, |
| { |
| "epoch": 2.586275287584292, |
| "grad_norm": 0.12259038741152185, |
| "learning_rate": 4.455725352727276e-05, |
| "loss": 0.346, |
| "step": 815 |
| }, |
| { |
| "epoch": 2.589448631495438, |
| "grad_norm": 0.14443376513578737, |
| "learning_rate": 4.446913692075311e-05, |
| "loss": 0.3436, |
| "step": 816 |
| }, |
| { |
| "epoch": 2.5926219754065847, |
| "grad_norm": 0.1381230568223312, |
| "learning_rate": 4.438099834658716e-05, |
| "loss": 0.3444, |
| "step": 817 |
| }, |
| { |
| "epoch": 2.595795319317731, |
| "grad_norm": 0.12800321400776432, |
| "learning_rate": 4.429283823801227e-05, |
| "loss": 0.3423, |
| "step": 818 |
| }, |
| { |
| "epoch": 2.5989686632288773, |
| "grad_norm": 0.14739193433103212, |
| "learning_rate": 4.420465702837162e-05, |
| "loss": 0.346, |
| "step": 819 |
| }, |
| { |
| "epoch": 2.602142007140024, |
| "grad_norm": 0.14778467346501864, |
| "learning_rate": 4.4116455151112135e-05, |
| "loss": 0.3481, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.6053153510511704, |
| "grad_norm": 0.11699717837600367, |
| "learning_rate": 4.4028233039782336e-05, |
| "loss": 0.3468, |
| "step": 821 |
| }, |
| { |
| "epoch": 2.6084886949623165, |
| "grad_norm": 0.12377912947544831, |
| "learning_rate": 4.393999112803017e-05, |
| "loss": 0.3422, |
| "step": 822 |
| }, |
| { |
| "epoch": 2.611662038873463, |
| "grad_norm": 0.11993635420501132, |
| "learning_rate": 4.385172984960093e-05, |
| "loss": 0.3479, |
| "step": 823 |
| }, |
| { |
| "epoch": 2.614835382784609, |
| "grad_norm": 0.1198156205225748, |
| "learning_rate": 4.3763449638335124e-05, |
| "loss": 0.3486, |
| "step": 824 |
| }, |
| { |
| "epoch": 2.6180087266957557, |
| "grad_norm": 0.14812630286596426, |
| "learning_rate": 4.367515092816628e-05, |
| "loss": 0.3494, |
| "step": 825 |
| }, |
| { |
| "epoch": 2.621182070606902, |
| "grad_norm": 0.11969830749952089, |
| "learning_rate": 4.3586834153118905e-05, |
| "loss": 0.343, |
| "step": 826 |
| }, |
| { |
| "epoch": 2.6243554145180483, |
| "grad_norm": 0.1330251306537408, |
| "learning_rate": 4.3498499747306243e-05, |
| "loss": 0.342, |
| "step": 827 |
| }, |
| { |
| "epoch": 2.627528758429195, |
| "grad_norm": 0.12546162647602022, |
| "learning_rate": 4.3410148144928256e-05, |
| "loss": 0.3425, |
| "step": 828 |
| }, |
| { |
| "epoch": 2.630702102340341, |
| "grad_norm": 0.13410489232741177, |
| "learning_rate": 4.332177978026943e-05, |
| "loss": 0.3483, |
| "step": 829 |
| }, |
| { |
| "epoch": 2.6338754462514875, |
| "grad_norm": 0.1232508805552152, |
| "learning_rate": 4.3233395087696585e-05, |
| "loss": 0.346, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.637048790162634, |
| "grad_norm": 0.12167537812092089, |
| "learning_rate": 4.314499450165688e-05, |
| "loss": 0.3409, |
| "step": 831 |
| }, |
| { |
| "epoch": 2.64022213407378, |
| "grad_norm": 0.13645509206436196, |
| "learning_rate": 4.305657845667553e-05, |
| "loss": 0.3462, |
| "step": 832 |
| }, |
| { |
| "epoch": 2.6433954779849267, |
| "grad_norm": 0.10599165500884121, |
| "learning_rate": 4.296814738735376e-05, |
| "loss": 0.3464, |
| "step": 833 |
| }, |
| { |
| "epoch": 2.6465688218960732, |
| "grad_norm": 0.13223375794191417, |
| "learning_rate": 4.2879701728366686e-05, |
| "loss": 0.3438, |
| "step": 834 |
| }, |
| { |
| "epoch": 2.6497421658072193, |
| "grad_norm": 0.12369661834770018, |
| "learning_rate": 4.2791241914461076e-05, |
| "loss": 0.348, |
| "step": 835 |
| }, |
| { |
| "epoch": 2.652915509718366, |
| "grad_norm": 0.1225955125555118, |
| "learning_rate": 4.270276838045331e-05, |
| "loss": 0.3396, |
| "step": 836 |
| }, |
| { |
| "epoch": 2.656088853629512, |
| "grad_norm": 0.11973226274198374, |
| "learning_rate": 4.26142815612272e-05, |
| "loss": 0.3473, |
| "step": 837 |
| }, |
| { |
| "epoch": 2.6592621975406585, |
| "grad_norm": 0.1302252203210119, |
| "learning_rate": 4.252578189173186e-05, |
| "loss": 0.3421, |
| "step": 838 |
| }, |
| { |
| "epoch": 2.6624355414518046, |
| "grad_norm": 0.13620263914799938, |
| "learning_rate": 4.2437269806979574e-05, |
| "loss": 0.3447, |
| "step": 839 |
| }, |
| { |
| "epoch": 2.665608885362951, |
| "grad_norm": 0.12216233976282596, |
| "learning_rate": 4.234874574204364e-05, |
| "loss": 0.3481, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.6687822292740977, |
| "grad_norm": 0.14511224880697332, |
| "learning_rate": 4.226021013205626e-05, |
| "loss": 0.3433, |
| "step": 841 |
| }, |
| { |
| "epoch": 2.671955573185244, |
| "grad_norm": 0.13167003525580787, |
| "learning_rate": 4.217166341220635e-05, |
| "loss": 0.3462, |
| "step": 842 |
| }, |
| { |
| "epoch": 2.6751289170963903, |
| "grad_norm": 0.11233528339177638, |
| "learning_rate": 4.208310601773749e-05, |
| "loss": 0.3422, |
| "step": 843 |
| }, |
| { |
| "epoch": 2.678302261007537, |
| "grad_norm": 0.116009578862862, |
| "learning_rate": 4.1994538383945686e-05, |
| "loss": 0.3382, |
| "step": 844 |
| }, |
| { |
| "epoch": 2.681475604918683, |
| "grad_norm": 0.14193128052476364, |
| "learning_rate": 4.190596094617729e-05, |
| "loss": 0.3477, |
| "step": 845 |
| }, |
| { |
| "epoch": 2.6846489488298295, |
| "grad_norm": 0.14084760732606688, |
| "learning_rate": 4.1817374139826857e-05, |
| "loss": 0.3416, |
| "step": 846 |
| }, |
| { |
| "epoch": 2.687822292740976, |
| "grad_norm": 0.1182925424974134, |
| "learning_rate": 4.172877840033496e-05, |
| "loss": 0.346, |
| "step": 847 |
| }, |
| { |
| "epoch": 2.690995636652122, |
| "grad_norm": 0.14243194869284295, |
| "learning_rate": 4.164017416318611e-05, |
| "loss": 0.3319, |
| "step": 848 |
| }, |
| { |
| "epoch": 2.6941689805632687, |
| "grad_norm": 0.14660981394007142, |
| "learning_rate": 4.155156186390659e-05, |
| "loss": 0.3454, |
| "step": 849 |
| }, |
| { |
| "epoch": 2.697342324474415, |
| "grad_norm": 0.12282621458948739, |
| "learning_rate": 4.1462941938062295e-05, |
| "loss": 0.3425, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.7005156683855613, |
| "grad_norm": 0.11456728642253448, |
| "learning_rate": 4.137431482125659e-05, |
| "loss": 0.3437, |
| "step": 851 |
| }, |
| { |
| "epoch": 2.7036890122967074, |
| "grad_norm": 0.13185395211536752, |
| "learning_rate": 4.128568094912825e-05, |
| "loss": 0.3437, |
| "step": 852 |
| }, |
| { |
| "epoch": 2.706862356207854, |
| "grad_norm": 0.11115691532463522, |
| "learning_rate": 4.1197040757349175e-05, |
| "loss": 0.35, |
| "step": 853 |
| }, |
| { |
| "epoch": 2.7100357001190005, |
| "grad_norm": 0.12965739444200763, |
| "learning_rate": 4.1108394681622406e-05, |
| "loss": 0.3469, |
| "step": 854 |
| }, |
| { |
| "epoch": 2.7132090440301466, |
| "grad_norm": 0.12606196792345453, |
| "learning_rate": 4.101974315767984e-05, |
| "loss": 0.3424, |
| "step": 855 |
| }, |
| { |
| "epoch": 2.716382387941293, |
| "grad_norm": 0.13377181792852472, |
| "learning_rate": 4.0931086621280195e-05, |
| "loss": 0.3449, |
| "step": 856 |
| }, |
| { |
| "epoch": 2.7195557318524397, |
| "grad_norm": 0.1083820821550372, |
| "learning_rate": 4.0842425508206814e-05, |
| "loss": 0.3422, |
| "step": 857 |
| }, |
| { |
| "epoch": 2.722729075763586, |
| "grad_norm": 0.14051865023509388, |
| "learning_rate": 4.075376025426553e-05, |
| "loss": 0.3432, |
| "step": 858 |
| }, |
| { |
| "epoch": 2.7259024196747323, |
| "grad_norm": 0.1446305958755852, |
| "learning_rate": 4.0665091295282557e-05, |
| "loss": 0.3421, |
| "step": 859 |
| }, |
| { |
| "epoch": 2.729075763585879, |
| "grad_norm": 0.1185199085913208, |
| "learning_rate": 4.0576419067102294e-05, |
| "loss": 0.3425, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.732249107497025, |
| "grad_norm": 0.12047038069530473, |
| "learning_rate": 4.04877440055852e-05, |
| "loss": 0.3433, |
| "step": 861 |
| }, |
| { |
| "epoch": 2.735422451408171, |
| "grad_norm": 0.12298317824143638, |
| "learning_rate": 4.03990665466057e-05, |
| "loss": 0.3419, |
| "step": 862 |
| }, |
| { |
| "epoch": 2.7385957953193176, |
| "grad_norm": 0.1134666458695627, |
| "learning_rate": 4.0310387126049965e-05, |
| "loss": 0.3449, |
| "step": 863 |
| }, |
| { |
| "epoch": 2.741769139230464, |
| "grad_norm": 0.1151216459243458, |
| "learning_rate": 4.022170617981383e-05, |
| "loss": 0.3487, |
| "step": 864 |
| }, |
| { |
| "epoch": 2.7449424831416103, |
| "grad_norm": 0.12438855242604728, |
| "learning_rate": 4.013302414380062e-05, |
| "loss": 0.3433, |
| "step": 865 |
| }, |
| { |
| "epoch": 2.748115827052757, |
| "grad_norm": 0.12462238298496425, |
| "learning_rate": 4.004434145391903e-05, |
| "loss": 0.3379, |
| "step": 866 |
| }, |
| { |
| "epoch": 2.7512891709639034, |
| "grad_norm": 0.12043847383946583, |
| "learning_rate": 3.9955658546080975e-05, |
| "loss": 0.3418, |
| "step": 867 |
| }, |
| { |
| "epoch": 2.7544625148750495, |
| "grad_norm": 0.12670455035239242, |
| "learning_rate": 3.9866975856199376e-05, |
| "loss": 0.3374, |
| "step": 868 |
| }, |
| { |
| "epoch": 2.757635858786196, |
| "grad_norm": 0.13869220804125282, |
| "learning_rate": 3.9778293820186176e-05, |
| "loss": 0.344, |
| "step": 869 |
| }, |
| { |
| "epoch": 2.7608092026973425, |
| "grad_norm": 0.12510091538071094, |
| "learning_rate": 3.968961287395004e-05, |
| "loss": 0.3464, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.7639825466084886, |
| "grad_norm": 0.11788439172448362, |
| "learning_rate": 3.960093345339432e-05, |
| "loss": 0.3415, |
| "step": 871 |
| }, |
| { |
| "epoch": 2.767155890519635, |
| "grad_norm": 0.13570817794277007, |
| "learning_rate": 3.9512255994414804e-05, |
| "loss": 0.3415, |
| "step": 872 |
| }, |
| { |
| "epoch": 2.7703292344307813, |
| "grad_norm": 0.13940993558742085, |
| "learning_rate": 3.9423580932897726e-05, |
| "loss": 0.3423, |
| "step": 873 |
| }, |
| { |
| "epoch": 2.773502578341928, |
| "grad_norm": 0.10631207090804463, |
| "learning_rate": 3.933490870471745e-05, |
| "loss": 0.3442, |
| "step": 874 |
| }, |
| { |
| "epoch": 2.776675922253074, |
| "grad_norm": 0.1503520815021571, |
| "learning_rate": 3.924623974573448e-05, |
| "loss": 0.3416, |
| "step": 875 |
| }, |
| { |
| "epoch": 2.7798492661642205, |
| "grad_norm": 0.12632156433331662, |
| "learning_rate": 3.9157574491793185e-05, |
| "loss": 0.3371, |
| "step": 876 |
| }, |
| { |
| "epoch": 2.783022610075367, |
| "grad_norm": 0.13734903086765304, |
| "learning_rate": 3.906891337871982e-05, |
| "loss": 0.3491, |
| "step": 877 |
| }, |
| { |
| "epoch": 2.786195953986513, |
| "grad_norm": 0.15639396884236917, |
| "learning_rate": 3.898025684232016e-05, |
| "loss": 0.3472, |
| "step": 878 |
| }, |
| { |
| "epoch": 2.7893692978976596, |
| "grad_norm": 0.12483944951607047, |
| "learning_rate": 3.889160531837761e-05, |
| "loss": 0.3381, |
| "step": 879 |
| }, |
| { |
| "epoch": 2.792542641808806, |
| "grad_norm": 0.13742280075928823, |
| "learning_rate": 3.8802959242650825e-05, |
| "loss": 0.3411, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.7957159857199523, |
| "grad_norm": 0.12266800245109839, |
| "learning_rate": 3.8714319050871764e-05, |
| "loss": 0.3414, |
| "step": 881 |
| }, |
| { |
| "epoch": 2.798889329631099, |
| "grad_norm": 0.12861032154624427, |
| "learning_rate": 3.862568517874341e-05, |
| "loss": 0.3392, |
| "step": 882 |
| }, |
| { |
| "epoch": 2.8020626735422454, |
| "grad_norm": 0.11470183191097062, |
| "learning_rate": 3.8537058061937725e-05, |
| "loss": 0.343, |
| "step": 883 |
| }, |
| { |
| "epoch": 2.8052360174533915, |
| "grad_norm": 0.12702748667879213, |
| "learning_rate": 3.8448438136093414e-05, |
| "loss": 0.3453, |
| "step": 884 |
| }, |
| { |
| "epoch": 2.808409361364538, |
| "grad_norm": 0.11316231412291199, |
| "learning_rate": 3.8359825836813895e-05, |
| "loss": 0.3446, |
| "step": 885 |
| }, |
| { |
| "epoch": 2.811582705275684, |
| "grad_norm": 0.12175654245533254, |
| "learning_rate": 3.827122159966504e-05, |
| "loss": 0.3464, |
| "step": 886 |
| }, |
| { |
| "epoch": 2.8147560491868306, |
| "grad_norm": 0.12805730071128243, |
| "learning_rate": 3.818262586017315e-05, |
| "loss": 0.3421, |
| "step": 887 |
| }, |
| { |
| "epoch": 2.8179293930979767, |
| "grad_norm": 0.13404348902216565, |
| "learning_rate": 3.8094039053822715e-05, |
| "loss": 0.3448, |
| "step": 888 |
| }, |
| { |
| "epoch": 2.8211027370091233, |
| "grad_norm": 0.12244995804431573, |
| "learning_rate": 3.800546161605433e-05, |
| "loss": 0.3408, |
| "step": 889 |
| }, |
| { |
| "epoch": 2.82427608092027, |
| "grad_norm": 0.11165322485961218, |
| "learning_rate": 3.791689398226252e-05, |
| "loss": 0.3448, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.827449424831416, |
| "grad_norm": 0.12392250807575451, |
| "learning_rate": 3.7828336587793665e-05, |
| "loss": 0.339, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.8306227687425625, |
| "grad_norm": 0.14372027694900424, |
| "learning_rate": 3.773978986794376e-05, |
| "loss": 0.3471, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.833796112653709, |
| "grad_norm": 0.11903041102933404, |
| "learning_rate": 3.765125425795637e-05, |
| "loss": 0.3445, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.836969456564855, |
| "grad_norm": 0.11348910315003628, |
| "learning_rate": 3.7562730193020425e-05, |
| "loss": 0.34, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.8401428004760016, |
| "grad_norm": 0.12038861322563689, |
| "learning_rate": 3.747421810826815e-05, |
| "loss": 0.3389, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.843316144387148, |
| "grad_norm": 0.1097622809874721, |
| "learning_rate": 3.73857184387728e-05, |
| "loss": 0.3471, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.8464894882982943, |
| "grad_norm": 0.11347921572685421, |
| "learning_rate": 3.7297231619546695e-05, |
| "loss": 0.3443, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.849662832209441, |
| "grad_norm": 0.12369160424846627, |
| "learning_rate": 3.7208758085538924e-05, |
| "loss": 0.3432, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.852836176120587, |
| "grad_norm": 0.12357156761028097, |
| "learning_rate": 3.712029827163332e-05, |
| "loss": 0.3414, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.8560095200317335, |
| "grad_norm": 0.11376937375822221, |
| "learning_rate": 3.703185261264624e-05, |
| "loss": 0.3401, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.8591828639428796, |
| "grad_norm": 0.12349127066450852, |
| "learning_rate": 3.694342154332449e-05, |
| "loss": 0.3422, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.862356207854026, |
| "grad_norm": 0.1034961716279667, |
| "learning_rate": 3.685500549834314e-05, |
| "loss": 0.3434, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.8655295517651727, |
| "grad_norm": 0.12269370497851256, |
| "learning_rate": 3.676660491230343e-05, |
| "loss": 0.3383, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.8687028956763188, |
| "grad_norm": 0.13382966320810855, |
| "learning_rate": 3.667822021973058e-05, |
| "loss": 0.3435, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.8718762395874653, |
| "grad_norm": 0.11277938663333244, |
| "learning_rate": 3.658985185507175e-05, |
| "loss": 0.3406, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.875049583498612, |
| "grad_norm": 0.1143319724175766, |
| "learning_rate": 3.6501500252693756e-05, |
| "loss": 0.3376, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.878222927409758, |
| "grad_norm": 0.11689144421922465, |
| "learning_rate": 3.641316584688111e-05, |
| "loss": 0.3453, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.8813962713209045, |
| "grad_norm": 0.11809016351008388, |
| "learning_rate": 3.632484907183372e-05, |
| "loss": 0.3481, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.884569615232051, |
| "grad_norm": 0.1276259830733137, |
| "learning_rate": 3.623655036166489e-05, |
| "loss": 0.3421, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.887742959143197, |
| "grad_norm": 0.11884475840914586, |
| "learning_rate": 3.6148270150399074e-05, |
| "loss": 0.3446, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.8909163030543437, |
| "grad_norm": 0.11087658030941172, |
| "learning_rate": 3.606000887196985e-05, |
| "loss": 0.3439, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.8940896469654898, |
| "grad_norm": 0.10980416285438169, |
| "learning_rate": 3.597176696021767e-05, |
| "loss": 0.3372, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.8972629908766363, |
| "grad_norm": 0.11601487167763035, |
| "learning_rate": 3.588354484888787e-05, |
| "loss": 0.3459, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.9004363347877824, |
| "grad_norm": 0.11368911677025695, |
| "learning_rate": 3.579534297162838e-05, |
| "loss": 0.3438, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.903609678698929, |
| "grad_norm": 0.10395283389340351, |
| "learning_rate": 3.5707161761987745e-05, |
| "loss": 0.338, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.9067830226100755, |
| "grad_norm": 0.11161882010475302, |
| "learning_rate": 3.561900165341284e-05, |
| "loss": 0.3408, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.9099563665212216, |
| "grad_norm": 0.0997500952290493, |
| "learning_rate": 3.55308630792469e-05, |
| "loss": 0.3421, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.913129710432368, |
| "grad_norm": 0.1339571225523946, |
| "learning_rate": 3.544274647272725e-05, |
| "loss": 0.3442, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.9163030543435147, |
| "grad_norm": 0.11617441220009064, |
| "learning_rate": 3.535465226698327e-05, |
| "loss": 0.343, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.9194763982546608, |
| "grad_norm": 0.1410191016195631, |
| "learning_rate": 3.526658089503421e-05, |
| "loss": 0.3395, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.9226497421658073, |
| "grad_norm": 0.13899939104575385, |
| "learning_rate": 3.517853278978708e-05, |
| "loss": 0.3418, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.925823086076954, |
| "grad_norm": 0.1448300244004051, |
| "learning_rate": 3.5090508384034554e-05, |
| "loss": 0.3434, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.9289964299881, |
| "grad_norm": 0.14071342868222997, |
| "learning_rate": 3.5002508110452796e-05, |
| "loss": 0.3463, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.9321697738992465, |
| "grad_norm": 0.13757441367946524, |
| "learning_rate": 3.491453240159932e-05, |
| "loss": 0.3428, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.9353431178103926, |
| "grad_norm": 0.12380157230283552, |
| "learning_rate": 3.4826581689910956e-05, |
| "loss": 0.3425, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.938516461721539, |
| "grad_norm": 0.14057723094623828, |
| "learning_rate": 3.473865640770161e-05, |
| "loss": 0.3411, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.9416898056326852, |
| "grad_norm": 0.11650354025690383, |
| "learning_rate": 3.465075698716022e-05, |
| "loss": 0.3365, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.9448631495438318, |
| "grad_norm": 0.12980737726549346, |
| "learning_rate": 3.4562883860348584e-05, |
| "loss": 0.3399, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.9480364934549783, |
| "grad_norm": 0.1134055745141929, |
| "learning_rate": 3.447503745919929e-05, |
| "loss": 0.3414, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.9512098373661244, |
| "grad_norm": 0.1201253976384782, |
| "learning_rate": 3.43872182155135e-05, |
| "loss": 0.3457, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.954383181277271, |
| "grad_norm": 0.11035272651701446, |
| "learning_rate": 3.429942656095895e-05, |
| "loss": 0.3458, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.9575565251884175, |
| "grad_norm": 0.11154353610592178, |
| "learning_rate": 3.4211662927067694e-05, |
| "loss": 0.3439, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.9607298690995636, |
| "grad_norm": 0.11422392455235472, |
| "learning_rate": 3.412392774523416e-05, |
| "loss": 0.3382, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.96390321301071, |
| "grad_norm": 0.10421558917351098, |
| "learning_rate": 3.4036221446712785e-05, |
| "loss": 0.3442, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.9670765569218567, |
| "grad_norm": 0.10191858553645955, |
| "learning_rate": 3.3948544462616154e-05, |
| "loss": 0.3472, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.9702499008330028, |
| "grad_norm": 0.10534746393290977, |
| "learning_rate": 3.386089722391268e-05, |
| "loss": 0.3426, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.973423244744149, |
| "grad_norm": 0.10261252386705168, |
| "learning_rate": 3.3773280161424614e-05, |
| "loss": 0.3433, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.9765965886552954, |
| "grad_norm": 0.1170256748314465, |
| "learning_rate": 3.368569370582584e-05, |
| "loss": 0.3394, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.979769932566442, |
| "grad_norm": 0.11126882366159503, |
| "learning_rate": 3.359813828763983e-05, |
| "loss": 0.3397, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.982943276477588, |
| "grad_norm": 0.1198757045135952, |
| "learning_rate": 3.351061433723746e-05, |
| "loss": 0.3438, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.9861166203887346, |
| "grad_norm": 0.10112115474813273, |
| "learning_rate": 3.3423122284834976e-05, |
| "loss": 0.3342, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.989289964299881, |
| "grad_norm": 0.11249349782419689, |
| "learning_rate": 3.3335662560491773e-05, |
| "loss": 0.3429, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.9924633082110272, |
| "grad_norm": 0.1001185547565032, |
| "learning_rate": 3.3248235594108415e-05, |
| "loss": 0.3337, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.995636652122174, |
| "grad_norm": 0.11017756335045858, |
| "learning_rate": 3.316084181542434e-05, |
| "loss": 0.3375, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.9988099960333203, |
| "grad_norm": 0.12823056575818978, |
| "learning_rate": 3.307348165401598e-05, |
| "loss": 0.3306, |
| "step": 945 |
| }, |
| { |
| "epoch": 3.0019833399444664, |
| "grad_norm": 0.27281229082142194, |
| "learning_rate": 3.2986155539294435e-05, |
| "loss": 0.6218, |
| "step": 946 |
| }, |
| { |
| "epoch": 3.005156683855613, |
| "grad_norm": 0.2100816419248202, |
| "learning_rate": 3.2898863900503484e-05, |
| "loss": 0.3147, |
| "step": 947 |
| }, |
| { |
| "epoch": 3.008330027766759, |
| "grad_norm": 0.17105137980537694, |
| "learning_rate": 3.281160716671743e-05, |
| "loss": 0.3116, |
| "step": 948 |
| }, |
| { |
| "epoch": 3.0115033716779056, |
| "grad_norm": 0.22415298912899825, |
| "learning_rate": 3.2724385766839026e-05, |
| "loss": 0.3194, |
| "step": 949 |
| }, |
| { |
| "epoch": 3.014676715589052, |
| "grad_norm": 0.19583525189357293, |
| "learning_rate": 3.263720012959732e-05, |
| "loss": 0.3173, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.0178500595001982, |
| "grad_norm": 0.15512832486443295, |
| "learning_rate": 3.255005068354559e-05, |
| "loss": 0.3157, |
| "step": 951 |
| }, |
| { |
| "epoch": 3.021023403411345, |
| "grad_norm": 0.16721068805673456, |
| "learning_rate": 3.2462937857059187e-05, |
| "loss": 0.3178, |
| "step": 952 |
| }, |
| { |
| "epoch": 3.024196747322491, |
| "grad_norm": 0.17107719512173727, |
| "learning_rate": 3.237586207833353e-05, |
| "loss": 0.3119, |
| "step": 953 |
| }, |
| { |
| "epoch": 3.0273700912336374, |
| "grad_norm": 0.1585477770306446, |
| "learning_rate": 3.2288823775381845e-05, |
| "loss": 0.3134, |
| "step": 954 |
| }, |
| { |
| "epoch": 3.030543435144784, |
| "grad_norm": 0.16343916704345743, |
| "learning_rate": 3.2201823376033226e-05, |
| "loss": 0.3132, |
| "step": 955 |
| }, |
| { |
| "epoch": 3.03371677905593, |
| "grad_norm": 0.13291081320812426, |
| "learning_rate": 3.21148613079304e-05, |
| "loss": 0.3155, |
| "step": 956 |
| }, |
| { |
| "epoch": 3.0368901229670766, |
| "grad_norm": 0.1495696649421576, |
| "learning_rate": 3.202793799852772e-05, |
| "loss": 0.3139, |
| "step": 957 |
| }, |
| { |
| "epoch": 3.040063466878223, |
| "grad_norm": 0.14466866199473793, |
| "learning_rate": 3.194105387508899e-05, |
| "loss": 0.3134, |
| "step": 958 |
| }, |
| { |
| "epoch": 3.0432368107893693, |
| "grad_norm": 0.1361091421412387, |
| "learning_rate": 3.1854209364685436e-05, |
| "loss": 0.3082, |
| "step": 959 |
| }, |
| { |
| "epoch": 3.046410154700516, |
| "grad_norm": 0.13367023176470594, |
| "learning_rate": 3.176740489419352e-05, |
| "loss": 0.3126, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.049583498611662, |
| "grad_norm": 0.13627283601328063, |
| "learning_rate": 3.168064089029296e-05, |
| "loss": 0.3134, |
| "step": 961 |
| }, |
| { |
| "epoch": 3.0527568425228084, |
| "grad_norm": 0.13148674412233574, |
| "learning_rate": 3.159391777946447e-05, |
| "loss": 0.3162, |
| "step": 962 |
| }, |
| { |
| "epoch": 3.055930186433955, |
| "grad_norm": 0.12808707534046343, |
| "learning_rate": 3.150723598798787e-05, |
| "loss": 0.3118, |
| "step": 963 |
| }, |
| { |
| "epoch": 3.059103530345101, |
| "grad_norm": 0.13273988457362224, |
| "learning_rate": 3.1420595941939786e-05, |
| "loss": 0.3133, |
| "step": 964 |
| }, |
| { |
| "epoch": 3.0622768742562476, |
| "grad_norm": 0.11421318045881329, |
| "learning_rate": 3.13339980671917e-05, |
| "loss": 0.3098, |
| "step": 965 |
| }, |
| { |
| "epoch": 3.0654502181673937, |
| "grad_norm": 0.13703482566876224, |
| "learning_rate": 3.124744278940777e-05, |
| "loss": 0.3195, |
| "step": 966 |
| }, |
| { |
| "epoch": 3.0686235620785403, |
| "grad_norm": 0.11316298092381129, |
| "learning_rate": 3.1160930534042805e-05, |
| "loss": 0.3172, |
| "step": 967 |
| }, |
| { |
| "epoch": 3.071796905989687, |
| "grad_norm": 0.1189425742935073, |
| "learning_rate": 3.107446172634012e-05, |
| "loss": 0.3108, |
| "step": 968 |
| }, |
| { |
| "epoch": 3.074970249900833, |
| "grad_norm": 0.11756376777246882, |
| "learning_rate": 3.098803679132947e-05, |
| "loss": 0.3179, |
| "step": 969 |
| }, |
| { |
| "epoch": 3.0781435938119794, |
| "grad_norm": 0.1143776389372859, |
| "learning_rate": 3.090165615382496e-05, |
| "loss": 0.3102, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.0813169377231255, |
| "grad_norm": 0.12423804582324503, |
| "learning_rate": 3.0815320238422974e-05, |
| "loss": 0.3115, |
| "step": 971 |
| }, |
| { |
| "epoch": 3.084490281634272, |
| "grad_norm": 0.13996003835367157, |
| "learning_rate": 3.072902946950001e-05, |
| "loss": 0.3192, |
| "step": 972 |
| }, |
| { |
| "epoch": 3.0876636255454186, |
| "grad_norm": 0.11255035117655975, |
| "learning_rate": 3.0642784271210734e-05, |
| "loss": 0.309, |
| "step": 973 |
| }, |
| { |
| "epoch": 3.0908369694565647, |
| "grad_norm": 0.12699451919385885, |
| "learning_rate": 3.055658506748575e-05, |
| "loss": 0.3134, |
| "step": 974 |
| }, |
| { |
| "epoch": 3.0940103133677113, |
| "grad_norm": 0.11635660673053044, |
| "learning_rate": 3.0470432282029614e-05, |
| "loss": 0.3167, |
| "step": 975 |
| }, |
| { |
| "epoch": 3.097183657278858, |
| "grad_norm": 0.10873609232252231, |
| "learning_rate": 3.0384326338318685e-05, |
| "loss": 0.3133, |
| "step": 976 |
| }, |
| { |
| "epoch": 3.100357001190004, |
| "grad_norm": 0.11807314933760477, |
| "learning_rate": 3.0298267659599115e-05, |
| "loss": 0.312, |
| "step": 977 |
| }, |
| { |
| "epoch": 3.1035303451011504, |
| "grad_norm": 0.11916222364590734, |
| "learning_rate": 3.0212256668884695e-05, |
| "loss": 0.3078, |
| "step": 978 |
| }, |
| { |
| "epoch": 3.1067036890122965, |
| "grad_norm": 0.12272695797856463, |
| "learning_rate": 3.0126293788954833e-05, |
| "loss": 0.3122, |
| "step": 979 |
| }, |
| { |
| "epoch": 3.109877032923443, |
| "grad_norm": 0.11540153822434568, |
| "learning_rate": 3.004037944235242e-05, |
| "loss": 0.3136, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.1130503768345896, |
| "grad_norm": 0.11618456648740005, |
| "learning_rate": 2.9954514051381863e-05, |
| "loss": 0.3166, |
| "step": 981 |
| }, |
| { |
| "epoch": 3.1162237207457357, |
| "grad_norm": 0.11244633521843918, |
| "learning_rate": 2.9868698038106815e-05, |
| "loss": 0.3148, |
| "step": 982 |
| }, |
| { |
| "epoch": 3.1193970646568823, |
| "grad_norm": 0.10920070004246661, |
| "learning_rate": 2.9782931824348328e-05, |
| "loss": 0.3088, |
| "step": 983 |
| }, |
| { |
| "epoch": 3.1225704085680284, |
| "grad_norm": 0.12174658516975441, |
| "learning_rate": 2.9697215831682595e-05, |
| "loss": 0.3165, |
| "step": 984 |
| }, |
| { |
| "epoch": 3.125743752479175, |
| "grad_norm": 0.10643018736017142, |
| "learning_rate": 2.9611550481439e-05, |
| "loss": 0.3145, |
| "step": 985 |
| }, |
| { |
| "epoch": 3.1289170963903215, |
| "grad_norm": 0.10684450615726702, |
| "learning_rate": 2.952593619469795e-05, |
| "loss": 0.3092, |
| "step": 986 |
| }, |
| { |
| "epoch": 3.1320904403014675, |
| "grad_norm": 0.10941148930845793, |
| "learning_rate": 2.9440373392288913e-05, |
| "loss": 0.3138, |
| "step": 987 |
| }, |
| { |
| "epoch": 3.135263784212614, |
| "grad_norm": 0.10382261849589255, |
| "learning_rate": 2.9354862494788223e-05, |
| "loss": 0.3119, |
| "step": 988 |
| }, |
| { |
| "epoch": 3.13843712812376, |
| "grad_norm": 0.10958378594318682, |
| "learning_rate": 2.9269403922517145e-05, |
| "loss": 0.3099, |
| "step": 989 |
| }, |
| { |
| "epoch": 3.1416104720349067, |
| "grad_norm": 0.12998774556538992, |
| "learning_rate": 2.9183998095539684e-05, |
| "loss": 0.3129, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.1447838159460533, |
| "grad_norm": 0.10561418907091862, |
| "learning_rate": 2.9098645433660655e-05, |
| "loss": 0.3128, |
| "step": 991 |
| }, |
| { |
| "epoch": 3.1479571598571994, |
| "grad_norm": 0.12592324141074962, |
| "learning_rate": 2.9013346356423446e-05, |
| "loss": 0.317, |
| "step": 992 |
| }, |
| { |
| "epoch": 3.151130503768346, |
| "grad_norm": 0.12513756646057633, |
| "learning_rate": 2.8928101283108153e-05, |
| "loss": 0.314, |
| "step": 993 |
| }, |
| { |
| "epoch": 3.1543038476794925, |
| "grad_norm": 0.14282969282839109, |
| "learning_rate": 2.884291063272935e-05, |
| "loss": 0.3109, |
| "step": 994 |
| }, |
| { |
| "epoch": 3.1574771915906386, |
| "grad_norm": 0.1199544503476591, |
| "learning_rate": 2.8757774824034146e-05, |
| "loss": 0.3119, |
| "step": 995 |
| }, |
| { |
| "epoch": 3.160650535501785, |
| "grad_norm": 0.11681362905939408, |
| "learning_rate": 2.867269427550004e-05, |
| "loss": 0.3065, |
| "step": 996 |
| }, |
| { |
| "epoch": 3.163823879412931, |
| "grad_norm": 0.11355969210006382, |
| "learning_rate": 2.8587669405332942e-05, |
| "loss": 0.3134, |
| "step": 997 |
| }, |
| { |
| "epoch": 3.1669972233240777, |
| "grad_norm": 0.11125944189799743, |
| "learning_rate": 2.850270063146505e-05, |
| "loss": 0.3123, |
| "step": 998 |
| }, |
| { |
| "epoch": 3.1701705672352243, |
| "grad_norm": 0.10903398382765295, |
| "learning_rate": 2.8417788371552847e-05, |
| "loss": 0.3137, |
| "step": 999 |
| }, |
| { |
| "epoch": 3.1733439111463704, |
| "grad_norm": 0.11735770219676825, |
| "learning_rate": 2.8332933042974997e-05, |
| "loss": 0.3143, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.176517255057517, |
| "grad_norm": 0.09886653400775318, |
| "learning_rate": 2.824813506283038e-05, |
| "loss": 0.312, |
| "step": 1001 |
| }, |
| { |
| "epoch": 3.179690598968663, |
| "grad_norm": 0.10362486903153832, |
| "learning_rate": 2.8163394847935924e-05, |
| "loss": 0.3063, |
| "step": 1002 |
| }, |
| { |
| "epoch": 3.1828639428798096, |
| "grad_norm": 0.11188362720582715, |
| "learning_rate": 2.8078712814824657e-05, |
| "loss": 0.3125, |
| "step": 1003 |
| }, |
| { |
| "epoch": 3.186037286790956, |
| "grad_norm": 0.09559025289026829, |
| "learning_rate": 2.799408937974359e-05, |
| "loss": 0.3121, |
| "step": 1004 |
| }, |
| { |
| "epoch": 3.189210630702102, |
| "grad_norm": 0.10812265121811047, |
| "learning_rate": 2.790952495865173e-05, |
| "loss": 0.3106, |
| "step": 1005 |
| }, |
| { |
| "epoch": 3.1923839746132487, |
| "grad_norm": 0.1121565061938828, |
| "learning_rate": 2.7825019967217975e-05, |
| "loss": 0.3189, |
| "step": 1006 |
| }, |
| { |
| "epoch": 3.1955573185243953, |
| "grad_norm": 0.1004631775145022, |
| "learning_rate": 2.7740574820819133e-05, |
| "loss": 0.3137, |
| "step": 1007 |
| }, |
| { |
| "epoch": 3.1987306624355414, |
| "grad_norm": 0.12433594061957719, |
| "learning_rate": 2.7656189934537815e-05, |
| "loss": 0.3081, |
| "step": 1008 |
| }, |
| { |
| "epoch": 3.201904006346688, |
| "grad_norm": 0.0976644148795563, |
| "learning_rate": 2.7571865723160484e-05, |
| "loss": 0.3108, |
| "step": 1009 |
| }, |
| { |
| "epoch": 3.205077350257834, |
| "grad_norm": 0.12033671452278129, |
| "learning_rate": 2.7487602601175274e-05, |
| "loss": 0.3128, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.2082506941689806, |
| "grad_norm": 0.11018748833970334, |
| "learning_rate": 2.7403400982770142e-05, |
| "loss": 0.3151, |
| "step": 1011 |
| }, |
| { |
| "epoch": 3.211424038080127, |
| "grad_norm": 0.11256643868056433, |
| "learning_rate": 2.731926128183064e-05, |
| "loss": 0.314, |
| "step": 1012 |
| }, |
| { |
| "epoch": 3.214597381991273, |
| "grad_norm": 0.11109067168322144, |
| "learning_rate": 2.7235183911938033e-05, |
| "loss": 0.3138, |
| "step": 1013 |
| }, |
| { |
| "epoch": 3.2177707259024197, |
| "grad_norm": 0.11490231986287124, |
| "learning_rate": 2.715116928636716e-05, |
| "loss": 0.3131, |
| "step": 1014 |
| }, |
| { |
| "epoch": 3.220944069813566, |
| "grad_norm": 0.1101964461135929, |
| "learning_rate": 2.7067217818084475e-05, |
| "loss": 0.3145, |
| "step": 1015 |
| }, |
| { |
| "epoch": 3.2241174137247124, |
| "grad_norm": 0.10280292922760781, |
| "learning_rate": 2.6983329919745968e-05, |
| "loss": 0.3117, |
| "step": 1016 |
| }, |
| { |
| "epoch": 3.227290757635859, |
| "grad_norm": 0.10260424316634227, |
| "learning_rate": 2.689950600369518e-05, |
| "loss": 0.3188, |
| "step": 1017 |
| }, |
| { |
| "epoch": 3.230464101547005, |
| "grad_norm": 0.09915026576929485, |
| "learning_rate": 2.681574648196111e-05, |
| "loss": 0.3128, |
| "step": 1018 |
| }, |
| { |
| "epoch": 3.2336374454581516, |
| "grad_norm": 0.09342686268392349, |
| "learning_rate": 2.673205176625631e-05, |
| "loss": 0.3127, |
| "step": 1019 |
| }, |
| { |
| "epoch": 3.236810789369298, |
| "grad_norm": 0.10375205809388013, |
| "learning_rate": 2.664842226797468e-05, |
| "loss": 0.3125, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.239984133280444, |
| "grad_norm": 0.09480673127381069, |
| "learning_rate": 2.656485839818964e-05, |
| "loss": 0.3103, |
| "step": 1021 |
| }, |
| { |
| "epoch": 3.2431574771915908, |
| "grad_norm": 0.0967591082180364, |
| "learning_rate": 2.648136056765197e-05, |
| "loss": 0.3158, |
| "step": 1022 |
| }, |
| { |
| "epoch": 3.246330821102737, |
| "grad_norm": 0.10704831819192964, |
| "learning_rate": 2.639792918678786e-05, |
| "loss": 0.3165, |
| "step": 1023 |
| }, |
| { |
| "epoch": 3.2495041650138834, |
| "grad_norm": 0.10372920487733647, |
| "learning_rate": 2.6314564665696853e-05, |
| "loss": 0.314, |
| "step": 1024 |
| }, |
| { |
| "epoch": 3.25267750892503, |
| "grad_norm": 0.10195810032807612, |
| "learning_rate": 2.6231267414149882e-05, |
| "loss": 0.3115, |
| "step": 1025 |
| }, |
| { |
| "epoch": 3.255850852836176, |
| "grad_norm": 0.10118352358935338, |
| "learning_rate": 2.6148037841587178e-05, |
| "loss": 0.3124, |
| "step": 1026 |
| }, |
| { |
| "epoch": 3.2590241967473226, |
| "grad_norm": 0.0960144819596772, |
| "learning_rate": 2.606487635711634e-05, |
| "loss": 0.3133, |
| "step": 1027 |
| }, |
| { |
| "epoch": 3.2621975406584687, |
| "grad_norm": 0.10341496259177649, |
| "learning_rate": 2.5981783369510262e-05, |
| "loss": 0.3133, |
| "step": 1028 |
| }, |
| { |
| "epoch": 3.265370884569615, |
| "grad_norm": 0.09978273187676878, |
| "learning_rate": 2.589875928720518e-05, |
| "loss": 0.3092, |
| "step": 1029 |
| }, |
| { |
| "epoch": 3.2685442284807618, |
| "grad_norm": 0.10892472596771698, |
| "learning_rate": 2.5815804518298575e-05, |
| "loss": 0.3098, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.271717572391908, |
| "grad_norm": 0.10275518766631388, |
| "learning_rate": 2.5732919470547295e-05, |
| "loss": 0.3083, |
| "step": 1031 |
| }, |
| { |
| "epoch": 3.2748909163030544, |
| "grad_norm": 0.10103072194259327, |
| "learning_rate": 2.5650104551365412e-05, |
| "loss": 0.3108, |
| "step": 1032 |
| }, |
| { |
| "epoch": 3.278064260214201, |
| "grad_norm": 0.10324262880702959, |
| "learning_rate": 2.556736016782234e-05, |
| "loss": 0.3122, |
| "step": 1033 |
| }, |
| { |
| "epoch": 3.281237604125347, |
| "grad_norm": 0.09926981679742919, |
| "learning_rate": 2.5484686726640744e-05, |
| "loss": 0.3107, |
| "step": 1034 |
| }, |
| { |
| "epoch": 3.2844109480364936, |
| "grad_norm": 0.10130743878380367, |
| "learning_rate": 2.540208463419462e-05, |
| "loss": 0.3074, |
| "step": 1035 |
| }, |
| { |
| "epoch": 3.2875842919476397, |
| "grad_norm": 0.10557324952221134, |
| "learning_rate": 2.5319554296507188e-05, |
| "loss": 0.3131, |
| "step": 1036 |
| }, |
| { |
| "epoch": 3.2907576358587862, |
| "grad_norm": 0.10083350399120836, |
| "learning_rate": 2.5237096119249058e-05, |
| "loss": 0.3204, |
| "step": 1037 |
| }, |
| { |
| "epoch": 3.2939309797699323, |
| "grad_norm": 0.0986145583348089, |
| "learning_rate": 2.5154710507736037e-05, |
| "loss": 0.3079, |
| "step": 1038 |
| }, |
| { |
| "epoch": 3.297104323681079, |
| "grad_norm": 0.11022375139763324, |
| "learning_rate": 2.5072397866927335e-05, |
| "loss": 0.317, |
| "step": 1039 |
| }, |
| { |
| "epoch": 3.3002776675922254, |
| "grad_norm": 0.1125140361024117, |
| "learning_rate": 2.4990158601423417e-05, |
| "loss": 0.317, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.3034510115033715, |
| "grad_norm": 0.11021070433389958, |
| "learning_rate": 2.4907993115464116e-05, |
| "loss": 0.3113, |
| "step": 1041 |
| }, |
| { |
| "epoch": 3.306624355414518, |
| "grad_norm": 0.10634404742096815, |
| "learning_rate": 2.4825901812926574e-05, |
| "loss": 0.3158, |
| "step": 1042 |
| }, |
| { |
| "epoch": 3.3097976993256646, |
| "grad_norm": 0.11699549952140445, |
| "learning_rate": 2.474388509732333e-05, |
| "loss": 0.3111, |
| "step": 1043 |
| }, |
| { |
| "epoch": 3.3129710432368107, |
| "grad_norm": 0.0980534798385101, |
| "learning_rate": 2.466194337180027e-05, |
| "loss": 0.3143, |
| "step": 1044 |
| }, |
| { |
| "epoch": 3.3161443871479572, |
| "grad_norm": 0.1194870796488275, |
| "learning_rate": 2.4580077039134683e-05, |
| "loss": 0.316, |
| "step": 1045 |
| }, |
| { |
| "epoch": 3.3193177310591038, |
| "grad_norm": 0.10484409113962986, |
| "learning_rate": 2.449828650173325e-05, |
| "loss": 0.3135, |
| "step": 1046 |
| }, |
| { |
| "epoch": 3.32249107497025, |
| "grad_norm": 0.10357015345939864, |
| "learning_rate": 2.441657216163015e-05, |
| "loss": 0.3109, |
| "step": 1047 |
| }, |
| { |
| "epoch": 3.3256644188813964, |
| "grad_norm": 0.11296601395220406, |
| "learning_rate": 2.433493442048492e-05, |
| "loss": 0.3133, |
| "step": 1048 |
| }, |
| { |
| "epoch": 3.3288377627925425, |
| "grad_norm": 0.09961102835039787, |
| "learning_rate": 2.4253373679580686e-05, |
| "loss": 0.3158, |
| "step": 1049 |
| }, |
| { |
| "epoch": 3.332011106703689, |
| "grad_norm": 0.10181823522886456, |
| "learning_rate": 2.4171890339822013e-05, |
| "loss": 0.3116, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.335184450614835, |
| "grad_norm": 0.10979798113117561, |
| "learning_rate": 2.409048480173305e-05, |
| "loss": 0.3162, |
| "step": 1051 |
| }, |
| { |
| "epoch": 3.3383577945259817, |
| "grad_norm": 0.094779351238252, |
| "learning_rate": 2.400915746545548e-05, |
| "loss": 0.315, |
| "step": 1052 |
| }, |
| { |
| "epoch": 3.3415311384371282, |
| "grad_norm": 0.10148023343814727, |
| "learning_rate": 2.392790873074664e-05, |
| "loss": 0.3146, |
| "step": 1053 |
| }, |
| { |
| "epoch": 3.3447044823482743, |
| "grad_norm": 0.09289268477261449, |
| "learning_rate": 2.384673899697746e-05, |
| "loss": 0.3125, |
| "step": 1054 |
| }, |
| { |
| "epoch": 3.347877826259421, |
| "grad_norm": 0.09684287864696564, |
| "learning_rate": 2.376564866313058e-05, |
| "loss": 0.3104, |
| "step": 1055 |
| }, |
| { |
| "epoch": 3.3510511701705674, |
| "grad_norm": 0.09643736175489405, |
| "learning_rate": 2.3684638127798336e-05, |
| "loss": 0.3178, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.3542245140817135, |
| "grad_norm": 0.10635216596639196, |
| "learning_rate": 2.3603707789180863e-05, |
| "loss": 0.3136, |
| "step": 1057 |
| }, |
| { |
| "epoch": 3.35739785799286, |
| "grad_norm": 0.09676123073144416, |
| "learning_rate": 2.3522858045084016e-05, |
| "loss": 0.3122, |
| "step": 1058 |
| }, |
| { |
| "epoch": 3.360571201904006, |
| "grad_norm": 0.10713967084993489, |
| "learning_rate": 2.344208929291759e-05, |
| "loss": 0.3141, |
| "step": 1059 |
| }, |
| { |
| "epoch": 3.3637445458151527, |
| "grad_norm": 0.09842859790655265, |
| "learning_rate": 2.3361401929693194e-05, |
| "loss": 0.3192, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.3669178897262992, |
| "grad_norm": 0.0960717662909176, |
| "learning_rate": 2.3280796352022442e-05, |
| "loss": 0.3053, |
| "step": 1061 |
| }, |
| { |
| "epoch": 3.3700912336374453, |
| "grad_norm": 0.10290198906684156, |
| "learning_rate": 2.3200272956114845e-05, |
| "loss": 0.3115, |
| "step": 1062 |
| }, |
| { |
| "epoch": 3.373264577548592, |
| "grad_norm": 0.09091365277942409, |
| "learning_rate": 2.3119832137776088e-05, |
| "loss": 0.3148, |
| "step": 1063 |
| }, |
| { |
| "epoch": 3.376437921459738, |
| "grad_norm": 0.10196717070207367, |
| "learning_rate": 2.3039474292405834e-05, |
| "loss": 0.3111, |
| "step": 1064 |
| }, |
| { |
| "epoch": 3.3796112653708845, |
| "grad_norm": 0.09586271821215989, |
| "learning_rate": 2.295919981499596e-05, |
| "loss": 0.3181, |
| "step": 1065 |
| }, |
| { |
| "epoch": 3.382784609282031, |
| "grad_norm": 0.09899945933563967, |
| "learning_rate": 2.287900910012854e-05, |
| "loss": 0.3162, |
| "step": 1066 |
| }, |
| { |
| "epoch": 3.385957953193177, |
| "grad_norm": 0.10322988072215468, |
| "learning_rate": 2.2798902541973945e-05, |
| "loss": 0.3153, |
| "step": 1067 |
| }, |
| { |
| "epoch": 3.3891312971043237, |
| "grad_norm": 0.09259998496704394, |
| "learning_rate": 2.2718880534288826e-05, |
| "loss": 0.3121, |
| "step": 1068 |
| }, |
| { |
| "epoch": 3.3923046410154702, |
| "grad_norm": 0.10569429128390048, |
| "learning_rate": 2.2638943470414274e-05, |
| "loss": 0.3125, |
| "step": 1069 |
| }, |
| { |
| "epoch": 3.3954779849266163, |
| "grad_norm": 0.09760048377336895, |
| "learning_rate": 2.2559091743273855e-05, |
| "loss": 0.3127, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.398651328837763, |
| "grad_norm": 0.10906819806313078, |
| "learning_rate": 2.2479325745371662e-05, |
| "loss": 0.3135, |
| "step": 1071 |
| }, |
| { |
| "epoch": 3.401824672748909, |
| "grad_norm": 0.09977494398591062, |
| "learning_rate": 2.239964586879033e-05, |
| "loss": 0.312, |
| "step": 1072 |
| }, |
| { |
| "epoch": 3.4049980166600555, |
| "grad_norm": 0.10832374432497219, |
| "learning_rate": 2.2320052505189307e-05, |
| "loss": 0.3181, |
| "step": 1073 |
| }, |
| { |
| "epoch": 3.408171360571202, |
| "grad_norm": 0.1022824580898682, |
| "learning_rate": 2.2240546045802657e-05, |
| "loss": 0.3224, |
| "step": 1074 |
| }, |
| { |
| "epoch": 3.411344704482348, |
| "grad_norm": 0.10034777595087839, |
| "learning_rate": 2.216112688143735e-05, |
| "loss": 0.3125, |
| "step": 1075 |
| }, |
| { |
| "epoch": 3.4145180483934947, |
| "grad_norm": 0.10030841402583066, |
| "learning_rate": 2.2081795402471248e-05, |
| "loss": 0.3084, |
| "step": 1076 |
| }, |
| { |
| "epoch": 3.417691392304641, |
| "grad_norm": 0.09762803350713443, |
| "learning_rate": 2.2002551998851214e-05, |
| "loss": 0.3093, |
| "step": 1077 |
| }, |
| { |
| "epoch": 3.4208647362157873, |
| "grad_norm": 0.1009931981677632, |
| "learning_rate": 2.192339706009115e-05, |
| "loss": 0.3142, |
| "step": 1078 |
| }, |
| { |
| "epoch": 3.424038080126934, |
| "grad_norm": 0.10087359052596663, |
| "learning_rate": 2.1844330975270146e-05, |
| "loss": 0.3116, |
| "step": 1079 |
| }, |
| { |
| "epoch": 3.42721142403808, |
| "grad_norm": 0.09669963400697884, |
| "learning_rate": 2.1765354133030537e-05, |
| "loss": 0.3093, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.4303847679492265, |
| "grad_norm": 0.10318895349642157, |
| "learning_rate": 2.1686466921576e-05, |
| "loss": 0.3139, |
| "step": 1081 |
| }, |
| { |
| "epoch": 3.433558111860373, |
| "grad_norm": 0.100273860386507, |
| "learning_rate": 2.1607669728669595e-05, |
| "loss": 0.3147, |
| "step": 1082 |
| }, |
| { |
| "epoch": 3.436731455771519, |
| "grad_norm": 0.10871940977569985, |
| "learning_rate": 2.152896294163198e-05, |
| "loss": 0.3195, |
| "step": 1083 |
| }, |
| { |
| "epoch": 3.4399047996826657, |
| "grad_norm": 0.09861412764646266, |
| "learning_rate": 2.1450346947339354e-05, |
| "loss": 0.3099, |
| "step": 1084 |
| }, |
| { |
| "epoch": 3.443078143593812, |
| "grad_norm": 0.1036174572244448, |
| "learning_rate": 2.137182213222168e-05, |
| "loss": 0.3141, |
| "step": 1085 |
| }, |
| { |
| "epoch": 3.4462514875049584, |
| "grad_norm": 0.10204915828850486, |
| "learning_rate": 2.1293388882260725e-05, |
| "loss": 0.3098, |
| "step": 1086 |
| }, |
| { |
| "epoch": 3.449424831416105, |
| "grad_norm": 0.09799894678131774, |
| "learning_rate": 2.1215047582988195e-05, |
| "loss": 0.3103, |
| "step": 1087 |
| }, |
| { |
| "epoch": 3.452598175327251, |
| "grad_norm": 0.09504536792403677, |
| "learning_rate": 2.113679861948376e-05, |
| "loss": 0.3127, |
| "step": 1088 |
| }, |
| { |
| "epoch": 3.4557715192383975, |
| "grad_norm": 0.09764248583285513, |
| "learning_rate": 2.1058642376373283e-05, |
| "loss": 0.3164, |
| "step": 1089 |
| }, |
| { |
| "epoch": 3.4589448631495436, |
| "grad_norm": 0.0906376025849311, |
| "learning_rate": 2.098057923782685e-05, |
| "loss": 0.3152, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.46211820706069, |
| "grad_norm": 0.09359589566718937, |
| "learning_rate": 2.0902609587556896e-05, |
| "loss": 0.3117, |
| "step": 1091 |
| }, |
| { |
| "epoch": 3.4652915509718367, |
| "grad_norm": 0.09548394143187244, |
| "learning_rate": 2.082473380881632e-05, |
| "loss": 0.3115, |
| "step": 1092 |
| }, |
| { |
| "epoch": 3.468464894882983, |
| "grad_norm": 0.09338528887344875, |
| "learning_rate": 2.074695228439663e-05, |
| "loss": 0.3114, |
| "step": 1093 |
| }, |
| { |
| "epoch": 3.4716382387941294, |
| "grad_norm": 0.09884602966359947, |
| "learning_rate": 2.066926539662598e-05, |
| "loss": 0.3145, |
| "step": 1094 |
| }, |
| { |
| "epoch": 3.474811582705276, |
| "grad_norm": 0.09230713872679078, |
| "learning_rate": 2.0591673527367386e-05, |
| "loss": 0.31, |
| "step": 1095 |
| }, |
| { |
| "epoch": 3.477984926616422, |
| "grad_norm": 0.09898085658702097, |
| "learning_rate": 2.051417705801681e-05, |
| "loss": 0.3106, |
| "step": 1096 |
| }, |
| { |
| "epoch": 3.4811582705275685, |
| "grad_norm": 0.08889141823884165, |
| "learning_rate": 2.04367763695013e-05, |
| "loss": 0.3096, |
| "step": 1097 |
| }, |
| { |
| "epoch": 3.4843316144387146, |
| "grad_norm": 0.10424786455484286, |
| "learning_rate": 2.0359471842277014e-05, |
| "loss": 0.3125, |
| "step": 1098 |
| }, |
| { |
| "epoch": 3.487504958349861, |
| "grad_norm": 0.09373054000252577, |
| "learning_rate": 2.0282263856327575e-05, |
| "loss": 0.3095, |
| "step": 1099 |
| }, |
| { |
| "epoch": 3.4906783022610077, |
| "grad_norm": 0.09743243651722891, |
| "learning_rate": 2.0205152791161942e-05, |
| "loss": 0.3138, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.493851646172154, |
| "grad_norm": 0.09903803728619201, |
| "learning_rate": 2.0128139025812737e-05, |
| "loss": 0.3117, |
| "step": 1101 |
| }, |
| { |
| "epoch": 3.4970249900833004, |
| "grad_norm": 0.09797020262865515, |
| "learning_rate": 2.00512229388343e-05, |
| "loss": 0.3083, |
| "step": 1102 |
| }, |
| { |
| "epoch": 3.5001983339944465, |
| "grad_norm": 0.10766478943557996, |
| "learning_rate": 1.9974404908300837e-05, |
| "loss": 0.3139, |
| "step": 1103 |
| }, |
| { |
| "epoch": 3.503371677905593, |
| "grad_norm": 0.09779359977262553, |
| "learning_rate": 1.9897685311804547e-05, |
| "loss": 0.3106, |
| "step": 1104 |
| }, |
| { |
| "epoch": 3.5065450218167395, |
| "grad_norm": 0.09387274555393152, |
| "learning_rate": 1.982106452645382e-05, |
| "loss": 0.3105, |
| "step": 1105 |
| }, |
| { |
| "epoch": 3.5097183657278856, |
| "grad_norm": 0.10203359221398621, |
| "learning_rate": 1.9744542928871335e-05, |
| "loss": 0.3131, |
| "step": 1106 |
| }, |
| { |
| "epoch": 3.512891709639032, |
| "grad_norm": 0.0940533035661825, |
| "learning_rate": 1.966812089519223e-05, |
| "loss": 0.3105, |
| "step": 1107 |
| }, |
| { |
| "epoch": 3.5160650535501787, |
| "grad_norm": 0.11077496419812648, |
| "learning_rate": 1.959179880106219e-05, |
| "loss": 0.3124, |
| "step": 1108 |
| }, |
| { |
| "epoch": 3.519238397461325, |
| "grad_norm": 0.08886436421169637, |
| "learning_rate": 1.9515577021635766e-05, |
| "loss": 0.3133, |
| "step": 1109 |
| }, |
| { |
| "epoch": 3.5224117413724714, |
| "grad_norm": 0.1058427262771896, |
| "learning_rate": 1.9439455931574306e-05, |
| "loss": 0.3188, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.5255850852836175, |
| "grad_norm": 0.0943621814009501, |
| "learning_rate": 1.9363435905044303e-05, |
| "loss": 0.3115, |
| "step": 1111 |
| }, |
| { |
| "epoch": 3.528758429194764, |
| "grad_norm": 0.10860537135562053, |
| "learning_rate": 1.9287517315715455e-05, |
| "loss": 0.3109, |
| "step": 1112 |
| }, |
| { |
| "epoch": 3.53193177310591, |
| "grad_norm": 0.08975638711859225, |
| "learning_rate": 1.9211700536758867e-05, |
| "loss": 0.3147, |
| "step": 1113 |
| }, |
| { |
| "epoch": 3.5351051170170567, |
| "grad_norm": 0.09431070654205716, |
| "learning_rate": 1.9135985940845167e-05, |
| "loss": 0.3061, |
| "step": 1114 |
| }, |
| { |
| "epoch": 3.538278460928203, |
| "grad_norm": 0.09176047436666471, |
| "learning_rate": 1.9060373900142758e-05, |
| "loss": 0.3039, |
| "step": 1115 |
| }, |
| { |
| "epoch": 3.5414518048393493, |
| "grad_norm": 0.09566553815030118, |
| "learning_rate": 1.898486478631591e-05, |
| "loss": 0.3096, |
| "step": 1116 |
| }, |
| { |
| "epoch": 3.544625148750496, |
| "grad_norm": 0.09398037913009233, |
| "learning_rate": 1.8909458970523003e-05, |
| "loss": 0.3117, |
| "step": 1117 |
| }, |
| { |
| "epoch": 3.5477984926616424, |
| "grad_norm": 0.09297530409510038, |
| "learning_rate": 1.8834156823414592e-05, |
| "loss": 0.3143, |
| "step": 1118 |
| }, |
| { |
| "epoch": 3.5509718365727885, |
| "grad_norm": 0.09616519717958327, |
| "learning_rate": 1.8758958715131763e-05, |
| "loss": 0.3134, |
| "step": 1119 |
| }, |
| { |
| "epoch": 3.554145180483935, |
| "grad_norm": 0.09595619987699143, |
| "learning_rate": 1.8683865015304107e-05, |
| "loss": 0.3167, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.5573185243950816, |
| "grad_norm": 0.09428857006752461, |
| "learning_rate": 1.860887609304806e-05, |
| "loss": 0.32, |
| "step": 1121 |
| }, |
| { |
| "epoch": 3.5604918683062277, |
| "grad_norm": 0.09288431779361411, |
| "learning_rate": 1.853399231696502e-05, |
| "loss": 0.3105, |
| "step": 1122 |
| }, |
| { |
| "epoch": 3.563665212217374, |
| "grad_norm": 0.09478695944187812, |
| "learning_rate": 1.845921405513957e-05, |
| "loss": 0.3136, |
| "step": 1123 |
| }, |
| { |
| "epoch": 3.5668385561285203, |
| "grad_norm": 0.09259086063066573, |
| "learning_rate": 1.838454167513759e-05, |
| "loss": 0.3082, |
| "step": 1124 |
| }, |
| { |
| "epoch": 3.570011900039667, |
| "grad_norm": 0.09731407575423262, |
| "learning_rate": 1.8309975544004563e-05, |
| "loss": 0.3097, |
| "step": 1125 |
| }, |
| { |
| "epoch": 3.573185243950813, |
| "grad_norm": 0.08908940261693532, |
| "learning_rate": 1.8235516028263693e-05, |
| "loss": 0.3102, |
| "step": 1126 |
| }, |
| { |
| "epoch": 3.5763585878619595, |
| "grad_norm": 0.0994114922720132, |
| "learning_rate": 1.8161163493914138e-05, |
| "loss": 0.3127, |
| "step": 1127 |
| }, |
| { |
| "epoch": 3.579531931773106, |
| "grad_norm": 0.09284958185097307, |
| "learning_rate": 1.808691830642915e-05, |
| "loss": 0.306, |
| "step": 1128 |
| }, |
| { |
| "epoch": 3.582705275684252, |
| "grad_norm": 0.09838434020935204, |
| "learning_rate": 1.8012780830754428e-05, |
| "loss": 0.3142, |
| "step": 1129 |
| }, |
| { |
| "epoch": 3.5858786195953987, |
| "grad_norm": 0.0957701843609928, |
| "learning_rate": 1.7938751431306108e-05, |
| "loss": 0.3059, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.589051963506545, |
| "grad_norm": 0.10452299557787663, |
| "learning_rate": 1.7864830471969158e-05, |
| "loss": 0.3149, |
| "step": 1131 |
| }, |
| { |
| "epoch": 3.5922253074176913, |
| "grad_norm": 0.09852568670954905, |
| "learning_rate": 1.77910183160955e-05, |
| "loss": 0.318, |
| "step": 1132 |
| }, |
| { |
| "epoch": 3.595398651328838, |
| "grad_norm": 0.08964997440395585, |
| "learning_rate": 1.771731532650226e-05, |
| "loss": 0.3132, |
| "step": 1133 |
| }, |
| { |
| "epoch": 3.5985719952399844, |
| "grad_norm": 0.10225504250213735, |
| "learning_rate": 1.7643721865469913e-05, |
| "loss": 0.3125, |
| "step": 1134 |
| }, |
| { |
| "epoch": 3.6017453391511305, |
| "grad_norm": 0.09337353095498398, |
| "learning_rate": 1.757023829474061e-05, |
| "loss": 0.3096, |
| "step": 1135 |
| }, |
| { |
| "epoch": 3.604918683062277, |
| "grad_norm": 0.09729026605466504, |
| "learning_rate": 1.7496864975516326e-05, |
| "loss": 0.3143, |
| "step": 1136 |
| }, |
| { |
| "epoch": 3.608092026973423, |
| "grad_norm": 0.09648684875445444, |
| "learning_rate": 1.7423602268457124e-05, |
| "loss": 0.3176, |
| "step": 1137 |
| }, |
| { |
| "epoch": 3.6112653708845697, |
| "grad_norm": 0.0891461740774676, |
| "learning_rate": 1.7350450533679298e-05, |
| "loss": 0.3076, |
| "step": 1138 |
| }, |
| { |
| "epoch": 3.6144387147957158, |
| "grad_norm": 0.09136934909265221, |
| "learning_rate": 1.7277410130753775e-05, |
| "loss": 0.3113, |
| "step": 1139 |
| }, |
| { |
| "epoch": 3.6176120587068623, |
| "grad_norm": 0.09282078764180805, |
| "learning_rate": 1.7204481418704136e-05, |
| "loss": 0.3088, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.620785402618009, |
| "grad_norm": 0.08738434824899044, |
| "learning_rate": 1.7131664756005012e-05, |
| "loss": 0.3112, |
| "step": 1141 |
| }, |
| { |
| "epoch": 3.623958746529155, |
| "grad_norm": 0.09634650667327464, |
| "learning_rate": 1.705896050058025e-05, |
| "loss": 0.3159, |
| "step": 1142 |
| }, |
| { |
| "epoch": 3.6271320904403015, |
| "grad_norm": 0.09161796695924315, |
| "learning_rate": 1.698636900980119e-05, |
| "loss": 0.3087, |
| "step": 1143 |
| }, |
| { |
| "epoch": 3.630305434351448, |
| "grad_norm": 0.08736032109536832, |
| "learning_rate": 1.6913890640484844e-05, |
| "loss": 0.3137, |
| "step": 1144 |
| }, |
| { |
| "epoch": 3.633478778262594, |
| "grad_norm": 0.09745072572329685, |
| "learning_rate": 1.6841525748892216e-05, |
| "loss": 0.3128, |
| "step": 1145 |
| }, |
| { |
| "epoch": 3.6366521221737407, |
| "grad_norm": 0.08662042757890395, |
| "learning_rate": 1.6769274690726523e-05, |
| "loss": 0.3131, |
| "step": 1146 |
| }, |
| { |
| "epoch": 3.639825466084887, |
| "grad_norm": 0.09312666065952754, |
| "learning_rate": 1.6697137821131443e-05, |
| "loss": 0.3086, |
| "step": 1147 |
| }, |
| { |
| "epoch": 3.6429988099960333, |
| "grad_norm": 0.10075435906006584, |
| "learning_rate": 1.6625115494689327e-05, |
| "loss": 0.3144, |
| "step": 1148 |
| }, |
| { |
| "epoch": 3.6461721539071794, |
| "grad_norm": 0.08168920500142361, |
| "learning_rate": 1.6553208065419585e-05, |
| "loss": 0.3067, |
| "step": 1149 |
| }, |
| { |
| "epoch": 3.649345497818326, |
| "grad_norm": 0.09473700986154443, |
| "learning_rate": 1.648141588677677e-05, |
| "loss": 0.3086, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.6525188417294725, |
| "grad_norm": 0.09995319497546384, |
| "learning_rate": 1.6409739311648985e-05, |
| "loss": 0.3153, |
| "step": 1151 |
| }, |
| { |
| "epoch": 3.6556921856406186, |
| "grad_norm": 0.08763944506301463, |
| "learning_rate": 1.633817869235608e-05, |
| "loss": 0.3109, |
| "step": 1152 |
| }, |
| { |
| "epoch": 3.658865529551765, |
| "grad_norm": 0.09881602368631207, |
| "learning_rate": 1.626673438064795e-05, |
| "loss": 0.321, |
| "step": 1153 |
| }, |
| { |
| "epoch": 3.6620388734629117, |
| "grad_norm": 0.09238325300250558, |
| "learning_rate": 1.6195406727702746e-05, |
| "loss": 0.3176, |
| "step": 1154 |
| }, |
| { |
| "epoch": 3.665212217374058, |
| "grad_norm": 0.09179321263341135, |
| "learning_rate": 1.6124196084125235e-05, |
| "loss": 0.3149, |
| "step": 1155 |
| }, |
| { |
| "epoch": 3.6683855612852043, |
| "grad_norm": 0.09632257418168569, |
| "learning_rate": 1.6053102799945026e-05, |
| "loss": 0.3152, |
| "step": 1156 |
| }, |
| { |
| "epoch": 3.671558905196351, |
| "grad_norm": 0.10636674102365737, |
| "learning_rate": 1.5982127224614867e-05, |
| "loss": 0.3183, |
| "step": 1157 |
| }, |
| { |
| "epoch": 3.674732249107497, |
| "grad_norm": 0.0872410805303305, |
| "learning_rate": 1.5911269707008857e-05, |
| "loss": 0.3114, |
| "step": 1158 |
| }, |
| { |
| "epoch": 3.6779055930186435, |
| "grad_norm": 0.09875023791232529, |
| "learning_rate": 1.5840530595420903e-05, |
| "loss": 0.3061, |
| "step": 1159 |
| }, |
| { |
| "epoch": 3.6810789369297896, |
| "grad_norm": 0.09455931044972642, |
| "learning_rate": 1.5769910237562798e-05, |
| "loss": 0.3069, |
| "step": 1160 |
| }, |
| { |
| "epoch": 3.684252280840936, |
| "grad_norm": 0.08864062113346818, |
| "learning_rate": 1.5699408980562653e-05, |
| "loss": 0.3091, |
| "step": 1161 |
| }, |
| { |
| "epoch": 3.6874256247520822, |
| "grad_norm": 0.0957648926194275, |
| "learning_rate": 1.562902717096316e-05, |
| "loss": 0.3105, |
| "step": 1162 |
| }, |
| { |
| "epoch": 3.690598968663229, |
| "grad_norm": 0.08822389225988175, |
| "learning_rate": 1.5558765154719867e-05, |
| "loss": 0.312, |
| "step": 1163 |
| }, |
| { |
| "epoch": 3.6937723125743753, |
| "grad_norm": 0.09144318120715113, |
| "learning_rate": 1.5488623277199463e-05, |
| "loss": 0.3133, |
| "step": 1164 |
| }, |
| { |
| "epoch": 3.6969456564855214, |
| "grad_norm": 0.08207249013636533, |
| "learning_rate": 1.5418601883178138e-05, |
| "loss": 0.3093, |
| "step": 1165 |
| }, |
| { |
| "epoch": 3.700119000396668, |
| "grad_norm": 0.08659915132311609, |
| "learning_rate": 1.5348701316839844e-05, |
| "loss": 0.3118, |
| "step": 1166 |
| }, |
| { |
| "epoch": 3.7032923443078145, |
| "grad_norm": 0.0829362247581557, |
| "learning_rate": 1.5278921921774624e-05, |
| "loss": 0.3101, |
| "step": 1167 |
| }, |
| { |
| "epoch": 3.7064656882189606, |
| "grad_norm": 0.08830303321169561, |
| "learning_rate": 1.5209264040976911e-05, |
| "loss": 0.3121, |
| "step": 1168 |
| }, |
| { |
| "epoch": 3.709639032130107, |
| "grad_norm": 0.08371651283489175, |
| "learning_rate": 1.5139728016843846e-05, |
| "loss": 0.3121, |
| "step": 1169 |
| }, |
| { |
| "epoch": 3.7128123760412537, |
| "grad_norm": 0.09394104777007455, |
| "learning_rate": 1.507031419117357e-05, |
| "loss": 0.3128, |
| "step": 1170 |
| }, |
| { |
| "epoch": 3.7159857199524, |
| "grad_norm": 0.08410977786060891, |
| "learning_rate": 1.5001022905163596e-05, |
| "loss": 0.3085, |
| "step": 1171 |
| }, |
| { |
| "epoch": 3.7191590638635463, |
| "grad_norm": 0.09126456097238246, |
| "learning_rate": 1.49318544994091e-05, |
| "loss": 0.3134, |
| "step": 1172 |
| }, |
| { |
| "epoch": 3.7223324077746924, |
| "grad_norm": 0.08545709007632069, |
| "learning_rate": 1.4862809313901268e-05, |
| "loss": 0.3154, |
| "step": 1173 |
| }, |
| { |
| "epoch": 3.725505751685839, |
| "grad_norm": 0.08780137354204715, |
| "learning_rate": 1.4793887688025534e-05, |
| "loss": 0.311, |
| "step": 1174 |
| }, |
| { |
| "epoch": 3.728679095596985, |
| "grad_norm": 0.09036873460150745, |
| "learning_rate": 1.4725089960560106e-05, |
| "loss": 0.3092, |
| "step": 1175 |
| }, |
| { |
| "epoch": 3.7318524395081316, |
| "grad_norm": 0.0906949944979453, |
| "learning_rate": 1.4656416469674067e-05, |
| "loss": 0.3087, |
| "step": 1176 |
| }, |
| { |
| "epoch": 3.735025783419278, |
| "grad_norm": 0.08524397104099314, |
| "learning_rate": 1.4587867552925886e-05, |
| "loss": 0.3097, |
| "step": 1177 |
| }, |
| { |
| "epoch": 3.7381991273304243, |
| "grad_norm": 0.0831715872678655, |
| "learning_rate": 1.4519443547261692e-05, |
| "loss": 0.3091, |
| "step": 1178 |
| }, |
| { |
| "epoch": 3.741372471241571, |
| "grad_norm": 0.08787932200139537, |
| "learning_rate": 1.445114478901362e-05, |
| "loss": 0.3104, |
| "step": 1179 |
| }, |
| { |
| "epoch": 3.7445458151527173, |
| "grad_norm": 0.09013424171138545, |
| "learning_rate": 1.4382971613898145e-05, |
| "loss": 0.3133, |
| "step": 1180 |
| }, |
| { |
| "epoch": 3.7477191590638634, |
| "grad_norm": 0.08872383417712887, |
| "learning_rate": 1.431492435701447e-05, |
| "loss": 0.3103, |
| "step": 1181 |
| }, |
| { |
| "epoch": 3.75089250297501, |
| "grad_norm": 0.0964431058984415, |
| "learning_rate": 1.424700335284286e-05, |
| "loss": 0.3142, |
| "step": 1182 |
| }, |
| { |
| "epoch": 3.7540658468861565, |
| "grad_norm": 0.08591965622221728, |
| "learning_rate": 1.4179208935243e-05, |
| "loss": 0.3075, |
| "step": 1183 |
| }, |
| { |
| "epoch": 3.7572391907973026, |
| "grad_norm": 0.08719874856870091, |
| "learning_rate": 1.4111541437452294e-05, |
| "loss": 0.3111, |
| "step": 1184 |
| }, |
| { |
| "epoch": 3.760412534708449, |
| "grad_norm": 0.09819733047781874, |
| "learning_rate": 1.4044001192084391e-05, |
| "loss": 0.3099, |
| "step": 1185 |
| }, |
| { |
| "epoch": 3.7635858786195953, |
| "grad_norm": 0.0836739072391128, |
| "learning_rate": 1.3976588531127334e-05, |
| "loss": 0.3115, |
| "step": 1186 |
| }, |
| { |
| "epoch": 3.766759222530742, |
| "grad_norm": 0.09633657842787988, |
| "learning_rate": 1.3909303785942089e-05, |
| "loss": 0.3154, |
| "step": 1187 |
| }, |
| { |
| "epoch": 3.769932566441888, |
| "grad_norm": 0.08398570887749328, |
| "learning_rate": 1.3842147287260863e-05, |
| "loss": 0.3114, |
| "step": 1188 |
| }, |
| { |
| "epoch": 3.7731059103530344, |
| "grad_norm": 0.08881126144850926, |
| "learning_rate": 1.3775119365185484e-05, |
| "loss": 0.3135, |
| "step": 1189 |
| }, |
| { |
| "epoch": 3.776279254264181, |
| "grad_norm": 0.087196679652796, |
| "learning_rate": 1.3708220349185731e-05, |
| "loss": 0.3096, |
| "step": 1190 |
| }, |
| { |
| "epoch": 3.779452598175327, |
| "grad_norm": 0.0818848922205358, |
| "learning_rate": 1.3641450568097794e-05, |
| "loss": 0.3072, |
| "step": 1191 |
| }, |
| { |
| "epoch": 3.7826259420864736, |
| "grad_norm": 0.09311600595200806, |
| "learning_rate": 1.3574810350122625e-05, |
| "loss": 0.3104, |
| "step": 1192 |
| }, |
| { |
| "epoch": 3.78579928599762, |
| "grad_norm": 0.0838102155532246, |
| "learning_rate": 1.350830002282431e-05, |
| "loss": 0.3128, |
| "step": 1193 |
| }, |
| { |
| "epoch": 3.7889726299087663, |
| "grad_norm": 0.08273644119209583, |
| "learning_rate": 1.344191991312843e-05, |
| "loss": 0.3099, |
| "step": 1194 |
| }, |
| { |
| "epoch": 3.792145973819913, |
| "grad_norm": 0.08673064009083564, |
| "learning_rate": 1.3375670347320577e-05, |
| "loss": 0.3108, |
| "step": 1195 |
| }, |
| { |
| "epoch": 3.7953193177310593, |
| "grad_norm": 0.08342237037133293, |
| "learning_rate": 1.330955165104459e-05, |
| "loss": 0.3074, |
| "step": 1196 |
| }, |
| { |
| "epoch": 3.7984926616422054, |
| "grad_norm": 0.08544215414242184, |
| "learning_rate": 1.3243564149301058e-05, |
| "loss": 0.3092, |
| "step": 1197 |
| }, |
| { |
| "epoch": 3.801666005553352, |
| "grad_norm": 0.07858190033954376, |
| "learning_rate": 1.3177708166445702e-05, |
| "loss": 0.3085, |
| "step": 1198 |
| }, |
| { |
| "epoch": 3.804839349464498, |
| "grad_norm": 0.0853891038054064, |
| "learning_rate": 1.311198402618778e-05, |
| "loss": 0.307, |
| "step": 1199 |
| }, |
| { |
| "epoch": 3.8080126933756446, |
| "grad_norm": 0.07989799358178139, |
| "learning_rate": 1.3046392051588454e-05, |
| "loss": 0.3128, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.8111860372867907, |
| "grad_norm": 0.08828442224939745, |
| "learning_rate": 1.2980932565059261e-05, |
| "loss": 0.3148, |
| "step": 1201 |
| }, |
| { |
| "epoch": 3.8143593811979373, |
| "grad_norm": 0.08068725629005853, |
| "learning_rate": 1.2915605888360516e-05, |
| "loss": 0.3177, |
| "step": 1202 |
| }, |
| { |
| "epoch": 3.817532725109084, |
| "grad_norm": 0.08257356138701971, |
| "learning_rate": 1.2850412342599712e-05, |
| "loss": 0.3119, |
| "step": 1203 |
| }, |
| { |
| "epoch": 3.82070606902023, |
| "grad_norm": 0.0866333410804087, |
| "learning_rate": 1.2785352248229907e-05, |
| "loss": 0.3098, |
| "step": 1204 |
| }, |
| { |
| "epoch": 3.8238794129313765, |
| "grad_norm": 0.08166886458082445, |
| "learning_rate": 1.2720425925048274e-05, |
| "loss": 0.3125, |
| "step": 1205 |
| }, |
| { |
| "epoch": 3.827052756842523, |
| "grad_norm": 0.08920230072749204, |
| "learning_rate": 1.2655633692194367e-05, |
| "loss": 0.3106, |
| "step": 1206 |
| }, |
| { |
| "epoch": 3.830226100753669, |
| "grad_norm": 0.08897293804748853, |
| "learning_rate": 1.259097586814867e-05, |
| "loss": 0.3146, |
| "step": 1207 |
| }, |
| { |
| "epoch": 3.8333994446648156, |
| "grad_norm": 0.08018138419305465, |
| "learning_rate": 1.2526452770730986e-05, |
| "loss": 0.3138, |
| "step": 1208 |
| }, |
| { |
| "epoch": 3.836572788575962, |
| "grad_norm": 0.08612406310222656, |
| "learning_rate": 1.246206471709889e-05, |
| "loss": 0.3125, |
| "step": 1209 |
| }, |
| { |
| "epoch": 3.8397461324871083, |
| "grad_norm": 0.08560142904793838, |
| "learning_rate": 1.2397812023746124e-05, |
| "loss": 0.3098, |
| "step": 1210 |
| }, |
| { |
| "epoch": 3.842919476398255, |
| "grad_norm": 0.08086605996564016, |
| "learning_rate": 1.2333695006501127e-05, |
| "loss": 0.3099, |
| "step": 1211 |
| }, |
| { |
| "epoch": 3.846092820309401, |
| "grad_norm": 0.08291451210131907, |
| "learning_rate": 1.2269713980525407e-05, |
| "loss": 0.3057, |
| "step": 1212 |
| }, |
| { |
| "epoch": 3.8492661642205475, |
| "grad_norm": 0.08303662204470749, |
| "learning_rate": 1.2205869260312034e-05, |
| "loss": 0.3133, |
| "step": 1213 |
| }, |
| { |
| "epoch": 3.8524395081316936, |
| "grad_norm": 0.08226024846397162, |
| "learning_rate": 1.2142161159684034e-05, |
| "loss": 0.3158, |
| "step": 1214 |
| }, |
| { |
| "epoch": 3.85561285204284, |
| "grad_norm": 0.08613484735188547, |
| "learning_rate": 1.2078589991792983e-05, |
| "loss": 0.3118, |
| "step": 1215 |
| }, |
| { |
| "epoch": 3.8587861959539866, |
| "grad_norm": 0.0844370020344609, |
| "learning_rate": 1.2015156069117278e-05, |
| "loss": 0.311, |
| "step": 1216 |
| }, |
| { |
| "epoch": 3.8619595398651327, |
| "grad_norm": 0.08157413068957893, |
| "learning_rate": 1.195185970346075e-05, |
| "loss": 0.3118, |
| "step": 1217 |
| }, |
| { |
| "epoch": 3.8651328837762793, |
| "grad_norm": 0.0809161660461467, |
| "learning_rate": 1.1888701205951084e-05, |
| "loss": 0.3074, |
| "step": 1218 |
| }, |
| { |
| "epoch": 3.868306227687426, |
| "grad_norm": 0.08569592239748124, |
| "learning_rate": 1.1825680887038274e-05, |
| "loss": 0.3136, |
| "step": 1219 |
| }, |
| { |
| "epoch": 3.871479571598572, |
| "grad_norm": 0.08144000303149429, |
| "learning_rate": 1.1762799056493095e-05, |
| "loss": 0.3117, |
| "step": 1220 |
| }, |
| { |
| "epoch": 3.8746529155097185, |
| "grad_norm": 0.08023713096676821, |
| "learning_rate": 1.1700056023405622e-05, |
| "loss": 0.3085, |
| "step": 1221 |
| }, |
| { |
| "epoch": 3.877826259420865, |
| "grad_norm": 0.08533264894900448, |
| "learning_rate": 1.1637452096183663e-05, |
| "loss": 0.3112, |
| "step": 1222 |
| }, |
| { |
| "epoch": 3.880999603332011, |
| "grad_norm": 0.08294444189988791, |
| "learning_rate": 1.1574987582551293e-05, |
| "loss": 0.312, |
| "step": 1223 |
| }, |
| { |
| "epoch": 3.884172947243157, |
| "grad_norm": 0.07997988212163627, |
| "learning_rate": 1.1512662789547249e-05, |
| "loss": 0.3102, |
| "step": 1224 |
| }, |
| { |
| "epoch": 3.8873462911543037, |
| "grad_norm": 0.08618908178309605, |
| "learning_rate": 1.1450478023523575e-05, |
| "loss": 0.3104, |
| "step": 1225 |
| }, |
| { |
| "epoch": 3.8905196350654503, |
| "grad_norm": 0.08855469703733576, |
| "learning_rate": 1.1388433590143939e-05, |
| "loss": 0.3099, |
| "step": 1226 |
| }, |
| { |
| "epoch": 3.8936929789765964, |
| "grad_norm": 0.08401029593007889, |
| "learning_rate": 1.1326529794382264e-05, |
| "loss": 0.3064, |
| "step": 1227 |
| }, |
| { |
| "epoch": 3.896866322887743, |
| "grad_norm": 0.08447977219178371, |
| "learning_rate": 1.1264766940521171e-05, |
| "loss": 0.3144, |
| "step": 1228 |
| }, |
| { |
| "epoch": 3.9000396667988895, |
| "grad_norm": 0.08858538731350699, |
| "learning_rate": 1.1203145332150505e-05, |
| "loss": 0.3121, |
| "step": 1229 |
| }, |
| { |
| "epoch": 3.9032130107100356, |
| "grad_norm": 0.0909725806781119, |
| "learning_rate": 1.1141665272165789e-05, |
| "loss": 0.3109, |
| "step": 1230 |
| }, |
| { |
| "epoch": 3.906386354621182, |
| "grad_norm": 0.08293088335470532, |
| "learning_rate": 1.1080327062766827e-05, |
| "loss": 0.3125, |
| "step": 1231 |
| }, |
| { |
| "epoch": 3.9095596985323287, |
| "grad_norm": 0.08569314487810697, |
| "learning_rate": 1.1019131005456143e-05, |
| "loss": 0.3083, |
| "step": 1232 |
| }, |
| { |
| "epoch": 3.9127330424434748, |
| "grad_norm": 0.08315219817463836, |
| "learning_rate": 1.0958077401037542e-05, |
| "loss": 0.313, |
| "step": 1233 |
| }, |
| { |
| "epoch": 3.9159063863546213, |
| "grad_norm": 0.08276037981985424, |
| "learning_rate": 1.0897166549614573e-05, |
| "loss": 0.314, |
| "step": 1234 |
| }, |
| { |
| "epoch": 3.9190797302657674, |
| "grad_norm": 0.08774104502041317, |
| "learning_rate": 1.0836398750589172e-05, |
| "loss": 0.3071, |
| "step": 1235 |
| }, |
| { |
| "epoch": 3.922253074176914, |
| "grad_norm": 0.08859302501482878, |
| "learning_rate": 1.0775774302660027e-05, |
| "loss": 0.3148, |
| "step": 1236 |
| }, |
| { |
| "epoch": 3.92542641808806, |
| "grad_norm": 0.08667466575751302, |
| "learning_rate": 1.0715293503821256e-05, |
| "loss": 0.309, |
| "step": 1237 |
| }, |
| { |
| "epoch": 3.9285997619992066, |
| "grad_norm": 0.08724856226631149, |
| "learning_rate": 1.0654956651360857e-05, |
| "loss": 0.3147, |
| "step": 1238 |
| }, |
| { |
| "epoch": 3.931773105910353, |
| "grad_norm": 0.08515851001054704, |
| "learning_rate": 1.0594764041859293e-05, |
| "loss": 0.312, |
| "step": 1239 |
| }, |
| { |
| "epoch": 3.934946449821499, |
| "grad_norm": 0.08664134089549802, |
| "learning_rate": 1.0534715971187976e-05, |
| "loss": 0.3065, |
| "step": 1240 |
| }, |
| { |
| "epoch": 3.9381197937326458, |
| "grad_norm": 0.07937577751856649, |
| "learning_rate": 1.0474812734507886e-05, |
| "loss": 0.3079, |
| "step": 1241 |
| }, |
| { |
| "epoch": 3.9412931376437923, |
| "grad_norm": 0.08269171013905462, |
| "learning_rate": 1.041505462626807e-05, |
| "loss": 0.3078, |
| "step": 1242 |
| }, |
| { |
| "epoch": 3.9444664815549384, |
| "grad_norm": 0.08011960205561683, |
| "learning_rate": 1.0355441940204215e-05, |
| "loss": 0.3066, |
| "step": 1243 |
| }, |
| { |
| "epoch": 3.947639825466085, |
| "grad_norm": 0.07819385992242202, |
| "learning_rate": 1.0295974969337186e-05, |
| "loss": 0.3128, |
| "step": 1244 |
| }, |
| { |
| "epoch": 3.9508131693772315, |
| "grad_norm": 0.08250283712608554, |
| "learning_rate": 1.0236654005971625e-05, |
| "loss": 0.3181, |
| "step": 1245 |
| }, |
| { |
| "epoch": 3.9539865132883776, |
| "grad_norm": 0.6384901300040913, |
| "learning_rate": 1.017747934169444e-05, |
| "loss": 0.3072, |
| "step": 1246 |
| }, |
| { |
| "epoch": 3.957159857199524, |
| "grad_norm": 0.08198945954442947, |
| "learning_rate": 1.0118451267373462e-05, |
| "loss": 0.3088, |
| "step": 1247 |
| }, |
| { |
| "epoch": 3.96033320111067, |
| "grad_norm": 0.08781474473700562, |
| "learning_rate": 1.0059570073155953e-05, |
| "loss": 0.3119, |
| "step": 1248 |
| }, |
| { |
| "epoch": 3.9635065450218168, |
| "grad_norm": 0.08246668180469044, |
| "learning_rate": 1.0000836048467221e-05, |
| "loss": 0.313, |
| "step": 1249 |
| }, |
| { |
| "epoch": 3.966679888932963, |
| "grad_norm": 0.08889236389023081, |
| "learning_rate": 9.942249482009117e-06, |
| "loss": 0.3148, |
| "step": 1250 |
| }, |
| { |
| "epoch": 3.9698532328441094, |
| "grad_norm": 0.08718334142633781, |
| "learning_rate": 9.88381066175876e-06, |
| "loss": 0.3098, |
| "step": 1251 |
| }, |
| { |
| "epoch": 3.973026576755256, |
| "grad_norm": 0.08701871231451917, |
| "learning_rate": 9.825519874966952e-06, |
| "loss": 0.3132, |
| "step": 1252 |
| }, |
| { |
| "epoch": 3.976199920666402, |
| "grad_norm": 0.08864822845899448, |
| "learning_rate": 9.767377408156906e-06, |
| "loss": 0.3141, |
| "step": 1253 |
| }, |
| { |
| "epoch": 3.9793732645775486, |
| "grad_norm": 0.08251988049787, |
| "learning_rate": 9.709383547122764e-06, |
| "loss": 0.3106, |
| "step": 1254 |
| }, |
| { |
| "epoch": 3.982546608488695, |
| "grad_norm": 0.0829769224836818, |
| "learning_rate": 9.651538576928213e-06, |
| "loss": 0.3147, |
| "step": 1255 |
| }, |
| { |
| "epoch": 3.9857199523998412, |
| "grad_norm": 0.08191823227244799, |
| "learning_rate": 9.59384278190505e-06, |
| "loss": 0.3068, |
| "step": 1256 |
| }, |
| { |
| "epoch": 3.9888932963109878, |
| "grad_norm": 0.08231464325661744, |
| "learning_rate": 9.53629644565186e-06, |
| "loss": 0.311, |
| "step": 1257 |
| }, |
| { |
| "epoch": 3.9920666402221343, |
| "grad_norm": 0.07952802959102342, |
| "learning_rate": 9.478899851032554e-06, |
| "loss": 0.3092, |
| "step": 1258 |
| }, |
| { |
| "epoch": 3.9952399841332804, |
| "grad_norm": 0.07741615193611691, |
| "learning_rate": 9.421653280175014e-06, |
| "loss": 0.3043, |
| "step": 1259 |
| }, |
| { |
| "epoch": 3.998413328044427, |
| "grad_norm": 0.0792129238358985, |
| "learning_rate": 9.364557014469651e-06, |
| "loss": 0.3106, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.0015866719555735, |
| "grad_norm": 0.2372544363090627, |
| "learning_rate": 9.307611334568137e-06, |
| "loss": 0.5642, |
| "step": 1261 |
| }, |
| { |
| "epoch": 4.00476001586672, |
| "grad_norm": 0.12928047394977374, |
| "learning_rate": 9.250816520381884e-06, |
| "loss": 0.2916, |
| "step": 1262 |
| }, |
| { |
| "epoch": 4.007933359777866, |
| "grad_norm": 0.1547813755040527, |
| "learning_rate": 9.194172851080778e-06, |
| "loss": 0.2904, |
| "step": 1263 |
| }, |
| { |
| "epoch": 4.011106703689013, |
| "grad_norm": 0.12678189534036333, |
| "learning_rate": 9.137680605091753e-06, |
| "loss": 0.2871, |
| "step": 1264 |
| }, |
| { |
| "epoch": 4.014280047600159, |
| "grad_norm": 0.13337837439164088, |
| "learning_rate": 9.081340060097443e-06, |
| "loss": 0.286, |
| "step": 1265 |
| }, |
| { |
| "epoch": 4.017453391511305, |
| "grad_norm": 0.1460976490959922, |
| "learning_rate": 9.025151493034779e-06, |
| "loss": 0.288, |
| "step": 1266 |
| }, |
| { |
| "epoch": 4.020626735422451, |
| "grad_norm": 0.11381826357536112, |
| "learning_rate": 8.969115180093699e-06, |
| "loss": 0.2894, |
| "step": 1267 |
| }, |
| { |
| "epoch": 4.023800079333598, |
| "grad_norm": 0.1292486747466223, |
| "learning_rate": 8.91323139671572e-06, |
| "loss": 0.2931, |
| "step": 1268 |
| }, |
| { |
| "epoch": 4.026973423244744, |
| "grad_norm": 0.1243312232469112, |
| "learning_rate": 8.857500417592648e-06, |
| "loss": 0.2908, |
| "step": 1269 |
| }, |
| { |
| "epoch": 4.03014676715589, |
| "grad_norm": 0.10431292634709807, |
| "learning_rate": 8.801922516665127e-06, |
| "loss": 0.285, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.033320111067037, |
| "grad_norm": 0.11511588958038933, |
| "learning_rate": 8.746497967121445e-06, |
| "loss": 0.288, |
| "step": 1271 |
| }, |
| { |
| "epoch": 4.036493454978183, |
| "grad_norm": 0.09400500609718773, |
| "learning_rate": 8.69122704139604e-06, |
| "loss": 0.2906, |
| "step": 1272 |
| }, |
| { |
| "epoch": 4.039666798889329, |
| "grad_norm": 0.10377314324554619, |
| "learning_rate": 8.636110011168246e-06, |
| "loss": 0.2847, |
| "step": 1273 |
| }, |
| { |
| "epoch": 4.042840142800476, |
| "grad_norm": 0.09999767041994467, |
| "learning_rate": 8.58114714736094e-06, |
| "loss": 0.2892, |
| "step": 1274 |
| }, |
| { |
| "epoch": 4.046013486711622, |
| "grad_norm": 0.0879076384755368, |
| "learning_rate": 8.526338720139225e-06, |
| "loss": 0.2866, |
| "step": 1275 |
| }, |
| { |
| "epoch": 4.0491868306227685, |
| "grad_norm": 0.10595158327559934, |
| "learning_rate": 8.471684998909033e-06, |
| "loss": 0.2856, |
| "step": 1276 |
| }, |
| { |
| "epoch": 4.0523601745339155, |
| "grad_norm": 0.09994029668981493, |
| "learning_rate": 8.417186252315904e-06, |
| "loss": 0.2862, |
| "step": 1277 |
| }, |
| { |
| "epoch": 4.055533518445062, |
| "grad_norm": 0.08748710664677468, |
| "learning_rate": 8.362842748243593e-06, |
| "loss": 0.2888, |
| "step": 1278 |
| }, |
| { |
| "epoch": 4.058706862356208, |
| "grad_norm": 0.1057862773429233, |
| "learning_rate": 8.308654753812785e-06, |
| "loss": 0.2922, |
| "step": 1279 |
| }, |
| { |
| "epoch": 4.061880206267354, |
| "grad_norm": 0.10805451904620589, |
| "learning_rate": 8.254622535379733e-06, |
| "loss": 0.2933, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.065053550178501, |
| "grad_norm": 0.08529498056433478, |
| "learning_rate": 8.200746358535054e-06, |
| "loss": 0.289, |
| "step": 1281 |
| }, |
| { |
| "epoch": 4.068226894089647, |
| "grad_norm": 0.10025625808348602, |
| "learning_rate": 8.147026488102288e-06, |
| "loss": 0.2861, |
| "step": 1282 |
| }, |
| { |
| "epoch": 4.071400238000793, |
| "grad_norm": 0.10466816524732167, |
| "learning_rate": 8.093463188136712e-06, |
| "loss": 0.2879, |
| "step": 1283 |
| }, |
| { |
| "epoch": 4.07457358191194, |
| "grad_norm": 0.09159217995048068, |
| "learning_rate": 8.04005672192397e-06, |
| "loss": 0.2911, |
| "step": 1284 |
| }, |
| { |
| "epoch": 4.077746925823086, |
| "grad_norm": 0.09573656877621275, |
| "learning_rate": 7.986807351978827e-06, |
| "loss": 0.2858, |
| "step": 1285 |
| }, |
| { |
| "epoch": 4.080920269734232, |
| "grad_norm": 0.09231500426704371, |
| "learning_rate": 7.933715340043822e-06, |
| "loss": 0.2906, |
| "step": 1286 |
| }, |
| { |
| "epoch": 4.084093613645379, |
| "grad_norm": 0.09110127087440445, |
| "learning_rate": 7.880780947088031e-06, |
| "loss": 0.2888, |
| "step": 1287 |
| }, |
| { |
| "epoch": 4.087266957556525, |
| "grad_norm": 0.09178186024677654, |
| "learning_rate": 7.82800443330578e-06, |
| "loss": 0.2845, |
| "step": 1288 |
| }, |
| { |
| "epoch": 4.090440301467671, |
| "grad_norm": 0.08699713547366167, |
| "learning_rate": 7.77538605811535e-06, |
| "loss": 0.2949, |
| "step": 1289 |
| }, |
| { |
| "epoch": 4.093613645378818, |
| "grad_norm": 0.08651443976613882, |
| "learning_rate": 7.722926080157673e-06, |
| "loss": 0.2915, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.096786989289964, |
| "grad_norm": 0.08839398111073136, |
| "learning_rate": 7.670624757295151e-06, |
| "loss": 0.2895, |
| "step": 1291 |
| }, |
| { |
| "epoch": 4.0999603332011105, |
| "grad_norm": 0.07840294963123258, |
| "learning_rate": 7.618482346610276e-06, |
| "loss": 0.2842, |
| "step": 1292 |
| }, |
| { |
| "epoch": 4.103133677112257, |
| "grad_norm": 0.08242092139423192, |
| "learning_rate": 7.566499104404452e-06, |
| "loss": 0.2848, |
| "step": 1293 |
| }, |
| { |
| "epoch": 4.106307021023404, |
| "grad_norm": 0.08497119568891674, |
| "learning_rate": 7.514675286196698e-06, |
| "loss": 0.2914, |
| "step": 1294 |
| }, |
| { |
| "epoch": 4.10948036493455, |
| "grad_norm": 0.0810225885617694, |
| "learning_rate": 7.46301114672241e-06, |
| "loss": 0.2872, |
| "step": 1295 |
| }, |
| { |
| "epoch": 4.112653708845696, |
| "grad_norm": 0.08718653499899484, |
| "learning_rate": 7.411506939932058e-06, |
| "loss": 0.2875, |
| "step": 1296 |
| }, |
| { |
| "epoch": 4.115827052756843, |
| "grad_norm": 0.08535338762467477, |
| "learning_rate": 7.360162918990021e-06, |
| "loss": 0.2846, |
| "step": 1297 |
| }, |
| { |
| "epoch": 4.119000396667989, |
| "grad_norm": 0.07685735750488655, |
| "learning_rate": 7.308979336273281e-06, |
| "loss": 0.2843, |
| "step": 1298 |
| }, |
| { |
| "epoch": 4.122173740579135, |
| "grad_norm": 0.08263564119903752, |
| "learning_rate": 7.257956443370209e-06, |
| "loss": 0.2905, |
| "step": 1299 |
| }, |
| { |
| "epoch": 4.125347084490282, |
| "grad_norm": 0.08566614638966435, |
| "learning_rate": 7.20709449107928e-06, |
| "loss": 0.289, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.128520428401428, |
| "grad_norm": 0.08073179020805406, |
| "learning_rate": 7.156393729407956e-06, |
| "loss": 0.285, |
| "step": 1301 |
| }, |
| { |
| "epoch": 4.131693772312574, |
| "grad_norm": 0.08607718507088445, |
| "learning_rate": 7.1058544075712995e-06, |
| "loss": 0.2958, |
| "step": 1302 |
| }, |
| { |
| "epoch": 4.134867116223721, |
| "grad_norm": 0.0773081314831222, |
| "learning_rate": 7.055476773990881e-06, |
| "loss": 0.2829, |
| "step": 1303 |
| }, |
| { |
| "epoch": 4.138040460134867, |
| "grad_norm": 0.08091752523907358, |
| "learning_rate": 7.0052610762934905e-06, |
| "loss": 0.2864, |
| "step": 1304 |
| }, |
| { |
| "epoch": 4.141213804046013, |
| "grad_norm": 0.08363422913570277, |
| "learning_rate": 6.955207561309944e-06, |
| "loss": 0.2949, |
| "step": 1305 |
| }, |
| { |
| "epoch": 4.1443871479571595, |
| "grad_norm": 0.07855930876619456, |
| "learning_rate": 6.905316475073842e-06, |
| "loss": 0.2921, |
| "step": 1306 |
| }, |
| { |
| "epoch": 4.147560491868306, |
| "grad_norm": 0.07964756493675755, |
| "learning_rate": 6.855588062820407e-06, |
| "loss": 0.2895, |
| "step": 1307 |
| }, |
| { |
| "epoch": 4.1507338357794525, |
| "grad_norm": 0.0788278091150506, |
| "learning_rate": 6.806022568985233e-06, |
| "loss": 0.2864, |
| "step": 1308 |
| }, |
| { |
| "epoch": 4.153907179690599, |
| "grad_norm": 0.08049149360175006, |
| "learning_rate": 6.756620237203124e-06, |
| "loss": 0.2825, |
| "step": 1309 |
| }, |
| { |
| "epoch": 4.157080523601746, |
| "grad_norm": 0.080868151969321, |
| "learning_rate": 6.707381310306833e-06, |
| "loss": 0.2892, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.160253867512892, |
| "grad_norm": 0.07798552901381123, |
| "learning_rate": 6.658306030325978e-06, |
| "loss": 0.2865, |
| "step": 1311 |
| }, |
| { |
| "epoch": 4.163427211424038, |
| "grad_norm": 0.0805335375512832, |
| "learning_rate": 6.60939463848572e-06, |
| "loss": 0.2911, |
| "step": 1312 |
| }, |
| { |
| "epoch": 4.166600555335185, |
| "grad_norm": 0.07518550741681629, |
| "learning_rate": 6.560647375205676e-06, |
| "loss": 0.2882, |
| "step": 1313 |
| }, |
| { |
| "epoch": 4.169773899246331, |
| "grad_norm": 0.0781854434454258, |
| "learning_rate": 6.512064480098694e-06, |
| "loss": 0.2891, |
| "step": 1314 |
| }, |
| { |
| "epoch": 4.172947243157477, |
| "grad_norm": 0.07987762578525559, |
| "learning_rate": 6.4636461919697034e-06, |
| "loss": 0.2856, |
| "step": 1315 |
| }, |
| { |
| "epoch": 4.176120587068624, |
| "grad_norm": 0.07606785506312735, |
| "learning_rate": 6.4153927488144865e-06, |
| "loss": 0.2845, |
| "step": 1316 |
| }, |
| { |
| "epoch": 4.17929393097977, |
| "grad_norm": 0.07637479917962597, |
| "learning_rate": 6.367304387818567e-06, |
| "loss": 0.277, |
| "step": 1317 |
| }, |
| { |
| "epoch": 4.182467274890916, |
| "grad_norm": 0.08032228596283143, |
| "learning_rate": 6.31938134535603e-06, |
| "loss": 0.2869, |
| "step": 1318 |
| }, |
| { |
| "epoch": 4.185640618802062, |
| "grad_norm": 0.07742967651427472, |
| "learning_rate": 6.271623856988336e-06, |
| "loss": 0.2841, |
| "step": 1319 |
| }, |
| { |
| "epoch": 4.188813962713209, |
| "grad_norm": 0.0808988691015848, |
| "learning_rate": 6.224032157463184e-06, |
| "loss": 0.2906, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.191987306624355, |
| "grad_norm": 0.07749448537178391, |
| "learning_rate": 6.176606480713365e-06, |
| "loss": 0.2898, |
| "step": 1321 |
| }, |
| { |
| "epoch": 4.1951606505355015, |
| "grad_norm": 0.07984359673939721, |
| "learning_rate": 6.129347059855572e-06, |
| "loss": 0.2883, |
| "step": 1322 |
| }, |
| { |
| "epoch": 4.1983339944466485, |
| "grad_norm": 0.07874063845144524, |
| "learning_rate": 6.082254127189302e-06, |
| "loss": 0.2863, |
| "step": 1323 |
| }, |
| { |
| "epoch": 4.2015073383577946, |
| "grad_norm": 0.07786902211545077, |
| "learning_rate": 6.035327914195694e-06, |
| "loss": 0.2911, |
| "step": 1324 |
| }, |
| { |
| "epoch": 4.204680682268941, |
| "grad_norm": 0.08187795024389079, |
| "learning_rate": 5.988568651536399e-06, |
| "loss": 0.2928, |
| "step": 1325 |
| }, |
| { |
| "epoch": 4.207854026180088, |
| "grad_norm": 0.07811082790053991, |
| "learning_rate": 5.941976569052394e-06, |
| "loss": 0.2855, |
| "step": 1326 |
| }, |
| { |
| "epoch": 4.211027370091234, |
| "grad_norm": 0.0781331548320789, |
| "learning_rate": 5.895551895762968e-06, |
| "loss": 0.2873, |
| "step": 1327 |
| }, |
| { |
| "epoch": 4.21420071400238, |
| "grad_norm": 0.08057455261417476, |
| "learning_rate": 5.849294859864456e-06, |
| "loss": 0.2862, |
| "step": 1328 |
| }, |
| { |
| "epoch": 4.217374057913526, |
| "grad_norm": 0.08046289285253863, |
| "learning_rate": 5.8032056887292345e-06, |
| "loss": 0.2926, |
| "step": 1329 |
| }, |
| { |
| "epoch": 4.220547401824673, |
| "grad_norm": 0.07901814464604347, |
| "learning_rate": 5.757284608904528e-06, |
| "loss": 0.2927, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.223720745735819, |
| "grad_norm": 0.08241125484928784, |
| "learning_rate": 5.711531846111351e-06, |
| "loss": 0.2919, |
| "step": 1331 |
| }, |
| { |
| "epoch": 4.226894089646965, |
| "grad_norm": 0.07943725546881736, |
| "learning_rate": 5.6659476252433285e-06, |
| "loss": 0.2858, |
| "step": 1332 |
| }, |
| { |
| "epoch": 4.230067433558112, |
| "grad_norm": 0.07690103398901868, |
| "learning_rate": 5.620532170365667e-06, |
| "loss": 0.288, |
| "step": 1333 |
| }, |
| { |
| "epoch": 4.233240777469258, |
| "grad_norm": 0.07982460591594524, |
| "learning_rate": 5.5752857047140086e-06, |
| "loss": 0.292, |
| "step": 1334 |
| }, |
| { |
| "epoch": 4.236414121380404, |
| "grad_norm": 0.07809749969035885, |
| "learning_rate": 5.530208450693355e-06, |
| "loss": 0.2895, |
| "step": 1335 |
| }, |
| { |
| "epoch": 4.239587465291551, |
| "grad_norm": 0.07723500752452818, |
| "learning_rate": 5.48530062987692e-06, |
| "loss": 0.2924, |
| "step": 1336 |
| }, |
| { |
| "epoch": 4.242760809202697, |
| "grad_norm": 0.07756528859698934, |
| "learning_rate": 5.440562463005154e-06, |
| "loss": 0.2923, |
| "step": 1337 |
| }, |
| { |
| "epoch": 4.2459341531138435, |
| "grad_norm": 0.07872620120384079, |
| "learning_rate": 5.395994169984522e-06, |
| "loss": 0.2858, |
| "step": 1338 |
| }, |
| { |
| "epoch": 4.2491074970249905, |
| "grad_norm": 0.07841592614920617, |
| "learning_rate": 5.351595969886529e-06, |
| "loss": 0.2948, |
| "step": 1339 |
| }, |
| { |
| "epoch": 4.252280840936137, |
| "grad_norm": 0.07403115107349581, |
| "learning_rate": 5.307368080946584e-06, |
| "loss": 0.2853, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.255454184847283, |
| "grad_norm": 0.07885594656165801, |
| "learning_rate": 5.263310720562973e-06, |
| "loss": 0.2929, |
| "step": 1341 |
| }, |
| { |
| "epoch": 4.25862752875843, |
| "grad_norm": 0.07590630094007132, |
| "learning_rate": 5.219424105295719e-06, |
| "loss": 0.2859, |
| "step": 1342 |
| }, |
| { |
| "epoch": 4.261800872669576, |
| "grad_norm": 0.07840740302270648, |
| "learning_rate": 5.175708450865595e-06, |
| "loss": 0.2858, |
| "step": 1343 |
| }, |
| { |
| "epoch": 4.264974216580722, |
| "grad_norm": 0.07673578995611278, |
| "learning_rate": 5.1321639721530325e-06, |
| "loss": 0.2889, |
| "step": 1344 |
| }, |
| { |
| "epoch": 4.268147560491868, |
| "grad_norm": 0.07951575168272773, |
| "learning_rate": 5.088790883197061e-06, |
| "loss": 0.2896, |
| "step": 1345 |
| }, |
| { |
| "epoch": 4.271320904403015, |
| "grad_norm": 0.08030218736730213, |
| "learning_rate": 5.045589397194231e-06, |
| "loss": 0.2865, |
| "step": 1346 |
| }, |
| { |
| "epoch": 4.274494248314161, |
| "grad_norm": 0.07842325630849278, |
| "learning_rate": 5.0025597264976446e-06, |
| "loss": 0.2858, |
| "step": 1347 |
| }, |
| { |
| "epoch": 4.277667592225307, |
| "grad_norm": 0.07773496721087116, |
| "learning_rate": 4.9597020826158114e-06, |
| "loss": 0.2901, |
| "step": 1348 |
| }, |
| { |
| "epoch": 4.280840936136454, |
| "grad_norm": 0.07839435165688115, |
| "learning_rate": 4.917016676211686e-06, |
| "loss": 0.2952, |
| "step": 1349 |
| }, |
| { |
| "epoch": 4.2840142800476, |
| "grad_norm": 0.08007319222215062, |
| "learning_rate": 4.8745037171016045e-06, |
| "loss": 0.2895, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.287187623958746, |
| "grad_norm": 0.0802943162822994, |
| "learning_rate": 4.832163414254254e-06, |
| "loss": 0.2854, |
| "step": 1351 |
| }, |
| { |
| "epoch": 4.290360967869893, |
| "grad_norm": 0.07670078924225301, |
| "learning_rate": 4.789995975789619e-06, |
| "loss": 0.2866, |
| "step": 1352 |
| }, |
| { |
| "epoch": 4.293534311781039, |
| "grad_norm": 0.07571900940799264, |
| "learning_rate": 4.748001608978015e-06, |
| "loss": 0.2864, |
| "step": 1353 |
| }, |
| { |
| "epoch": 4.2967076556921855, |
| "grad_norm": 0.07887124085195305, |
| "learning_rate": 4.70618052023903e-06, |
| "loss": 0.2861, |
| "step": 1354 |
| }, |
| { |
| "epoch": 4.299880999603332, |
| "grad_norm": 0.07844552010826854, |
| "learning_rate": 4.664532915140525e-06, |
| "loss": 0.2861, |
| "step": 1355 |
| }, |
| { |
| "epoch": 4.303054343514479, |
| "grad_norm": 0.0758283591701092, |
| "learning_rate": 4.623058998397585e-06, |
| "loss": 0.2827, |
| "step": 1356 |
| }, |
| { |
| "epoch": 4.306227687425625, |
| "grad_norm": 0.07567360663444131, |
| "learning_rate": 4.581758973871609e-06, |
| "loss": 0.2852, |
| "step": 1357 |
| }, |
| { |
| "epoch": 4.309401031336771, |
| "grad_norm": 0.07698958638459642, |
| "learning_rate": 4.540633044569172e-06, |
| "loss": 0.2838, |
| "step": 1358 |
| }, |
| { |
| "epoch": 4.312574375247918, |
| "grad_norm": 0.07772937829274414, |
| "learning_rate": 4.499681412641148e-06, |
| "loss": 0.2895, |
| "step": 1359 |
| }, |
| { |
| "epoch": 4.315747719159064, |
| "grad_norm": 0.07637398650887665, |
| "learning_rate": 4.4589042793816525e-06, |
| "loss": 0.2921, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.31892106307021, |
| "grad_norm": 0.07633260293292246, |
| "learning_rate": 4.418301845227073e-06, |
| "loss": 0.2829, |
| "step": 1361 |
| }, |
| { |
| "epoch": 4.322094406981357, |
| "grad_norm": 0.07860481054083629, |
| "learning_rate": 4.377874309755065e-06, |
| "loss": 0.2866, |
| "step": 1362 |
| }, |
| { |
| "epoch": 4.325267750892503, |
| "grad_norm": 0.0762763983665852, |
| "learning_rate": 4.337621871683597e-06, |
| "loss": 0.2833, |
| "step": 1363 |
| }, |
| { |
| "epoch": 4.328441094803649, |
| "grad_norm": 0.0766144117665982, |
| "learning_rate": 4.297544728869958e-06, |
| "loss": 0.2901, |
| "step": 1364 |
| }, |
| { |
| "epoch": 4.331614438714796, |
| "grad_norm": 0.07937899829074807, |
| "learning_rate": 4.257643078309808e-06, |
| "loss": 0.2924, |
| "step": 1365 |
| }, |
| { |
| "epoch": 4.334787782625942, |
| "grad_norm": 0.07527289810281566, |
| "learning_rate": 4.2179171161361365e-06, |
| "loss": 0.2848, |
| "step": 1366 |
| }, |
| { |
| "epoch": 4.337961126537088, |
| "grad_norm": 0.07688154141825307, |
| "learning_rate": 4.178367037618429e-06, |
| "loss": 0.2895, |
| "step": 1367 |
| }, |
| { |
| "epoch": 4.341134470448234, |
| "grad_norm": 0.07790856052535607, |
| "learning_rate": 4.138993037161565e-06, |
| "loss": 0.293, |
| "step": 1368 |
| }, |
| { |
| "epoch": 4.344307814359381, |
| "grad_norm": 0.0782212752825844, |
| "learning_rate": 4.099795308304954e-06, |
| "loss": 0.2873, |
| "step": 1369 |
| }, |
| { |
| "epoch": 4.3474811582705275, |
| "grad_norm": 0.07864542088050423, |
| "learning_rate": 4.060774043721565e-06, |
| "loss": 0.2895, |
| "step": 1370 |
| }, |
| { |
| "epoch": 4.350654502181674, |
| "grad_norm": 0.07539046652733071, |
| "learning_rate": 4.0219294352169714e-06, |
| "loss": 0.2928, |
| "step": 1371 |
| }, |
| { |
| "epoch": 4.353827846092821, |
| "grad_norm": 0.07802077980079632, |
| "learning_rate": 3.983261673728378e-06, |
| "loss": 0.2907, |
| "step": 1372 |
| }, |
| { |
| "epoch": 4.357001190003967, |
| "grad_norm": 0.07914754106412068, |
| "learning_rate": 3.94477094932376e-06, |
| "loss": 0.2897, |
| "step": 1373 |
| }, |
| { |
| "epoch": 4.360174533915113, |
| "grad_norm": 0.07815313150549123, |
| "learning_rate": 3.906457451200845e-06, |
| "loss": 0.2823, |
| "step": 1374 |
| }, |
| { |
| "epoch": 4.36334787782626, |
| "grad_norm": 0.0783643686014188, |
| "learning_rate": 3.8683213676862585e-06, |
| "loss": 0.2902, |
| "step": 1375 |
| }, |
| { |
| "epoch": 4.366521221737406, |
| "grad_norm": 0.07899805915070553, |
| "learning_rate": 3.830362886234502e-06, |
| "loss": 0.2915, |
| "step": 1376 |
| }, |
| { |
| "epoch": 4.369694565648552, |
| "grad_norm": 0.07616018845948855, |
| "learning_rate": 3.7925821934271655e-06, |
| "loss": 0.2853, |
| "step": 1377 |
| }, |
| { |
| "epoch": 4.372867909559698, |
| "grad_norm": 0.07604900073546127, |
| "learning_rate": 3.7549794749718673e-06, |
| "loss": 0.2918, |
| "step": 1378 |
| }, |
| { |
| "epoch": 4.376041253470845, |
| "grad_norm": 0.07611014329101679, |
| "learning_rate": 3.717554915701449e-06, |
| "loss": 0.2833, |
| "step": 1379 |
| }, |
| { |
| "epoch": 4.379214597381991, |
| "grad_norm": 0.07836395731025556, |
| "learning_rate": 3.680308699573005e-06, |
| "loss": 0.2879, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.382387941293137, |
| "grad_norm": 0.07492430726816589, |
| "learning_rate": 3.64324100966702e-06, |
| "loss": 0.2806, |
| "step": 1381 |
| }, |
| { |
| "epoch": 4.385561285204284, |
| "grad_norm": 0.07741032733958537, |
| "learning_rate": 3.606352028186426e-06, |
| "loss": 0.2897, |
| "step": 1382 |
| }, |
| { |
| "epoch": 4.38873462911543, |
| "grad_norm": 0.07451962544381002, |
| "learning_rate": 3.5696419364557433e-06, |
| "loss": 0.2893, |
| "step": 1383 |
| }, |
| { |
| "epoch": 4.391907973026576, |
| "grad_norm": 0.08019767472904286, |
| "learning_rate": 3.533110914920177e-06, |
| "loss": 0.2884, |
| "step": 1384 |
| }, |
| { |
| "epoch": 4.395081316937723, |
| "grad_norm": 0.07731877454550821, |
| "learning_rate": 3.4967591431447256e-06, |
| "loss": 0.2902, |
| "step": 1385 |
| }, |
| { |
| "epoch": 4.3982546608488695, |
| "grad_norm": 0.08113895307948948, |
| "learning_rate": 3.460586799813288e-06, |
| "loss": 0.2906, |
| "step": 1386 |
| }, |
| { |
| "epoch": 4.401428004760016, |
| "grad_norm": 0.07677456138444168, |
| "learning_rate": 3.4245940627278284e-06, |
| "loss": 0.2902, |
| "step": 1387 |
| }, |
| { |
| "epoch": 4.404601348671163, |
| "grad_norm": 0.07648163374651913, |
| "learning_rate": 3.388781108807444e-06, |
| "loss": 0.2879, |
| "step": 1388 |
| }, |
| { |
| "epoch": 4.407774692582309, |
| "grad_norm": 0.0758938270041518, |
| "learning_rate": 3.3531481140875345e-06, |
| "loss": 0.2934, |
| "step": 1389 |
| }, |
| { |
| "epoch": 4.410948036493455, |
| "grad_norm": 0.07252099034254757, |
| "learning_rate": 3.317695253718931e-06, |
| "loss": 0.2855, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.414121380404602, |
| "grad_norm": 0.08007509732355841, |
| "learning_rate": 3.2824227019670272e-06, |
| "loss": 0.29, |
| "step": 1391 |
| }, |
| { |
| "epoch": 4.417294724315748, |
| "grad_norm": 0.07359637512325612, |
| "learning_rate": 3.2473306322109078e-06, |
| "loss": 0.2846, |
| "step": 1392 |
| }, |
| { |
| "epoch": 4.420468068226894, |
| "grad_norm": 0.07691247352169074, |
| "learning_rate": 3.2124192169425573e-06, |
| "loss": 0.2938, |
| "step": 1393 |
| }, |
| { |
| "epoch": 4.42364141213804, |
| "grad_norm": 0.07370622338875793, |
| "learning_rate": 3.177688627765916e-06, |
| "loss": 0.2838, |
| "step": 1394 |
| }, |
| { |
| "epoch": 4.426814756049187, |
| "grad_norm": 0.0756129323864927, |
| "learning_rate": 3.143139035396128e-06, |
| "loss": 0.292, |
| "step": 1395 |
| }, |
| { |
| "epoch": 4.429988099960333, |
| "grad_norm": 0.0775966898230028, |
| "learning_rate": 3.1087706096586535e-06, |
| "loss": 0.2929, |
| "step": 1396 |
| }, |
| { |
| "epoch": 4.433161443871479, |
| "grad_norm": 0.07808391129944373, |
| "learning_rate": 3.0745835194884435e-06, |
| "loss": 0.2897, |
| "step": 1397 |
| }, |
| { |
| "epoch": 4.436334787782626, |
| "grad_norm": 0.07349160355298512, |
| "learning_rate": 3.0405779329290987e-06, |
| "loss": 0.2821, |
| "step": 1398 |
| }, |
| { |
| "epoch": 4.439508131693772, |
| "grad_norm": 0.07471990105942067, |
| "learning_rate": 3.0067540171320674e-06, |
| "loss": 0.2868, |
| "step": 1399 |
| }, |
| { |
| "epoch": 4.442681475604918, |
| "grad_norm": 0.07304458565439569, |
| "learning_rate": 2.973111938355815e-06, |
| "loss": 0.289, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.445854819516065, |
| "grad_norm": 0.07542736354862137, |
| "learning_rate": 2.9396518619650007e-06, |
| "loss": 0.2837, |
| "step": 1401 |
| }, |
| { |
| "epoch": 4.4490281634272115, |
| "grad_norm": 0.07580500670249878, |
| "learning_rate": 2.90637395242964e-06, |
| "loss": 0.2886, |
| "step": 1402 |
| }, |
| { |
| "epoch": 4.452201507338358, |
| "grad_norm": 0.07483645701648972, |
| "learning_rate": 2.8732783733243754e-06, |
| "loss": 0.2883, |
| "step": 1403 |
| }, |
| { |
| "epoch": 4.455374851249504, |
| "grad_norm": 0.0734355618152173, |
| "learning_rate": 2.8403652873275755e-06, |
| "loss": 0.2896, |
| "step": 1404 |
| }, |
| { |
| "epoch": 4.458548195160651, |
| "grad_norm": 0.07743307777656919, |
| "learning_rate": 2.8076348562206024e-06, |
| "loss": 0.2945, |
| "step": 1405 |
| }, |
| { |
| "epoch": 4.461721539071797, |
| "grad_norm": 0.07285494607732518, |
| "learning_rate": 2.7750872408869843e-06, |
| "loss": 0.2823, |
| "step": 1406 |
| }, |
| { |
| "epoch": 4.464894882982943, |
| "grad_norm": 0.07654578475317778, |
| "learning_rate": 2.7427226013116448e-06, |
| "loss": 0.2893, |
| "step": 1407 |
| }, |
| { |
| "epoch": 4.46806822689409, |
| "grad_norm": 0.07415127461281228, |
| "learning_rate": 2.7105410965800928e-06, |
| "loss": 0.2881, |
| "step": 1408 |
| }, |
| { |
| "epoch": 4.471241570805236, |
| "grad_norm": 0.07473740527863117, |
| "learning_rate": 2.678542884877664e-06, |
| "loss": 0.2861, |
| "step": 1409 |
| }, |
| { |
| "epoch": 4.474414914716382, |
| "grad_norm": 0.07455517075536294, |
| "learning_rate": 2.646728123488731e-06, |
| "loss": 0.2868, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.477588258627529, |
| "grad_norm": 0.07402864039008861, |
| "learning_rate": 2.6150969687959426e-06, |
| "loss": 0.2891, |
| "step": 1411 |
| }, |
| { |
| "epoch": 4.480761602538675, |
| "grad_norm": 0.07506645131586324, |
| "learning_rate": 2.5836495762794078e-06, |
| "loss": 0.2875, |
| "step": 1412 |
| }, |
| { |
| "epoch": 4.483934946449821, |
| "grad_norm": 0.07687328694069057, |
| "learning_rate": 2.552386100516033e-06, |
| "loss": 0.2889, |
| "step": 1413 |
| }, |
| { |
| "epoch": 4.487108290360968, |
| "grad_norm": 0.07292287204353926, |
| "learning_rate": 2.521306695178636e-06, |
| "loss": 0.2862, |
| "step": 1414 |
| }, |
| { |
| "epoch": 4.490281634272114, |
| "grad_norm": 0.07378533131418447, |
| "learning_rate": 2.4904115130352845e-06, |
| "loss": 0.2858, |
| "step": 1415 |
| }, |
| { |
| "epoch": 4.4934549781832605, |
| "grad_norm": 0.07434204604179016, |
| "learning_rate": 2.459700705948507e-06, |
| "loss": 0.2855, |
| "step": 1416 |
| }, |
| { |
| "epoch": 4.496628322094407, |
| "grad_norm": 0.07431002205125276, |
| "learning_rate": 2.429174424874563e-06, |
| "loss": 0.2816, |
| "step": 1417 |
| }, |
| { |
| "epoch": 4.4998016660055535, |
| "grad_norm": 0.07821248980927963, |
| "learning_rate": 2.398832819862662e-06, |
| "loss": 0.2884, |
| "step": 1418 |
| }, |
| { |
| "epoch": 4.5029750099167, |
| "grad_norm": 0.07520177957755667, |
| "learning_rate": 2.3686760400542853e-06, |
| "loss": 0.2912, |
| "step": 1419 |
| }, |
| { |
| "epoch": 4.506148353827846, |
| "grad_norm": 0.07186434245067173, |
| "learning_rate": 2.3387042336824097e-06, |
| "loss": 0.287, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.509321697738993, |
| "grad_norm": 0.07671516440501819, |
| "learning_rate": 2.3089175480707926e-06, |
| "loss": 0.2904, |
| "step": 1421 |
| }, |
| { |
| "epoch": 4.512495041650139, |
| "grad_norm": 0.07403348893774686, |
| "learning_rate": 2.2793161296332354e-06, |
| "loss": 0.2848, |
| "step": 1422 |
| }, |
| { |
| "epoch": 4.515668385561285, |
| "grad_norm": 0.07485509243857365, |
| "learning_rate": 2.2499001238729034e-06, |
| "loss": 0.2889, |
| "step": 1423 |
| }, |
| { |
| "epoch": 4.518841729472432, |
| "grad_norm": 0.07341240180495677, |
| "learning_rate": 2.2206696753815527e-06, |
| "loss": 0.2876, |
| "step": 1424 |
| }, |
| { |
| "epoch": 4.522015073383578, |
| "grad_norm": 0.07545046460513817, |
| "learning_rate": 2.191624927838865e-06, |
| "loss": 0.2803, |
| "step": 1425 |
| }, |
| { |
| "epoch": 4.525188417294724, |
| "grad_norm": 0.0729065721268795, |
| "learning_rate": 2.1627660240117177e-06, |
| "loss": 0.2907, |
| "step": 1426 |
| }, |
| { |
| "epoch": 4.52836176120587, |
| "grad_norm": 0.0739827535708724, |
| "learning_rate": 2.134093105753503e-06, |
| "loss": 0.2875, |
| "step": 1427 |
| }, |
| { |
| "epoch": 4.531535105117017, |
| "grad_norm": 0.0737465566063914, |
| "learning_rate": 2.1056063140034013e-06, |
| "loss": 0.2886, |
| "step": 1428 |
| }, |
| { |
| "epoch": 4.534708449028163, |
| "grad_norm": 0.07481103353206092, |
| "learning_rate": 2.0773057887857105e-06, |
| "loss": 0.2859, |
| "step": 1429 |
| }, |
| { |
| "epoch": 4.537881792939309, |
| "grad_norm": 0.07443664446659384, |
| "learning_rate": 2.049191669209156e-06, |
| "loss": 0.2877, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.541055136850456, |
| "grad_norm": 0.07271868744467536, |
| "learning_rate": 2.0212640934662087e-06, |
| "loss": 0.2808, |
| "step": 1431 |
| }, |
| { |
| "epoch": 4.5442284807616025, |
| "grad_norm": 0.0731348085785222, |
| "learning_rate": 1.99352319883237e-06, |
| "loss": 0.2859, |
| "step": 1432 |
| }, |
| { |
| "epoch": 4.547401824672749, |
| "grad_norm": 0.0728659918076379, |
| "learning_rate": 1.9659691216655697e-06, |
| "loss": 0.2817, |
| "step": 1433 |
| }, |
| { |
| "epoch": 4.5505751685838955, |
| "grad_norm": 0.07052892858321479, |
| "learning_rate": 1.9386019974054182e-06, |
| "loss": 0.2845, |
| "step": 1434 |
| }, |
| { |
| "epoch": 4.553748512495042, |
| "grad_norm": 0.07162865810925105, |
| "learning_rate": 1.9114219605725905e-06, |
| "loss": 0.2852, |
| "step": 1435 |
| }, |
| { |
| "epoch": 4.556921856406188, |
| "grad_norm": 0.07216631582905737, |
| "learning_rate": 1.8844291447681496e-06, |
| "loss": 0.2893, |
| "step": 1436 |
| }, |
| { |
| "epoch": 4.560095200317335, |
| "grad_norm": 0.07454657617595882, |
| "learning_rate": 1.857623682672891e-06, |
| "loss": 0.2884, |
| "step": 1437 |
| }, |
| { |
| "epoch": 4.563268544228481, |
| "grad_norm": 0.07192684325273382, |
| "learning_rate": 1.8310057060466845e-06, |
| "loss": 0.2921, |
| "step": 1438 |
| }, |
| { |
| "epoch": 4.566441888139627, |
| "grad_norm": 0.07346684054783836, |
| "learning_rate": 1.8045753457278303e-06, |
| "loss": 0.287, |
| "step": 1439 |
| }, |
| { |
| "epoch": 4.569615232050774, |
| "grad_norm": 0.07368537492116049, |
| "learning_rate": 1.7783327316324238e-06, |
| "loss": 0.2874, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.57278857596192, |
| "grad_norm": 0.07300900030526943, |
| "learning_rate": 1.752277992753717e-06, |
| "loss": 0.2827, |
| "step": 1441 |
| }, |
| { |
| "epoch": 4.575961919873066, |
| "grad_norm": 0.0735865281943053, |
| "learning_rate": 1.726411257161451e-06, |
| "loss": 0.2902, |
| "step": 1442 |
| }, |
| { |
| "epoch": 4.579135263784213, |
| "grad_norm": 0.07218495117782045, |
| "learning_rate": 1.700732652001289e-06, |
| "loss": 0.2878, |
| "step": 1443 |
| }, |
| { |
| "epoch": 4.582308607695359, |
| "grad_norm": 0.07237820497971356, |
| "learning_rate": 1.6752423034941223e-06, |
| "loss": 0.2914, |
| "step": 1444 |
| }, |
| { |
| "epoch": 4.585481951606505, |
| "grad_norm": 0.07169874424676832, |
| "learning_rate": 1.6499403369355115e-06, |
| "loss": 0.2906, |
| "step": 1445 |
| }, |
| { |
| "epoch": 4.588655295517651, |
| "grad_norm": 0.07237942647540188, |
| "learning_rate": 1.6248268766950204e-06, |
| "loss": 0.2871, |
| "step": 1446 |
| }, |
| { |
| "epoch": 4.591828639428798, |
| "grad_norm": 0.07206522725627988, |
| "learning_rate": 1.5999020462156511e-06, |
| "loss": 0.2872, |
| "step": 1447 |
| }, |
| { |
| "epoch": 4.5950019833399445, |
| "grad_norm": 0.07232160129557513, |
| "learning_rate": 1.5751659680131792e-06, |
| "loss": 0.2859, |
| "step": 1448 |
| }, |
| { |
| "epoch": 4.598175327251091, |
| "grad_norm": 0.07224139682967294, |
| "learning_rate": 1.5506187636756109e-06, |
| "loss": 0.2899, |
| "step": 1449 |
| }, |
| { |
| "epoch": 4.601348671162238, |
| "grad_norm": 0.07350195322568023, |
| "learning_rate": 1.5262605538625574e-06, |
| "loss": 0.2888, |
| "step": 1450 |
| }, |
| { |
| "epoch": 4.604522015073384, |
| "grad_norm": 0.0724134729249542, |
| "learning_rate": 1.5020914583046398e-06, |
| "loss": 0.2872, |
| "step": 1451 |
| }, |
| { |
| "epoch": 4.60769535898453, |
| "grad_norm": 0.07146812102253265, |
| "learning_rate": 1.4781115958028935e-06, |
| "loss": 0.2863, |
| "step": 1452 |
| }, |
| { |
| "epoch": 4.610868702895676, |
| "grad_norm": 0.07365565598868938, |
| "learning_rate": 1.4543210842282229e-06, |
| "loss": 0.2891, |
| "step": 1453 |
| }, |
| { |
| "epoch": 4.614042046806823, |
| "grad_norm": 0.07266133951722602, |
| "learning_rate": 1.4307200405207656e-06, |
| "loss": 0.2875, |
| "step": 1454 |
| }, |
| { |
| "epoch": 4.617215390717969, |
| "grad_norm": 0.07255421353863033, |
| "learning_rate": 1.4073085806893593e-06, |
| "loss": 0.2933, |
| "step": 1455 |
| }, |
| { |
| "epoch": 4.620388734629115, |
| "grad_norm": 0.0738467244304907, |
| "learning_rate": 1.3840868198109658e-06, |
| "loss": 0.2853, |
| "step": 1456 |
| }, |
| { |
| "epoch": 4.623562078540262, |
| "grad_norm": 0.07193567501371838, |
| "learning_rate": 1.3610548720300965e-06, |
| "loss": 0.2846, |
| "step": 1457 |
| }, |
| { |
| "epoch": 4.626735422451408, |
| "grad_norm": 0.07215628995818985, |
| "learning_rate": 1.3382128505582315e-06, |
| "loss": 0.2907, |
| "step": 1458 |
| }, |
| { |
| "epoch": 4.629908766362554, |
| "grad_norm": 0.07250278706923594, |
| "learning_rate": 1.315560867673318e-06, |
| "loss": 0.2892, |
| "step": 1459 |
| }, |
| { |
| "epoch": 4.633082110273701, |
| "grad_norm": 0.07078718377132749, |
| "learning_rate": 1.2930990347191607e-06, |
| "loss": 0.2834, |
| "step": 1460 |
| }, |
| { |
| "epoch": 4.636255454184847, |
| "grad_norm": 0.07349937695742143, |
| "learning_rate": 1.2708274621049134e-06, |
| "loss": 0.2861, |
| "step": 1461 |
| }, |
| { |
| "epoch": 4.639428798095993, |
| "grad_norm": 0.07204842558684392, |
| "learning_rate": 1.2487462593045075e-06, |
| "loss": 0.2904, |
| "step": 1462 |
| }, |
| { |
| "epoch": 4.64260214200714, |
| "grad_norm": 0.0721734105457553, |
| "learning_rate": 1.2268555348561529e-06, |
| "loss": 0.2835, |
| "step": 1463 |
| }, |
| { |
| "epoch": 4.6457754859182865, |
| "grad_norm": 0.0720032080278003, |
| "learning_rate": 1.20515539636175e-06, |
| "loss": 0.2872, |
| "step": 1464 |
| }, |
| { |
| "epoch": 4.648948829829433, |
| "grad_norm": 0.07173144513365656, |
| "learning_rate": 1.183645950486416e-06, |
| "loss": 0.2903, |
| "step": 1465 |
| }, |
| { |
| "epoch": 4.65212217374058, |
| "grad_norm": 0.07173493810236914, |
| "learning_rate": 1.1623273029579195e-06, |
| "loss": 0.293, |
| "step": 1466 |
| }, |
| { |
| "epoch": 4.655295517651726, |
| "grad_norm": 0.07082496375705209, |
| "learning_rate": 1.141199558566184e-06, |
| "loss": 0.286, |
| "step": 1467 |
| }, |
| { |
| "epoch": 4.658468861562872, |
| "grad_norm": 0.07192243350970179, |
| "learning_rate": 1.1202628211627587e-06, |
| "loss": 0.2865, |
| "step": 1468 |
| }, |
| { |
| "epoch": 4.661642205474018, |
| "grad_norm": 0.07242387413423897, |
| "learning_rate": 1.099517193660331e-06, |
| "loss": 0.2934, |
| "step": 1469 |
| }, |
| { |
| "epoch": 4.664815549385165, |
| "grad_norm": 0.0733740588484503, |
| "learning_rate": 1.0789627780321888e-06, |
| "loss": 0.2905, |
| "step": 1470 |
| }, |
| { |
| "epoch": 4.667988893296311, |
| "grad_norm": 0.07206002165507971, |
| "learning_rate": 1.058599675311731e-06, |
| "loss": 0.287, |
| "step": 1471 |
| }, |
| { |
| "epoch": 4.671162237207457, |
| "grad_norm": 0.07127887265831032, |
| "learning_rate": 1.0384279855919944e-06, |
| "loss": 0.2896, |
| "step": 1472 |
| }, |
| { |
| "epoch": 4.674335581118604, |
| "grad_norm": 0.07358907596433474, |
| "learning_rate": 1.0184478080251315e-06, |
| "loss": 0.2879, |
| "step": 1473 |
| }, |
| { |
| "epoch": 4.67750892502975, |
| "grad_norm": 0.0714280248007389, |
| "learning_rate": 9.986592408219286e-07, |
| "loss": 0.2868, |
| "step": 1474 |
| }, |
| { |
| "epoch": 4.680682268940896, |
| "grad_norm": 0.0711643498792862, |
| "learning_rate": 9.7906238125133e-07, |
| "loss": 0.2877, |
| "step": 1475 |
| }, |
| { |
| "epoch": 4.683855612852043, |
| "grad_norm": 0.07071027647717443, |
| "learning_rate": 9.59657325639971e-07, |
| "loss": 0.2875, |
| "step": 1476 |
| }, |
| { |
| "epoch": 4.687028956763189, |
| "grad_norm": 0.07098768877099393, |
| "learning_rate": 9.404441693716771e-07, |
| "loss": 0.2889, |
| "step": 1477 |
| }, |
| { |
| "epoch": 4.690202300674335, |
| "grad_norm": 0.0713859105686287, |
| "learning_rate": 9.2142300688701e-07, |
| "loss": 0.292, |
| "step": 1478 |
| }, |
| { |
| "epoch": 4.6933756445854815, |
| "grad_norm": 0.07126866567296886, |
| "learning_rate": 9.025939316828203e-07, |
| "loss": 0.2846, |
| "step": 1479 |
| }, |
| { |
| "epoch": 4.6965489884966285, |
| "grad_norm": 0.0723870642231539, |
| "learning_rate": 8.839570363117445e-07, |
| "loss": 0.288, |
| "step": 1480 |
| }, |
| { |
| "epoch": 4.699722332407775, |
| "grad_norm": 0.07141709931411543, |
| "learning_rate": 8.655124123817926e-07, |
| "loss": 0.2851, |
| "step": 1481 |
| }, |
| { |
| "epoch": 4.702895676318921, |
| "grad_norm": 0.07173481548872705, |
| "learning_rate": 8.47260150555882e-07, |
| "loss": 0.2869, |
| "step": 1482 |
| }, |
| { |
| "epoch": 4.706069020230068, |
| "grad_norm": 0.07243043839490769, |
| "learning_rate": 8.292003405513882e-07, |
| "loss": 0.2863, |
| "step": 1483 |
| }, |
| { |
| "epoch": 4.709242364141214, |
| "grad_norm": 0.07029557557454981, |
| "learning_rate": 8.113330711397016e-07, |
| "loss": 0.2873, |
| "step": 1484 |
| }, |
| { |
| "epoch": 4.71241570805236, |
| "grad_norm": 0.0704646381143378, |
| "learning_rate": 7.936584301458006e-07, |
| "loss": 0.2889, |
| "step": 1485 |
| }, |
| { |
| "epoch": 4.715589051963507, |
| "grad_norm": 0.07189911994697401, |
| "learning_rate": 7.761765044478209e-07, |
| "loss": 0.2844, |
| "step": 1486 |
| }, |
| { |
| "epoch": 4.718762395874653, |
| "grad_norm": 0.07145859830072916, |
| "learning_rate": 7.588873799766161e-07, |
| "loss": 0.29, |
| "step": 1487 |
| }, |
| { |
| "epoch": 4.721935739785799, |
| "grad_norm": 0.0711567541075634, |
| "learning_rate": 7.417911417153401e-07, |
| "loss": 0.2919, |
| "step": 1488 |
| }, |
| { |
| "epoch": 4.725109083696946, |
| "grad_norm": 0.07204569153095995, |
| "learning_rate": 7.248878736990428e-07, |
| "loss": 0.2877, |
| "step": 1489 |
| }, |
| { |
| "epoch": 4.728282427608092, |
| "grad_norm": 0.07125020942303784, |
| "learning_rate": 7.081776590142352e-07, |
| "loss": 0.2858, |
| "step": 1490 |
| }, |
| { |
| "epoch": 4.731455771519238, |
| "grad_norm": 0.06994584906572929, |
| "learning_rate": 6.916605797984987e-07, |
| "loss": 0.2807, |
| "step": 1491 |
| }, |
| { |
| "epoch": 4.734629115430385, |
| "grad_norm": 0.0715883706850393, |
| "learning_rate": 6.753367172400716e-07, |
| "loss": 0.2859, |
| "step": 1492 |
| }, |
| { |
| "epoch": 4.737802459341531, |
| "grad_norm": 0.07097376897268061, |
| "learning_rate": 6.59206151577454e-07, |
| "loss": 0.2831, |
| "step": 1493 |
| }, |
| { |
| "epoch": 4.740975803252677, |
| "grad_norm": 0.07016656413072693, |
| "learning_rate": 6.432689620990084e-07, |
| "loss": 0.2814, |
| "step": 1494 |
| }, |
| { |
| "epoch": 4.7441491471638235, |
| "grad_norm": 0.07157617127762622, |
| "learning_rate": 6.275252271425736e-07, |
| "loss": 0.2838, |
| "step": 1495 |
| }, |
| { |
| "epoch": 4.7473224910749705, |
| "grad_norm": 0.07273178665389977, |
| "learning_rate": 6.119750240950906e-07, |
| "loss": 0.2892, |
| "step": 1496 |
| }, |
| { |
| "epoch": 4.750495834986117, |
| "grad_norm": 0.07183066506951129, |
| "learning_rate": 5.96618429392204e-07, |
| "loss": 0.2838, |
| "step": 1497 |
| }, |
| { |
| "epoch": 4.753669178897263, |
| "grad_norm": 0.07085345548841952, |
| "learning_rate": 5.814555185178838e-07, |
| "loss": 0.2924, |
| "step": 1498 |
| }, |
| { |
| "epoch": 4.75684252280841, |
| "grad_norm": 0.071668882520716, |
| "learning_rate": 5.664863660040843e-07, |
| "loss": 0.2882, |
| "step": 1499 |
| }, |
| { |
| "epoch": 4.760015866719556, |
| "grad_norm": 0.07232430109380229, |
| "learning_rate": 5.517110454303387e-07, |
| "loss": 0.2922, |
| "step": 1500 |
| }, |
| { |
| "epoch": 4.763189210630702, |
| "grad_norm": 0.07157437165230283, |
| "learning_rate": 5.371296294234318e-07, |
| "loss": 0.2818, |
| "step": 1501 |
| }, |
| { |
| "epoch": 4.766362554541848, |
| "grad_norm": 0.07138983987689239, |
| "learning_rate": 5.227421896570217e-07, |
| "loss": 0.2896, |
| "step": 1502 |
| }, |
| { |
| "epoch": 4.769535898452995, |
| "grad_norm": 0.0728983951265643, |
| "learning_rate": 5.085487968512892e-07, |
| "loss": 0.2837, |
| "step": 1503 |
| }, |
| { |
| "epoch": 4.772709242364141, |
| "grad_norm": 0.07175621867152891, |
| "learning_rate": 4.945495207725958e-07, |
| "loss": 0.2838, |
| "step": 1504 |
| }, |
| { |
| "epoch": 4.775882586275287, |
| "grad_norm": 0.07008101989246392, |
| "learning_rate": 4.807444302331509e-07, |
| "loss": 0.2891, |
| "step": 1505 |
| }, |
| { |
| "epoch": 4.779055930186434, |
| "grad_norm": 0.07114350131264044, |
| "learning_rate": 4.671335930906429e-07, |
| "loss": 0.2884, |
| "step": 1506 |
| }, |
| { |
| "epoch": 4.78222927409758, |
| "grad_norm": 0.0712547000013449, |
| "learning_rate": 4.537170762479459e-07, |
| "loss": 0.2865, |
| "step": 1507 |
| }, |
| { |
| "epoch": 4.785402618008726, |
| "grad_norm": 0.07044621429669759, |
| "learning_rate": 4.404949456527474e-07, |
| "loss": 0.2803, |
| "step": 1508 |
| }, |
| { |
| "epoch": 4.788575961919873, |
| "grad_norm": 0.07132128750657112, |
| "learning_rate": 4.274672662972679e-07, |
| "loss": 0.2904, |
| "step": 1509 |
| }, |
| { |
| "epoch": 4.791749305831019, |
| "grad_norm": 0.07164963300558302, |
| "learning_rate": 4.146341022179057e-07, |
| "loss": 0.2908, |
| "step": 1510 |
| }, |
| { |
| "epoch": 4.7949226497421655, |
| "grad_norm": 0.07165377120863803, |
| "learning_rate": 4.019955164949352e-07, |
| "loss": 0.2874, |
| "step": 1511 |
| }, |
| { |
| "epoch": 4.7980959936533125, |
| "grad_norm": 0.07017965132137843, |
| "learning_rate": 3.8955157125221356e-07, |
| "loss": 0.2849, |
| "step": 1512 |
| }, |
| { |
| "epoch": 4.801269337564459, |
| "grad_norm": 0.0706363848720524, |
| "learning_rate": 3.7730232765685213e-07, |
| "loss": 0.2861, |
| "step": 1513 |
| }, |
| { |
| "epoch": 4.804442681475605, |
| "grad_norm": 0.07329406027888734, |
| "learning_rate": 3.6524784591891013e-07, |
| "loss": 0.2873, |
| "step": 1514 |
| }, |
| { |
| "epoch": 4.807616025386752, |
| "grad_norm": 0.06880746155049348, |
| "learning_rate": 3.5338818529113253e-07, |
| "loss": 0.2869, |
| "step": 1515 |
| }, |
| { |
| "epoch": 4.810789369297898, |
| "grad_norm": 0.07176951769611108, |
| "learning_rate": 3.417234040686257e-07, |
| "loss": 0.2935, |
| "step": 1516 |
| }, |
| { |
| "epoch": 4.813962713209044, |
| "grad_norm": 0.07173903151971806, |
| "learning_rate": 3.302535595885914e-07, |
| "loss": 0.292, |
| "step": 1517 |
| }, |
| { |
| "epoch": 4.817136057120191, |
| "grad_norm": 0.06981608234215353, |
| "learning_rate": 3.1897870823002883e-07, |
| "loss": 0.2875, |
| "step": 1518 |
| }, |
| { |
| "epoch": 4.820309401031337, |
| "grad_norm": 0.07079666453671589, |
| "learning_rate": 3.078989054134729e-07, |
| "loss": 0.2852, |
| "step": 1519 |
| }, |
| { |
| "epoch": 4.823482744942483, |
| "grad_norm": 0.07073176594785087, |
| "learning_rate": 2.9701420560071417e-07, |
| "loss": 0.2905, |
| "step": 1520 |
| }, |
| { |
| "epoch": 4.826656088853629, |
| "grad_norm": 0.06983353352491713, |
| "learning_rate": 2.863246622945237e-07, |
| "loss": 0.2851, |
| "step": 1521 |
| }, |
| { |
| "epoch": 4.829829432764776, |
| "grad_norm": 0.07067428777959145, |
| "learning_rate": 2.758303280384045e-07, |
| "loss": 0.2873, |
| "step": 1522 |
| }, |
| { |
| "epoch": 4.833002776675922, |
| "grad_norm": 0.07102800424361236, |
| "learning_rate": 2.6553125441633355e-07, |
| "loss": 0.2871, |
| "step": 1523 |
| }, |
| { |
| "epoch": 4.836176120587068, |
| "grad_norm": 0.07060381894814903, |
| "learning_rate": 2.5542749205248683e-07, |
| "loss": 0.285, |
| "step": 1524 |
| }, |
| { |
| "epoch": 4.839349464498215, |
| "grad_norm": 0.07059262948871846, |
| "learning_rate": 2.4551909061101275e-07, |
| "loss": 0.2838, |
| "step": 1525 |
| }, |
| { |
| "epoch": 4.8425228084093614, |
| "grad_norm": 0.0700522721522073, |
| "learning_rate": 2.3580609879578332e-07, |
| "loss": 0.2913, |
| "step": 1526 |
| }, |
| { |
| "epoch": 4.8456961523205075, |
| "grad_norm": 0.07311091919771467, |
| "learning_rate": 2.2628856435015e-07, |
| "loss": 0.2903, |
| "step": 1527 |
| }, |
| { |
| "epoch": 4.848869496231654, |
| "grad_norm": 0.06916920833217112, |
| "learning_rate": 2.1696653405670398e-07, |
| "loss": 0.2831, |
| "step": 1528 |
| }, |
| { |
| "epoch": 4.852042840142801, |
| "grad_norm": 0.0709586661157069, |
| "learning_rate": 2.0784005373706729e-07, |
| "loss": 0.2892, |
| "step": 1529 |
| }, |
| { |
| "epoch": 4.855216184053947, |
| "grad_norm": 0.06984666564128461, |
| "learning_rate": 1.9890916825163086e-07, |
| "loss": 0.2895, |
| "step": 1530 |
| }, |
| { |
| "epoch": 4.858389527965093, |
| "grad_norm": 0.07227174002725144, |
| "learning_rate": 1.9017392149938585e-07, |
| "loss": 0.2856, |
| "step": 1531 |
| }, |
| { |
| "epoch": 4.86156287187624, |
| "grad_norm": 0.06981243660335461, |
| "learning_rate": 1.816343564176526e-07, |
| "loss": 0.2841, |
| "step": 1532 |
| }, |
| { |
| "epoch": 4.864736215787386, |
| "grad_norm": 0.06955676961459405, |
| "learning_rate": 1.7329051498191196e-07, |
| "loss": 0.2853, |
| "step": 1533 |
| }, |
| { |
| "epoch": 4.867909559698532, |
| "grad_norm": 0.07044007567746591, |
| "learning_rate": 1.6514243820556996e-07, |
| "loss": 0.2892, |
| "step": 1534 |
| }, |
| { |
| "epoch": 4.871082903609679, |
| "grad_norm": 0.07011876795238156, |
| "learning_rate": 1.5719016613978012e-07, |
| "loss": 0.2888, |
| "step": 1535 |
| }, |
| { |
| "epoch": 4.874256247520825, |
| "grad_norm": 0.0711153254470578, |
| "learning_rate": 1.4943373787323468e-07, |
| "loss": 0.2875, |
| "step": 1536 |
| }, |
| { |
| "epoch": 4.877429591431971, |
| "grad_norm": 0.07033022450802037, |
| "learning_rate": 1.4187319153196488e-07, |
| "loss": 0.2937, |
| "step": 1537 |
| }, |
| { |
| "epoch": 4.880602935343118, |
| "grad_norm": 0.07169239899295766, |
| "learning_rate": 1.3450856427916325e-07, |
| "loss": 0.2911, |
| "step": 1538 |
| }, |
| { |
| "epoch": 4.883776279254264, |
| "grad_norm": 0.07061272545363809, |
| "learning_rate": 1.2733989231500597e-07, |
| "loss": 0.2852, |
| "step": 1539 |
| }, |
| { |
| "epoch": 4.88694962316541, |
| "grad_norm": 0.07142348008808704, |
| "learning_rate": 1.2036721087646642e-07, |
| "loss": 0.2881, |
| "step": 1540 |
| }, |
| { |
| "epoch": 4.890122967076557, |
| "grad_norm": 0.07491018067145877, |
| "learning_rate": 1.1359055423713295e-07, |
| "loss": 0.2883, |
| "step": 1541 |
| }, |
| { |
| "epoch": 4.8932963109877035, |
| "grad_norm": 0.07043352846998585, |
| "learning_rate": 1.070099557070714e-07, |
| "loss": 0.2884, |
| "step": 1542 |
| }, |
| { |
| "epoch": 4.8964696548988496, |
| "grad_norm": 0.0703268500097442, |
| "learning_rate": 1.0062544763262516e-07, |
| "loss": 0.2886, |
| "step": 1543 |
| }, |
| { |
| "epoch": 4.899642998809996, |
| "grad_norm": 0.06967989026912552, |
| "learning_rate": 9.44370613962775e-08, |
| "loss": 0.2875, |
| "step": 1544 |
| }, |
| { |
| "epoch": 4.902816342721143, |
| "grad_norm": 0.07091190304068178, |
| "learning_rate": 8.844482741649174e-08, |
| "loss": 0.2846, |
| "step": 1545 |
| }, |
| { |
| "epoch": 4.905989686632289, |
| "grad_norm": 0.0704157347762079, |
| "learning_rate": 8.264877514756465e-08, |
| "loss": 0.2915, |
| "step": 1546 |
| }, |
| { |
| "epoch": 4.909163030543435, |
| "grad_norm": 0.07197642287777141, |
| "learning_rate": 7.704893307947547e-08, |
| "loss": 0.2886, |
| "step": 1547 |
| }, |
| { |
| "epoch": 4.912336374454582, |
| "grad_norm": 0.06954677224905088, |
| "learning_rate": 7.164532873775276e-08, |
| "loss": 0.288, |
| "step": 1548 |
| }, |
| { |
| "epoch": 4.915509718365728, |
| "grad_norm": 0.07029671235552006, |
| "learning_rate": 6.643798868333662e-08, |
| "loss": 0.2868, |
| "step": 1549 |
| }, |
| { |
| "epoch": 4.918683062276874, |
| "grad_norm": 0.07101459809224268, |
| "learning_rate": 6.142693851244109e-08, |
| "loss": 0.2871, |
| "step": 1550 |
| }, |
| { |
| "epoch": 4.921856406188021, |
| "grad_norm": 0.07028381337765902, |
| "learning_rate": 5.661220285643865e-08, |
| "loss": 0.2923, |
| "step": 1551 |
| }, |
| { |
| "epoch": 4.925029750099167, |
| "grad_norm": 0.07105984885088223, |
| "learning_rate": 5.199380538174037e-08, |
| "loss": 0.2935, |
| "step": 1552 |
| }, |
| { |
| "epoch": 4.928203094010313, |
| "grad_norm": 0.07233897145123062, |
| "learning_rate": 4.7571768789667075e-08, |
| "loss": 0.2875, |
| "step": 1553 |
| }, |
| { |
| "epoch": 4.931376437921459, |
| "grad_norm": 0.0703092103811166, |
| "learning_rate": 4.3346114816347207e-08, |
| "loss": 0.2858, |
| "step": 1554 |
| }, |
| { |
| "epoch": 4.934549781832606, |
| "grad_norm": 0.0706968242740516, |
| "learning_rate": 3.931686423261027e-08, |
| "loss": 0.2872, |
| "step": 1555 |
| }, |
| { |
| "epoch": 4.937723125743752, |
| "grad_norm": 0.069673162302404, |
| "learning_rate": 3.5484036843875804e-08, |
| "loss": 0.2882, |
| "step": 1556 |
| }, |
| { |
| "epoch": 4.9408964696548985, |
| "grad_norm": 0.06968426384379744, |
| "learning_rate": 3.1847651490068964e-08, |
| "loss": 0.2855, |
| "step": 1557 |
| }, |
| { |
| "epoch": 4.9440698135660455, |
| "grad_norm": 0.07194410996026296, |
| "learning_rate": 2.8407726045522886e-08, |
| "loss": 0.2928, |
| "step": 1558 |
| }, |
| { |
| "epoch": 4.947243157477192, |
| "grad_norm": 0.06910814568161171, |
| "learning_rate": 2.5164277418880945e-08, |
| "loss": 0.2853, |
| "step": 1559 |
| }, |
| { |
| "epoch": 4.950416501388338, |
| "grad_norm": 0.07039708426854659, |
| "learning_rate": 2.2117321553030146e-08, |
| "loss": 0.2896, |
| "step": 1560 |
| }, |
| { |
| "epoch": 4.953589845299485, |
| "grad_norm": 0.07008182260440685, |
| "learning_rate": 1.9266873425012323e-08, |
| "loss": 0.2892, |
| "step": 1561 |
| }, |
| { |
| "epoch": 4.956763189210631, |
| "grad_norm": 0.06932142266239405, |
| "learning_rate": 1.6612947045953064e-08, |
| "loss": 0.2852, |
| "step": 1562 |
| }, |
| { |
| "epoch": 4.959936533121777, |
| "grad_norm": 0.06970140934176305, |
| "learning_rate": 1.4155555460990677e-08, |
| "loss": 0.2893, |
| "step": 1563 |
| }, |
| { |
| "epoch": 4.963109877032924, |
| "grad_norm": 0.07142752676643119, |
| "learning_rate": 1.1894710749214e-08, |
| "loss": 0.2883, |
| "step": 1564 |
| }, |
| { |
| "epoch": 4.96628322094407, |
| "grad_norm": 0.07011403382472035, |
| "learning_rate": 9.83042402360912e-09, |
| "loss": 0.2877, |
| "step": 1565 |
| }, |
| { |
| "epoch": 4.969456564855216, |
| "grad_norm": 0.06985655852080626, |
| "learning_rate": 7.962705430988315e-09, |
| "loss": 0.2908, |
| "step": 1566 |
| }, |
| { |
| "epoch": 4.972629908766363, |
| "grad_norm": 0.07205354220071218, |
| "learning_rate": 6.291564151963414e-09, |
| "loss": 0.2885, |
| "step": 1567 |
| }, |
| { |
| "epoch": 4.975803252677509, |
| "grad_norm": 0.06900070418548365, |
| "learning_rate": 4.817008400879175e-09, |
| "loss": 0.2877, |
| "step": 1568 |
| }, |
| { |
| "epoch": 4.978976596588655, |
| "grad_norm": 0.07029900380853947, |
| "learning_rate": 3.539045425777765e-09, |
| "loss": 0.2868, |
| "step": 1569 |
| }, |
| { |
| "epoch": 4.982149940499801, |
| "grad_norm": 0.070893441701857, |
| "learning_rate": 2.4576815083809933e-09, |
| "loss": 0.2859, |
| "step": 1570 |
| }, |
| { |
| "epoch": 4.985323284410948, |
| "grad_norm": 0.07118447299049073, |
| "learning_rate": 1.572921964032581e-09, |
| "loss": 0.2897, |
| "step": 1571 |
| }, |
| { |
| "epoch": 4.988496628322094, |
| "grad_norm": 0.07138726064563218, |
| "learning_rate": 8.847711416937188e-10, |
| "loss": 0.2912, |
| "step": 1572 |
| }, |
| { |
| "epoch": 4.9916699722332405, |
| "grad_norm": 0.06968196998745611, |
| "learning_rate": 3.9323242390754137e-10, |
| "loss": 0.2841, |
| "step": 1573 |
| }, |
| { |
| "epoch": 4.9948433161443875, |
| "grad_norm": 0.07155054271656291, |
| "learning_rate": 9.830822678136288e-11, |
| "loss": 0.2906, |
| "step": 1574 |
| }, |
| { |
| "epoch": 4.998016660055534, |
| "grad_norm": 0.07077675462284239, |
| "learning_rate": 0.0, |
| "loss": 0.2858, |
| "step": 1575 |
| }, |
| { |
| "epoch": 4.998016660055534, |
| "step": 1575, |
| "total_flos": 3.782923812788083e+19, |
| "train_loss": 0.3593273034549895, |
| "train_runtime": 93234.1699, |
| "train_samples_per_second": 8.651, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1575, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.782923812788083e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|