| { |
| "best_metric": 0.41607236862182617, |
| "best_model_checkpoint": "output_dir/finetuned_best_mnli/checkpoint-12000", |
| "epoch": 1.9149282920469362, |
| "global_step": 23500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.983702737940026e-05, |
| "loss": 0.8277, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_accuracy": 0.7111563932755985, |
| "eval_loss": 0.6876205801963806, |
| "eval_runtime": 133.5092, |
| "eval_samples_per_second": 73.516, |
| "eval_steps_per_second": 9.19, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9674054758800523e-05, |
| "loss": 0.6598, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_accuracy": 0.7539480387162506, |
| "eval_loss": 0.6054476499557495, |
| "eval_runtime": 151.2877, |
| "eval_samples_per_second": 64.876, |
| "eval_steps_per_second": 8.11, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9511082138200782e-05, |
| "loss": 0.6012, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_accuracy": 0.7814569536423841, |
| "eval_loss": 0.5584900975227356, |
| "eval_runtime": 160.2112, |
| "eval_samples_per_second": 61.263, |
| "eval_steps_per_second": 7.659, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9348109517601044e-05, |
| "loss": 0.5589, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 0.7909322465613856, |
| "eval_loss": 0.5319538116455078, |
| "eval_runtime": 160.4115, |
| "eval_samples_per_second": 61.186, |
| "eval_steps_per_second": 7.649, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.9185136897001307e-05, |
| "loss": 0.5631, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_accuracy": 0.7811512990320937, |
| "eval_loss": 0.5446802973747253, |
| "eval_runtime": 190.5286, |
| "eval_samples_per_second": 51.515, |
| "eval_steps_per_second": 6.44, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.9022164276401566e-05, |
| "loss": 0.5556, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_accuracy": 0.795109526235354, |
| "eval_loss": 0.5091724991798401, |
| "eval_runtime": 100.2906, |
| "eval_samples_per_second": 97.866, |
| "eval_steps_per_second": 12.234, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.8859191655801828e-05, |
| "loss": 0.5311, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_accuracy": 0.8002037697401936, |
| "eval_loss": 0.5131673216819763, |
| "eval_runtime": 106.1752, |
| "eval_samples_per_second": 92.442, |
| "eval_steps_per_second": 11.556, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.8696219035202087e-05, |
| "loss": 0.5119, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_accuracy": 0.8132450331125828, |
| "eval_loss": 0.47580984234809875, |
| "eval_runtime": 84.6709, |
| "eval_samples_per_second": 115.919, |
| "eval_steps_per_second": 14.491, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.853324641460235e-05, |
| "loss": 0.4988, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_accuracy": 0.8033622007131941, |
| "eval_loss": 0.5015984177589417, |
| "eval_runtime": 120.1676, |
| "eval_samples_per_second": 81.678, |
| "eval_steps_per_second": 10.211, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.837027379400261e-05, |
| "loss": 0.509, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_accuracy": 0.8169128884360672, |
| "eval_loss": 0.4686170220375061, |
| "eval_runtime": 115.84, |
| "eval_samples_per_second": 84.729, |
| "eval_steps_per_second": 10.592, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.820730117340287e-05, |
| "loss": 0.5033, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.45, |
| "eval_accuracy": 0.8121242995415181, |
| "eval_loss": 0.4700980484485626, |
| "eval_runtime": 103.4737, |
| "eval_samples_per_second": 94.855, |
| "eval_steps_per_second": 11.858, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.804432855280313e-05, |
| "loss": 0.4834, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_accuracy": 0.8227203260315843, |
| "eval_loss": 0.4637387990951538, |
| "eval_runtime": 110.8909, |
| "eval_samples_per_second": 88.51, |
| "eval_steps_per_second": 11.065, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 1.788135593220339e-05, |
| "loss": 0.4863, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_accuracy": 0.8237391747325522, |
| "eval_loss": 0.45791763067245483, |
| "eval_runtime": 95.3317, |
| "eval_samples_per_second": 102.956, |
| "eval_steps_per_second": 12.871, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 1.771838331160365e-05, |
| "loss": 0.4829, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_accuracy": 0.823331635252165, |
| "eval_loss": 0.463431179523468, |
| "eval_runtime": 94.439, |
| "eval_samples_per_second": 103.93, |
| "eval_steps_per_second": 12.993, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 1.7555410691003914e-05, |
| "loss": 0.4819, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_accuracy": 0.8260825267447784, |
| "eval_loss": 0.4441950023174286, |
| "eval_runtime": 106.424, |
| "eval_samples_per_second": 92.225, |
| "eval_steps_per_second": 11.529, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 1.7392438070404173e-05, |
| "loss": 0.4763, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_accuracy": 0.8212939378502292, |
| "eval_loss": 0.44791945815086365, |
| "eval_runtime": 88.0696, |
| "eval_samples_per_second": 111.446, |
| "eval_steps_per_second": 13.932, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 1.7229465449804435e-05, |
| "loss": 0.4738, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_accuracy": 0.8275089149261334, |
| "eval_loss": 0.45088809728622437, |
| "eval_runtime": 92.5901, |
| "eval_samples_per_second": 106.005, |
| "eval_steps_per_second": 13.252, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.7066492829204694e-05, |
| "loss": 0.4687, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_accuracy": 0.8311767702496179, |
| "eval_loss": 0.43536150455474854, |
| "eval_runtime": 126.1698, |
| "eval_samples_per_second": 77.792, |
| "eval_steps_per_second": 9.725, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 1.6903520208604957e-05, |
| "loss": 0.468, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_accuracy": 0.8322975038206827, |
| "eval_loss": 0.43157872557640076, |
| "eval_runtime": 113.9939, |
| "eval_samples_per_second": 86.101, |
| "eval_steps_per_second": 10.764, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.6740547588005215e-05, |
| "loss": 0.4529, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_accuracy": 0.8317880794701987, |
| "eval_loss": 0.42749524116516113, |
| "eval_runtime": 115.1753, |
| "eval_samples_per_second": 85.218, |
| "eval_steps_per_second": 10.653, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.6577574967405478e-05, |
| "loss": 0.4617, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_accuracy": 0.8309730005094244, |
| "eval_loss": 0.43376246094703674, |
| "eval_runtime": 139.6786, |
| "eval_samples_per_second": 70.268, |
| "eval_steps_per_second": 8.784, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 1.6414602346805737e-05, |
| "loss": 0.4534, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_accuracy": 0.8359653591441671, |
| "eval_loss": 0.42552220821380615, |
| "eval_runtime": 96.9513, |
| "eval_samples_per_second": 101.236, |
| "eval_steps_per_second": 12.656, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6251629726206e-05, |
| "loss": 0.4536, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_accuracy": 0.8359653591441671, |
| "eval_loss": 0.41881263256073, |
| "eval_runtime": 108.0698, |
| "eval_samples_per_second": 90.821, |
| "eval_steps_per_second": 11.354, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.608865710560626e-05, |
| "loss": 0.4551, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_accuracy": 0.8391237901171676, |
| "eval_loss": 0.41607236862182617, |
| "eval_runtime": 89.8988, |
| "eval_samples_per_second": 109.178, |
| "eval_steps_per_second": 13.649, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.592568448500652e-05, |
| "loss": 0.4006, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_accuracy": 0.8356597045338767, |
| "eval_loss": 0.4454086124897003, |
| "eval_runtime": 91.709, |
| "eval_samples_per_second": 107.023, |
| "eval_steps_per_second": 13.379, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 1.576271186440678e-05, |
| "loss": 0.3355, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_accuracy": 0.840855832908813, |
| "eval_loss": 0.44230031967163086, |
| "eval_runtime": 93.3849, |
| "eval_samples_per_second": 105.103, |
| "eval_steps_per_second": 13.139, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 1.5599739243807042e-05, |
| "loss": 0.3309, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_accuracy": 0.8339276617422313, |
| "eval_loss": 0.4691074788570404, |
| "eval_runtime": 81.8679, |
| "eval_samples_per_second": 119.888, |
| "eval_steps_per_second": 14.988, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.54367666232073e-05, |
| "loss": 0.3416, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_accuracy": 0.8397350993377484, |
| "eval_loss": 0.4406871497631073, |
| "eval_runtime": 80.3421, |
| "eval_samples_per_second": 122.165, |
| "eval_steps_per_second": 15.272, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 1.5273794002607563e-05, |
| "loss": 0.3463, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_accuracy": 0.8371879775853286, |
| "eval_loss": 0.445261150598526, |
| "eval_runtime": 83.1773, |
| "eval_samples_per_second": 118.001, |
| "eval_steps_per_second": 14.752, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 1.5110821382007822e-05, |
| "loss": 0.3447, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_accuracy": 0.8397350993377484, |
| "eval_loss": 0.4480521082878113, |
| "eval_runtime": 93.0739, |
| "eval_samples_per_second": 105.454, |
| "eval_steps_per_second": 13.183, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.4947848761408083e-05, |
| "loss": 0.3302, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_accuracy": 0.8403464085583291, |
| "eval_loss": 0.462993860244751, |
| "eval_runtime": 98.4254, |
| "eval_samples_per_second": 99.72, |
| "eval_steps_per_second": 12.466, |
| "step": 15500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.4784876140808346e-05, |
| "loss": 0.3366, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_accuracy": 0.8402445236882323, |
| "eval_loss": 0.4278508424758911, |
| "eval_runtime": 80.9392, |
| "eval_samples_per_second": 121.264, |
| "eval_steps_per_second": 15.16, |
| "step": 16000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 1.4621903520208606e-05, |
| "loss": 0.3446, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_accuracy": 0.8444218033622007, |
| "eval_loss": 0.4328218996524811, |
| "eval_runtime": 88.6359, |
| "eval_samples_per_second": 110.734, |
| "eval_steps_per_second": 13.843, |
| "step": 16500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 1.4458930899608867e-05, |
| "loss": 0.3442, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_accuracy": 0.8421803362200713, |
| "eval_loss": 0.4372538924217224, |
| "eval_runtime": 90.5486, |
| "eval_samples_per_second": 108.395, |
| "eval_steps_per_second": 13.551, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 1.4295958279009128e-05, |
| "loss": 0.3431, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_accuracy": 0.8425878757004585, |
| "eval_loss": 0.444231778383255, |
| "eval_runtime": 104.7466, |
| "eval_samples_per_second": 93.702, |
| "eval_steps_per_second": 11.714, |
| "step": 17500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.4132985658409388e-05, |
| "loss": 0.3388, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_accuracy": 0.8358634742740703, |
| "eval_loss": 0.4574286937713623, |
| "eval_runtime": 79.912, |
| "eval_samples_per_second": 122.823, |
| "eval_steps_per_second": 15.354, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 1.3970013037809649e-05, |
| "loss": 0.3403, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_accuracy": 0.8423841059602649, |
| "eval_loss": 0.4366081953048706, |
| "eval_runtime": 80.6484, |
| "eval_samples_per_second": 121.701, |
| "eval_steps_per_second": 15.214, |
| "step": 18500 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 1.380704041720991e-05, |
| "loss": 0.3421, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_accuracy": 0.8441161487519103, |
| "eval_loss": 0.4383392930030823, |
| "eval_runtime": 105.6168, |
| "eval_samples_per_second": 92.93, |
| "eval_steps_per_second": 11.617, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 1.364406779661017e-05, |
| "loss": 0.3405, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.59, |
| "eval_accuracy": 0.8411614875191035, |
| "eval_loss": 0.4455229938030243, |
| "eval_runtime": 99.6387, |
| "eval_samples_per_second": 98.506, |
| "eval_steps_per_second": 12.314, |
| "step": 19500 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 1.3481095176010431e-05, |
| "loss": 0.3448, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_accuracy": 0.8385124808965868, |
| "eval_loss": 0.4386064112186432, |
| "eval_runtime": 86.9871, |
| "eval_samples_per_second": 112.833, |
| "eval_steps_per_second": 14.106, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 1.3318122555410693e-05, |
| "loss": 0.3346, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_accuracy": 0.8416709118695873, |
| "eval_loss": 0.4354933500289917, |
| "eval_runtime": 90.9919, |
| "eval_samples_per_second": 107.867, |
| "eval_steps_per_second": 13.485, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.3155149934810954e-05, |
| "loss": 0.3475, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_accuracy": 0.8372898624554254, |
| "eval_loss": 0.4339876174926758, |
| "eval_runtime": 89.1677, |
| "eval_samples_per_second": 110.073, |
| "eval_steps_per_second": 13.761, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.2992177314211213e-05, |
| "loss": 0.3378, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.75, |
| "eval_accuracy": 0.8469689251146205, |
| "eval_loss": 0.4287373125553131, |
| "eval_runtime": 93.5816, |
| "eval_samples_per_second": 104.882, |
| "eval_steps_per_second": 13.112, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 1.2829204693611474e-05, |
| "loss": 0.3454, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_accuracy": 0.8434029546612328, |
| "eval_loss": 0.43279626965522766, |
| "eval_runtime": 108.513, |
| "eval_samples_per_second": 90.45, |
| "eval_steps_per_second": 11.307, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.2666232073011735e-05, |
| "loss": 0.3423, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_accuracy": 0.8433010697911361, |
| "eval_loss": 0.4425649344921112, |
| "eval_runtime": 91.9329, |
| "eval_samples_per_second": 106.763, |
| "eval_steps_per_second": 13.347, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.2503259452411995e-05, |
| "loss": 0.3292, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_accuracy": 0.845236882322975, |
| "eval_loss": 0.424352765083313, |
| "eval_runtime": 101.4887, |
| "eval_samples_per_second": 96.71, |
| "eval_steps_per_second": 12.09, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.2340286831812256e-05, |
| "loss": 0.3363, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_accuracy": 0.8494141619969434, |
| "eval_loss": 0.4183717668056488, |
| "eval_runtime": 104.207, |
| "eval_samples_per_second": 94.188, |
| "eval_steps_per_second": 11.775, |
| "step": 23500 |
| } |
| ], |
| "max_steps": 61360, |
| "num_train_epochs": 5, |
| "total_flos": 4.946519097615514e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|