| { |
| "best_global_step": 69500, |
| "best_metric": 0.9733653983882032, |
| "best_model_checkpoint": "./results/checkpoint-69500", |
| "epoch": 2.278541733290694, |
| "eval_steps": 500, |
| "global_step": 85500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.332498301064666e-05, |
| "grad_norm": 2.171241283416748, |
| "learning_rate": 0.0, |
| "loss": 1.1419, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0013324983010646661, |
| "grad_norm": 3.923346757888794, |
| "learning_rate": 1.319120586275816e-07, |
| "loss": 0.9369, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0026649966021293323, |
| "grad_norm": 5.213994026184082, |
| "learning_rate": 2.651565622918055e-07, |
| "loss": 0.9031, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.003997494903193999, |
| "grad_norm": 3.4589016437530518, |
| "learning_rate": 3.984010659560293e-07, |
| "loss": 0.8309, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0053299932042586646, |
| "grad_norm": 2.302459239959717, |
| "learning_rate": 5.316455696202532e-07, |
| "loss": 0.7406, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.0066624915053233305, |
| "grad_norm": 2.8590707778930664, |
| "learning_rate": 6.64890073284477e-07, |
| "loss": 0.7311, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0066624915053233305, |
| "eval_dev_accuracy": 0.9312310116323776, |
| "eval_dev_accuracy_threshold": 0.48881804943084717, |
| "eval_dev_average_precision": 0.1090500582180461, |
| "eval_dev_f1": 0.1867953275774505, |
| "eval_dev_f1_threshold": 0.31155017018318176, |
| "eval_dev_precision": 0.17409826753763136, |
| "eval_dev_recall": 0.20149008436507068, |
| "eval_loss": 0.7330209612846375, |
| "eval_runtime": 567.4178, |
| "eval_samples_per_second": 233.773, |
| "eval_steps_per_second": 7.307, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.007994989806387997, |
| "grad_norm": 2.162013530731201, |
| "learning_rate": 7.981345769487009e-07, |
| "loss": 0.725, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.009327488107452663, |
| "grad_norm": 3.412961959838867, |
| "learning_rate": 9.313790806129248e-07, |
| "loss": 0.6892, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.010659986408517329, |
| "grad_norm": 3.037612199783325, |
| "learning_rate": 1.0646235842771487e-06, |
| "loss": 0.74, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.011992484709581995, |
| "grad_norm": 3.178318977355957, |
| "learning_rate": 1.1978680879413725e-06, |
| "loss": 0.6857, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.013324983010646661, |
| "grad_norm": 3.9319422245025635, |
| "learning_rate": 1.3311125916055965e-06, |
| "loss": 0.6784, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.013324983010646661, |
| "eval_dev_accuracy": 0.9312913220804089, |
| "eval_dev_accuracy_threshold": 0.5571334362030029, |
| "eval_dev_average_precision": 0.2217988734731733, |
| "eval_dev_f1": 0.3029693004529441, |
| "eval_dev_f1_threshold": 0.3375406265258789, |
| "eval_dev_precision": 0.2600942655145326, |
| "eval_dev_recall": 0.36276980387860197, |
| "eval_loss": 0.6632949113845825, |
| "eval_runtime": 567.4434, |
| "eval_samples_per_second": 233.763, |
| "eval_steps_per_second": 7.306, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.014657481311711327, |
| "grad_norm": 3.4704461097717285, |
| "learning_rate": 1.4643570952698202e-06, |
| "loss": 0.6753, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.015989979612775995, |
| "grad_norm": 5.541119575500488, |
| "learning_rate": 1.597601598934044e-06, |
| "loss": 0.6707, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.01732247791384066, |
| "grad_norm": 4.9743475914001465, |
| "learning_rate": 1.7308461025982678e-06, |
| "loss": 0.6679, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.018654976214905326, |
| "grad_norm": 7.222622394561768, |
| "learning_rate": 1.864090606262492e-06, |
| "loss": 0.5831, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.01998747451596999, |
| "grad_norm": 3.6720590591430664, |
| "learning_rate": 1.9973351099267156e-06, |
| "loss": 0.5589, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.01998747451596999, |
| "eval_dev_accuracy": 0.9353095056804903, |
| "eval_dev_accuracy_threshold": 0.5631594657897949, |
| "eval_dev_average_precision": 0.338116839920073, |
| "eval_dev_f1": 0.39591571740541814, |
| "eval_dev_f1_threshold": 0.4508041739463806, |
| "eval_dev_precision": 0.4291197543500512, |
| "eval_dev_recall": 0.3674811000328695, |
| "eval_loss": 0.6119648814201355, |
| "eval_runtime": 567.5553, |
| "eval_samples_per_second": 233.716, |
| "eval_steps_per_second": 7.305, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.021319972817034658, |
| "grad_norm": 10.199407577514648, |
| "learning_rate": 2.1305796135909398e-06, |
| "loss": 0.6065, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.022652471118099326, |
| "grad_norm": 6.087101459503174, |
| "learning_rate": 2.2638241172551636e-06, |
| "loss": 0.5724, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.02398496941916399, |
| "grad_norm": 16.529647827148438, |
| "learning_rate": 2.3970686209193873e-06, |
| "loss": 0.5568, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.025317467720228658, |
| "grad_norm": 14.971884727478027, |
| "learning_rate": 2.530313124583611e-06, |
| "loss": 0.5603, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.026649966021293322, |
| "grad_norm": 4.663777828216553, |
| "learning_rate": 2.663557628247835e-06, |
| "loss": 0.5553, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.026649966021293322, |
| "eval_dev_accuracy": 0.9353848937405294, |
| "eval_dev_accuracy_threshold": 0.6437499523162842, |
| "eval_dev_average_precision": 0.38323093653846474, |
| "eval_dev_f1": 0.4514054443643622, |
| "eval_dev_f1_threshold": 0.6111855506896973, |
| "eval_dev_precision": 0.4569023569023569, |
| "eval_dev_recall": 0.44603922427960996, |
| "eval_loss": 0.5613667964935303, |
| "eval_runtime": 568.0344, |
| "eval_samples_per_second": 233.519, |
| "eval_steps_per_second": 7.299, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.02798246432235799, |
| "grad_norm": 5.051695823669434, |
| "learning_rate": 2.7968021319120587e-06, |
| "loss": 0.5506, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.029314962623422654, |
| "grad_norm": 12.604368209838867, |
| "learning_rate": 2.930046635576283e-06, |
| "loss": 0.5446, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.03064746092448732, |
| "grad_norm": 3.9183976650238037, |
| "learning_rate": 3.0632911392405066e-06, |
| "loss": 0.5432, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.03197995922555199, |
| "grad_norm": 5.165050983428955, |
| "learning_rate": 3.1965356429047304e-06, |
| "loss": 0.5091, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.03331245752661666, |
| "grad_norm": 10.820756912231445, |
| "learning_rate": 3.3297801465689546e-06, |
| "loss": 0.5099, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.03331245752661666, |
| "eval_dev_accuracy": 0.9395613922666928, |
| "eval_dev_accuracy_threshold": 0.700435996055603, |
| "eval_dev_average_precision": 0.4498892909951412, |
| "eval_dev_f1": 0.4910784423745932, |
| "eval_dev_f1_threshold": 0.5620608925819397, |
| "eval_dev_precision": 0.5032777458309373, |
| "eval_dev_recall": 0.47945655746685656, |
| "eval_loss": 0.5332435369491577, |
| "eval_runtime": 567.4907, |
| "eval_samples_per_second": 233.743, |
| "eval_steps_per_second": 7.306, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.03464495582768132, |
| "grad_norm": 5.984354496002197, |
| "learning_rate": 3.4630246502331784e-06, |
| "loss": 0.5168, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.035977454128745985, |
| "grad_norm": 11.091134071350098, |
| "learning_rate": 3.596269153897402e-06, |
| "loss": 0.4763, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.03730995242981065, |
| "grad_norm": 25.33905601501465, |
| "learning_rate": 3.729513657561626e-06, |
| "loss": 0.4916, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.03864245073087532, |
| "grad_norm": 7.44692325592041, |
| "learning_rate": 3.862758161225849e-06, |
| "loss": 0.4842, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.03997494903193998, |
| "grad_norm": 11.449934005737305, |
| "learning_rate": 3.996002664890073e-06, |
| "loss": 0.5246, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.03997494903193998, |
| "eval_dev_accuracy": 0.9320828967108189, |
| "eval_dev_accuracy_threshold": 0.7955138683319092, |
| "eval_dev_average_precision": 0.40640646661588725, |
| "eval_dev_f1": 0.5273073175258689, |
| "eval_dev_f1_threshold": 0.7331215143203735, |
| "eval_dev_precision": 0.45734063103670314, |
| "eval_dev_recall": 0.6225484825243782, |
| "eval_loss": 0.4692871868610382, |
| "eval_runtime": 565.8018, |
| "eval_samples_per_second": 234.441, |
| "eval_steps_per_second": 7.328, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.04130744733300465, |
| "grad_norm": 2.739481210708618, |
| "learning_rate": 4.129247168554298e-06, |
| "loss": 0.4399, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.042639945634069316, |
| "grad_norm": 18.604293823242188, |
| "learning_rate": 4.2624916722185215e-06, |
| "loss": 0.4532, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.043972443935133984, |
| "grad_norm": 2.9506380558013916, |
| "learning_rate": 4.395736175882745e-06, |
| "loss": 0.5107, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.04530494223619865, |
| "grad_norm": 6.515221118927002, |
| "learning_rate": 4.528980679546969e-06, |
| "loss": 0.4249, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.04663744053726331, |
| "grad_norm": 8.708155632019043, |
| "learning_rate": 4.662225183211193e-06, |
| "loss": 0.4526, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.04663744053726331, |
| "eval_dev_accuracy": 0.9425166042202237, |
| "eval_dev_accuracy_threshold": 0.8969273567199707, |
| "eval_dev_average_precision": 0.5181562757024104, |
| "eval_dev_f1": 0.5669769324160259, |
| "eval_dev_f1_threshold": 0.7522543668746948, |
| "eval_dev_precision": 0.5266422328728503, |
| "eval_dev_recall": 0.6140024104305906, |
| "eval_loss": 0.44245800375938416, |
| "eval_runtime": 566.66, |
| "eval_samples_per_second": 234.086, |
| "eval_steps_per_second": 7.317, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.04796993883832798, |
| "grad_norm": 20.27404022216797, |
| "learning_rate": 4.795469686875417e-06, |
| "loss": 0.4791, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.04930243713939265, |
| "grad_norm": 26.697437286376953, |
| "learning_rate": 4.92871419053964e-06, |
| "loss": 0.4151, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.050634935440457315, |
| "grad_norm": 29.9031982421875, |
| "learning_rate": 5.061958694203864e-06, |
| "loss": 0.4842, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.051967433741521976, |
| "grad_norm": 33.03110885620117, |
| "learning_rate": 5.195203197868088e-06, |
| "loss": 0.4062, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.053299932042586644, |
| "grad_norm": 18.199092864990234, |
| "learning_rate": 5.328447701532313e-06, |
| "loss": 0.4491, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.053299932042586644, |
| "eval_dev_accuracy": 0.9464367833422542, |
| "eval_dev_accuracy_threshold": 0.8940709829330444, |
| "eval_dev_average_precision": 0.5665972703365837, |
| "eval_dev_f1": 0.5783120410421486, |
| "eval_dev_f1_threshold": 0.532160758972168, |
| "eval_dev_precision": 0.5345420734542073, |
| "eval_dev_recall": 0.6298893393228882, |
| "eval_loss": 0.4243237376213074, |
| "eval_runtime": 565.9608, |
| "eval_samples_per_second": 234.375, |
| "eval_steps_per_second": 7.326, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.05463243034365131, |
| "grad_norm": 2.3968331813812256, |
| "learning_rate": 5.461692205196536e-06, |
| "loss": 0.3937, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.05596492864471598, |
| "grad_norm": 3.501485586166382, |
| "learning_rate": 5.59493670886076e-06, |
| "loss": 0.4806, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.05729742694578065, |
| "grad_norm": 20.607412338256836, |
| "learning_rate": 5.728181212524984e-06, |
| "loss": 0.4355, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.05862992524684531, |
| "grad_norm": 11.288957595825195, |
| "learning_rate": 5.861425716189208e-06, |
| "loss": 0.4579, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.059962423547909975, |
| "grad_norm": 23.52041244506836, |
| "learning_rate": 5.9946702198534315e-06, |
| "loss": 0.4232, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.059962423547909975, |
| "eval_dev_accuracy": 0.9480651654390978, |
| "eval_dev_accuracy_threshold": 0.925118088722229, |
| "eval_dev_average_precision": 0.5978901727391869, |
| "eval_dev_f1": 0.5897354160025502, |
| "eval_dev_f1_threshold": 0.8643622994422913, |
| "eval_dev_precision": 0.5724600309437855, |
| "eval_dev_recall": 0.6080858989810453, |
| "eval_loss": 0.4087965786457062, |
| "eval_runtime": 564.9476, |
| "eval_samples_per_second": 234.795, |
| "eval_steps_per_second": 7.339, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.06129492184897464, |
| "grad_norm": 8.910244941711426, |
| "learning_rate": 6.127914723517655e-06, |
| "loss": 0.4195, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.0626274201500393, |
| "grad_norm": 10.253131866455078, |
| "learning_rate": 6.261159227181879e-06, |
| "loss": 0.4332, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.06395991845110398, |
| "grad_norm": 6.71283483505249, |
| "learning_rate": 6.394403730846103e-06, |
| "loss": 0.433, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.06529241675216864, |
| "grad_norm": 13.018428802490234, |
| "learning_rate": 6.527648234510327e-06, |
| "loss": 0.3978, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.06662491505323331, |
| "grad_norm": 5.483168601989746, |
| "learning_rate": 6.660892738174551e-06, |
| "loss": 0.4165, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.06662491505323331, |
| "eval_dev_accuracy": 0.9488642788755117, |
| "eval_dev_accuracy_threshold": 0.9351813793182373, |
| "eval_dev_average_precision": 0.611806667891919, |
| "eval_dev_f1": 0.594213494881972, |
| "eval_dev_f1_threshold": 0.8715409636497498, |
| "eval_dev_precision": 0.5677078135914579, |
| "eval_dev_recall": 0.6233154377122823, |
| "eval_loss": 0.4015994369983673, |
| "eval_runtime": 565.0203, |
| "eval_samples_per_second": 234.765, |
| "eval_steps_per_second": 7.338, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.06795741335429797, |
| "grad_norm": 44.895931243896484, |
| "learning_rate": 6.794137241838775e-06, |
| "loss": 0.3173, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.06928991165536263, |
| "grad_norm": 19.51112937927246, |
| "learning_rate": 6.927381745502999e-06, |
| "loss": 0.4279, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.07062240995642731, |
| "grad_norm": 11.284177780151367, |
| "learning_rate": 7.0606262491672225e-06, |
| "loss": 0.4278, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.07195490825749197, |
| "grad_norm": 12.088862419128418, |
| "learning_rate": 7.193870752831446e-06, |
| "loss": 0.394, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.07328740655855664, |
| "grad_norm": 5.778110504150391, |
| "learning_rate": 7.32711525649567e-06, |
| "loss": 0.4033, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.07328740655855664, |
| "eval_dev_accuracy": 0.9500780266421405, |
| "eval_dev_accuracy_threshold": 0.9353994131088257, |
| "eval_dev_average_precision": 0.6400665115573199, |
| "eval_dev_f1": 0.6073723716004319, |
| "eval_dev_f1_threshold": 0.8007456064224243, |
| "eval_dev_precision": 0.5721619527314994, |
| "eval_dev_recall": 0.6472006135641504, |
| "eval_loss": 0.3888355791568756, |
| "eval_runtime": 566.2917, |
| "eval_samples_per_second": 234.238, |
| "eval_steps_per_second": 7.321, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.0746199048596213, |
| "grad_norm": 7.1619439125061035, |
| "learning_rate": 7.460359760159894e-06, |
| "loss": 0.3775, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.07595240316068597, |
| "grad_norm": 12.566367149353027, |
| "learning_rate": 7.593604263824118e-06, |
| "loss": 0.3944, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.07728490146175064, |
| "grad_norm": 10.173190116882324, |
| "learning_rate": 7.726848767488342e-06, |
| "loss": 0.4256, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.0786173997628153, |
| "grad_norm": 1.7395318746566772, |
| "learning_rate": 7.860093271152565e-06, |
| "loss": 0.3984, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.07994989806387996, |
| "grad_norm": 3.9586873054504395, |
| "learning_rate": 7.99333777481679e-06, |
| "loss": 0.3545, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.07994989806387996, |
| "eval_dev_accuracy": 0.949716163953953, |
| "eval_dev_accuracy_threshold": 0.9251655340194702, |
| "eval_dev_average_precision": 0.6562730620483952, |
| "eval_dev_f1": 0.6157150706828513, |
| "eval_dev_f1_threshold": 0.4972879886627197, |
| "eval_dev_precision": 0.5658281307381564, |
| "eval_dev_recall": 0.6752492604360688, |
| "eval_loss": 0.38393494486808777, |
| "eval_runtime": 567.4221, |
| "eval_samples_per_second": 233.771, |
| "eval_steps_per_second": 7.307, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.08128239636494464, |
| "grad_norm": 4.9177398681640625, |
| "learning_rate": 8.126582278481013e-06, |
| "loss": 0.4551, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.0826148946660093, |
| "grad_norm": 14.003257751464844, |
| "learning_rate": 8.259826782145237e-06, |
| "loss": 0.3817, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.08394739296707397, |
| "grad_norm": 7.29791259765625, |
| "learning_rate": 8.39307128580946e-06, |
| "loss": 0.408, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.08527989126813863, |
| "grad_norm": 26.11504554748535, |
| "learning_rate": 8.526315789473685e-06, |
| "loss": 0.4176, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.0866123895692033, |
| "grad_norm": 21.16114616394043, |
| "learning_rate": 8.659560293137908e-06, |
| "loss": 0.4128, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.0866123895692033, |
| "eval_dev_accuracy": 0.9528523072515775, |
| "eval_dev_accuracy_threshold": 0.9102756977081299, |
| "eval_dev_average_precision": 0.6708215133735886, |
| "eval_dev_f1": 0.6232578397212545, |
| "eval_dev_f1_threshold": 0.6869294047355652, |
| "eval_dev_precision": 0.6194134833892436, |
| "eval_dev_recall": 0.6271502136518023, |
| "eval_loss": 0.35909053683280945, |
| "eval_runtime": 568.2468, |
| "eval_samples_per_second": 233.432, |
| "eval_steps_per_second": 7.296, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.08794488787026797, |
| "grad_norm": 2.0384860038757324, |
| "learning_rate": 8.792804796802133e-06, |
| "loss": 0.3837, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.08927738617133263, |
| "grad_norm": 9.94750690460205, |
| "learning_rate": 8.926049300466355e-06, |
| "loss": 0.3618, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.0906098844723973, |
| "grad_norm": 3.8198211193084717, |
| "learning_rate": 9.05929380413058e-06, |
| "loss": 0.3643, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.09194238277346196, |
| "grad_norm": 14.838878631591797, |
| "learning_rate": 9.192538307794803e-06, |
| "loss": 0.3409, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.09327488107452662, |
| "grad_norm": 25.42053985595703, |
| "learning_rate": 9.325782811459028e-06, |
| "loss": 0.4001, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.09327488107452662, |
| "eval_dev_accuracy": 0.9507640579884958, |
| "eval_dev_accuracy_threshold": 0.7285012006759644, |
| "eval_dev_average_precision": 0.6341417194028635, |
| "eval_dev_f1": 0.6159875449616148, |
| "eval_dev_f1_threshold": 0.3488144874572754, |
| "eval_dev_precision": 0.6038947368421053, |
| "eval_dev_recall": 0.628574559000767, |
| "eval_loss": 0.46753522753715515, |
| "eval_runtime": 566.0195, |
| "eval_samples_per_second": 234.351, |
| "eval_steps_per_second": 7.325, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.0946073793755913, |
| "grad_norm": 21.88748550415039, |
| "learning_rate": 9.459027315123252e-06, |
| "loss": 0.4186, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.09593987767665596, |
| "grad_norm": 5.960207939147949, |
| "learning_rate": 9.592271818787475e-06, |
| "loss": 0.3478, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.09727237597772063, |
| "grad_norm": 16.917625427246094, |
| "learning_rate": 9.7255163224517e-06, |
| "loss": 0.3492, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.0986048742787853, |
| "grad_norm": 14.463135719299316, |
| "learning_rate": 9.858760826115924e-06, |
| "loss": 0.3522, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.09993737257984996, |
| "grad_norm": 3.8919215202331543, |
| "learning_rate": 9.992005329780147e-06, |
| "loss": 0.3445, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.09993737257984996, |
| "eval_dev_accuracy": 0.9513897788868199, |
| "eval_dev_accuracy_threshold": 0.933416485786438, |
| "eval_dev_average_precision": 0.672072308065407, |
| "eval_dev_f1": 0.6241289651586063, |
| "eval_dev_f1_threshold": 0.4514094591140747, |
| "eval_dev_precision": 0.5939819855488468, |
| "eval_dev_recall": 0.6574997260874329, |
| "eval_loss": 0.4463006556034088, |
| "eval_runtime": 565.9962, |
| "eval_samples_per_second": 234.36, |
| "eval_steps_per_second": 7.325, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.10126987088091463, |
| "grad_norm": 5.6276421546936035, |
| "learning_rate": 1.012524983344437e-05, |
| "loss": 0.3611, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.10260236918197929, |
| "grad_norm": 25.222440719604492, |
| "learning_rate": 1.0258494337108595e-05, |
| "loss": 0.3694, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.10393486748304395, |
| "grad_norm": 10.44590950012207, |
| "learning_rate": 1.0391738840772818e-05, |
| "loss": 0.34, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.10526736578410863, |
| "grad_norm": 15.12126350402832, |
| "learning_rate": 1.0524983344437042e-05, |
| "loss": 0.3839, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.10659986408517329, |
| "grad_norm": 10.425951957702637, |
| "learning_rate": 1.0658227848101265e-05, |
| "loss": 0.3408, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.10659986408517329, |
| "eval_dev_accuracy": 0.947168047524633, |
| "eval_dev_accuracy_threshold": 0.9196346402168274, |
| "eval_dev_average_precision": 0.6278546311695713, |
| "eval_dev_f1": 0.5864126161957174, |
| "eval_dev_f1_threshold": 0.5887953042984009, |
| "eval_dev_precision": 0.5073623559539052, |
| "eval_dev_recall": 0.6946422701873562, |
| "eval_loss": 0.36468541622161865, |
| "eval_runtime": 566.1553, |
| "eval_samples_per_second": 234.294, |
| "eval_steps_per_second": 7.323, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.10793236238623796, |
| "grad_norm": 19.852497100830078, |
| "learning_rate": 1.079147235176549e-05, |
| "loss": 0.4011, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.10926486068730262, |
| "grad_norm": 66.98611450195312, |
| "learning_rate": 1.0924716855429713e-05, |
| "loss": 0.3037, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.11059735898836728, |
| "grad_norm": 2.033569812774658, |
| "learning_rate": 1.1057961359093938e-05, |
| "loss": 0.3632, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.11192985728943196, |
| "grad_norm": 1.7951024770736694, |
| "learning_rate": 1.1191205862758164e-05, |
| "loss": 0.3878, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.11326235559049662, |
| "grad_norm": 3.2986645698547363, |
| "learning_rate": 1.1324450366422385e-05, |
| "loss": 0.3849, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.11326235559049662, |
| "eval_dev_accuracy": 0.9516611759029605, |
| "eval_dev_accuracy_threshold": 0.9289531707763672, |
| "eval_dev_average_precision": 0.6795165568410317, |
| "eval_dev_f1": 0.6335993534700474, |
| "eval_dev_f1_threshold": 0.7476029396057129, |
| "eval_dev_precision": 0.5877612220035611, |
| "eval_dev_recall": 0.6871918483620029, |
| "eval_loss": 0.3556542694568634, |
| "eval_runtime": 568.4381, |
| "eval_samples_per_second": 233.353, |
| "eval_steps_per_second": 7.294, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.1145948538915613, |
| "grad_norm": 1.4321446418762207, |
| "learning_rate": 1.1457694870086611e-05, |
| "loss": 0.3833, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.11592735219262595, |
| "grad_norm": 54.76650619506836, |
| "learning_rate": 1.1590939373750833e-05, |
| "loss": 0.3797, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.11725985049369061, |
| "grad_norm": 31.644800186157227, |
| "learning_rate": 1.1724183877415059e-05, |
| "loss": 0.3705, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.11859234879475529, |
| "grad_norm": 10.417598724365234, |
| "learning_rate": 1.1857428381079282e-05, |
| "loss": 0.3556, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.11992484709581995, |
| "grad_norm": 9.85118579864502, |
| "learning_rate": 1.1990672884743507e-05, |
| "loss": 0.3771, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.11992484709581995, |
| "eval_dev_accuracy": 0.955196875918792, |
| "eval_dev_accuracy_threshold": 0.9099207520484924, |
| "eval_dev_average_precision": 0.7086261480764138, |
| "eval_dev_f1": 0.6543492478744277, |
| "eval_dev_f1_threshold": 0.8123365640640259, |
| "eval_dev_precision": 0.6510139898058779, |
| "eval_dev_recall": 0.6577188561411198, |
| "eval_loss": 0.33506301045417786, |
| "eval_runtime": 567.679, |
| "eval_samples_per_second": 233.665, |
| "eval_steps_per_second": 7.303, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.12125734539688462, |
| "grad_norm": 22.609596252441406, |
| "learning_rate": 1.212391738840773e-05, |
| "loss": 0.3649, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.12258984369794929, |
| "grad_norm": 13.67054271697998, |
| "learning_rate": 1.2257161892071954e-05, |
| "loss": 0.3687, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.12392234199901395, |
| "grad_norm": 11.858447074890137, |
| "learning_rate": 1.2390406395736177e-05, |
| "loss": 0.406, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.1252548403000786, |
| "grad_norm": 22.195842742919922, |
| "learning_rate": 1.2523650899400402e-05, |
| "loss": 0.3362, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.1265873386011433, |
| "grad_norm": 1.6114740371704102, |
| "learning_rate": 1.2656895403064625e-05, |
| "loss": 0.2749, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.1265873386011433, |
| "eval_dev_accuracy": 0.9535986490459641, |
| "eval_dev_accuracy_threshold": 0.9009051322937012, |
| "eval_dev_average_precision": 0.6983138303648807, |
| "eval_dev_f1": 0.6486718540381003, |
| "eval_dev_f1_threshold": 0.7954304218292236, |
| "eval_dev_precision": 0.6356752208666386, |
| "eval_dev_recall": 0.6622110222417005, |
| "eval_loss": 0.35359007120132446, |
| "eval_runtime": 567.3028, |
| "eval_samples_per_second": 233.82, |
| "eval_steps_per_second": 7.308, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.12791983690220796, |
| "grad_norm": 15.079890251159668, |
| "learning_rate": 1.279013990672885e-05, |
| "loss": 0.3038, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.12925233520327262, |
| "grad_norm": 19.459815979003906, |
| "learning_rate": 1.2923384410393072e-05, |
| "loss": 0.3273, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.13058483350433728, |
| "grad_norm": 21.132827758789062, |
| "learning_rate": 1.3056628914057297e-05, |
| "loss": 0.3823, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.13191733180540194, |
| "grad_norm": 4.1918158531188965, |
| "learning_rate": 1.318987341772152e-05, |
| "loss": 0.3406, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.13324983010646663, |
| "grad_norm": 22.806039810180664, |
| "learning_rate": 1.3323117921385744e-05, |
| "loss": 0.4069, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.13324983010646663, |
| "eval_dev_accuracy": 0.9545711550204679, |
| "eval_dev_accuracy_threshold": 0.9562267065048218, |
| "eval_dev_average_precision": 0.7100922283297543, |
| "eval_dev_f1": 0.6518728053062817, |
| "eval_dev_f1_threshold": 0.8285595178604126, |
| "eval_dev_precision": 0.5874132020743605, |
| "eval_dev_recall": 0.7322230743946532, |
| "eval_loss": 0.3286122977733612, |
| "eval_runtime": 566.8451, |
| "eval_samples_per_second": 234.009, |
| "eval_steps_per_second": 7.314, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.1345823284075313, |
| "grad_norm": 12.236410140991211, |
| "learning_rate": 1.3456362425049967e-05, |
| "loss": 0.3618, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.13591482670859595, |
| "grad_norm": 5.4430060386657715, |
| "learning_rate": 1.3589606928714192e-05, |
| "loss": 0.3619, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.1372473250096606, |
| "grad_norm": 13.798270225524902, |
| "learning_rate": 1.3722851432378415e-05, |
| "loss": 0.3413, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.13857982331072527, |
| "grad_norm": 3.899458169937134, |
| "learning_rate": 1.385609593604264e-05, |
| "loss": 0.3374, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.13991232161178996, |
| "grad_norm": 6.147464752197266, |
| "learning_rate": 1.3989340439706862e-05, |
| "loss": 0.3725, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.13991232161178996, |
| "eval_dev_accuracy": 0.9561316878632762, |
| "eval_dev_accuracy_threshold": 0.9145029187202454, |
| "eval_dev_average_precision": 0.7194237355423562, |
| "eval_dev_f1": 0.6639100398366194, |
| "eval_dev_f1_threshold": 0.7489595413208008, |
| "eval_dev_precision": 0.6150037369207773, |
| "eval_dev_recall": 0.7212665717103101, |
| "eval_loss": 0.30797863006591797, |
| "eval_runtime": 567.191, |
| "eval_samples_per_second": 233.867, |
| "eval_steps_per_second": 7.31, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.14124481991285462, |
| "grad_norm": 0.32248708605766296, |
| "learning_rate": 1.4122584943371087e-05, |
| "loss": 0.3289, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.14257731821391928, |
| "grad_norm": 3.342273235321045, |
| "learning_rate": 1.4255829447035312e-05, |
| "loss": 0.335, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.14390981651498394, |
| "grad_norm": 5.640665531158447, |
| "learning_rate": 1.4389073950699535e-05, |
| "loss": 0.3298, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.1452423148160486, |
| "grad_norm": 1.3349778652191162, |
| "learning_rate": 1.452231845436376e-05, |
| "loss": 0.3805, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.1465748131171133, |
| "grad_norm": 8.876007080078125, |
| "learning_rate": 1.4655562958027982e-05, |
| "loss": 0.3545, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.1465748131171133, |
| "eval_dev_accuracy": 0.9554607341289286, |
| "eval_dev_accuracy_threshold": 0.8000156283378601, |
| "eval_dev_average_precision": 0.706945036170435, |
| "eval_dev_f1": 0.6470619459631616, |
| "eval_dev_f1_threshold": 0.35837632417678833, |
| "eval_dev_precision": 0.6275741350906096, |
| "eval_dev_recall": 0.6677988386107154, |
| "eval_loss": 0.40842413902282715, |
| "eval_runtime": 566.601, |
| "eval_samples_per_second": 234.11, |
| "eval_steps_per_second": 7.317, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.14790731141817795, |
| "grad_norm": 1.4081681966781616, |
| "learning_rate": 1.4788807461692207e-05, |
| "loss": 0.3591, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.1492398097192426, |
| "grad_norm": 15.024413108825684, |
| "learning_rate": 1.492205196535643e-05, |
| "loss": 0.3523, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.15057230802030727, |
| "grad_norm": 18.281108856201172, |
| "learning_rate": 1.5055296469020654e-05, |
| "loss": 0.3601, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.15190480632137193, |
| "grad_norm": 6.56211519241333, |
| "learning_rate": 1.5188540972684877e-05, |
| "loss": 0.3365, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.1532373046224366, |
| "grad_norm": 43.26646041870117, |
| "learning_rate": 1.5321785476349102e-05, |
| "loss": 0.3859, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.1532373046224366, |
| "eval_dev_accuracy": 0.9578354580201588, |
| "eval_dev_accuracy_threshold": 0.7837315797805786, |
| "eval_dev_average_precision": 0.7318177300477213, |
| "eval_dev_f1": 0.6784168212739641, |
| "eval_dev_f1_threshold": 0.5022754669189453, |
| "eval_dev_precision": 0.6404592779994162, |
| "eval_dev_recall": 0.7211570066834666, |
| "eval_loss": 0.3321084976196289, |
| "eval_runtime": 566.5331, |
| "eval_samples_per_second": 234.138, |
| "eval_steps_per_second": 7.318, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.15456980292350128, |
| "grad_norm": 28.52861785888672, |
| "learning_rate": 1.5455029980013325e-05, |
| "loss": 0.3581, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.15590230122456594, |
| "grad_norm": 7.024416923522949, |
| "learning_rate": 1.558827448367755e-05, |
| "loss": 0.3133, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.1572347995256306, |
| "grad_norm": 0.6226129531860352, |
| "learning_rate": 1.5721518987341774e-05, |
| "loss": 0.295, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.15856729782669526, |
| "grad_norm": 1.0621097087860107, |
| "learning_rate": 1.5854763491005997e-05, |
| "loss": 0.3027, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.15989979612775992, |
| "grad_norm": 1.318295955657959, |
| "learning_rate": 1.598800799467022e-05, |
| "loss": 0.3216, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.15989979612775992, |
| "eval_dev_accuracy": 0.9555587386069794, |
| "eval_dev_accuracy_threshold": 0.8190538287162781, |
| "eval_dev_average_precision": 0.7133029701747852, |
| "eval_dev_f1": 0.6531785971038309, |
| "eval_dev_f1_threshold": 0.3298466205596924, |
| "eval_dev_precision": 0.6146709191069876, |
| "eval_dev_recall": 0.6968335707242248, |
| "eval_loss": 0.3916049897670746, |
| "eval_runtime": 554.3199, |
| "eval_samples_per_second": 239.297, |
| "eval_steps_per_second": 7.479, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.1612322944288246, |
| "grad_norm": 9.81628704071045, |
| "learning_rate": 1.6121252498334446e-05, |
| "loss": 0.3522, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.16256479272988927, |
| "grad_norm": 4.447005271911621, |
| "learning_rate": 1.625449700199867e-05, |
| "loss": 0.3266, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.16389729103095393, |
| "grad_norm": 14.646246910095215, |
| "learning_rate": 1.6387741505662892e-05, |
| "loss": 0.3292, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.1652297893320186, |
| "grad_norm": 16.482669830322266, |
| "learning_rate": 1.6520986009327115e-05, |
| "loss": 0.3446, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.16656228763308326, |
| "grad_norm": 7.77319860458374, |
| "learning_rate": 1.665423051299134e-05, |
| "loss": 0.3236, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.16656228763308326, |
| "eval_dev_accuracy": 0.9569684953297097, |
| "eval_dev_accuracy_threshold": 0.9383260011672974, |
| "eval_dev_average_precision": 0.7292398003419696, |
| "eval_dev_f1": 0.66701062841812, |
| "eval_dev_f1_threshold": 0.8000765442848206, |
| "eval_dev_precision": 0.6303266699171136, |
| "eval_dev_recall": 0.7082283335159417, |
| "eval_loss": 0.35466820001602173, |
| "eval_runtime": 561.336, |
| "eval_samples_per_second": 236.306, |
| "eval_steps_per_second": 7.386, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.16789478593414794, |
| "grad_norm": 6.646021366119385, |
| "learning_rate": 1.6787475016655564e-05, |
| "loss": 0.3134, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.1692272842352126, |
| "grad_norm": 87.47698211669922, |
| "learning_rate": 1.6920719520319787e-05, |
| "loss": 0.3249, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.17055978253627727, |
| "grad_norm": 17.500768661499023, |
| "learning_rate": 1.705396402398401e-05, |
| "loss": 0.3811, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.17189228083734193, |
| "grad_norm": 7.166949272155762, |
| "learning_rate": 1.7187208527648237e-05, |
| "loss": 0.3127, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.1732247791384066, |
| "grad_norm": 4.106062889099121, |
| "learning_rate": 1.732045303131246e-05, |
| "loss": 0.3219, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.1732247791384066, |
| "eval_dev_accuracy": 0.9564407789094364, |
| "eval_dev_accuracy_threshold": 0.8795315623283386, |
| "eval_dev_average_precision": 0.726842832162545, |
| "eval_dev_f1": 0.6643535054597408, |
| "eval_dev_f1_threshold": 0.4876420497894287, |
| "eval_dev_precision": 0.621717123483908, |
| "eval_dev_recall": 0.7132683247507395, |
| "eval_loss": 0.3564859926700592, |
| "eval_runtime": 558.1535, |
| "eval_samples_per_second": 237.653, |
| "eval_steps_per_second": 7.428, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.17455727743947128, |
| "grad_norm": 1.3857766389846802, |
| "learning_rate": 1.7453697534976682e-05, |
| "loss": 0.3526, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.17588977574053594, |
| "grad_norm": 20.39262580871582, |
| "learning_rate": 1.758694203864091e-05, |
| "loss": 0.3299, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.1772222740416006, |
| "grad_norm": 18.849407196044922, |
| "learning_rate": 1.772018654230513e-05, |
| "loss": 0.3303, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.17855477234266526, |
| "grad_norm": 42.82183837890625, |
| "learning_rate": 1.7853431045969355e-05, |
| "loss": 0.3739, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.17988727064372992, |
| "grad_norm": 4.524885654449463, |
| "learning_rate": 1.7986675549633577e-05, |
| "loss": 0.3544, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.17988727064372992, |
| "eval_dev_accuracy": 0.9572021983158308, |
| "eval_dev_accuracy_threshold": 0.9545025825500488, |
| "eval_dev_average_precision": 0.7351364884979171, |
| "eval_dev_f1": 0.6692303640099035, |
| "eval_dev_f1_threshold": 0.7856150269508362, |
| "eval_dev_precision": 0.6444805194805194, |
| "eval_dev_recall": 0.6959570505094774, |
| "eval_loss": 0.33416542410850525, |
| "eval_runtime": 558.7974, |
| "eval_samples_per_second": 237.379, |
| "eval_steps_per_second": 7.42, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.1812197689447946, |
| "grad_norm": 2.0763137340545654, |
| "learning_rate": 1.8119920053297804e-05, |
| "loss": 0.3584, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.18255226724585927, |
| "grad_norm": 4.722475051879883, |
| "learning_rate": 1.8253164556962027e-05, |
| "loss": 0.341, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.18388476554692393, |
| "grad_norm": 4.084864139556885, |
| "learning_rate": 1.838640906062625e-05, |
| "loss": 0.3371, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.1852172638479886, |
| "grad_norm": 0.32559067010879517, |
| "learning_rate": 1.8519653564290473e-05, |
| "loss": 0.3322, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.18654976214905325, |
| "grad_norm": 9.505677223205566, |
| "learning_rate": 1.86528980679547e-05, |
| "loss": 0.3493, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.18654976214905325, |
| "eval_dev_accuracy": 0.9565010893574676, |
| "eval_dev_accuracy_threshold": 0.9463940858840942, |
| "eval_dev_average_precision": 0.7367044841602028, |
| "eval_dev_f1": 0.6669865642994243, |
| "eval_dev_f1_threshold": 0.8662494421005249, |
| "eval_dev_precision": 0.6496001661647107, |
| "eval_dev_recall": 0.6853292429056645, |
| "eval_loss": 0.29620230197906494, |
| "eval_runtime": 559.2923, |
| "eval_samples_per_second": 237.169, |
| "eval_steps_per_second": 7.413, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.18788226045011794, |
| "grad_norm": 19.357847213745117, |
| "learning_rate": 1.8786142571618922e-05, |
| "loss": 0.3021, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.1892147587511826, |
| "grad_norm": 0.8998715281486511, |
| "learning_rate": 1.8919387075283148e-05, |
| "loss": 0.3249, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.19054725705224726, |
| "grad_norm": 17.16973304748535, |
| "learning_rate": 1.9052631578947368e-05, |
| "loss": 0.3389, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.19187975535331192, |
| "grad_norm": 1.553682565689087, |
| "learning_rate": 1.9185876082611594e-05, |
| "loss": 0.3547, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.19321225365437658, |
| "grad_norm": 10.778045654296875, |
| "learning_rate": 1.9319120586275817e-05, |
| "loss": 0.3205, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.19321225365437658, |
| "eval_dev_accuracy": 0.9589587401147406, |
| "eval_dev_accuracy_threshold": 0.9295341968536377, |
| "eval_dev_average_precision": 0.7537310584722261, |
| "eval_dev_f1": 0.6830523319465732, |
| "eval_dev_f1_threshold": 0.7479926347732544, |
| "eval_dev_precision": 0.6825292637567005, |
| "eval_dev_recall": 0.6835762024761696, |
| "eval_loss": 0.3468180298805237, |
| "eval_runtime": 558.8191, |
| "eval_samples_per_second": 237.37, |
| "eval_steps_per_second": 7.419, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.19454475195544127, |
| "grad_norm": 2.241529941558838, |
| "learning_rate": 1.9452365089940043e-05, |
| "loss": 0.3168, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.19587725025650593, |
| "grad_norm": 1.1848278045654297, |
| "learning_rate": 1.9585609593604263e-05, |
| "loss": 0.3348, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.1972097485575706, |
| "grad_norm": 16.031787872314453, |
| "learning_rate": 1.971885409726849e-05, |
| "loss": 0.3237, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.19854224685863525, |
| "grad_norm": 12.078638076782227, |
| "learning_rate": 1.9852098600932712e-05, |
| "loss": 0.3428, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.1998747451596999, |
| "grad_norm": 5.735422134399414, |
| "learning_rate": 1.998534310459694e-05, |
| "loss": 0.3179, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.1998747451596999, |
| "eval_dev_accuracy": 0.9547219311405459, |
| "eval_dev_accuracy_threshold": 0.9576058387756348, |
| "eval_dev_average_precision": 0.6908515568536635, |
| "eval_dev_f1": 0.6764229341974599, |
| "eval_dev_f1_threshold": 0.926771879196167, |
| "eval_dev_precision": 0.6467119728163102, |
| "eval_dev_recall": 0.7089952887038458, |
| "eval_loss": 0.34104466438293457, |
| "eval_runtime": 561.385, |
| "eval_samples_per_second": 236.285, |
| "eval_steps_per_second": 7.385, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.2012072434607646, |
| "grad_norm": 19.35861587524414, |
| "learning_rate": 1.9986823013828432e-05, |
| "loss": 0.3409, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.20253974176182926, |
| "grad_norm": 35.545223236083984, |
| "learning_rate": 1.997201741138847e-05, |
| "loss": 0.331, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.20387224006289392, |
| "grad_norm": 17.14919662475586, |
| "learning_rate": 1.9957211808948506e-05, |
| "loss": 0.3493, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.20520473836395858, |
| "grad_norm": 2.735530138015747, |
| "learning_rate": 1.9942406206508544e-05, |
| "loss": 0.3205, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.20653723666502324, |
| "grad_norm": 1.0762556791305542, |
| "learning_rate": 1.9927600604068582e-05, |
| "loss": 0.3307, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.20653723666502324, |
| "eval_dev_accuracy": 0.9532292475517727, |
| "eval_dev_accuracy_threshold": 0.8670874238014221, |
| "eval_dev_average_precision": 0.6865319564110608, |
| "eval_dev_f1": 0.660230457801308, |
| "eval_dev_f1_threshold": 0.6122031211853027, |
| "eval_dev_precision": 0.6272807969227735, |
| "eval_dev_recall": 0.6968335707242248, |
| "eval_loss": 0.3867639899253845, |
| "eval_runtime": 563.4002, |
| "eval_samples_per_second": 235.44, |
| "eval_steps_per_second": 7.359, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.2078697349660879, |
| "grad_norm": 8.77493953704834, |
| "learning_rate": 1.9912795001628617e-05, |
| "loss": 0.3683, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.2092022332671526, |
| "grad_norm": 0.7768261432647705, |
| "learning_rate": 1.9897989399188656e-05, |
| "loss": 0.318, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.21053473156821725, |
| "grad_norm": 12.180807113647461, |
| "learning_rate": 1.988318379674869e-05, |
| "loss": 0.3498, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.21186722986928191, |
| "grad_norm": 4.719166278839111, |
| "learning_rate": 1.986837819430873e-05, |
| "loss": 0.3043, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.21319972817034658, |
| "grad_norm": 6.112349987030029, |
| "learning_rate": 1.9853572591868764e-05, |
| "loss": 0.3393, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.21319972817034658, |
| "eval_dev_accuracy": 0.9534780281499016, |
| "eval_dev_accuracy_threshold": 0.9171842336654663, |
| "eval_dev_average_precision": 0.7089238013031434, |
| "eval_dev_f1": 0.6696855863736944, |
| "eval_dev_f1_threshold": 0.8700560331344604, |
| "eval_dev_precision": 0.6313797787696488, |
| "eval_dev_recall": 0.7129396296702093, |
| "eval_loss": 0.33279770612716675, |
| "eval_runtime": 559.9008, |
| "eval_samples_per_second": 236.912, |
| "eval_steps_per_second": 7.405, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.21453222647141124, |
| "grad_norm": 47.89255905151367, |
| "learning_rate": 1.9838766989428802e-05, |
| "loss": 0.3326, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.21586472477247592, |
| "grad_norm": 6.826938152313232, |
| "learning_rate": 1.982396138698884e-05, |
| "loss": 0.3094, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.21719722307354059, |
| "grad_norm": 13.803174018859863, |
| "learning_rate": 1.9809155784548875e-05, |
| "loss": 0.3572, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.21852972137460525, |
| "grad_norm": 7.402415752410889, |
| "learning_rate": 1.9794350182108914e-05, |
| "loss": 0.3447, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.2198622196756699, |
| "grad_norm": 28.723724365234375, |
| "learning_rate": 1.977954457966895e-05, |
| "loss": 0.3484, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.2198622196756699, |
| "eval_dev_accuracy": 0.9592753699669047, |
| "eval_dev_accuracy_threshold": 0.8685222864151001, |
| "eval_dev_average_precision": 0.7462578581871518, |
| "eval_dev_f1": 0.6837169650468883, |
| "eval_dev_f1_threshold": 0.35429614782333374, |
| "eval_dev_precision": 0.6654911316253501, |
| "eval_dev_recall": 0.702969212227457, |
| "eval_loss": 0.3938925862312317, |
| "eval_runtime": 560.3292, |
| "eval_samples_per_second": 236.73, |
| "eval_steps_per_second": 7.399, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.22119471797673457, |
| "grad_norm": 16.560897827148438, |
| "learning_rate": 1.9764738977228987e-05, |
| "loss": 0.3182, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.22252721627779926, |
| "grad_norm": 1.3701841831207275, |
| "learning_rate": 1.9749933374789022e-05, |
| "loss": 0.283, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.22385971457886392, |
| "grad_norm": 12.799971580505371, |
| "learning_rate": 1.973512777234906e-05, |
| "loss": 0.3134, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.22519221287992858, |
| "grad_norm": 4.794546127319336, |
| "learning_rate": 1.9720322169909095e-05, |
| "loss": 0.3454, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.22652471118099324, |
| "grad_norm": 21.59016990661621, |
| "learning_rate": 1.970551656746913e-05, |
| "loss": 0.3485, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.22652471118099324, |
| "eval_dev_accuracy": 0.9593356804149359, |
| "eval_dev_accuracy_threshold": 0.9145892858505249, |
| "eval_dev_average_precision": 0.747695886787644, |
| "eval_dev_f1": 0.6768515829218704, |
| "eval_dev_f1_threshold": 0.8686491847038269, |
| "eval_dev_precision": 0.7203264094955489, |
| "eval_dev_recall": 0.6383258463898324, |
| "eval_loss": 0.34743377566337585, |
| "eval_runtime": 563.0506, |
| "eval_samples_per_second": 235.586, |
| "eval_steps_per_second": 7.363, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.2278572094820579, |
| "grad_norm": 14.225733757019043, |
| "learning_rate": 1.969071096502917e-05, |
| "loss": 0.2942, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.2291897077831226, |
| "grad_norm": 7.983681678771973, |
| "learning_rate": 1.9675905362589203e-05, |
| "loss": 0.2865, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.23052220608418725, |
| "grad_norm": 2.1385481357574463, |
| "learning_rate": 1.9661099760149242e-05, |
| "loss": 0.3489, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.2318547043852519, |
| "grad_norm": 12.413968086242676, |
| "learning_rate": 1.9646294157709277e-05, |
| "loss": 0.2959, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.23318720268631657, |
| "grad_norm": 15.191158294677734, |
| "learning_rate": 1.9631488555269315e-05, |
| "loss": 0.3626, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.23318720268631657, |
| "eval_dev_accuracy": 0.959637232655092, |
| "eval_dev_accuracy_threshold": 0.9243228435516357, |
| "eval_dev_average_precision": 0.7598083206267562, |
| "eval_dev_f1": 0.6903569873748368, |
| "eval_dev_f1_threshold": 0.7939244508743286, |
| "eval_dev_precision": 0.6858038706887231, |
| "eval_dev_recall": 0.6949709652678865, |
| "eval_loss": 0.28921985626220703, |
| "eval_runtime": 562.3661, |
| "eval_samples_per_second": 235.873, |
| "eval_steps_per_second": 7.372, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.23451970098738123, |
| "grad_norm": 2.7491862773895264, |
| "learning_rate": 1.961668295282935e-05, |
| "loss": 0.3107, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.23585219928844592, |
| "grad_norm": 52.241886138916016, |
| "learning_rate": 1.960187735038939e-05, |
| "loss": 0.2914, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.23718469758951058, |
| "grad_norm": 11.401723861694336, |
| "learning_rate": 1.9587071747949427e-05, |
| "loss": 0.3298, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.23851719589057524, |
| "grad_norm": 4.170936107635498, |
| "learning_rate": 1.957226614550946e-05, |
| "loss": 0.3315, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.2398496941916399, |
| "grad_norm": 5.668073654174805, |
| "learning_rate": 1.95574605430695e-05, |
| "loss": 0.3097, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.2398496941916399, |
| "eval_dev_accuracy": 0.9607454371376661, |
| "eval_dev_accuracy_threshold": 0.9465240836143494, |
| "eval_dev_average_precision": 0.7633624538366494, |
| "eval_dev_f1": 0.6969561824060653, |
| "eval_dev_f1_threshold": 0.8802664279937744, |
| "eval_dev_precision": 0.7094540914765634, |
| "eval_dev_recall": 0.6848909827982907, |
| "eval_loss": 0.2974649667739868, |
| "eval_runtime": 565.9458, |
| "eval_samples_per_second": 234.381, |
| "eval_steps_per_second": 7.326, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.24118219249270456, |
| "grad_norm": 18.609146118164062, |
| "learning_rate": 1.9542654940629535e-05, |
| "loss": 0.3474, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.24251469079376925, |
| "grad_norm": 5.154010772705078, |
| "learning_rate": 1.9527849338189573e-05, |
| "loss": 0.2917, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.2438471890948339, |
| "grad_norm": 9.549324035644531, |
| "learning_rate": 1.9513043735749608e-05, |
| "loss": 0.3557, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.24517968739589857, |
| "grad_norm": 1.6343746185302734, |
| "learning_rate": 1.9498238133309647e-05, |
| "loss": 0.3394, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.24651218569696323, |
| "grad_norm": 9.207841873168945, |
| "learning_rate": 1.9483432530869685e-05, |
| "loss": 0.2891, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.24651218569696323, |
| "eval_dev_accuracy": 0.9630900058048806, |
| "eval_dev_accuracy_threshold": 0.9168897271156311, |
| "eval_dev_average_precision": 0.7806456437261002, |
| "eval_dev_f1": 0.7187227550130775, |
| "eval_dev_f1_threshold": 0.6090723276138306, |
| "eval_dev_precision": 0.7149051490514905, |
| "eval_dev_recall": 0.7225813520324312, |
| "eval_loss": 0.3342207372188568, |
| "eval_runtime": 562.3653, |
| "eval_samples_per_second": 235.873, |
| "eval_steps_per_second": 7.372, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.2478446839980279, |
| "grad_norm": 30.978008270263672, |
| "learning_rate": 1.946862692842972e-05, |
| "loss": 0.2514, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.24917718229909258, |
| "grad_norm": 26.20627784729004, |
| "learning_rate": 1.9453821325989758e-05, |
| "loss": 0.3139, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.2505096806001572, |
| "grad_norm": 29.30525779724121, |
| "learning_rate": 1.9439015723549793e-05, |
| "loss": 0.2896, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.2518421789012219, |
| "grad_norm": 1.5062270164489746, |
| "learning_rate": 1.942421012110983e-05, |
| "loss": 0.3161, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.2531746772022866, |
| "grad_norm": 5.7221784591674805, |
| "learning_rate": 1.9409404518669866e-05, |
| "loss": 0.3331, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.2531746772022866, |
| "eval_dev_accuracy": 0.9605946610175881, |
| "eval_dev_accuracy_threshold": 0.8986555337905884, |
| "eval_dev_average_precision": 0.7733706796615292, |
| "eval_dev_f1": 0.7069406003832233, |
| "eval_dev_f1_threshold": 0.5075786113739014, |
| "eval_dev_precision": 0.6874029603560708, |
| "eval_dev_recall": 0.727621343267229, |
| "eval_loss": 0.333689421415329, |
| "eval_runtime": 564.7555, |
| "eval_samples_per_second": 234.875, |
| "eval_steps_per_second": 7.341, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.2545071755033512, |
| "grad_norm": 0.41423532366752625, |
| "learning_rate": 1.9394598916229905e-05, |
| "loss": 0.2959, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.2558396738044159, |
| "grad_norm": 18.290435791015625, |
| "learning_rate": 1.937979331378994e-05, |
| "loss": 0.3236, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.25717217210548055, |
| "grad_norm": 1.2307929992675781, |
| "learning_rate": 1.9364987711349975e-05, |
| "loss": 0.3527, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.25850467040654523, |
| "grad_norm": 1.151492714881897, |
| "learning_rate": 1.9350182108910013e-05, |
| "loss": 0.3106, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.2598371687076099, |
| "grad_norm": 1.676810383796692, |
| "learning_rate": 1.9335376506470048e-05, |
| "loss": 0.3271, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.2598371687076099, |
| "eval_dev_accuracy": 0.9614239296780176, |
| "eval_dev_accuracy_threshold": 0.9547422528266907, |
| "eval_dev_average_precision": 0.7587239294098156, |
| "eval_dev_f1": 0.7068855932203391, |
| "eval_dev_f1_threshold": 0.7505875825881958, |
| "eval_dev_precision": 0.6841997334153593, |
| "eval_dev_recall": 0.7311274241262189, |
| "eval_loss": 0.3253738582134247, |
| "eval_runtime": 562.4242, |
| "eval_samples_per_second": 235.849, |
| "eval_steps_per_second": 7.372, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.26116966700867456, |
| "grad_norm": 18.608806610107422, |
| "learning_rate": 1.9320570904030086e-05, |
| "loss": 0.3504, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.26250216530973924, |
| "grad_norm": 20.453174591064453, |
| "learning_rate": 1.930576530159012e-05, |
| "loss": 0.303, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.2638346636108039, |
| "grad_norm": 5.1661248207092285, |
| "learning_rate": 1.929095969915016e-05, |
| "loss": 0.2478, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.26516716191186857, |
| "grad_norm": 1.2466572523117065, |
| "learning_rate": 1.9276154096710194e-05, |
| "loss": 0.3309, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.26649966021293325, |
| "grad_norm": 2.0681653022766113, |
| "learning_rate": 1.9261348494270233e-05, |
| "loss": 0.3063, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.26649966021293325, |
| "eval_dev_accuracy": 0.9621928878904159, |
| "eval_dev_accuracy_threshold": 0.5716267228126526, |
| "eval_dev_average_precision": 0.7784750233173614, |
| "eval_dev_f1": 0.7154299699632884, |
| "eval_dev_f1_threshold": 0.2591094672679901, |
| "eval_dev_precision": 0.7265845667156253, |
| "eval_dev_recall": 0.7046126876301084, |
| "eval_loss": 0.45214059948921204, |
| "eval_runtime": 560.4518, |
| "eval_samples_per_second": 236.679, |
| "eval_steps_per_second": 7.398, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.2678321585139979, |
| "grad_norm": 18.87665557861328, |
| "learning_rate": 1.924654289183027e-05, |
| "loss": 0.3482, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.2691646568150626, |
| "grad_norm": 1.1184475421905518, |
| "learning_rate": 1.9231737289390306e-05, |
| "loss": 0.3033, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.2704971551161272, |
| "grad_norm": 13.190022468566895, |
| "learning_rate": 1.9216931686950344e-05, |
| "loss": 0.288, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.2718296534171919, |
| "grad_norm": 5.855016231536865, |
| "learning_rate": 1.920212608451038e-05, |
| "loss": 0.3609, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.2731621517182566, |
| "grad_norm": 0.26388707756996155, |
| "learning_rate": 1.9187320482070418e-05, |
| "loss": 0.3071, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.2731621517182566, |
| "eval_dev_accuracy": 0.9630975446108845, |
| "eval_dev_accuracy_threshold": 0.9040592908859253, |
| "eval_dev_average_precision": 0.7844185876274975, |
| "eval_dev_f1": 0.7150979850952249, |
| "eval_dev_f1_threshold": 0.6517728567123413, |
| "eval_dev_precision": 0.7206275033377837, |
| "eval_dev_recall": 0.7096526788649063, |
| "eval_loss": 0.3398449718952179, |
| "eval_runtime": 559.7754, |
| "eval_samples_per_second": 236.965, |
| "eval_steps_per_second": 7.407, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.2744946500193212, |
| "grad_norm": 4.928101539611816, |
| "learning_rate": 1.9172514879630453e-05, |
| "loss": 0.3778, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.2758271483203859, |
| "grad_norm": 32.13788604736328, |
| "learning_rate": 1.915770927719049e-05, |
| "loss": 0.2681, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.27715964662145054, |
| "grad_norm": 4.934467792510986, |
| "learning_rate": 1.914290367475053e-05, |
| "loss": 0.3358, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.27849214492251523, |
| "grad_norm": 20.491180419921875, |
| "learning_rate": 1.9128098072310564e-05, |
| "loss": 0.2964, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.2798246432235799, |
| "grad_norm": 1.0770193338394165, |
| "learning_rate": 1.9113292469870603e-05, |
| "loss": 0.2193, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.2798246432235799, |
| "eval_dev_accuracy": 0.9630221565508454, |
| "eval_dev_accuracy_threshold": 0.9147968292236328, |
| "eval_dev_average_precision": 0.7911413496458403, |
| "eval_dev_f1": 0.7163220463124683, |
| "eval_dev_f1_threshold": 0.7818174362182617, |
| "eval_dev_precision": 0.7372999304105776, |
| "eval_dev_recall": 0.6965048756436946, |
| "eval_loss": 0.3126268982887268, |
| "eval_runtime": 558.7817, |
| "eval_samples_per_second": 237.386, |
| "eval_steps_per_second": 7.42, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.28115714152464455, |
| "grad_norm": 24.751399993896484, |
| "learning_rate": 1.9098486867430638e-05, |
| "loss": 0.3136, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.28248963982570924, |
| "grad_norm": 38.034759521484375, |
| "learning_rate": 1.9083681264990676e-05, |
| "loss": 0.3121, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.28382213812677387, |
| "grad_norm": 22.520530700683594, |
| "learning_rate": 1.906887566255071e-05, |
| "loss": 0.2893, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.28515463642783856, |
| "grad_norm": 13.158409118652344, |
| "learning_rate": 1.905407006011075e-05, |
| "loss": 0.2987, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.28648713472890325, |
| "grad_norm": 2.2500672340393066, |
| "learning_rate": 1.9039264457670784e-05, |
| "loss": 0.2781, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.28648713472890325, |
| "eval_dev_accuracy": 0.96250951774258, |
| "eval_dev_accuracy_threshold": 0.9360392093658447, |
| "eval_dev_average_precision": 0.7809073848360293, |
| "eval_dev_f1": 0.724827056110684, |
| "eval_dev_f1_threshold": 0.9214021563529968, |
| "eval_dev_precision": 0.7264223616154947, |
| "eval_dev_recall": 0.7232387421934918, |
| "eval_loss": 0.32232773303985596, |
| "eval_runtime": 558.841, |
| "eval_samples_per_second": 237.361, |
| "eval_steps_per_second": 7.419, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.2878196330299679, |
| "grad_norm": 7.364509582519531, |
| "learning_rate": 1.902445885523082e-05, |
| "loss": 0.2444, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.28915213133103257, |
| "grad_norm": 14.986044883728027, |
| "learning_rate": 1.9009653252790857e-05, |
| "loss": 0.2917, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.2904846296320972, |
| "grad_norm": 1.4703857898712158, |
| "learning_rate": 1.8994847650350892e-05, |
| "loss": 0.32, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.2918171279331619, |
| "grad_norm": 4.144439220428467, |
| "learning_rate": 1.898004204791093e-05, |
| "loss": 0.2873, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.2931496262342266, |
| "grad_norm": 3.1540684700012207, |
| "learning_rate": 1.8965236445470966e-05, |
| "loss": 0.2877, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.2931496262342266, |
| "eval_dev_accuracy": 0.9638212699872594, |
| "eval_dev_accuracy_threshold": 0.8727903366088867, |
| "eval_dev_average_precision": 0.7927687696111004, |
| "eval_dev_f1": 0.7246392958609548, |
| "eval_dev_f1_threshold": 0.7912191152572632, |
| "eval_dev_precision": 0.7370806890299184, |
| "eval_dev_recall": 0.712610934589679, |
| "eval_loss": 0.30881205201148987, |
| "eval_runtime": 559.5919, |
| "eval_samples_per_second": 237.042, |
| "eval_steps_per_second": 7.409, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.2944821245352912, |
| "grad_norm": 6.18324613571167, |
| "learning_rate": 1.8950430843031004e-05, |
| "loss": 0.2947, |
| "step": 22100 |
| }, |
| { |
| "epoch": 0.2958146228363559, |
| "grad_norm": 12.850146293640137, |
| "learning_rate": 1.893562524059104e-05, |
| "loss": 0.2619, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.29714712113742053, |
| "grad_norm": 5.986371040344238, |
| "learning_rate": 1.8920819638151077e-05, |
| "loss": 0.3143, |
| "step": 22300 |
| }, |
| { |
| "epoch": 0.2984796194384852, |
| "grad_norm": 6.889712810516357, |
| "learning_rate": 1.8906014035711116e-05, |
| "loss": 0.3585, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.29981211773954985, |
| "grad_norm": 14.721301078796387, |
| "learning_rate": 1.889120843327115e-05, |
| "loss": 0.28, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.29981211773954985, |
| "eval_dev_accuracy": 0.9627809147587205, |
| "eval_dev_accuracy_threshold": 0.949242889881134, |
| "eval_dev_average_precision": 0.7827527934861719, |
| "eval_dev_f1": 0.7189280438911163, |
| "eval_dev_f1_threshold": 0.41740649938583374, |
| "eval_dev_precision": 0.6932546545935497, |
| "eval_dev_recall": 0.7465760929111428, |
| "eval_loss": 0.3353007137775421, |
| "eval_runtime": 558.6719, |
| "eval_samples_per_second": 237.433, |
| "eval_steps_per_second": 7.421, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.30114461604061454, |
| "grad_norm": 1.0338587760925293, |
| "learning_rate": 1.887640283083119e-05, |
| "loss": 0.284, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.30247711434167923, |
| "grad_norm": 0.5249596834182739, |
| "learning_rate": 1.8861597228391224e-05, |
| "loss": 0.2821, |
| "step": 22700 |
| }, |
| { |
| "epoch": 0.30380961264274386, |
| "grad_norm": 2.10871958732605, |
| "learning_rate": 1.8846791625951262e-05, |
| "loss": 0.2762, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.30514211094380855, |
| "grad_norm": 8.820456504821777, |
| "learning_rate": 1.8831986023511297e-05, |
| "loss": 0.3152, |
| "step": 22900 |
| }, |
| { |
| "epoch": 0.3064746092448732, |
| "grad_norm": 0.5152029395103455, |
| "learning_rate": 1.8817180421071335e-05, |
| "loss": 0.2879, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.3064746092448732, |
| "eval_dev_accuracy": 0.9637911147632438, |
| "eval_dev_accuracy_threshold": 0.8721863627433777, |
| "eval_dev_average_precision": 0.7944006641896747, |
| "eval_dev_f1": 0.7282656663724625, |
| "eval_dev_f1_threshold": 0.7819468975067139, |
| "eval_dev_precision": 0.7333629596711476, |
| "eval_dev_recall": 0.7232387421934918, |
| "eval_loss": 0.27257823944091797, |
| "eval_runtime": 559.9281, |
| "eval_samples_per_second": 236.9, |
| "eval_steps_per_second": 7.405, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.3078071075459379, |
| "grad_norm": 7.670559406280518, |
| "learning_rate": 1.8802374818631374e-05, |
| "loss": 0.2738, |
| "step": 23100 |
| }, |
| { |
| "epoch": 0.30913960584700256, |
| "grad_norm": 1.2862569093704224, |
| "learning_rate": 1.878756921619141e-05, |
| "loss": 0.2624, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.3104721041480672, |
| "grad_norm": 6.1086249351501465, |
| "learning_rate": 1.8772763613751447e-05, |
| "loss": 0.2698, |
| "step": 23300 |
| }, |
| { |
| "epoch": 0.3118046024491319, |
| "grad_norm": 2.7864394187927246, |
| "learning_rate": 1.8757958011311482e-05, |
| "loss": 0.278, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.3131371007501965, |
| "grad_norm": 0.4662020206451416, |
| "learning_rate": 1.874315240887152e-05, |
| "loss": 0.3024, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.3131371007501965, |
| "eval_dev_accuracy": 0.9640398953613727, |
| "eval_dev_accuracy_threshold": 0.9022700786590576, |
| "eval_dev_average_precision": 0.789665459307555, |
| "eval_dev_f1": 0.7233386555084511, |
| "eval_dev_f1_threshold": 0.49045658111572266, |
| "eval_dev_precision": 0.7269809650287737, |
| "eval_dev_recall": 0.719732661334502, |
| "eval_loss": 0.3414628207683563, |
| "eval_runtime": 561.5298, |
| "eval_samples_per_second": 236.224, |
| "eval_steps_per_second": 7.383, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.3144695990512612, |
| "grad_norm": 16.328683853149414, |
| "learning_rate": 1.8728346806431555e-05, |
| "loss": 0.3255, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.3158020973523259, |
| "grad_norm": 6.683753490447998, |
| "learning_rate": 1.8713541203991594e-05, |
| "loss": 0.3298, |
| "step": 23700 |
| }, |
| { |
| "epoch": 0.3171345956533905, |
| "grad_norm": 14.66252613067627, |
| "learning_rate": 1.869873560155163e-05, |
| "loss": 0.2902, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.3184670939544552, |
| "grad_norm": 1.7640432119369507, |
| "learning_rate": 1.8683929999111664e-05, |
| "loss": 0.283, |
| "step": 23900 |
| }, |
| { |
| "epoch": 0.31979959225551985, |
| "grad_norm": 20.055587768554688, |
| "learning_rate": 1.8669124396671702e-05, |
| "loss": 0.3098, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.31979959225551985, |
| "eval_dev_accuracy": 0.9638288087932633, |
| "eval_dev_accuracy_threshold": 0.9375428557395935, |
| "eval_dev_average_precision": 0.7947515841312096, |
| "eval_dev_f1": 0.731536653364675, |
| "eval_dev_f1_threshold": 0.8831270337104797, |
| "eval_dev_precision": 0.7299803622081605, |
| "eval_dev_recall": 0.7330995946094007, |
| "eval_loss": 0.2945517897605896, |
| "eval_runtime": 562.1643, |
| "eval_samples_per_second": 235.958, |
| "eval_steps_per_second": 7.375, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.32113209055658454, |
| "grad_norm": 40.83311080932617, |
| "learning_rate": 1.8654318794231737e-05, |
| "loss": 0.2592, |
| "step": 24100 |
| }, |
| { |
| "epoch": 0.3224645888576492, |
| "grad_norm": 5.973490238189697, |
| "learning_rate": 1.8639513191791775e-05, |
| "loss": 0.27, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.32379708715871386, |
| "grad_norm": 8.698867797851562, |
| "learning_rate": 1.862470758935181e-05, |
| "loss": 0.2738, |
| "step": 24300 |
| }, |
| { |
| "epoch": 0.32512958545977855, |
| "grad_norm": 8.795327186584473, |
| "learning_rate": 1.860990198691185e-05, |
| "loss": 0.2528, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.3264620837608432, |
| "grad_norm": 0.2583109438419342, |
| "learning_rate": 1.8595096384471883e-05, |
| "loss": 0.2694, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.3264620837608432, |
| "eval_dev_accuracy": 0.9628261475947439, |
| "eval_dev_accuracy_threshold": 0.9562203884124756, |
| "eval_dev_average_precision": 0.7884777296034856, |
| "eval_dev_f1": 0.7260596117035821, |
| "eval_dev_f1_threshold": 0.9503564834594727, |
| "eval_dev_precision": 0.7248307490718497, |
| "eval_dev_recall": 0.7272926481866988, |
| "eval_loss": 0.3025730550289154, |
| "eval_runtime": 561.6413, |
| "eval_samples_per_second": 236.177, |
| "eval_steps_per_second": 7.382, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.32779458206190787, |
| "grad_norm": 2.1876091957092285, |
| "learning_rate": 1.8580290782031922e-05, |
| "loss": 0.2288, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.32912708036297256, |
| "grad_norm": 7.1153459548950195, |
| "learning_rate": 1.856548517959196e-05, |
| "loss": 0.2966, |
| "step": 24700 |
| }, |
| { |
| "epoch": 0.3304595786640372, |
| "grad_norm": 0.5204883217811584, |
| "learning_rate": 1.8550679577151995e-05, |
| "loss": 0.3103, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.3317920769651019, |
| "grad_norm": 0.5321233868598938, |
| "learning_rate": 1.8535873974712033e-05, |
| "loss": 0.2403, |
| "step": 24900 |
| }, |
| { |
| "epoch": 0.3331245752661665, |
| "grad_norm": 0.5437518358230591, |
| "learning_rate": 1.8521068372272068e-05, |
| "loss": 0.2986, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.3331245752661665, |
| "eval_dev_accuracy": 0.964250981929482, |
| "eval_dev_accuracy_threshold": 0.8822938203811646, |
| "eval_dev_average_precision": 0.7923663060740336, |
| "eval_dev_f1": 0.7238444852327716, |
| "eval_dev_f1_threshold": 0.3350263833999634, |
| "eval_dev_precision": 0.7350881156800723, |
| "eval_dev_recall": 0.7129396296702093, |
| "eval_loss": 0.39168474078178406, |
| "eval_runtime": 559.9637, |
| "eval_samples_per_second": 236.885, |
| "eval_steps_per_second": 7.404, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.3344570735672312, |
| "grad_norm": 10.434455871582031, |
| "learning_rate": 1.8506262769832107e-05, |
| "loss": 0.2954, |
| "step": 25100 |
| }, |
| { |
| "epoch": 0.3357895718682959, |
| "grad_norm": 29.660995483398438, |
| "learning_rate": 1.849145716739214e-05, |
| "loss": 0.2778, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.3371220701693605, |
| "grad_norm": 17.967578887939453, |
| "learning_rate": 1.847665156495218e-05, |
| "loss": 0.2522, |
| "step": 25300 |
| }, |
| { |
| "epoch": 0.3384545684704252, |
| "grad_norm": 16.963655471801758, |
| "learning_rate": 1.8461845962512218e-05, |
| "loss": 0.3071, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.33978706677148984, |
| "grad_norm": 3.178967237472534, |
| "learning_rate": 1.8447040360072253e-05, |
| "loss": 0.3088, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.33978706677148984, |
| "eval_dev_accuracy": 0.9653064147700288, |
| "eval_dev_accuracy_threshold": 0.9469561576843262, |
| "eval_dev_average_precision": 0.8090508028224602, |
| "eval_dev_f1": 0.7406513872135102, |
| "eval_dev_f1_threshold": 0.9149296879768372, |
| "eval_dev_precision": 0.7413017231917463, |
| "eval_dev_recall": 0.7400021913005369, |
| "eval_loss": 0.28880587220191956, |
| "eval_runtime": 559.426, |
| "eval_samples_per_second": 237.113, |
| "eval_steps_per_second": 7.411, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.34111956507255453, |
| "grad_norm": 31.83365821838379, |
| "learning_rate": 1.843223475763229e-05, |
| "loss": 0.3328, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.3424520633736192, |
| "grad_norm": 73.58321380615234, |
| "learning_rate": 1.8417429155192326e-05, |
| "loss": 0.249, |
| "step": 25700 |
| }, |
| { |
| "epoch": 0.34378456167468385, |
| "grad_norm": 31.073486328125, |
| "learning_rate": 1.8402623552752365e-05, |
| "loss": 0.248, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.34511705997574854, |
| "grad_norm": 2.6796510219573975, |
| "learning_rate": 1.83878179503124e-05, |
| "loss": 0.2735, |
| "step": 25900 |
| }, |
| { |
| "epoch": 0.3464495582768132, |
| "grad_norm": 19.556621551513672, |
| "learning_rate": 1.8373012347872438e-05, |
| "loss": 0.3087, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.3464495582768132, |
| "eval_dev_accuracy": 0.9655853505921732, |
| "eval_dev_accuracy_threshold": 0.9267855882644653, |
| "eval_dev_average_precision": 0.8095342358911112, |
| "eval_dev_f1": 0.7389250472391351, |
| "eval_dev_f1_threshold": 0.6512651443481445, |
| "eval_dev_precision": 0.7092191435768262, |
| "eval_dev_recall": 0.7712282239509148, |
| "eval_loss": 0.26777184009552, |
| "eval_runtime": 562.5407, |
| "eval_samples_per_second": 235.8, |
| "eval_steps_per_second": 7.37, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.34778205657787786, |
| "grad_norm": 10.894082069396973, |
| "learning_rate": 1.8358206745432473e-05, |
| "loss": 0.2852, |
| "step": 26100 |
| }, |
| { |
| "epoch": 0.34911455487894255, |
| "grad_norm": 43.44607162475586, |
| "learning_rate": 1.8343401142992508e-05, |
| "loss": 0.255, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.3504470531800072, |
| "grad_norm": 0.060168083757162094, |
| "learning_rate": 1.8328595540552546e-05, |
| "loss": 0.27, |
| "step": 26300 |
| }, |
| { |
| "epoch": 0.3517795514810719, |
| "grad_norm": 0.13352444767951965, |
| "learning_rate": 1.831378993811258e-05, |
| "loss": 0.3315, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.3531120497821365, |
| "grad_norm": 2.8769795894622803, |
| "learning_rate": 1.829898433567262e-05, |
| "loss": 0.2548, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.3531120497821365, |
| "eval_dev_accuracy": 0.9633161699849978, |
| "eval_dev_accuracy_threshold": 0.9598461389541626, |
| "eval_dev_average_precision": 0.7967367390804647, |
| "eval_dev_f1": 0.7211769095463995, |
| "eval_dev_f1_threshold": 0.9407143592834473, |
| "eval_dev_precision": 0.7022005397550343, |
| "eval_dev_recall": 0.7412074065958146, |
| "eval_loss": 0.26967185735702515, |
| "eval_runtime": 561.1397, |
| "eval_samples_per_second": 236.389, |
| "eval_steps_per_second": 7.389, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.3544445480832012, |
| "grad_norm": 6.555627822875977, |
| "learning_rate": 1.8284178733232655e-05, |
| "loss": 0.2967, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.3557770463842659, |
| "grad_norm": 18.727455139160156, |
| "learning_rate": 1.8269373130792693e-05, |
| "loss": 0.2907, |
| "step": 26700 |
| }, |
| { |
| "epoch": 0.3571095446853305, |
| "grad_norm": 16.004812240600586, |
| "learning_rate": 1.825456752835273e-05, |
| "loss": 0.2871, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.3584420429863952, |
| "grad_norm": 0.3446504771709442, |
| "learning_rate": 1.8239761925912766e-05, |
| "loss": 0.287, |
| "step": 26900 |
| }, |
| { |
| "epoch": 0.35977454128745984, |
| "grad_norm": 1.3801554441452026, |
| "learning_rate": 1.8224956323472805e-05, |
| "loss": 0.2461, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.35977454128745984, |
| "eval_dev_accuracy": 0.9659321356683529, |
| "eval_dev_accuracy_threshold": 0.9563218355178833, |
| "eval_dev_average_precision": 0.8166384763438364, |
| "eval_dev_f1": 0.7424130273871207, |
| "eval_dev_f1_threshold": 0.5458764433860779, |
| "eval_dev_precision": 0.7173801982221314, |
| "eval_dev_recall": 0.7692560534677331, |
| "eval_loss": 0.3241870701313019, |
| "eval_runtime": 560.7697, |
| "eval_samples_per_second": 236.545, |
| "eval_steps_per_second": 7.393, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.3611070395885245, |
| "grad_norm": 11.259644508361816, |
| "learning_rate": 1.821015072103284e-05, |
| "loss": 0.3134, |
| "step": 27100 |
| }, |
| { |
| "epoch": 0.3624395378895892, |
| "grad_norm": 15.958681106567383, |
| "learning_rate": 1.8195345118592878e-05, |
| "loss": 0.229, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.36377203619065385, |
| "grad_norm": 3.471926689147949, |
| "learning_rate": 1.8180539516152913e-05, |
| "loss": 0.2318, |
| "step": 27300 |
| }, |
| { |
| "epoch": 0.36510453449171854, |
| "grad_norm": 57.36378479003906, |
| "learning_rate": 1.816573391371295e-05, |
| "loss": 0.2584, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.36643703279278317, |
| "grad_norm": 15.649163246154785, |
| "learning_rate": 1.8150928311272986e-05, |
| "loss": 0.3092, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.36643703279278317, |
| "eval_dev_accuracy": 0.9644696073035952, |
| "eval_dev_accuracy_threshold": 0.9345089793205261, |
| "eval_dev_average_precision": 0.8036883896122946, |
| "eval_dev_f1": 0.7414679756895747, |
| "eval_dev_f1_threshold": 0.8182344436645508, |
| "eval_dev_precision": 0.7049585144211774, |
| "eval_dev_recall": 0.7819655965815712, |
| "eval_loss": 0.2574635446071625, |
| "eval_runtime": 562.3168, |
| "eval_samples_per_second": 235.894, |
| "eval_steps_per_second": 7.373, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.36776953109384786, |
| "grad_norm": 31.03179931640625, |
| "learning_rate": 1.8136122708833024e-05, |
| "loss": 0.2781, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.36910202939491255, |
| "grad_norm": 32.65872573852539, |
| "learning_rate": 1.8121317106393063e-05, |
| "loss": 0.2411, |
| "step": 27700 |
| }, |
| { |
| "epoch": 0.3704345276959772, |
| "grad_norm": 10.414048194885254, |
| "learning_rate": 1.8106511503953098e-05, |
| "loss": 0.2768, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.37176702599704187, |
| "grad_norm": 0.27181100845336914, |
| "learning_rate": 1.8091705901513136e-05, |
| "loss": 0.256, |
| "step": 27900 |
| }, |
| { |
| "epoch": 0.3730995242981065, |
| "grad_norm": 15.69724178314209, |
| "learning_rate": 1.807690029907317e-05, |
| "loss": 0.3024, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.3730995242981065, |
| "eval_dev_accuracy": 0.9660904505944349, |
| "eval_dev_accuracy_threshold": 0.960444450378418, |
| "eval_dev_average_precision": 0.8143885872198954, |
| "eval_dev_f1": 0.7409103007718926, |
| "eval_dev_f1_threshold": 0.8899838328361511, |
| "eval_dev_precision": 0.7205425553944916, |
| "eval_dev_recall": 0.7624630218034404, |
| "eval_loss": 0.2652537524700165, |
| "eval_runtime": 560.0512, |
| "eval_samples_per_second": 236.848, |
| "eval_steps_per_second": 7.403, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.3744320225991712, |
| "grad_norm": 4.027531623840332, |
| "learning_rate": 1.806209469663321e-05, |
| "loss": 0.2676, |
| "step": 28100 |
| }, |
| { |
| "epoch": 0.3757645209002359, |
| "grad_norm": 6.543447494506836, |
| "learning_rate": 1.8047289094193244e-05, |
| "loss": 0.2384, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.3770970192013005, |
| "grad_norm": 35.99159622192383, |
| "learning_rate": 1.8032483491753283e-05, |
| "loss": 0.2586, |
| "step": 28300 |
| }, |
| { |
| "epoch": 0.3784295175023652, |
| "grad_norm": 1.3943774700164795, |
| "learning_rate": 1.8017677889313318e-05, |
| "loss": 0.2663, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.37976201580342983, |
| "grad_norm": 0.43371257185935974, |
| "learning_rate": 1.8002872286873352e-05, |
| "loss": 0.3077, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.37976201580342983, |
| "eval_dev_accuracy": 0.9662638431325247, |
| "eval_dev_accuracy_threshold": 0.9389976263046265, |
| "eval_dev_average_precision": 0.8185963813825948, |
| "eval_dev_f1": 0.7529551465428834, |
| "eval_dev_f1_threshold": 0.8002798557281494, |
| "eval_dev_precision": 0.7420212765957447, |
| "eval_dev_recall": 0.7642160622329353, |
| "eval_loss": 0.2862532138824463, |
| "eval_runtime": 562.8872, |
| "eval_samples_per_second": 235.655, |
| "eval_steps_per_second": 7.366, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.3810945141044945, |
| "grad_norm": 7.868191719055176, |
| "learning_rate": 1.798806668443339e-05, |
| "loss": 0.2609, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.3824270124055592, |
| "grad_norm": 0.37841853499412537, |
| "learning_rate": 1.7973261081993426e-05, |
| "loss": 0.277, |
| "step": 28700 |
| }, |
| { |
| "epoch": 0.38375951070662384, |
| "grad_norm": 1.237690806388855, |
| "learning_rate": 1.7958455479553464e-05, |
| "loss": 0.2635, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.38509200900768853, |
| "grad_norm": 14.932636260986328, |
| "learning_rate": 1.79436498771135e-05, |
| "loss": 0.2518, |
| "step": 28900 |
| }, |
| { |
| "epoch": 0.38642450730875316, |
| "grad_norm": 7.698137283325195, |
| "learning_rate": 1.7928844274673537e-05, |
| "loss": 0.2686, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.38642450730875316, |
| "eval_dev_accuracy": 0.9663693864165793, |
| "eval_dev_accuracy_threshold": 0.9125785231590271, |
| "eval_dev_average_precision": 0.8194613717227588, |
| "eval_dev_f1": 0.7500950931913275, |
| "eval_dev_f1_threshold": 0.7369703054428101, |
| "eval_dev_precision": 0.7440707201379905, |
| "eval_dev_recall": 0.7562178152733647, |
| "eval_loss": 0.25516369938850403, |
| "eval_runtime": 561.2432, |
| "eval_samples_per_second": 236.345, |
| "eval_steps_per_second": 7.387, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.38775700560981785, |
| "grad_norm": 11.858484268188477, |
| "learning_rate": 1.7914038672233576e-05, |
| "loss": 0.2419, |
| "step": 29100 |
| }, |
| { |
| "epoch": 0.38908950391088254, |
| "grad_norm": 1.3223813772201538, |
| "learning_rate": 1.789923306979361e-05, |
| "loss": 0.268, |
| "step": 29200 |
| }, |
| { |
| "epoch": 0.3904220022119472, |
| "grad_norm": 1.3486851453781128, |
| "learning_rate": 1.788442746735365e-05, |
| "loss": 0.2851, |
| "step": 29300 |
| }, |
| { |
| "epoch": 0.39175450051301186, |
| "grad_norm": 4.85157585144043, |
| "learning_rate": 1.7869621864913684e-05, |
| "loss": 0.2212, |
| "step": 29400 |
| }, |
| { |
| "epoch": 0.3930869988140765, |
| "grad_norm": 6.538160800933838, |
| "learning_rate": 1.7854816262473722e-05, |
| "loss": 0.2571, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.3930869988140765, |
| "eval_dev_accuracy": 0.9645676117816461, |
| "eval_dev_accuracy_threshold": 0.8994825482368469, |
| "eval_dev_average_precision": 0.8082227405172548, |
| "eval_dev_f1": 0.7435443565181175, |
| "eval_dev_f1_threshold": 0.609738826751709, |
| "eval_dev_precision": 0.7083622656482492, |
| "eval_dev_recall": 0.7824038566889449, |
| "eval_loss": 0.2665890157222748, |
| "eval_runtime": 562.6368, |
| "eval_samples_per_second": 235.76, |
| "eval_steps_per_second": 7.369, |
| "step": 29500 |
| }, |
| { |
| "epoch": 0.3944194971151412, |
| "grad_norm": 10.298799514770508, |
| "learning_rate": 1.7840010660033757e-05, |
| "loss": 0.2803, |
| "step": 29600 |
| }, |
| { |
| "epoch": 0.39575199541620587, |
| "grad_norm": 46.07704162597656, |
| "learning_rate": 1.7825205057593796e-05, |
| "loss": 0.3034, |
| "step": 29700 |
| }, |
| { |
| "epoch": 0.3970844937172705, |
| "grad_norm": 12.525829315185547, |
| "learning_rate": 1.781039945515383e-05, |
| "loss": 0.2332, |
| "step": 29800 |
| }, |
| { |
| "epoch": 0.3984169920183352, |
| "grad_norm": 3.9645519256591797, |
| "learning_rate": 1.779559385271387e-05, |
| "loss": 0.2444, |
| "step": 29900 |
| }, |
| { |
| "epoch": 0.3997494903193998, |
| "grad_norm": 18.388866424560547, |
| "learning_rate": 1.7780788250273907e-05, |
| "loss": 0.247, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.3997494903193998, |
| "eval_dev_accuracy": 0.9654571908901068, |
| "eval_dev_accuracy_threshold": 0.9365599155426025, |
| "eval_dev_average_precision": 0.8171252302464322, |
| "eval_dev_f1": 0.747335818153184, |
| "eval_dev_f1_threshold": 0.8443748354911804, |
| "eval_dev_precision": 0.7173956863535578, |
| "eval_dev_recall": 0.779883861071546, |
| "eval_loss": 0.267426073551178, |
| "eval_runtime": 564.1091, |
| "eval_samples_per_second": 235.144, |
| "eval_steps_per_second": 7.35, |
| "step": 30000 |
| }, |
| { |
| "epoch": 0.4010819886204645, |
| "grad_norm": 23.66806411743164, |
| "learning_rate": 1.7765982647833942e-05, |
| "loss": 0.2861, |
| "step": 30100 |
| }, |
| { |
| "epoch": 0.4024144869215292, |
| "grad_norm": 3.966848611831665, |
| "learning_rate": 1.775117704539398e-05, |
| "loss": 0.2409, |
| "step": 30200 |
| }, |
| { |
| "epoch": 0.40374698522259383, |
| "grad_norm": 14.780499458312988, |
| "learning_rate": 1.7736371442954015e-05, |
| "loss": 0.2658, |
| "step": 30300 |
| }, |
| { |
| "epoch": 0.4050794835236585, |
| "grad_norm": 30.90425682067871, |
| "learning_rate": 1.7721565840514054e-05, |
| "loss": 0.3114, |
| "step": 30400 |
| }, |
| { |
| "epoch": 0.40641198182472316, |
| "grad_norm": 5.639667987823486, |
| "learning_rate": 1.770676023807409e-05, |
| "loss": 0.2685, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.40641198182472316, |
| "eval_dev_accuracy": 0.9670704953749425, |
| "eval_dev_accuracy_threshold": 0.9521620869636536, |
| "eval_dev_average_precision": 0.8255021501170436, |
| "eval_dev_f1": 0.7578924800343035, |
| "eval_dev_f1_threshold": 0.8574447631835938, |
| "eval_dev_precision": 0.7418677859391396, |
| "eval_dev_recall": 0.7746247397830612, |
| "eval_loss": 0.27643173933029175, |
| "eval_runtime": 561.8887, |
| "eval_samples_per_second": 236.073, |
| "eval_steps_per_second": 7.379, |
| "step": 30500 |
| }, |
| { |
| "epoch": 0.40774448012578784, |
| "grad_norm": 0.6215185523033142, |
| "learning_rate": 1.7691954635634127e-05, |
| "loss": 0.2354, |
| "step": 30600 |
| }, |
| { |
| "epoch": 0.40907697842685253, |
| "grad_norm": 4.660243034362793, |
| "learning_rate": 1.7677149033194162e-05, |
| "loss": 0.2576, |
| "step": 30700 |
| }, |
| { |
| "epoch": 0.41040947672791717, |
| "grad_norm": 0.37590527534484863, |
| "learning_rate": 1.7662343430754197e-05, |
| "loss": 0.2647, |
| "step": 30800 |
| }, |
| { |
| "epoch": 0.41174197502898185, |
| "grad_norm": 0.8927075862884521, |
| "learning_rate": 1.7647537828314235e-05, |
| "loss": 0.2175, |
| "step": 30900 |
| }, |
| { |
| "epoch": 0.4130744733300465, |
| "grad_norm": 3.024475336074829, |
| "learning_rate": 1.763273222587427e-05, |
| "loss": 0.3085, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.4130744733300465, |
| "eval_dev_accuracy": 0.9660376789524076, |
| "eval_dev_accuracy_threshold": 0.9548216462135315, |
| "eval_dev_average_precision": 0.8156242337854964, |
| "eval_dev_f1": 0.7478032096816627, |
| "eval_dev_f1_threshold": 0.6426188945770264, |
| "eval_dev_precision": 0.7193763919821826, |
| "eval_dev_recall": 0.7785690807494248, |
| "eval_loss": 0.26265889406204224, |
| "eval_runtime": 565.3292, |
| "eval_samples_per_second": 234.637, |
| "eval_steps_per_second": 7.334, |
| "step": 31000 |
| }, |
| { |
| "epoch": 0.4144069716311112, |
| "grad_norm": 0.6045613884925842, |
| "learning_rate": 1.761792662343431e-05, |
| "loss": 0.2637, |
| "step": 31100 |
| }, |
| { |
| "epoch": 0.4157394699321758, |
| "grad_norm": 0.6080629229545593, |
| "learning_rate": 1.7603121020994344e-05, |
| "loss": 0.2567, |
| "step": 31200 |
| }, |
| { |
| "epoch": 0.4170719682332405, |
| "grad_norm": 0.933800995349884, |
| "learning_rate": 1.7588315418554382e-05, |
| "loss": 0.2906, |
| "step": 31300 |
| }, |
| { |
| "epoch": 0.4184044665343052, |
| "grad_norm": 3.305546522140503, |
| "learning_rate": 1.757350981611442e-05, |
| "loss": 0.2516, |
| "step": 31400 |
| }, |
| { |
| "epoch": 0.4197369648353698, |
| "grad_norm": 9.856147766113281, |
| "learning_rate": 1.7558704213674455e-05, |
| "loss": 0.2342, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.4197369648353698, |
| "eval_dev_accuracy": 0.9664523132826223, |
| "eval_dev_accuracy_threshold": 0.6949450373649597, |
| "eval_dev_average_precision": 0.8198951977617771, |
| "eval_dev_f1": 0.752799668187474, |
| "eval_dev_f1_threshold": 0.14068716764450073, |
| "eval_dev_precision": 0.7144966046648952, |
| "eval_dev_recall": 0.7954420948833133, |
| "eval_loss": 0.3560490906238556, |
| "eval_runtime": 566.4442, |
| "eval_samples_per_second": 234.175, |
| "eval_steps_per_second": 7.319, |
| "step": 31500 |
| }, |
| { |
| "epoch": 0.4210694631364345, |
| "grad_norm": 6.468503952026367, |
| "learning_rate": 1.7543898611234493e-05, |
| "loss": 0.2595, |
| "step": 31600 |
| }, |
| { |
| "epoch": 0.42240196143749914, |
| "grad_norm": 2.2248482704162598, |
| "learning_rate": 1.752909300879453e-05, |
| "loss": 0.259, |
| "step": 31700 |
| }, |
| { |
| "epoch": 0.42373445973856383, |
| "grad_norm": 2.2780916690826416, |
| "learning_rate": 1.7514287406354567e-05, |
| "loss": 0.2563, |
| "step": 31800 |
| }, |
| { |
| "epoch": 0.4250669580396285, |
| "grad_norm": 5.997177600860596, |
| "learning_rate": 1.74994818039146e-05, |
| "loss": 0.2504, |
| "step": 31900 |
| }, |
| { |
| "epoch": 0.42639945634069315, |
| "grad_norm": 5.018893241882324, |
| "learning_rate": 1.748467620147464e-05, |
| "loss": 0.2751, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.42639945634069315, |
| "eval_dev_accuracy": 0.9660979894004388, |
| "eval_dev_accuracy_threshold": 0.9447215795516968, |
| "eval_dev_average_precision": 0.818149670082586, |
| "eval_dev_f1": 0.7564001884718078, |
| "eval_dev_f1_threshold": 0.7197975516319275, |
| "eval_dev_precision": 0.7242831361540004, |
| "eval_dev_recall": 0.7914977539169498, |
| "eval_loss": 0.23995983600616455, |
| "eval_runtime": 559.4727, |
| "eval_samples_per_second": 237.093, |
| "eval_steps_per_second": 7.411, |
| "step": 32000 |
| }, |
| { |
| "epoch": 0.42773195464175784, |
| "grad_norm": 9.826861381530762, |
| "learning_rate": 1.7469870599034675e-05, |
| "loss": 0.2521, |
| "step": 32100 |
| }, |
| { |
| "epoch": 0.42906445294282247, |
| "grad_norm": 7.288123607635498, |
| "learning_rate": 1.7455064996594713e-05, |
| "loss": 0.2406, |
| "step": 32200 |
| }, |
| { |
| "epoch": 0.43039695124388716, |
| "grad_norm": 11.257208824157715, |
| "learning_rate": 1.744025939415475e-05, |
| "loss": 0.3026, |
| "step": 32300 |
| }, |
| { |
| "epoch": 0.43172944954495185, |
| "grad_norm": 0.21672357618808746, |
| "learning_rate": 1.7425453791714787e-05, |
| "loss": 0.234, |
| "step": 32400 |
| }, |
| { |
| "epoch": 0.4330619478460165, |
| "grad_norm": 1.5854872465133667, |
| "learning_rate": 1.7410648189274825e-05, |
| "loss": 0.2639, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.4330619478460165, |
| "eval_dev_accuracy": 0.9651707162619584, |
| "eval_dev_accuracy_threshold": 0.8978205919265747, |
| "eval_dev_average_precision": 0.8087336536278384, |
| "eval_dev_f1": 0.740958788898234, |
| "eval_dev_f1_threshold": 0.7787094712257385, |
| "eval_dev_precision": 0.7121349904011317, |
| "eval_dev_recall": 0.7722143091925058, |
| "eval_loss": 0.2519395053386688, |
| "eval_runtime": 557.1987, |
| "eval_samples_per_second": 238.061, |
| "eval_steps_per_second": 7.441, |
| "step": 32500 |
| }, |
| { |
| "epoch": 0.43439444614708117, |
| "grad_norm": 5.898445129394531, |
| "learning_rate": 1.739584258683486e-05, |
| "loss": 0.2321, |
| "step": 32600 |
| }, |
| { |
| "epoch": 0.4357269444481458, |
| "grad_norm": 0.27915239334106445, |
| "learning_rate": 1.7381036984394898e-05, |
| "loss": 0.1894, |
| "step": 32700 |
| }, |
| { |
| "epoch": 0.4370594427492105, |
| "grad_norm": 0.3429672122001648, |
| "learning_rate": 1.7366231381954933e-05, |
| "loss": 0.3076, |
| "step": 32800 |
| }, |
| { |
| "epoch": 0.4383919410502752, |
| "grad_norm": 0.6808755397796631, |
| "learning_rate": 1.735142577951497e-05, |
| "loss": 0.2392, |
| "step": 32900 |
| }, |
| { |
| "epoch": 0.4397244393513398, |
| "grad_norm": 36.33818435668945, |
| "learning_rate": 1.7336620177075006e-05, |
| "loss": 0.2742, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.4397244393513398, |
| "eval_dev_accuracy": 0.9674248192571261, |
| "eval_dev_accuracy_threshold": 0.9071935415267944, |
| "eval_dev_average_precision": 0.8132130323917695, |
| "eval_dev_f1": 0.7505652677438923, |
| "eval_dev_f1_threshold": 0.5399670600891113, |
| "eval_dev_precision": 0.7556073728625361, |
| "eval_dev_recall": 0.7455900076695519, |
| "eval_loss": 0.30597466230392456, |
| "eval_runtime": 519.866, |
| "eval_samples_per_second": 255.156, |
| "eval_steps_per_second": 7.975, |
| "step": 33000 |
| }, |
| { |
| "epoch": 0.4410569376524045, |
| "grad_norm": 6.550230503082275, |
| "learning_rate": 1.732181457463504e-05, |
| "loss": 0.2624, |
| "step": 33100 |
| }, |
| { |
| "epoch": 0.44238943595346913, |
| "grad_norm": 15.728365898132324, |
| "learning_rate": 1.730700897219508e-05, |
| "loss": 0.2481, |
| "step": 33200 |
| }, |
| { |
| "epoch": 0.4437219342545338, |
| "grad_norm": 1.1476960182189941, |
| "learning_rate": 1.7292203369755115e-05, |
| "loss": 0.2289, |
| "step": 33300 |
| }, |
| { |
| "epoch": 0.4450544325555985, |
| "grad_norm": 89.61054992675781, |
| "learning_rate": 1.7277397767315153e-05, |
| "loss": 0.2854, |
| "step": 33400 |
| }, |
| { |
| "epoch": 0.44638693085666314, |
| "grad_norm": 4.351845741271973, |
| "learning_rate": 1.7262592164875188e-05, |
| "loss": 0.2733, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.44638693085666314, |
| "eval_dev_accuracy": 0.9650500953658959, |
| "eval_dev_accuracy_threshold": 0.9060708284378052, |
| "eval_dev_average_precision": 0.8133536713572236, |
| "eval_dev_f1": 0.744153082919915, |
| "eval_dev_f1_threshold": 0.8223495483398438, |
| "eval_dev_precision": 0.7405598958333334, |
| "eval_dev_recall": 0.7477813082064205, |
| "eval_loss": 0.272208571434021, |
| "eval_runtime": 521.749, |
| "eval_samples_per_second": 254.235, |
| "eval_steps_per_second": 7.946, |
| "step": 33500 |
| }, |
| { |
| "epoch": 0.44771942915772783, |
| "grad_norm": 6.246555805206299, |
| "learning_rate": 1.7247786562435226e-05, |
| "loss": 0.2759, |
| "step": 33600 |
| }, |
| { |
| "epoch": 0.44905192745879247, |
| "grad_norm": 52.076377868652344, |
| "learning_rate": 1.7232980959995265e-05, |
| "loss": 0.2588, |
| "step": 33700 |
| }, |
| { |
| "epoch": 0.45038442575985715, |
| "grad_norm": 5.682718276977539, |
| "learning_rate": 1.72181753575553e-05, |
| "loss": 0.2106, |
| "step": 33800 |
| }, |
| { |
| "epoch": 0.45171692406092184, |
| "grad_norm": 2.271516799926758, |
| "learning_rate": 1.7203369755115338e-05, |
| "loss": 0.2631, |
| "step": 33900 |
| }, |
| { |
| "epoch": 0.4530494223619865, |
| "grad_norm": 1.0763822793960571, |
| "learning_rate": 1.7188564152675373e-05, |
| "loss": 0.304, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.4530494223619865, |
| "eval_dev_accuracy": 0.9669197192548644, |
| "eval_dev_accuracy_threshold": 0.8872429132461548, |
| "eval_dev_average_precision": 0.8222864572131344, |
| "eval_dev_f1": 0.7534934497816593, |
| "eval_dev_f1_threshold": 0.4772883951663971, |
| "eval_dev_precision": 0.750788643533123, |
| "eval_dev_recall": 0.7562178152733647, |
| "eval_loss": 0.2554573118686676, |
| "eval_runtime": 520.4082, |
| "eval_samples_per_second": 254.89, |
| "eval_steps_per_second": 7.967, |
| "step": 34000 |
| }, |
| { |
| "epoch": 0.45438192066305116, |
| "grad_norm": 0.5738760828971863, |
| "learning_rate": 1.717375855023541e-05, |
| "loss": 0.2513, |
| "step": 34100 |
| }, |
| { |
| "epoch": 0.4557144189641158, |
| "grad_norm": 2.8462681770324707, |
| "learning_rate": 1.7158952947795446e-05, |
| "loss": 0.2507, |
| "step": 34200 |
| }, |
| { |
| "epoch": 0.4570469172651805, |
| "grad_norm": 8.60177993774414, |
| "learning_rate": 1.7144147345355484e-05, |
| "loss": 0.2417, |
| "step": 34300 |
| }, |
| { |
| "epoch": 0.4583794155662452, |
| "grad_norm": 1.3673675060272217, |
| "learning_rate": 1.712934174291552e-05, |
| "loss": 0.239, |
| "step": 34400 |
| }, |
| { |
| "epoch": 0.4597119138673098, |
| "grad_norm": 36.5560188293457, |
| "learning_rate": 1.7114536140475558e-05, |
| "loss": 0.2527, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.4597119138673098, |
| "eval_dev_accuracy": 0.9667614043287824, |
| "eval_dev_accuracy_threshold": 0.9581319093704224, |
| "eval_dev_average_precision": 0.818866417573704, |
| "eval_dev_f1": 0.7523900039134568, |
| "eval_dev_f1_threshold": 0.9470370411872864, |
| "eval_dev_precision": 0.7681506849315068, |
| "eval_dev_recall": 0.7372630656294511, |
| "eval_loss": 0.2984105348587036, |
| "eval_runtime": 519.7969, |
| "eval_samples_per_second": 255.19, |
| "eval_steps_per_second": 7.976, |
| "step": 34500 |
| }, |
| { |
| "epoch": 0.4610444121683745, |
| "grad_norm": 17.973974227905273, |
| "learning_rate": 1.7099730538035596e-05, |
| "loss": 0.2807, |
| "step": 34600 |
| }, |
| { |
| "epoch": 0.46237691046943913, |
| "grad_norm": 9.143497467041016, |
| "learning_rate": 1.708492493559563e-05, |
| "loss": 0.2304, |
| "step": 34700 |
| }, |
| { |
| "epoch": 0.4637094087705038, |
| "grad_norm": 8.447179794311523, |
| "learning_rate": 1.707011933315567e-05, |
| "loss": 0.2707, |
| "step": 34800 |
| }, |
| { |
| "epoch": 0.4650419070715685, |
| "grad_norm": 0.18045054376125336, |
| "learning_rate": 1.7055313730715704e-05, |
| "loss": 0.2202, |
| "step": 34900 |
| }, |
| { |
| "epoch": 0.46637440537263314, |
| "grad_norm": 18.00141716003418, |
| "learning_rate": 1.7040508128275743e-05, |
| "loss": 0.2802, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.46637440537263314, |
| "eval_dev_accuracy": 0.9667387879107707, |
| "eval_dev_accuracy_threshold": 0.922869086265564, |
| "eval_dev_average_precision": 0.8248757172419965, |
| "eval_dev_f1": 0.7573180276545787, |
| "eval_dev_f1_threshold": 0.618488073348999, |
| "eval_dev_precision": 0.7229527794381351, |
| "eval_dev_recall": 0.795113399802783, |
| "eval_loss": 0.2512986958026886, |
| "eval_runtime": 520.5462, |
| "eval_samples_per_second": 254.823, |
| "eval_steps_per_second": 7.965, |
| "step": 35000 |
| }, |
| { |
| "epoch": 0.4677069036736978, |
| "grad_norm": 0.6688315868377686, |
| "learning_rate": 1.7025702525835778e-05, |
| "loss": 0.2375, |
| "step": 35100 |
| }, |
| { |
| "epoch": 0.46903940197476246, |
| "grad_norm": 17.023473739624023, |
| "learning_rate": 1.7010896923395816e-05, |
| "loss": 0.2058, |
| "step": 35200 |
| }, |
| { |
| "epoch": 0.47037190027582715, |
| "grad_norm": 0.3867310881614685, |
| "learning_rate": 1.699609132095585e-05, |
| "loss": 0.2419, |
| "step": 35300 |
| }, |
| { |
| "epoch": 0.47170439857689184, |
| "grad_norm": 13.710586547851562, |
| "learning_rate": 1.6981285718515886e-05, |
| "loss": 0.2232, |
| "step": 35400 |
| }, |
| { |
| "epoch": 0.47303689687795647, |
| "grad_norm": 14.513033866882324, |
| "learning_rate": 1.6966480116075924e-05, |
| "loss": 0.316, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.47303689687795647, |
| "eval_dev_accuracy": 0.9672815819430518, |
| "eval_dev_accuracy_threshold": 0.9403676986694336, |
| "eval_dev_average_precision": 0.821150327893476, |
| "eval_dev_f1": 0.7596174282678001, |
| "eval_dev_f1_threshold": 0.7547413110733032, |
| "eval_dev_precision": 0.7374393892499742, |
| "eval_dev_recall": 0.7831708118768489, |
| "eval_loss": 0.2614619731903076, |
| "eval_runtime": 520.6856, |
| "eval_samples_per_second": 254.755, |
| "eval_steps_per_second": 7.963, |
| "step": 35500 |
| }, |
| { |
| "epoch": 0.47436939517902116, |
| "grad_norm": 2.2954721450805664, |
| "learning_rate": 1.695167451363596e-05, |
| "loss": 0.2527, |
| "step": 35600 |
| }, |
| { |
| "epoch": 0.4757018934800858, |
| "grad_norm": 2.294912338256836, |
| "learning_rate": 1.6936868911195997e-05, |
| "loss": 0.2732, |
| "step": 35700 |
| }, |
| { |
| "epoch": 0.4770343917811505, |
| "grad_norm": 100.57258605957031, |
| "learning_rate": 1.6922063308756032e-05, |
| "loss": 0.2806, |
| "step": 35800 |
| }, |
| { |
| "epoch": 0.47836689008221517, |
| "grad_norm": 13.040018081665039, |
| "learning_rate": 1.690725770631607e-05, |
| "loss": 0.25, |
| "step": 35900 |
| }, |
| { |
| "epoch": 0.4796993883832798, |
| "grad_norm": 0.7189066410064697, |
| "learning_rate": 1.689245210387611e-05, |
| "loss": 0.2173, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.4796993883832798, |
| "eval_dev_accuracy": 0.9675982117952159, |
| "eval_dev_accuracy_threshold": 0.9232138395309448, |
| "eval_dev_average_precision": 0.8271432363427305, |
| "eval_dev_f1": 0.7561493449329397, |
| "eval_dev_f1_threshold": 0.49452510476112366, |
| "eval_dev_precision": 0.7169088766692852, |
| "eval_dev_recall": 0.799934260983894, |
| "eval_loss": 0.2864265441894531, |
| "eval_runtime": 519.7616, |
| "eval_samples_per_second": 255.207, |
| "eval_steps_per_second": 7.977, |
| "step": 36000 |
| }, |
| { |
| "epoch": 0.4810318866843445, |
| "grad_norm": 0.35153084993362427, |
| "learning_rate": 1.6877646501436144e-05, |
| "loss": 0.2576, |
| "step": 36100 |
| }, |
| { |
| "epoch": 0.4823643849854091, |
| "grad_norm": 0.3834153413772583, |
| "learning_rate": 1.6862840898996182e-05, |
| "loss": 0.2087, |
| "step": 36200 |
| }, |
| { |
| "epoch": 0.4836968832864738, |
| "grad_norm": 0.9096924066543579, |
| "learning_rate": 1.6848035296556217e-05, |
| "loss": 0.2581, |
| "step": 36300 |
| }, |
| { |
| "epoch": 0.4850293815875385, |
| "grad_norm": 17.327335357666016, |
| "learning_rate": 1.6833229694116256e-05, |
| "loss": 0.265, |
| "step": 36400 |
| }, |
| { |
| "epoch": 0.48636187988860313, |
| "grad_norm": 3.3336431980133057, |
| "learning_rate": 1.681842409167629e-05, |
| "loss": 0.2404, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.48636187988860313, |
| "eval_dev_accuracy": 0.9678545311993486, |
| "eval_dev_accuracy_threshold": 0.6843677163124084, |
| "eval_dev_average_precision": 0.8368359833153991, |
| "eval_dev_f1": 0.7613580982292738, |
| "eval_dev_f1_threshold": 0.3513629138469696, |
| "eval_dev_precision": 0.7526766595289079, |
| "eval_dev_recall": 0.770242138709324, |
| "eval_loss": 0.3165341913700104, |
| "eval_runtime": 520.0004, |
| "eval_samples_per_second": 255.09, |
| "eval_steps_per_second": 7.973, |
| "step": 36500 |
| }, |
| { |
| "epoch": 0.4876943781896678, |
| "grad_norm": 22.69322395324707, |
| "learning_rate": 1.680361848923633e-05, |
| "loss": 0.2525, |
| "step": 36600 |
| }, |
| { |
| "epoch": 0.48902687649073245, |
| "grad_norm": 4.788589954376221, |
| "learning_rate": 1.6788812886796367e-05, |
| "loss": 0.2262, |
| "step": 36700 |
| }, |
| { |
| "epoch": 0.49035937479179714, |
| "grad_norm": 48.63047409057617, |
| "learning_rate": 1.6774007284356402e-05, |
| "loss": 0.2572, |
| "step": 36800 |
| }, |
| { |
| "epoch": 0.49169187309286183, |
| "grad_norm": 8.924850463867188, |
| "learning_rate": 1.675920168191644e-05, |
| "loss": 0.2608, |
| "step": 36900 |
| }, |
| { |
| "epoch": 0.49302437139392646, |
| "grad_norm": 0.28982293605804443, |
| "learning_rate": 1.6744396079476476e-05, |
| "loss": 0.2212, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.49302437139392646, |
| "eval_dev_accuracy": 0.9689250416519032, |
| "eval_dev_accuracy_threshold": 0.9271968603134155, |
| "eval_dev_average_precision": 0.8366709399417354, |
| "eval_dev_f1": 0.7682220970137786, |
| "eval_dev_f1_threshold": 0.2581200897693634, |
| "eval_dev_precision": 0.7213351288957291, |
| "eval_dev_recall": 0.8216281362988934, |
| "eval_loss": 0.29943621158599854, |
| "eval_runtime": 519.5326, |
| "eval_samples_per_second": 255.32, |
| "eval_steps_per_second": 7.98, |
| "step": 37000 |
| }, |
| { |
| "epoch": 0.49435686969499115, |
| "grad_norm": 8.631064414978027, |
| "learning_rate": 1.6729590477036514e-05, |
| "loss": 0.2236, |
| "step": 37100 |
| }, |
| { |
| "epoch": 0.4956893679960558, |
| "grad_norm": 0.3893554210662842, |
| "learning_rate": 1.671478487459655e-05, |
| "loss": 0.2542, |
| "step": 37200 |
| }, |
| { |
| "epoch": 0.4970218662971205, |
| "grad_norm": 11.258530616760254, |
| "learning_rate": 1.6699979272156587e-05, |
| "loss": 0.2775, |
| "step": 37300 |
| }, |
| { |
| "epoch": 0.49835436459818516, |
| "grad_norm": 23.54794692993164, |
| "learning_rate": 1.6685173669716622e-05, |
| "loss": 0.2437, |
| "step": 37400 |
| }, |
| { |
| "epoch": 0.4996868628992498, |
| "grad_norm": 15.748093605041504, |
| "learning_rate": 1.667036806727666e-05, |
| "loss": 0.3368, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.4996868628992498, |
| "eval_dev_accuracy": 0.9686687222477705, |
| "eval_dev_accuracy_threshold": 0.9627949595451355, |
| "eval_dev_average_precision": 0.8345873786652108, |
| "eval_dev_f1": 0.7644562041783806, |
| "eval_dev_f1_threshold": 0.6956943869590759, |
| "eval_dev_precision": 0.7692478366984691, |
| "eval_dev_recall": 0.7597238961323546, |
| "eval_loss": 0.2797723412513733, |
| "eval_runtime": 520.4988, |
| "eval_samples_per_second": 254.846, |
| "eval_steps_per_second": 7.965, |
| "step": 37500 |
| }, |
| { |
| "epoch": 0.5010193612003144, |
| "grad_norm": 2.5468738079071045, |
| "learning_rate": 1.6655562464836695e-05, |
| "loss": 0.2806, |
| "step": 37600 |
| }, |
| { |
| "epoch": 0.5023518595013792, |
| "grad_norm": 2.1441900730133057, |
| "learning_rate": 1.664075686239673e-05, |
| "loss": 0.2576, |
| "step": 37700 |
| }, |
| { |
| "epoch": 0.5036843578024438, |
| "grad_norm": 1.2568778991699219, |
| "learning_rate": 1.662595125995677e-05, |
| "loss": 0.2848, |
| "step": 37800 |
| }, |
| { |
| "epoch": 0.5050168561035084, |
| "grad_norm": 3.095561981201172, |
| "learning_rate": 1.6611145657516804e-05, |
| "loss": 0.215, |
| "step": 37900 |
| }, |
| { |
| "epoch": 0.5063493544045732, |
| "grad_norm": 1.0205029249191284, |
| "learning_rate": 1.6596340055076842e-05, |
| "loss": 0.2331, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.5063493544045732, |
| "eval_dev_accuracy": 0.9679223804533837, |
| "eval_dev_accuracy_threshold": 0.9669108390808105, |
| "eval_dev_average_precision": 0.8269638058273905, |
| "eval_dev_f1": 0.7641839204087119, |
| "eval_dev_f1_threshold": 0.8454810380935669, |
| "eval_dev_precision": 0.7504224757076469, |
| "eval_dev_recall": 0.7784595157225813, |
| "eval_loss": 0.27937838435173035, |
| "eval_runtime": 519.5927, |
| "eval_samples_per_second": 255.29, |
| "eval_steps_per_second": 7.979, |
| "step": 38000 |
| }, |
| { |
| "epoch": 0.5076818527056378, |
| "grad_norm": 31.32097053527832, |
| "learning_rate": 1.6581534452636877e-05, |
| "loss": 0.27, |
| "step": 38100 |
| }, |
| { |
| "epoch": 0.5090143510067024, |
| "grad_norm": 0.6534382104873657, |
| "learning_rate": 1.6566728850196915e-05, |
| "loss": 0.2522, |
| "step": 38200 |
| }, |
| { |
| "epoch": 0.5103468493077671, |
| "grad_norm": 23.841657638549805, |
| "learning_rate": 1.6551923247756954e-05, |
| "loss": 0.251, |
| "step": 38300 |
| }, |
| { |
| "epoch": 0.5116793476088318, |
| "grad_norm": 11.927959442138672, |
| "learning_rate": 1.653711764531699e-05, |
| "loss": 0.2299, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.5130118459098965, |
| "grad_norm": 1.765657663345337, |
| "learning_rate": 1.6522312042877027e-05, |
| "loss": 0.2543, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.5130118459098965, |
| "eval_dev_accuracy": 0.9683596312016103, |
| "eval_dev_accuracy_threshold": 0.9189764261245728, |
| "eval_dev_average_precision": 0.8251932254713443, |
| "eval_dev_f1": 0.7703276368781975, |
| "eval_dev_f1_threshold": 0.8014627695083618, |
| "eval_dev_precision": 0.765329295987888, |
| "eval_dev_recall": 0.7753916949709653, |
| "eval_loss": 0.2911910116672516, |
| "eval_runtime": 519.1529, |
| "eval_samples_per_second": 255.507, |
| "eval_steps_per_second": 7.986, |
| "step": 38500 |
| }, |
| { |
| "epoch": 0.5143443442109611, |
| "grad_norm": 6.26005220413208, |
| "learning_rate": 1.6507506440437062e-05, |
| "loss": 0.203, |
| "step": 38600 |
| }, |
| { |
| "epoch": 0.5156768425120258, |
| "grad_norm": 3.370025157928467, |
| "learning_rate": 1.64927008379971e-05, |
| "loss": 0.2621, |
| "step": 38700 |
| }, |
| { |
| "epoch": 0.5170093408130905, |
| "grad_norm": 29.85224151611328, |
| "learning_rate": 1.6477895235557135e-05, |
| "loss": 0.2677, |
| "step": 38800 |
| }, |
| { |
| "epoch": 0.5183418391141551, |
| "grad_norm": 13.099495887756348, |
| "learning_rate": 1.6463089633117173e-05, |
| "loss": 0.2377, |
| "step": 38900 |
| }, |
| { |
| "epoch": 0.5196743374152198, |
| "grad_norm": 17.140789031982422, |
| "learning_rate": 1.6448284030677212e-05, |
| "loss": 0.265, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.5196743374152198, |
| "eval_dev_accuracy": 0.9675529789591925, |
| "eval_dev_accuracy_threshold": 0.9323844909667969, |
| "eval_dev_average_precision": 0.818988116595722, |
| "eval_dev_f1": 0.7656208525773743, |
| "eval_dev_f1_threshold": 0.8410446643829346, |
| "eval_dev_precision": 0.7426364572605562, |
| "eval_dev_recall": 0.7900734085679851, |
| "eval_loss": 0.28379642963409424, |
| "eval_runtime": 518.2405, |
| "eval_samples_per_second": 255.956, |
| "eval_steps_per_second": 8.0, |
| "step": 39000 |
| }, |
| { |
| "epoch": 0.5210068357162845, |
| "grad_norm": 2.0083911418914795, |
| "learning_rate": 1.319120586275816e-07, |
| "loss": 0.2108, |
| "step": 39100 |
| }, |
| { |
| "epoch": 0.5223393340173491, |
| "grad_norm": 0.4948272705078125, |
| "learning_rate": 2.651565622918055e-07, |
| "loss": 0.227, |
| "step": 39200 |
| }, |
| { |
| "epoch": 0.5236718323184137, |
| "grad_norm": 11.525949478149414, |
| "learning_rate": 3.984010659560293e-07, |
| "loss": 0.2081, |
| "step": 39300 |
| }, |
| { |
| "epoch": 0.5250043306194785, |
| "grad_norm": 18.18743133544922, |
| "learning_rate": 5.316455696202532e-07, |
| "loss": 0.2782, |
| "step": 39400 |
| }, |
| { |
| "epoch": 0.5263368289205431, |
| "grad_norm": 30.067602157592773, |
| "learning_rate": 6.64890073284477e-07, |
| "loss": 0.2357, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.5263368289205431, |
| "eval_dev_accuracy": 0.9679374580653916, |
| "eval_dev_accuracy_threshold": 0.8992660045623779, |
| "eval_dev_average_precision": 0.8239503903565419, |
| "eval_dev_f1": 0.768843413510473, |
| "eval_dev_f1_threshold": 0.8412591814994812, |
| "eval_dev_precision": 0.7522012578616353, |
| "eval_dev_recall": 0.7862386326284649, |
| "eval_loss": 0.27902960777282715, |
| "eval_runtime": 522.9572, |
| "eval_samples_per_second": 253.648, |
| "eval_steps_per_second": 7.928, |
| "step": 39500 |
| }, |
| { |
| "epoch": 0.5276693272216078, |
| "grad_norm": 1.496453881263733, |
| "learning_rate": 7.981345769487009e-07, |
| "loss": 0.2654, |
| "step": 39600 |
| }, |
| { |
| "epoch": 0.5290018255226725, |
| "grad_norm": 2.676929473876953, |
| "learning_rate": 9.313790806129248e-07, |
| "loss": 0.2572, |
| "step": 39700 |
| }, |
| { |
| "epoch": 0.5303343238237371, |
| "grad_norm": 1.3355958461761475, |
| "learning_rate": 1.0646235842771487e-06, |
| "loss": 0.2452, |
| "step": 39800 |
| }, |
| { |
| "epoch": 0.5316668221248018, |
| "grad_norm": 24.94687843322754, |
| "learning_rate": 1.1978680879413725e-06, |
| "loss": 0.2412, |
| "step": 39900 |
| }, |
| { |
| "epoch": 0.5329993204258665, |
| "grad_norm": 16.272785186767578, |
| "learning_rate": 1.3311125916055965e-06, |
| "loss": 0.2656, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.5329993204258665, |
| "eval_dev_accuracy": 0.9683219371715908, |
| "eval_dev_accuracy_threshold": 0.8796899914741516, |
| "eval_dev_average_precision": 0.8334902875069624, |
| "eval_dev_f1": 0.7711174542763505, |
| "eval_dev_f1_threshold": 0.6210243701934814, |
| "eval_dev_precision": 0.7449698702890409, |
| "eval_dev_recall": 0.7991673057959899, |
| "eval_loss": 0.2717488408088684, |
| "eval_runtime": 523.9326, |
| "eval_samples_per_second": 253.176, |
| "eval_steps_per_second": 7.913, |
| "step": 40000 |
| }, |
| { |
| "epoch": 0.5343318187269311, |
| "grad_norm": 38.643516540527344, |
| "learning_rate": 1.4643570952698202e-06, |
| "loss": 0.2558, |
| "step": 40100 |
| }, |
| { |
| "epoch": 0.5356643170279958, |
| "grad_norm": 0.41367307305336, |
| "learning_rate": 1.597601598934044e-06, |
| "loss": 0.2445, |
| "step": 40200 |
| }, |
| { |
| "epoch": 0.5369968153290604, |
| "grad_norm": 0.5968548655509949, |
| "learning_rate": 1.7308461025982678e-06, |
| "loss": 0.225, |
| "step": 40300 |
| }, |
| { |
| "epoch": 0.5383293136301252, |
| "grad_norm": 3.6407761573791504, |
| "learning_rate": 1.864090606262492e-06, |
| "loss": 0.1996, |
| "step": 40400 |
| }, |
| { |
| "epoch": 0.5396618119311898, |
| "grad_norm": 4.504887580871582, |
| "learning_rate": 1.9973351099267156e-06, |
| "loss": 0.244, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.5396618119311898, |
| "eval_dev_accuracy": 0.9687214938897978, |
| "eval_dev_accuracy_threshold": 0.9278361797332764, |
| "eval_dev_average_precision": 0.8391958373486473, |
| "eval_dev_f1": 0.772467364332722, |
| "eval_dev_f1_threshold": 0.8639750480651855, |
| "eval_dev_precision": 0.7608118159600468, |
| "eval_dev_recall": 0.7844855921989701, |
| "eval_loss": 0.2598799467086792, |
| "eval_runtime": 524.043, |
| "eval_samples_per_second": 253.122, |
| "eval_steps_per_second": 7.912, |
| "step": 40500 |
| }, |
| { |
| "epoch": 0.5409943102322544, |
| "grad_norm": 102.69219970703125, |
| "learning_rate": 2.1305796135909398e-06, |
| "loss": 0.2261, |
| "step": 40600 |
| }, |
| { |
| "epoch": 0.5423268085333192, |
| "grad_norm": 0.4366992115974426, |
| "learning_rate": 2.2638241172551636e-06, |
| "loss": 0.2146, |
| "step": 40700 |
| }, |
| { |
| "epoch": 0.5436593068343838, |
| "grad_norm": 0.5195454955101013, |
| "learning_rate": 2.3970686209193873e-06, |
| "loss": 0.2287, |
| "step": 40800 |
| }, |
| { |
| "epoch": 0.5449918051354484, |
| "grad_norm": 0.5551161170005798, |
| "learning_rate": 2.530313124583611e-06, |
| "loss": 0.2278, |
| "step": 40900 |
| }, |
| { |
| "epoch": 0.5463243034365132, |
| "grad_norm": 0.49544551968574524, |
| "learning_rate": 2.663557628247835e-06, |
| "loss": 0.2482, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.5463243034365132, |
| "eval_dev_accuracy": 0.9691587446380242, |
| "eval_dev_accuracy_threshold": 0.9283666610717773, |
| "eval_dev_average_precision": 0.8431961837252191, |
| "eval_dev_f1": 0.7750185715801761, |
| "eval_dev_f1_threshold": 0.6344282627105713, |
| "eval_dev_precision": 0.7514147546043831, |
| "eval_dev_recall": 0.8001533910375808, |
| "eval_loss": 0.275828093290329, |
| "eval_runtime": 522.4079, |
| "eval_samples_per_second": 253.915, |
| "eval_steps_per_second": 7.936, |
| "step": 41000 |
| }, |
| { |
| "epoch": 0.5476568017375778, |
| "grad_norm": 0.10281296074390411, |
| "learning_rate": 2.7968021319120587e-06, |
| "loss": 0.2163, |
| "step": 41100 |
| }, |
| { |
| "epoch": 0.5489893000386424, |
| "grad_norm": 1.15056312084198, |
| "learning_rate": 2.930046635576283e-06, |
| "loss": 0.2284, |
| "step": 41200 |
| }, |
| { |
| "epoch": 0.5503217983397071, |
| "grad_norm": 0.4747524559497833, |
| "learning_rate": 3.0632911392405066e-06, |
| "loss": 0.2382, |
| "step": 41300 |
| }, |
| { |
| "epoch": 0.5516542966407718, |
| "grad_norm": 0.4341018795967102, |
| "learning_rate": 3.1965356429047304e-06, |
| "loss": 0.2355, |
| "step": 41400 |
| }, |
| { |
| "epoch": 0.5529867949418364, |
| "grad_norm": 14.61008071899414, |
| "learning_rate": 3.3297801465689546e-06, |
| "loss": 0.2247, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.5529867949418364, |
| "eval_dev_accuracy": 0.9692039774740476, |
| "eval_dev_accuracy_threshold": 0.9339917302131653, |
| "eval_dev_average_precision": 0.8436933951228754, |
| "eval_dev_f1": 0.7787227299138979, |
| "eval_dev_f1_threshold": 0.5835311412811279, |
| "eval_dev_precision": 0.7518359853121175, |
| "eval_dev_recall": 0.8076038128629341, |
| "eval_loss": 0.2721947729587555, |
| "eval_runtime": 523.5606, |
| "eval_samples_per_second": 253.356, |
| "eval_steps_per_second": 7.919, |
| "step": 41500 |
| }, |
| { |
| "epoch": 0.5543192932429011, |
| "grad_norm": 0.17993593215942383, |
| "learning_rate": 3.4630246502331784e-06, |
| "loss": 0.2731, |
| "step": 41600 |
| }, |
| { |
| "epoch": 0.5556517915439658, |
| "grad_norm": 0.47082406282424927, |
| "learning_rate": 3.596269153897402e-06, |
| "loss": 0.2493, |
| "step": 41700 |
| }, |
| { |
| "epoch": 0.5569842898450305, |
| "grad_norm": 3.0138349533081055, |
| "learning_rate": 3.729513657561626e-06, |
| "loss": 0.2002, |
| "step": 41800 |
| }, |
| { |
| "epoch": 0.5583167881460951, |
| "grad_norm": 15.761974334716797, |
| "learning_rate": 3.862758161225849e-06, |
| "loss": 0.2301, |
| "step": 41900 |
| }, |
| { |
| "epoch": 0.5596492864471598, |
| "grad_norm": 0.34038063883781433, |
| "learning_rate": 3.996002664890073e-06, |
| "loss": 0.2136, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.5596492864471598, |
| "eval_dev_accuracy": 0.9691587446380242, |
| "eval_dev_accuracy_threshold": 0.9421218633651733, |
| "eval_dev_average_precision": 0.8475633374819089, |
| "eval_dev_f1": 0.7781878671310496, |
| "eval_dev_f1_threshold": 0.3623931407928467, |
| "eval_dev_precision": 0.7403560830860534, |
| "eval_dev_recall": 0.8200942259230853, |
| "eval_loss": 0.2632051110267639, |
| "eval_runtime": 523.1078, |
| "eval_samples_per_second": 253.575, |
| "eval_steps_per_second": 7.926, |
| "step": 42000 |
| }, |
| { |
| "epoch": 0.5609817847482245, |
| "grad_norm": 0.8982422351837158, |
| "learning_rate": 4.129247168554298e-06, |
| "loss": 0.2323, |
| "step": 42100 |
| }, |
| { |
| "epoch": 0.5623142830492891, |
| "grad_norm": 3.004122495651245, |
| "learning_rate": 4.2624916722185215e-06, |
| "loss": 0.2274, |
| "step": 42200 |
| }, |
| { |
| "epoch": 0.5636467813503537, |
| "grad_norm": 7.217723846435547, |
| "learning_rate": 4.395736175882745e-06, |
| "loss": 0.2233, |
| "step": 42300 |
| }, |
| { |
| "epoch": 0.5649792796514185, |
| "grad_norm": 1.1566057205200195, |
| "learning_rate": 4.528980679546969e-06, |
| "loss": 0.2819, |
| "step": 42400 |
| }, |
| { |
| "epoch": 0.5663117779524831, |
| "grad_norm": 0.2774888575077057, |
| "learning_rate": 4.662225183211193e-06, |
| "loss": 0.2002, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.5663117779524831, |
| "eval_dev_accuracy": 0.9700181685224694, |
| "eval_dev_accuracy_threshold": 0.9420008063316345, |
| "eval_dev_average_precision": 0.8490166145203218, |
| "eval_dev_f1": 0.7794501933730532, |
| "eval_dev_f1_threshold": 0.41960281133651733, |
| "eval_dev_precision": 0.7451783751374038, |
| "eval_dev_recall": 0.8170264051714693, |
| "eval_loss": 0.2606056034564972, |
| "eval_runtime": 524.5455, |
| "eval_samples_per_second": 252.88, |
| "eval_steps_per_second": 7.904, |
| "step": 42500 |
| }, |
| { |
| "epoch": 0.5676442762535477, |
| "grad_norm": 13.932589530944824, |
| "learning_rate": 4.795469686875417e-06, |
| "loss": 0.2599, |
| "step": 42600 |
| }, |
| { |
| "epoch": 0.5689767745546125, |
| "grad_norm": 10.140316009521484, |
| "learning_rate": 4.92871419053964e-06, |
| "loss": 0.2478, |
| "step": 42700 |
| }, |
| { |
| "epoch": 0.5703092728556771, |
| "grad_norm": 13.381287574768066, |
| "learning_rate": 5.061958694203864e-06, |
| "loss": 0.2151, |
| "step": 42800 |
| }, |
| { |
| "epoch": 0.5716417711567418, |
| "grad_norm": 3.821155548095703, |
| "learning_rate": 5.195203197868088e-06, |
| "loss": 0.2207, |
| "step": 42900 |
| }, |
| { |
| "epoch": 0.5729742694578065, |
| "grad_norm": 0.3303406834602356, |
| "learning_rate": 5.328447701532313e-06, |
| "loss": 0.2683, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.5729742694578065, |
| "eval_dev_accuracy": 0.9702820267326061, |
| "eval_dev_accuracy_threshold": 0.9166876673698425, |
| "eval_dev_average_precision": 0.8539072755077529, |
| "eval_dev_f1": 0.7817631806395852, |
| "eval_dev_f1_threshold": 0.4148586690425873, |
| "eval_dev_precision": 0.7710175812466702, |
| "eval_dev_recall": 0.7928125342390709, |
| "eval_loss": 0.2761251628398895, |
| "eval_runtime": 522.8877, |
| "eval_samples_per_second": 253.682, |
| "eval_steps_per_second": 7.929, |
| "step": 43000 |
| }, |
| { |
| "epoch": 0.5743067677588711, |
| "grad_norm": 2.869353771209717, |
| "learning_rate": 5.461692205196536e-06, |
| "loss": 0.2233, |
| "step": 43100 |
| }, |
| { |
| "epoch": 0.5756392660599358, |
| "grad_norm": 1.4524685144424438, |
| "learning_rate": 5.59493670886076e-06, |
| "loss": 0.2473, |
| "step": 43200 |
| }, |
| { |
| "epoch": 0.5769717643610004, |
| "grad_norm": 0.838426411151886, |
| "learning_rate": 5.728181212524984e-06, |
| "loss": 0.2289, |
| "step": 43300 |
| }, |
| { |
| "epoch": 0.5783042626620651, |
| "grad_norm": 33.507659912109375, |
| "learning_rate": 5.861425716189208e-06, |
| "loss": 0.2757, |
| "step": 43400 |
| }, |
| { |
| "epoch": 0.5796367609631298, |
| "grad_norm": 10.75368595123291, |
| "learning_rate": 5.9946702198534315e-06, |
| "loss": 0.2489, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.5796367609631298, |
| "eval_dev_accuracy": 0.9702367938965827, |
| "eval_dev_accuracy_threshold": 0.9455279111862183, |
| "eval_dev_average_precision": 0.8513893973961074, |
| "eval_dev_f1": 0.7795382036446223, |
| "eval_dev_f1_threshold": 0.6581396460533142, |
| "eval_dev_precision": 0.7695921417894512, |
| "eval_dev_recall": 0.7897447134874548, |
| "eval_loss": 0.24530762434005737, |
| "eval_runtime": 523.1112, |
| "eval_samples_per_second": 253.573, |
| "eval_steps_per_second": 7.926, |
| "step": 43500 |
| }, |
| { |
| "epoch": 0.5809692592641944, |
| "grad_norm": 4.178175449371338, |
| "learning_rate": 6.127914723517655e-06, |
| "loss": 0.2238, |
| "step": 43600 |
| }, |
| { |
| "epoch": 0.5823017575652591, |
| "grad_norm": 7.612859725952148, |
| "learning_rate": 6.261159227181879e-06, |
| "loss": 0.2342, |
| "step": 43700 |
| }, |
| { |
| "epoch": 0.5836342558663238, |
| "grad_norm": 19.10555648803711, |
| "learning_rate": 6.394403730846103e-06, |
| "loss": 0.2209, |
| "step": 43800 |
| }, |
| { |
| "epoch": 0.5849667541673884, |
| "grad_norm": 0.2660426199436188, |
| "learning_rate": 6.527648234510327e-06, |
| "loss": 0.1982, |
| "step": 43900 |
| }, |
| { |
| "epoch": 0.5862992524684532, |
| "grad_norm": 4.176153659820557, |
| "learning_rate": 6.660892738174551e-06, |
| "loss": 0.2577, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.5862992524684532, |
| "eval_dev_accuracy": 0.9705006521067193, |
| "eval_dev_accuracy_threshold": 0.9348860383033752, |
| "eval_dev_average_precision": 0.8544433474182094, |
| "eval_dev_f1": 0.7824561403508773, |
| "eval_dev_f1_threshold": 0.41301047801971436, |
| "eval_dev_precision": 0.759991738097697, |
| "eval_dev_recall": 0.806289032540813, |
| "eval_loss": 0.268686980009079, |
| "eval_runtime": 525.8935, |
| "eval_samples_per_second": 252.232, |
| "eval_steps_per_second": 7.884, |
| "step": 44000 |
| }, |
| { |
| "epoch": 0.5876317507695178, |
| "grad_norm": 2.451788902282715, |
| "learning_rate": 6.794137241838775e-06, |
| "loss": 0.1872, |
| "step": 44100 |
| }, |
| { |
| "epoch": 0.5889642490705824, |
| "grad_norm": 0.2053864449262619, |
| "learning_rate": 6.927381745502999e-06, |
| "loss": 0.2132, |
| "step": 44200 |
| }, |
| { |
| "epoch": 0.5902967473716471, |
| "grad_norm": 2.7442498207092285, |
| "learning_rate": 7.0606262491672225e-06, |
| "loss": 0.1735, |
| "step": 44300 |
| }, |
| { |
| "epoch": 0.5916292456727118, |
| "grad_norm": 14.928565979003906, |
| "learning_rate": 7.193870752831446e-06, |
| "loss": 0.2907, |
| "step": 44400 |
| }, |
| { |
| "epoch": 0.5929617439737764, |
| "grad_norm": 1.0581625699996948, |
| "learning_rate": 7.32711525649567e-06, |
| "loss": 0.2109, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.5929617439737764, |
| "eval_dev_accuracy": 0.9710132909149849, |
| "eval_dev_accuracy_threshold": 0.9184995889663696, |
| "eval_dev_average_precision": 0.8564900386871592, |
| "eval_dev_f1": 0.7874429836329488, |
| "eval_dev_f1_threshold": 0.42533212900161743, |
| "eval_dev_precision": 0.7716659655027346, |
| "eval_dev_recall": 0.8038786019502575, |
| "eval_loss": 0.2596043348312378, |
| "eval_runtime": 521.1196, |
| "eval_samples_per_second": 254.542, |
| "eval_steps_per_second": 7.956, |
| "step": 44500 |
| }, |
| { |
| "epoch": 0.5942942422748411, |
| "grad_norm": 7.90291166305542, |
| "learning_rate": 7.460359760159894e-06, |
| "loss": 0.2621, |
| "step": 44600 |
| }, |
| { |
| "epoch": 0.5956267405759058, |
| "grad_norm": 27.323461532592773, |
| "learning_rate": 7.593604263824118e-06, |
| "loss": 0.21, |
| "step": 44700 |
| }, |
| { |
| "epoch": 0.5969592388769704, |
| "grad_norm": 0.3570970296859741, |
| "learning_rate": 7.726848767488342e-06, |
| "loss": 0.216, |
| "step": 44800 |
| }, |
| { |
| "epoch": 0.5982917371780351, |
| "grad_norm": 0.6491680145263672, |
| "learning_rate": 7.860093271152565e-06, |
| "loss": 0.2136, |
| "step": 44900 |
| }, |
| { |
| "epoch": 0.5996242354790997, |
| "grad_norm": 20.47812271118164, |
| "learning_rate": 7.99333777481679e-06, |
| "loss": 0.2099, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.5996242354790997, |
| "eval_dev_accuracy": 0.9701463282245358, |
| "eval_dev_accuracy_threshold": 0.7721706628799438, |
| "eval_dev_average_precision": 0.8515314890810202, |
| "eval_dev_f1": 0.7854063375727528, |
| "eval_dev_f1_threshold": 0.46630430221557617, |
| "eval_dev_precision": 0.7728285077951003, |
| "eval_dev_recall": 0.7984003506080859, |
| "eval_loss": 0.27925005555152893, |
| "eval_runtime": 528.9897, |
| "eval_samples_per_second": 250.755, |
| "eval_steps_per_second": 7.838, |
| "step": 45000 |
| }, |
| { |
| "epoch": 0.6009567337801645, |
| "grad_norm": 0.4902491867542267, |
| "learning_rate": 8.126582278481013e-06, |
| "loss": 0.2536, |
| "step": 45100 |
| }, |
| { |
| "epoch": 0.6022892320812291, |
| "grad_norm": 0.5637998580932617, |
| "learning_rate": 8.259826782145237e-06, |
| "loss": 0.2247, |
| "step": 45200 |
| }, |
| { |
| "epoch": 0.6036217303822937, |
| "grad_norm": 1.9175264835357666, |
| "learning_rate": 8.39307128580946e-06, |
| "loss": 0.2349, |
| "step": 45300 |
| }, |
| { |
| "epoch": 0.6049542286833585, |
| "grad_norm": 76.62299346923828, |
| "learning_rate": 8.526315789473685e-06, |
| "loss": 0.1836, |
| "step": 45400 |
| }, |
| { |
| "epoch": 0.6062867269844231, |
| "grad_norm": 1.5868983268737793, |
| "learning_rate": 8.659560293137908e-06, |
| "loss": 0.2635, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.6062867269844231, |
| "eval_dev_accuracy": 0.9700709401644968, |
| "eval_dev_accuracy_threshold": 0.9073478579521179, |
| "eval_dev_average_precision": 0.85367208401453, |
| "eval_dev_f1": 0.7835151777033597, |
| "eval_dev_f1_threshold": 0.5480349659919739, |
| "eval_dev_precision": 0.7726643229998935, |
| "eval_dev_recall": 0.7946751396954093, |
| "eval_loss": 0.27641019225120544, |
| "eval_runtime": 535.7653, |
| "eval_samples_per_second": 247.584, |
| "eval_steps_per_second": 7.738, |
| "step": 45500 |
| }, |
| { |
| "epoch": 0.6076192252854877, |
| "grad_norm": 0.3646801710128784, |
| "learning_rate": 8.792804796802133e-06, |
| "loss": 0.2259, |
| "step": 45600 |
| }, |
| { |
| "epoch": 0.6089517235865525, |
| "grad_norm": 0.1534300446510315, |
| "learning_rate": 8.926049300466355e-06, |
| "loss": 0.1824, |
| "step": 45700 |
| }, |
| { |
| "epoch": 0.6102842218876171, |
| "grad_norm": 4.515030384063721, |
| "learning_rate": 9.05929380413058e-06, |
| "loss": 0.2108, |
| "step": 45800 |
| }, |
| { |
| "epoch": 0.6116167201886817, |
| "grad_norm": 27.513139724731445, |
| "learning_rate": 9.192538307794803e-06, |
| "loss": 0.1652, |
| "step": 45900 |
| }, |
| { |
| "epoch": 0.6129492184897464, |
| "grad_norm": 0.3283866345882416, |
| "learning_rate": 9.325782811459028e-06, |
| "loss": 0.2599, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.6129492184897464, |
| "eval_dev_accuracy": 0.9699880132984537, |
| "eval_dev_accuracy_threshold": 0.9482549428939819, |
| "eval_dev_average_precision": 0.8491188703823201, |
| "eval_dev_f1": 0.7826180027828322, |
| "eval_dev_f1_threshold": 0.9011486768722534, |
| "eval_dev_precision": 0.7649335704571608, |
| "eval_dev_recall": 0.8011394762791717, |
| "eval_loss": 0.2594774067401886, |
| "eval_runtime": 527.4017, |
| "eval_samples_per_second": 251.51, |
| "eval_steps_per_second": 7.861, |
| "step": 46000 |
| }, |
| { |
| "epoch": 0.6142817167908111, |
| "grad_norm": 0.6060785055160522, |
| "learning_rate": 9.459027315123252e-06, |
| "loss": 0.231, |
| "step": 46100 |
| }, |
| { |
| "epoch": 0.6156142150918757, |
| "grad_norm": 1.9709681272506714, |
| "learning_rate": 9.592271818787475e-06, |
| "loss": 0.2364, |
| "step": 46200 |
| }, |
| { |
| "epoch": 0.6169467133929404, |
| "grad_norm": 0.13106560707092285, |
| "learning_rate": 9.7255163224517e-06, |
| "loss": 0.1774, |
| "step": 46300 |
| }, |
| { |
| "epoch": 0.6182792116940051, |
| "grad_norm": 53.972103118896484, |
| "learning_rate": 9.858760826115924e-06, |
| "loss": 0.2322, |
| "step": 46400 |
| }, |
| { |
| "epoch": 0.6196117099950698, |
| "grad_norm": 12.795185089111328, |
| "learning_rate": 9.992005329780147e-06, |
| "loss": 0.2283, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.6196117099950698, |
| "eval_dev_accuracy": 0.9702518715085905, |
| "eval_dev_accuracy_threshold": 0.8647300004959106, |
| "eval_dev_average_precision": 0.8569022880485853, |
| "eval_dev_f1": 0.7869809918232983, |
| "eval_dev_f1_threshold": 0.43426772952079773, |
| "eval_dev_precision": 0.7634696610693315, |
| "eval_dev_recall": 0.8119864139366714, |
| "eval_loss": 0.26569852232933044, |
| "eval_runtime": 528.8452, |
| "eval_samples_per_second": 250.824, |
| "eval_steps_per_second": 7.84, |
| "step": 46500 |
| }, |
| { |
| "epoch": 0.6209442082961344, |
| "grad_norm": 6.9099507331848145, |
| "learning_rate": 1.012524983344437e-05, |
| "loss": 0.2275, |
| "step": 46600 |
| }, |
| { |
| "epoch": 0.6222767065971991, |
| "grad_norm": 3.897141456604004, |
| "learning_rate": 1.0258494337108595e-05, |
| "loss": 0.1867, |
| "step": 46700 |
| }, |
| { |
| "epoch": 0.6236092048982638, |
| "grad_norm": 1.8539767265319824, |
| "learning_rate": 1.0391738840772818e-05, |
| "loss": 0.276, |
| "step": 46800 |
| }, |
| { |
| "epoch": 0.6249417031993284, |
| "grad_norm": 17.823284149169922, |
| "learning_rate": 1.0524983344437042e-05, |
| "loss": 0.2208, |
| "step": 46900 |
| }, |
| { |
| "epoch": 0.626274201500393, |
| "grad_norm": 0.8377816081047058, |
| "learning_rate": 1.0658227848101265e-05, |
| "loss": 0.2644, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.626274201500393, |
| "eval_dev_accuracy": 0.9708625147949068, |
| "eval_dev_accuracy_threshold": 0.8370188474655151, |
| "eval_dev_average_precision": 0.8568328618718613, |
| "eval_dev_f1": 0.7867207514944491, |
| "eval_dev_f1_threshold": 0.3532576858997345, |
| "eval_dev_precision": 0.766989280882506, |
| "eval_dev_recall": 0.8074942478360907, |
| "eval_loss": 0.2608221769332886, |
| "eval_runtime": 528.9364, |
| "eval_samples_per_second": 250.781, |
| "eval_steps_per_second": 7.838, |
| "step": 47000 |
| }, |
| { |
| "epoch": 0.6276066998014578, |
| "grad_norm": 23.196794509887695, |
| "learning_rate": 1.079147235176549e-05, |
| "loss": 0.1944, |
| "step": 47100 |
| }, |
| { |
| "epoch": 0.6289391981025224, |
| "grad_norm": 0.2909054458141327, |
| "learning_rate": 1.0924716855429713e-05, |
| "loss": 0.2221, |
| "step": 47200 |
| }, |
| { |
| "epoch": 0.630271696403587, |
| "grad_norm": 15.759045600891113, |
| "learning_rate": 1.1057961359093938e-05, |
| "loss": 0.2392, |
| "step": 47300 |
| }, |
| { |
| "epoch": 0.6316041947046518, |
| "grad_norm": 4.435680866241455, |
| "learning_rate": 1.1191205862758164e-05, |
| "loss": 0.1809, |
| "step": 47400 |
| }, |
| { |
| "epoch": 0.6329366930057164, |
| "grad_norm": 3.936431646347046, |
| "learning_rate": 1.1324450366422385e-05, |
| "loss": 0.1708, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.6329366930057164, |
| "eval_dev_accuracy": 0.9702594103145944, |
| "eval_dev_accuracy_threshold": 0.9633700847625732, |
| "eval_dev_average_precision": 0.8539832745264263, |
| "eval_dev_f1": 0.7859069988890653, |
| "eval_dev_f1_threshold": 0.7301878929138184, |
| "eval_dev_precision": 0.7598199672667758, |
| "eval_dev_recall": 0.8138490193930098, |
| "eval_loss": 0.2916560173034668, |
| "eval_runtime": 524.8425, |
| "eval_samples_per_second": 252.737, |
| "eval_steps_per_second": 7.9, |
| "step": 47500 |
| }, |
| { |
| "epoch": 0.634269191306781, |
| "grad_norm": 1.574413776397705, |
| "learning_rate": 1.1457694870086611e-05, |
| "loss": 0.2181, |
| "step": 47600 |
| }, |
| { |
| "epoch": 0.6356016896078458, |
| "grad_norm": 4.340725421905518, |
| "learning_rate": 1.1590939373750833e-05, |
| "loss": 0.2258, |
| "step": 47700 |
| }, |
| { |
| "epoch": 0.6369341879089104, |
| "grad_norm": 5.916915416717529, |
| "learning_rate": 1.1724183877415059e-05, |
| "loss": 0.2808, |
| "step": 47800 |
| }, |
| { |
| "epoch": 0.6382666862099751, |
| "grad_norm": 15.759284019470215, |
| "learning_rate": 1.1857428381079282e-05, |
| "loss": 0.2394, |
| "step": 47900 |
| }, |
| { |
| "epoch": 0.6395991845110397, |
| "grad_norm": 14.555028915405273, |
| "learning_rate": 1.1990672884743507e-05, |
| "loss": 0.2267, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.6395991845110397, |
| "eval_dev_accuracy": 0.9713826924091762, |
| "eval_dev_accuracy_threshold": 0.9341762065887451, |
| "eval_dev_average_precision": 0.8563315677126753, |
| "eval_dev_f1": 0.7862142099681866, |
| "eval_dev_f1_threshold": 0.4216569662094116, |
| "eval_dev_precision": 0.7617384156991678, |
| "eval_dev_recall": 0.8123151090172017, |
| "eval_loss": 0.26165512204170227, |
| "eval_runtime": 525.2884, |
| "eval_samples_per_second": 252.522, |
| "eval_steps_per_second": 7.893, |
| "step": 48000 |
| }, |
| { |
| "epoch": 0.6409316828121044, |
| "grad_norm": 8.00622844696045, |
| "learning_rate": 1.212391738840773e-05, |
| "loss": 0.2583, |
| "step": 48100 |
| }, |
| { |
| "epoch": 0.6422641811131691, |
| "grad_norm": 13.320343017578125, |
| "learning_rate": 1.2257161892071954e-05, |
| "loss": 0.2188, |
| "step": 48200 |
| }, |
| { |
| "epoch": 0.6435966794142337, |
| "grad_norm": 2.9494426250457764, |
| "learning_rate": 1.2390406395736177e-05, |
| "loss": 0.1877, |
| "step": 48300 |
| }, |
| { |
| "epoch": 0.6449291777152985, |
| "grad_norm": 0.39628902077674866, |
| "learning_rate": 1.2523650899400402e-05, |
| "loss": 0.2324, |
| "step": 48400 |
| }, |
| { |
| "epoch": 0.6462616760163631, |
| "grad_norm": 0.1506374627351761, |
| "learning_rate": 1.2656895403064625e-05, |
| "loss": 0.2239, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.6462616760163631, |
| "eval_dev_accuracy": 0.9706514282267974, |
| "eval_dev_accuracy_threshold": 0.8615503311157227, |
| "eval_dev_average_precision": 0.8586570982605375, |
| "eval_dev_f1": 0.7870691958322201, |
| "eval_dev_f1_threshold": 0.24849581718444824, |
| "eval_dev_precision": 0.7681476846057572, |
| "eval_dev_recall": 0.8069464227018736, |
| "eval_loss": 0.28418707847595215, |
| "eval_runtime": 533.0754, |
| "eval_samples_per_second": 248.833, |
| "eval_steps_per_second": 7.778, |
| "step": 48500 |
| }, |
| { |
| "epoch": 0.6475941743174277, |
| "grad_norm": 0.48906368017196655, |
| "learning_rate": 1.279013990672885e-05, |
| "loss": 0.22, |
| "step": 48600 |
| }, |
| { |
| "epoch": 0.6489266726184925, |
| "grad_norm": 71.81077575683594, |
| "learning_rate": 1.2923384410393072e-05, |
| "loss": 0.2079, |
| "step": 48700 |
| }, |
| { |
| "epoch": 0.6502591709195571, |
| "grad_norm": 17.413375854492188, |
| "learning_rate": 1.3056628914057297e-05, |
| "loss": 0.2212, |
| "step": 48800 |
| }, |
| { |
| "epoch": 0.6515916692206217, |
| "grad_norm": 0.7448732852935791, |
| "learning_rate": 1.318987341772152e-05, |
| "loss": 0.2106, |
| "step": 48900 |
| }, |
| { |
| "epoch": 0.6529241675216864, |
| "grad_norm": 0.6357948780059814, |
| "learning_rate": 1.3323117921385744e-05, |
| "loss": 0.2095, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.6529241675216864, |
| "eval_dev_accuracy": 0.971164067035063, |
| "eval_dev_accuracy_threshold": 0.925714373588562, |
| "eval_dev_average_precision": 0.8570638757463108, |
| "eval_dev_f1": 0.7913554743365645, |
| "eval_dev_f1_threshold": 0.5317444801330566, |
| "eval_dev_precision": 0.7659967186218212, |
| "eval_dev_recall": 0.8184507505204339, |
| "eval_loss": 0.2687513828277588, |
| "eval_runtime": 529.1402, |
| "eval_samples_per_second": 250.684, |
| "eval_steps_per_second": 7.835, |
| "step": 49000 |
| }, |
| { |
| "epoch": 0.6542566658227511, |
| "grad_norm": 12.15365982055664, |
| "learning_rate": 1.3456362425049967e-05, |
| "loss": 0.2359, |
| "step": 49100 |
| }, |
| { |
| "epoch": 0.6555891641238157, |
| "grad_norm": 12.457159996032715, |
| "learning_rate": 1.3589606928714192e-05, |
| "loss": 0.2392, |
| "step": 49200 |
| }, |
| { |
| "epoch": 0.6569216624248804, |
| "grad_norm": 0.6378312110900879, |
| "learning_rate": 1.3722851432378415e-05, |
| "loss": 0.2185, |
| "step": 49300 |
| }, |
| { |
| "epoch": 0.6582541607259451, |
| "grad_norm": 10.198519706726074, |
| "learning_rate": 1.385609593604264e-05, |
| "loss": 0.2497, |
| "step": 49400 |
| }, |
| { |
| "epoch": 0.6595866590270097, |
| "grad_norm": 0.6230494976043701, |
| "learning_rate": 1.3989340439706862e-05, |
| "loss": 0.2357, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.6595866590270097, |
| "eval_dev_accuracy": 0.9700030909104616, |
| "eval_dev_accuracy_threshold": 0.5345156192779541, |
| "eval_dev_average_precision": 0.8443688741553218, |
| "eval_dev_f1": 0.785516801361123, |
| "eval_dev_f1_threshold": 0.39208123087882996, |
| "eval_dev_precision": 0.7630410081603141, |
| "eval_dev_recall": 0.809356853292429, |
| "eval_loss": 0.270622581243515, |
| "eval_runtime": 527.7067, |
| "eval_samples_per_second": 251.365, |
| "eval_steps_per_second": 7.857, |
| "step": 49500 |
| }, |
| { |
| "epoch": 0.6609191573280744, |
| "grad_norm": 6.028562068939209, |
| "learning_rate": 1.4122584943371087e-05, |
| "loss": 0.2147, |
| "step": 49600 |
| }, |
| { |
| "epoch": 0.6622516556291391, |
| "grad_norm": 7.488621711730957, |
| "learning_rate": 1.4255829447035312e-05, |
| "loss": 0.2252, |
| "step": 49700 |
| }, |
| { |
| "epoch": 0.6635841539302038, |
| "grad_norm": 3.221320152282715, |
| "learning_rate": 1.4389073950699535e-05, |
| "loss": 0.2296, |
| "step": 49800 |
| }, |
| { |
| "epoch": 0.6649166522312684, |
| "grad_norm": 33.004817962646484, |
| "learning_rate": 1.452231845436376e-05, |
| "loss": 0.2434, |
| "step": 49900 |
| }, |
| { |
| "epoch": 0.666249150532333, |
| "grad_norm": 6.759824752807617, |
| "learning_rate": 1.4655562958027982e-05, |
| "loss": 0.2449, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.666249150532333, |
| "eval_dev_accuracy": 0.9705534237487466, |
| "eval_dev_accuracy_threshold": 0.9030373096466064, |
| "eval_dev_average_precision": 0.8517374123261313, |
| "eval_dev_f1": 0.7881202847731378, |
| "eval_dev_f1_threshold": 0.5092203617095947, |
| "eval_dev_precision": 0.7650335224342445, |
| "eval_dev_recall": 0.812643804097732, |
| "eval_loss": 0.24229487776756287, |
| "eval_runtime": 528.3673, |
| "eval_samples_per_second": 251.051, |
| "eval_steps_per_second": 7.847, |
| "step": 50000 |
| }, |
| { |
| "epoch": 0.6675816488333978, |
| "grad_norm": 0.4978267252445221, |
| "learning_rate": 1.4788807461692207e-05, |
| "loss": 0.3087, |
| "step": 50100 |
| }, |
| { |
| "epoch": 0.6689141471344624, |
| "grad_norm": 17.420612335205078, |
| "learning_rate": 1.492205196535643e-05, |
| "loss": 0.2188, |
| "step": 50200 |
| }, |
| { |
| "epoch": 0.670246645435527, |
| "grad_norm": 0.26254966855049133, |
| "learning_rate": 1.5055296469020654e-05, |
| "loss": 0.2214, |
| "step": 50300 |
| }, |
| { |
| "epoch": 0.6715791437365918, |
| "grad_norm": 16.93143653869629, |
| "learning_rate": 1.5188540972684877e-05, |
| "loss": 0.2141, |
| "step": 50400 |
| }, |
| { |
| "epoch": 0.6729116420376564, |
| "grad_norm": 5.481032848358154, |
| "learning_rate": 1.5321785476349102e-05, |
| "loss": 0.2534, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.6729116420376564, |
| "eval_dev_accuracy": 0.9701538670305397, |
| "eval_dev_accuracy_threshold": 0.9412756562232971, |
| "eval_dev_average_precision": 0.8418413944064206, |
| "eval_dev_f1": 0.78390731292517, |
| "eval_dev_f1_threshold": 0.8259508013725281, |
| "eval_dev_precision": 0.7611724636185365, |
| "eval_dev_recall": 0.8080420729703078, |
| "eval_loss": 0.28124794363975525, |
| "eval_runtime": 528.2314, |
| "eval_samples_per_second": 251.115, |
| "eval_steps_per_second": 7.849, |
| "step": 50500 |
| }, |
| { |
| "epoch": 0.674244140338721, |
| "grad_norm": 0.13247288763523102, |
| "learning_rate": 1.319120586275816e-07, |
| "loss": 0.2242, |
| "step": 50600 |
| }, |
| { |
| "epoch": 0.6755766386397858, |
| "grad_norm": 50.61308670043945, |
| "learning_rate": 2.651565622918055e-07, |
| "loss": 0.199, |
| "step": 50700 |
| }, |
| { |
| "epoch": 0.6769091369408504, |
| "grad_norm": 9.46574592590332, |
| "learning_rate": 3.984010659560293e-07, |
| "loss": 0.2019, |
| "step": 50800 |
| }, |
| { |
| "epoch": 0.678241635241915, |
| "grad_norm": 0.4613121449947357, |
| "learning_rate": 5.316455696202532e-07, |
| "loss": 0.2324, |
| "step": 50900 |
| }, |
| { |
| "epoch": 0.6795741335429797, |
| "grad_norm": 0.06632626801729202, |
| "learning_rate": 6.64890073284477e-07, |
| "loss": 0.2095, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.6795741335429797, |
| "eval_dev_accuracy": 0.9702820267326061, |
| "eval_dev_accuracy_threshold": 0.9349472522735596, |
| "eval_dev_average_precision": 0.8438139930773977, |
| "eval_dev_f1": 0.7839174599797903, |
| "eval_dev_f1_threshold": 0.7425632476806641, |
| "eval_dev_precision": 0.7616783794956593, |
| "eval_dev_recall": 0.8074942478360907, |
| "eval_loss": 0.2824593782424927, |
| "eval_runtime": 534.9937, |
| "eval_samples_per_second": 247.941, |
| "eval_steps_per_second": 7.75, |
| "step": 51000 |
| }, |
| { |
| "epoch": 0.6809066318440444, |
| "grad_norm": 0.5744990706443787, |
| "learning_rate": 7.981345769487009e-07, |
| "loss": 0.2757, |
| "step": 51100 |
| }, |
| { |
| "epoch": 0.6822391301451091, |
| "grad_norm": 44.8016471862793, |
| "learning_rate": 9.313790806129248e-07, |
| "loss": 0.2954, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.6835716284461737, |
| "grad_norm": 18.677654266357422, |
| "learning_rate": 1.0646235842771487e-06, |
| "loss": 0.2051, |
| "step": 51300 |
| }, |
| { |
| "epoch": 0.6849041267472384, |
| "grad_norm": 7.698785305023193, |
| "learning_rate": 1.1978680879413725e-06, |
| "loss": 0.2575, |
| "step": 51400 |
| }, |
| { |
| "epoch": 0.6862366250483031, |
| "grad_norm": 1.6236628293991089, |
| "learning_rate": 1.3311125916055965e-06, |
| "loss": 0.1763, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.6862366250483031, |
| "eval_dev_accuracy": 0.9702669491205983, |
| "eval_dev_accuracy_threshold": 0.9349033832550049, |
| "eval_dev_average_precision": 0.8468881842158165, |
| "eval_dev_f1": 0.783245178180264, |
| "eval_dev_f1_threshold": 0.763167142868042, |
| "eval_dev_precision": 0.7643378519290928, |
| "eval_dev_recall": 0.8031116467623535, |
| "eval_loss": 0.2643745541572571, |
| "eval_runtime": 526.1955, |
| "eval_samples_per_second": 252.087, |
| "eval_steps_per_second": 7.879, |
| "step": 51500 |
| }, |
| { |
| "epoch": 0.6875691233493677, |
| "grad_norm": 28.033424377441406, |
| "learning_rate": 1.4643570952698202e-06, |
| "loss": 0.2108, |
| "step": 51600 |
| }, |
| { |
| "epoch": 0.6889016216504324, |
| "grad_norm": 19.735244750976562, |
| "learning_rate": 1.597601598934044e-06, |
| "loss": 0.2313, |
| "step": 51700 |
| }, |
| { |
| "epoch": 0.6902341199514971, |
| "grad_norm": 2.9967164993286133, |
| "learning_rate": 1.7308461025982678e-06, |
| "loss": 0.2344, |
| "step": 51800 |
| }, |
| { |
| "epoch": 0.6915666182525617, |
| "grad_norm": 1.428648591041565, |
| "learning_rate": 1.864090606262492e-06, |
| "loss": 0.1968, |
| "step": 51900 |
| }, |
| { |
| "epoch": 0.6928991165536263, |
| "grad_norm": 0.3774360418319702, |
| "learning_rate": 1.9973351099267156e-06, |
| "loss": 0.2222, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.6928991165536263, |
| "eval_dev_accuracy": 0.9705609625547506, |
| "eval_dev_accuracy_threshold": 0.9212765693664551, |
| "eval_dev_average_precision": 0.8504652727472383, |
| "eval_dev_f1": 0.786851950828434, |
| "eval_dev_f1_threshold": 0.6886965036392212, |
| "eval_dev_precision": 0.7681310654283627, |
| "eval_dev_recall": 0.8065081625944999, |
| "eval_loss": 0.26056790351867676, |
| "eval_runtime": 524.9198, |
| "eval_samples_per_second": 252.7, |
| "eval_steps_per_second": 7.898, |
| "step": 52000 |
| }, |
| { |
| "epoch": 0.6942316148546911, |
| "grad_norm": 5.1444525718688965, |
| "learning_rate": 2.1305796135909398e-06, |
| "loss": 0.2213, |
| "step": 52100 |
| }, |
| { |
| "epoch": 0.6955641131557557, |
| "grad_norm": 0.18948954343795776, |
| "learning_rate": 2.2638241172551636e-06, |
| "loss": 0.2055, |
| "step": 52200 |
| }, |
| { |
| "epoch": 0.6968966114568204, |
| "grad_norm": 13.482624053955078, |
| "learning_rate": 2.3970686209193873e-06, |
| "loss": 0.2321, |
| "step": 52300 |
| }, |
| { |
| "epoch": 0.6982291097578851, |
| "grad_norm": 0.6994342803955078, |
| "learning_rate": 2.530313124583611e-06, |
| "loss": 0.257, |
| "step": 52400 |
| }, |
| { |
| "epoch": 0.6995616080589497, |
| "grad_norm": 0.9283449053764343, |
| "learning_rate": 2.663557628247835e-06, |
| "loss": 0.2398, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.6995616080589497, |
| "eval_dev_accuracy": 0.9711112953930356, |
| "eval_dev_accuracy_threshold": 0.9353954195976257, |
| "eval_dev_average_precision": 0.854664598144776, |
| "eval_dev_f1": 0.789044289044289, |
| "eval_dev_f1_threshold": 0.7551745176315308, |
| "eval_dev_precision": 0.7638732177659248, |
| "eval_dev_recall": 0.8159307549030349, |
| "eval_loss": 0.2365955263376236, |
| "eval_runtime": 526.0912, |
| "eval_samples_per_second": 252.137, |
| "eval_steps_per_second": 7.881, |
| "step": 52500 |
| }, |
| { |
| "epoch": 0.7008941063600144, |
| "grad_norm": 69.95816040039062, |
| "learning_rate": 2.7968021319120587e-06, |
| "loss": 0.2168, |
| "step": 52600 |
| }, |
| { |
| "epoch": 0.7022266046610791, |
| "grad_norm": 13.763835906982422, |
| "learning_rate": 2.930046635576283e-06, |
| "loss": 0.2066, |
| "step": 52700 |
| }, |
| { |
| "epoch": 0.7035591029621437, |
| "grad_norm": 2.3356781005859375, |
| "learning_rate": 3.0632911392405066e-06, |
| "loss": 0.222, |
| "step": 52800 |
| }, |
| { |
| "epoch": 0.7048916012632084, |
| "grad_norm": 4.479837417602539, |
| "learning_rate": 3.1965356429047304e-06, |
| "loss": 0.269, |
| "step": 52900 |
| }, |
| { |
| "epoch": 0.706224099564273, |
| "grad_norm": 15.155440330505371, |
| "learning_rate": 3.3297801465689546e-06, |
| "loss": 0.2327, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.706224099564273, |
| "eval_dev_accuracy": 0.971005752108981, |
| "eval_dev_accuracy_threshold": 0.9340351819992065, |
| "eval_dev_average_precision": 0.8546100599663748, |
| "eval_dev_f1": 0.7908306421726932, |
| "eval_dev_f1_threshold": 0.7827771306037903, |
| "eval_dev_precision": 0.7651096086867445, |
| "eval_dev_recall": 0.8183411854935905, |
| "eval_loss": 0.24670535326004028, |
| "eval_runtime": 524.1368, |
| "eval_samples_per_second": 253.077, |
| "eval_steps_per_second": 7.91, |
| "step": 53000 |
| }, |
| { |
| "epoch": 0.7075565978653378, |
| "grad_norm": 30.88198471069336, |
| "learning_rate": 3.4630246502331784e-06, |
| "loss": 0.2274, |
| "step": 53100 |
| }, |
| { |
| "epoch": 0.7088890961664024, |
| "grad_norm": 19.670501708984375, |
| "learning_rate": 3.596269153897402e-06, |
| "loss": 0.1619, |
| "step": 53200 |
| }, |
| { |
| "epoch": 0.710221594467467, |
| "grad_norm": 1.817409873008728, |
| "learning_rate": 3.729513657561626e-06, |
| "loss": 0.2105, |
| "step": 53300 |
| }, |
| { |
| "epoch": 0.7115540927685318, |
| "grad_norm": 7.859726428985596, |
| "learning_rate": 3.862758161225849e-06, |
| "loss": 0.2314, |
| "step": 53400 |
| }, |
| { |
| "epoch": 0.7128865910695964, |
| "grad_norm": 1.2846513986587524, |
| "learning_rate": 3.996002664890073e-06, |
| "loss": 0.2118, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.7128865910695964, |
| "eval_dev_accuracy": 0.9711263730050435, |
| "eval_dev_accuracy_threshold": 0.956214189529419, |
| "eval_dev_average_precision": 0.8529387869562187, |
| "eval_dev_f1": 0.7885323513940031, |
| "eval_dev_f1_threshold": 0.7215464115142822, |
| "eval_dev_precision": 0.7583729636749975, |
| "eval_dev_recall": 0.8211898761915196, |
| "eval_loss": 0.251621812582016, |
| "eval_runtime": 524.6134, |
| "eval_samples_per_second": 252.847, |
| "eval_steps_per_second": 7.903, |
| "step": 53500 |
| }, |
| { |
| "epoch": 0.714219089370661, |
| "grad_norm": 29.144947052001953, |
| "learning_rate": 4.129247168554298e-06, |
| "loss": 0.223, |
| "step": 53600 |
| }, |
| { |
| "epoch": 0.7155515876717257, |
| "grad_norm": 1.1121717691421509, |
| "learning_rate": 4.2624916722185215e-06, |
| "loss": 0.2177, |
| "step": 53700 |
| }, |
| { |
| "epoch": 0.7168840859727904, |
| "grad_norm": 20.09768295288086, |
| "learning_rate": 4.395736175882745e-06, |
| "loss": 0.2092, |
| "step": 53800 |
| }, |
| { |
| "epoch": 0.718216584273855, |
| "grad_norm": 0.34697094559669495, |
| "learning_rate": 4.528980679546969e-06, |
| "loss": 0.2112, |
| "step": 53900 |
| }, |
| { |
| "epoch": 0.7195490825749197, |
| "grad_norm": 27.53289222717285, |
| "learning_rate": 4.662225183211193e-06, |
| "loss": 0.2188, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.7195490825749197, |
| "eval_dev_accuracy": 0.9717445550973637, |
| "eval_dev_accuracy_threshold": 0.9209288358688354, |
| "eval_dev_average_precision": 0.8572864419695019, |
| "eval_dev_f1": 0.7925902130849127, |
| "eval_dev_f1_threshold": 0.5230389833450317, |
| "eval_dev_precision": 0.7749973824730395, |
| "eval_dev_recall": 0.8110003286950805, |
| "eval_loss": 0.2652234435081482, |
| "eval_runtime": 524.205, |
| "eval_samples_per_second": 253.044, |
| "eval_steps_per_second": 7.909, |
| "step": 54000 |
| }, |
| { |
| "epoch": 0.7208815808759844, |
| "grad_norm": 0.12331326305866241, |
| "learning_rate": 4.795469686875417e-06, |
| "loss": 0.1995, |
| "step": 54100 |
| }, |
| { |
| "epoch": 0.722214079177049, |
| "grad_norm": 26.130399703979492, |
| "learning_rate": 4.92871419053964e-06, |
| "loss": 0.1863, |
| "step": 54200 |
| }, |
| { |
| "epoch": 0.7235465774781137, |
| "grad_norm": 63.348262786865234, |
| "learning_rate": 5.061958694203864e-06, |
| "loss": 0.1885, |
| "step": 54300 |
| }, |
| { |
| "epoch": 0.7248790757791784, |
| "grad_norm": 4.434421539306641, |
| "learning_rate": 5.195203197868088e-06, |
| "loss": 0.2059, |
| "step": 54400 |
| }, |
| { |
| "epoch": 0.7262115740802431, |
| "grad_norm": 1.5990498065948486, |
| "learning_rate": 5.328447701532313e-06, |
| "loss": 0.1944, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.7262115740802431, |
| "eval_dev_accuracy": 0.9710962177810278, |
| "eval_dev_accuracy_threshold": 0.938183069229126, |
| "eval_dev_average_precision": 0.8581458729833185, |
| "eval_dev_f1": 0.79388743943347, |
| "eval_dev_f1_threshold": 0.692324697971344, |
| "eval_dev_precision": 0.7722187694220013, |
| "eval_dev_recall": 0.8168072751177824, |
| "eval_loss": 0.24563372135162354, |
| "eval_runtime": 524.5271, |
| "eval_samples_per_second": 252.889, |
| "eval_steps_per_second": 7.904, |
| "step": 54500 |
| }, |
| { |
| "epoch": 0.7275440723813077, |
| "grad_norm": 13.777716636657715, |
| "learning_rate": 5.461692205196536e-06, |
| "loss": 0.2015, |
| "step": 54600 |
| }, |
| { |
| "epoch": 0.7288765706823723, |
| "grad_norm": 0.40915578603744507, |
| "learning_rate": 5.59493670886076e-06, |
| "loss": 0.1804, |
| "step": 54700 |
| }, |
| { |
| "epoch": 0.7302090689834371, |
| "grad_norm": 2.3663179874420166, |
| "learning_rate": 5.728181212524984e-06, |
| "loss": 0.2424, |
| "step": 54800 |
| }, |
| { |
| "epoch": 0.7315415672845017, |
| "grad_norm": 19.617507934570312, |
| "learning_rate": 5.861425716189208e-06, |
| "loss": 0.2331, |
| "step": 54900 |
| }, |
| { |
| "epoch": 0.7328740655855663, |
| "grad_norm": 1.4067281484603882, |
| "learning_rate": 5.9946702198534315e-06, |
| "loss": 0.197, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.7328740655855663, |
| "eval_dev_accuracy": 0.9715711625592739, |
| "eval_dev_accuracy_threshold": 0.9351357221603394, |
| "eval_dev_average_precision": 0.8584440513483999, |
| "eval_dev_f1": 0.7944548676255994, |
| "eval_dev_f1_threshold": 0.3239399194717407, |
| "eval_dev_precision": 0.7575787695060133, |
| "eval_dev_recall": 0.8351046346006354, |
| "eval_loss": 0.271222859621048, |
| "eval_runtime": 526.0703, |
| "eval_samples_per_second": 252.147, |
| "eval_steps_per_second": 7.881, |
| "step": 55000 |
| }, |
| { |
| "epoch": 0.7342065638866311, |
| "grad_norm": 0.45710641145706177, |
| "learning_rate": 6.127914723517655e-06, |
| "loss": 0.2503, |
| "step": 55100 |
| }, |
| { |
| "epoch": 0.7355390621876957, |
| "grad_norm": 0.6267761588096619, |
| "learning_rate": 6.261159227181879e-06, |
| "loss": 0.2421, |
| "step": 55200 |
| }, |
| { |
| "epoch": 0.7368715604887603, |
| "grad_norm": 11.160945892333984, |
| "learning_rate": 6.394403730846103e-06, |
| "loss": 0.2169, |
| "step": 55300 |
| }, |
| { |
| "epoch": 0.7382040587898251, |
| "grad_norm": 0.22500374913215637, |
| "learning_rate": 6.527648234510327e-06, |
| "loss": 0.1801, |
| "step": 55400 |
| }, |
| { |
| "epoch": 0.7395365570908897, |
| "grad_norm": 0.34952008724212646, |
| "learning_rate": 6.660892738174551e-06, |
| "loss": 0.2168, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.7395365570908897, |
| "eval_dev_accuracy": 0.9718576371874222, |
| "eval_dev_accuracy_threshold": 0.9311728477478027, |
| "eval_dev_average_precision": 0.8606955219787713, |
| "eval_dev_f1": 0.7966432680635458, |
| "eval_dev_f1_threshold": 0.3317277133464813, |
| "eval_dev_precision": 0.7685336048879837, |
| "eval_dev_recall": 0.8268872575873781, |
| "eval_loss": 0.24974019825458527, |
| "eval_runtime": 524.3487, |
| "eval_samples_per_second": 252.975, |
| "eval_steps_per_second": 7.907, |
| "step": 55500 |
| }, |
| { |
| "epoch": 0.7408690553919544, |
| "grad_norm": 13.866408348083496, |
| "learning_rate": 6.794137241838775e-06, |
| "loss": 0.2266, |
| "step": 55600 |
| }, |
| { |
| "epoch": 0.742201553693019, |
| "grad_norm": 9.584277153015137, |
| "learning_rate": 6.927381745502999e-06, |
| "loss": 0.1882, |
| "step": 55700 |
| }, |
| { |
| "epoch": 0.7435340519940837, |
| "grad_norm": 52.4222297668457, |
| "learning_rate": 7.0606262491672225e-06, |
| "loss": 0.2214, |
| "step": 55800 |
| }, |
| { |
| "epoch": 0.7448665502951484, |
| "grad_norm": 15.216498374938965, |
| "learning_rate": 7.193870752831446e-06, |
| "loss": 0.23, |
| "step": 55900 |
| }, |
| { |
| "epoch": 0.746199048596213, |
| "grad_norm": 21.095590591430664, |
| "learning_rate": 7.32711525649567e-06, |
| "loss": 0.2355, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.746199048596213, |
| "eval_dev_accuracy": 0.9719631804714769, |
| "eval_dev_accuracy_threshold": 0.9183558821678162, |
| "eval_dev_average_precision": 0.8589405860687593, |
| "eval_dev_f1": 0.7974690109434157, |
| "eval_dev_f1_threshold": 0.33763912320137024, |
| "eval_dev_precision": 0.7571400433326768, |
| "eval_dev_recall": 0.8423359263723019, |
| "eval_loss": 0.24558140337467194, |
| "eval_runtime": 523.2596, |
| "eval_samples_per_second": 253.501, |
| "eval_steps_per_second": 7.923, |
| "step": 56000 |
| }, |
| { |
| "epoch": 0.7475315468972777, |
| "grad_norm": 87.9457778930664, |
| "learning_rate": 7.460359760159894e-06, |
| "loss": 0.2105, |
| "step": 56100 |
| }, |
| { |
| "epoch": 0.7488640451983424, |
| "grad_norm": 1.1765731573104858, |
| "learning_rate": 7.593604263824118e-06, |
| "loss": 0.1608, |
| "step": 56200 |
| }, |
| { |
| "epoch": 0.750196543499407, |
| "grad_norm": 12.082050323486328, |
| "learning_rate": 7.726848767488342e-06, |
| "loss": 0.214, |
| "step": 56300 |
| }, |
| { |
| "epoch": 0.7515290418004718, |
| "grad_norm": 17.673494338989258, |
| "learning_rate": 7.860093271152565e-06, |
| "loss": 0.2531, |
| "step": 56400 |
| }, |
| { |
| "epoch": 0.7528615401015364, |
| "grad_norm": 4.850943565368652, |
| "learning_rate": 7.99333777481679e-06, |
| "loss": 0.2641, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.7528615401015364, |
| "eval_dev_accuracy": 0.9718953312174418, |
| "eval_dev_accuracy_threshold": 0.9289690852165222, |
| "eval_dev_average_precision": 0.8607199959963239, |
| "eval_dev_f1": 0.7934619562406249, |
| "eval_dev_f1_threshold": 0.2598855793476105, |
| "eval_dev_precision": 0.7515187144816774, |
| "eval_dev_recall": 0.8403637558891202, |
| "eval_loss": 0.24914328753948212, |
| "eval_runtime": 526.4308, |
| "eval_samples_per_second": 251.974, |
| "eval_steps_per_second": 7.876, |
| "step": 56500 |
| }, |
| { |
| "epoch": 0.754194038402601, |
| "grad_norm": 21.872079849243164, |
| "learning_rate": 8.126582278481013e-06, |
| "loss": 0.2002, |
| "step": 56600 |
| }, |
| { |
| "epoch": 0.7555265367036657, |
| "grad_norm": 0.3463062345981598, |
| "learning_rate": 8.259826782145237e-06, |
| "loss": 0.1727, |
| "step": 56700 |
| }, |
| { |
| "epoch": 0.7568590350047304, |
| "grad_norm": 4.641270637512207, |
| "learning_rate": 8.39307128580946e-06, |
| "loss": 0.2135, |
| "step": 56800 |
| }, |
| { |
| "epoch": 0.758191533305795, |
| "grad_norm": 1.456807017326355, |
| "learning_rate": 8.526315789473685e-06, |
| "loss": 0.1694, |
| "step": 56900 |
| }, |
| { |
| "epoch": 0.7595240316068597, |
| "grad_norm": 0.2848343551158905, |
| "learning_rate": 8.659560293137908e-06, |
| "loss": 0.1969, |
| "step": 57000 |
| }, |
| { |
| "epoch": 0.7595240316068597, |
| "eval_dev_accuracy": 0.9716917834553364, |
| "eval_dev_accuracy_threshold": 0.9249356389045715, |
| "eval_dev_average_precision": 0.8628574223791167, |
| "eval_dev_f1": 0.7945488333677474, |
| "eval_dev_f1_threshold": 0.2702260911464691, |
| "eval_dev_precision": 0.7511957052220596, |
| "eval_dev_recall": 0.8432124465870494, |
| "eval_loss": 0.2667163014411926, |
| "eval_runtime": 523.0471, |
| "eval_samples_per_second": 253.604, |
| "eval_steps_per_second": 7.927, |
| "step": 57000 |
| }, |
| { |
| "epoch": 1.521692783285364, |
| "grad_norm": 680.6102294921875, |
| "learning_rate": 1.1723219044235212e-05, |
| "loss": 0.1989, |
| "step": 57100 |
| }, |
| { |
| "epoch": 1.524357744376932, |
| "grad_norm": 555.5462036132812, |
| "learning_rate": 1.1900870492094512e-05, |
| "loss": 0.1823, |
| "step": 57200 |
| }, |
| { |
| "epoch": 1.5270227054685002, |
| "grad_norm": 19347.361328125, |
| "learning_rate": 1.207852193995381e-05, |
| "loss": 0.2099, |
| "step": 57300 |
| }, |
| { |
| "epoch": 1.5296876665600683, |
| "grad_norm": 28487.04296875, |
| "learning_rate": 1.225617338781311e-05, |
| "loss": 0.2007, |
| "step": 57400 |
| }, |
| { |
| "epoch": 1.5323526276516364, |
| "grad_norm": 33787.03515625, |
| "learning_rate": 1.2433824835672413e-05, |
| "loss": 0.1893, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.5323526276516364, |
| "eval_dev_accuracy": 0.9712469939011059, |
| "eval_dev_accuracy_threshold": 0.930076539516449, |
| "eval_dev_average_precision": 0.8589126571915907, |
| "eval_dev_f1": 0.788643194504079, |
| "eval_dev_f1_threshold": 0.8417924642562866, |
| "eval_dev_precision": 0.7729615991583377, |
| "eval_dev_recall": 0.8049742522186918, |
| "eval_loss": 0.22310471534729004, |
| "eval_runtime": 911.6835, |
| "eval_samples_per_second": 145.497, |
| "eval_steps_per_second": 2.274, |
| "step": 57500 |
| }, |
| { |
| "epoch": 1.5350175887432043, |
| "grad_norm": 10426.8994140625, |
| "learning_rate": 1.2611476283531711e-05, |
| "loss": 0.1941, |
| "step": 57600 |
| }, |
| { |
| "epoch": 1.5376825498347724, |
| "grad_norm": 20932.927734375, |
| "learning_rate": 1.2789127731391012e-05, |
| "loss": 0.1917, |
| "step": 57700 |
| }, |
| { |
| "epoch": 1.5403475109263405, |
| "grad_norm": 19958.53125, |
| "learning_rate": 1.2966779179250314e-05, |
| "loss": 0.1704, |
| "step": 57800 |
| }, |
| { |
| "epoch": 1.5430124720179086, |
| "grad_norm": 4519.30517578125, |
| "learning_rate": 1.3144430627109612e-05, |
| "loss": 0.1769, |
| "step": 57900 |
| }, |
| { |
| "epoch": 1.5456774331094767, |
| "grad_norm": 1185.6409912109375, |
| "learning_rate": 1.3322082074968912e-05, |
| "loss": 0.1917, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.5456774331094767, |
| "eval_dev_accuracy": 0.971314843155141, |
| "eval_dev_accuracy_threshold": 0.9302895069122314, |
| "eval_dev_average_precision": 0.8581921137101376, |
| "eval_dev_f1": 0.7902556259558663, |
| "eval_dev_f1_threshold": 0.9142668843269348, |
| "eval_dev_precision": 0.7879315978651563, |
| "eval_dev_recall": 0.792593404185384, |
| "eval_loss": 0.21683622896671295, |
| "eval_runtime": 910.3929, |
| "eval_samples_per_second": 145.703, |
| "eval_steps_per_second": 2.277, |
| "step": 58000 |
| }, |
| { |
| "epoch": 1.5483423942010446, |
| "grad_norm": 10156.921875, |
| "learning_rate": 1.3499733522828211e-05, |
| "loss": 0.156, |
| "step": 58100 |
| }, |
| { |
| "epoch": 1.5510073552926127, |
| "grad_norm": 20830.22265625, |
| "learning_rate": 1.3677384970687513e-05, |
| "loss": 0.1882, |
| "step": 58200 |
| }, |
| { |
| "epoch": 1.5536723163841808, |
| "grad_norm": 10158.1328125, |
| "learning_rate": 1.3855036418546812e-05, |
| "loss": 0.1914, |
| "step": 58300 |
| }, |
| { |
| "epoch": 1.556337277475749, |
| "grad_norm": 12550.0205078125, |
| "learning_rate": 1.4032687866406112e-05, |
| "loss": 0.1859, |
| "step": 58400 |
| }, |
| { |
| "epoch": 1.559002238567317, |
| "grad_norm": 25116.525390625, |
| "learning_rate": 1.4210339314265414e-05, |
| "loss": 0.1915, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.559002238567317, |
| "eval_dev_accuracy": 0.9707343550928405, |
| "eval_dev_accuracy_threshold": 0.9600124359130859, |
| "eval_dev_average_precision": 0.8552104699335599, |
| "eval_dev_f1": 0.788252996419862, |
| "eval_dev_f1_threshold": 0.6280207633972168, |
| "eval_dev_precision": 0.7486694263749261, |
| "eval_dev_recall": 0.8322559439027063, |
| "eval_loss": 0.22474558651447296, |
| "eval_runtime": 912.547, |
| "eval_samples_per_second": 145.359, |
| "eval_steps_per_second": 2.272, |
| "step": 58500 |
| }, |
| { |
| "epoch": 1.561667199658885, |
| "grad_norm": 1747.8248291015625, |
| "learning_rate": 1.4387990762124712e-05, |
| "loss": 0.1658, |
| "step": 58600 |
| }, |
| { |
| "epoch": 1.564332160750453, |
| "grad_norm": 10528.990234375, |
| "learning_rate": 1.4565642209984013e-05, |
| "loss": 0.1877, |
| "step": 58700 |
| }, |
| { |
| "epoch": 1.5669971218420211, |
| "grad_norm": 14108.591796875, |
| "learning_rate": 1.4743293657843311e-05, |
| "loss": 0.1972, |
| "step": 58800 |
| }, |
| { |
| "epoch": 1.5696620829335892, |
| "grad_norm": 33609.73828125, |
| "learning_rate": 1.4920945105702613e-05, |
| "loss": 0.1915, |
| "step": 58900 |
| }, |
| { |
| "epoch": 1.5723270440251573, |
| "grad_norm": 14393.123046875, |
| "learning_rate": 1.5098596553561913e-05, |
| "loss": 0.1982, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.5723270440251573, |
| "eval_dev_accuracy": 0.9714354640512036, |
| "eval_dev_accuracy_threshold": 0.861323356628418, |
| "eval_dev_average_precision": 0.8617997355004788, |
| "eval_dev_f1": 0.792690745885873, |
| "eval_dev_f1_threshold": 0.5087981224060059, |
| "eval_dev_precision": 0.7735947439774742, |
| "eval_dev_recall": 0.8127533691245754, |
| "eval_loss": 0.27619487047195435, |
| "eval_runtime": 912.722, |
| "eval_samples_per_second": 145.331, |
| "eval_steps_per_second": 2.271, |
| "step": 59000 |
| }, |
| { |
| "epoch": 1.5749920051167252, |
| "grad_norm": 2650.031982421875, |
| "learning_rate": 1.5276248001421212e-05, |
| "loss": 0.1977, |
| "step": 59100 |
| }, |
| { |
| "epoch": 1.5776569662082933, |
| "grad_norm": 21126.404296875, |
| "learning_rate": 1.5453899449280514e-05, |
| "loss": 0.1646, |
| "step": 59200 |
| }, |
| { |
| "epoch": 1.5803219272998614, |
| "grad_norm": 1604.2296142578125, |
| "learning_rate": 1.5631550897139813e-05, |
| "loss": 0.1855, |
| "step": 59300 |
| }, |
| { |
| "epoch": 1.5829868883914295, |
| "grad_norm": 9624.1689453125, |
| "learning_rate": 1.580920234499911e-05, |
| "loss": 0.1809, |
| "step": 59400 |
| }, |
| { |
| "epoch": 1.5856518494829976, |
| "grad_norm": 4949.5078125, |
| "learning_rate": 1.5986853792858413e-05, |
| "loss": 0.185, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.5856518494829976, |
| "eval_dev_accuracy": 0.9717068610673442, |
| "eval_dev_accuracy_threshold": 0.9281443357467651, |
| "eval_dev_average_precision": 0.8651298435899648, |
| "eval_dev_f1": 0.7949938492806332, |
| "eval_dev_f1_threshold": 0.7204960584640503, |
| "eval_dev_precision": 0.7765935214211076, |
| "eval_dev_recall": 0.8142872795003835, |
| "eval_loss": 0.23017099499702454, |
| "eval_runtime": 912.0946, |
| "eval_samples_per_second": 145.431, |
| "eval_steps_per_second": 2.273, |
| "step": 59500 |
| }, |
| { |
| "epoch": 1.5883168105745655, |
| "grad_norm": 4366.28125, |
| "learning_rate": 1.6164505240717715e-05, |
| "loss": 0.1524, |
| "step": 59600 |
| }, |
| { |
| "epoch": 1.5909817716661336, |
| "grad_norm": 6088.126953125, |
| "learning_rate": 1.6342156688577014e-05, |
| "loss": 0.1626, |
| "step": 59700 |
| }, |
| { |
| "epoch": 1.5936467327577017, |
| "grad_norm": 41741.02734375, |
| "learning_rate": 1.6519808136436312e-05, |
| "loss": 0.1855, |
| "step": 59800 |
| }, |
| { |
| "epoch": 1.5963116938492699, |
| "grad_norm": 6351.677734375, |
| "learning_rate": 1.6697459584295614e-05, |
| "loss": 0.1777, |
| "step": 59900 |
| }, |
| { |
| "epoch": 1.598976654940838, |
| "grad_norm": 667.612548828125, |
| "learning_rate": 1.6875111032154913e-05, |
| "loss": 0.1519, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.598976654940838, |
| "eval_dev_accuracy": 0.9702217162845749, |
| "eval_dev_accuracy_threshold": 0.9527369737625122, |
| "eval_dev_average_precision": 0.8599004250878434, |
| "eval_dev_f1": 0.7857490403849272, |
| "eval_dev_f1_threshold": 0.9123563170433044, |
| "eval_dev_precision": 0.7755602988260406, |
| "eval_dev_recall": 0.7962090500712172, |
| "eval_loss": 0.23386961221694946, |
| "eval_runtime": 912.5307, |
| "eval_samples_per_second": 145.362, |
| "eval_steps_per_second": 2.272, |
| "step": 60000 |
| }, |
| { |
| "epoch": 1.6016416160324058, |
| "grad_norm": 74362.328125, |
| "learning_rate": 1.7052762480014215e-05, |
| "loss": 0.1705, |
| "step": 60100 |
| }, |
| { |
| "epoch": 1.604306577123974, |
| "grad_norm": 41024.45703125, |
| "learning_rate": 1.7230413927873513e-05, |
| "loss": 0.1868, |
| "step": 60200 |
| }, |
| { |
| "epoch": 1.606971538215542, |
| "grad_norm": 10907.779296875, |
| "learning_rate": 1.7408065375732815e-05, |
| "loss": 0.1801, |
| "step": 60300 |
| }, |
| { |
| "epoch": 1.6096364993071102, |
| "grad_norm": 17233.494140625, |
| "learning_rate": 1.7585716823592114e-05, |
| "loss": 0.1672, |
| "step": 60400 |
| }, |
| { |
| "epoch": 1.6123014603986783, |
| "grad_norm": 6108.4228515625, |
| "learning_rate": 1.7763368271451412e-05, |
| "loss": 0.1619, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.6123014603986783, |
| "eval_dev_accuracy": 0.9701990998665632, |
| "eval_dev_accuracy_threshold": 0.9653939604759216, |
| "eval_dev_average_precision": 0.8583701139769879, |
| "eval_dev_f1": 0.7852786105654916, |
| "eval_dev_f1_threshold": 0.4483921527862549, |
| "eval_dev_precision": 0.7433212643115765, |
| "eval_dev_recall": 0.8322559439027063, |
| "eval_loss": 0.2841331958770752, |
| "eval_runtime": 912.3726, |
| "eval_samples_per_second": 145.387, |
| "eval_steps_per_second": 2.272, |
| "step": 60500 |
| }, |
| { |
| "epoch": 1.6149664214902462, |
| "grad_norm": 753.2778930664062, |
| "learning_rate": 1.7941019719310714e-05, |
| "loss": 0.1775, |
| "step": 60600 |
| }, |
| { |
| "epoch": 1.6176313825818143, |
| "grad_norm": 7861.2724609375, |
| "learning_rate": 1.8118671167170013e-05, |
| "loss": 0.1539, |
| "step": 60700 |
| }, |
| { |
| "epoch": 1.6202963436733824, |
| "grad_norm": 4606.5625, |
| "learning_rate": 1.8296322615029315e-05, |
| "loss": 0.1984, |
| "step": 60800 |
| }, |
| { |
| "epoch": 1.6229613047649505, |
| "grad_norm": 3256.729248046875, |
| "learning_rate": 1.8473974062888614e-05, |
| "loss": 0.1936, |
| "step": 60900 |
| }, |
| { |
| "epoch": 1.6256262658565186, |
| "grad_norm": 16788.51953125, |
| "learning_rate": 1.8651625510747916e-05, |
| "loss": 0.1928, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.6256262658565186, |
| "eval_dev_accuracy": 0.9702820267326061, |
| "eval_dev_accuracy_threshold": 0.9588229656219482, |
| "eval_dev_average_precision": 0.8578683942622316, |
| "eval_dev_f1": 0.78329335697153, |
| "eval_dev_f1_threshold": 0.8013461232185364, |
| "eval_dev_precision": 0.7472888269823899, |
| "eval_dev_recall": 0.8229429166210146, |
| "eval_loss": 0.21942387521266937, |
| "eval_runtime": 911.7434, |
| "eval_samples_per_second": 145.487, |
| "eval_steps_per_second": 2.274, |
| "step": 61000 |
| }, |
| { |
| "epoch": 1.6282912269480865, |
| "grad_norm": 1664.8751220703125, |
| "learning_rate": 1.8829276958607214e-05, |
| "loss": 0.166, |
| "step": 61100 |
| }, |
| { |
| "epoch": 1.6309561880396546, |
| "grad_norm": 21448.6796875, |
| "learning_rate": 1.9006928406466513e-05, |
| "loss": 0.1774, |
| "step": 61200 |
| }, |
| { |
| "epoch": 1.6336211491312227, |
| "grad_norm": 18060.765625, |
| "learning_rate": 1.9184579854325815e-05, |
| "loss": 0.1319, |
| "step": 61300 |
| }, |
| { |
| "epoch": 1.6362861102227908, |
| "grad_norm": 7385.87353515625, |
| "learning_rate": 1.9362231302185113e-05, |
| "loss": 0.1971, |
| "step": 61400 |
| }, |
| { |
| "epoch": 1.638951071314359, |
| "grad_norm": 5024.80078125, |
| "learning_rate": 1.9539882750044415e-05, |
| "loss": 0.1728, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.638951071314359, |
| "eval_dev_accuracy": 0.9713073043491371, |
| "eval_dev_accuracy_threshold": 0.9408199787139893, |
| "eval_dev_average_precision": 0.8671213714406215, |
| "eval_dev_f1": 0.7911789297658863, |
| "eval_dev_f1_threshold": 0.6503252983093262, |
| "eval_dev_precision": 0.7563193126186433, |
| "eval_dev_recall": 0.829407253204777, |
| "eval_loss": 0.23295743763446808, |
| "eval_runtime": 911.9086, |
| "eval_samples_per_second": 145.461, |
| "eval_steps_per_second": 2.273, |
| "step": 61500 |
| }, |
| { |
| "epoch": 1.6416160324059268, |
| "grad_norm": 8569.271484375, |
| "learning_rate": 1.9717534197903714e-05, |
| "loss": 0.1703, |
| "step": 61600 |
| }, |
| { |
| "epoch": 1.644280993497495, |
| "grad_norm": 20367.513671875, |
| "learning_rate": 1.9895185645763016e-05, |
| "loss": 0.1624, |
| "step": 61700 |
| }, |
| { |
| "epoch": 1.646945954589063, |
| "grad_norm": 1712.7371826171875, |
| "learning_rate": 1.9991906350553724e-05, |
| "loss": 0.1526, |
| "step": 61800 |
| }, |
| { |
| "epoch": 1.6496109156806311, |
| "grad_norm": 408.11163330078125, |
| "learning_rate": 1.9972165742148174e-05, |
| "loss": 0.1611, |
| "step": 61900 |
| }, |
| { |
| "epoch": 1.6522758767721992, |
| "grad_norm": 6086.27587890625, |
| "learning_rate": 1.9952425133742624e-05, |
| "loss": 0.1603, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.6522758767721992, |
| "eval_dev_accuracy": 0.971164067035063, |
| "eval_dev_accuracy_threshold": 0.9526249170303345, |
| "eval_dev_average_precision": 0.8575865091547995, |
| "eval_dev_f1": 0.7882105728821057, |
| "eval_dev_f1_threshold": 0.9510890245437622, |
| "eval_dev_precision": 0.7956905213799264, |
| "eval_dev_recall": 0.7808699463131369, |
| "eval_loss": 0.2797718644142151, |
| "eval_runtime": 911.0244, |
| "eval_samples_per_second": 145.602, |
| "eval_steps_per_second": 2.275, |
| "step": 62000 |
| }, |
| { |
| "epoch": 1.6549408378637671, |
| "grad_norm": 448.80615234375, |
| "learning_rate": 1.993268452533707e-05, |
| "loss": 0.1546, |
| "step": 62100 |
| }, |
| { |
| "epoch": 1.6576057989553352, |
| "grad_norm": 31734.08984375, |
| "learning_rate": 1.991294391693152e-05, |
| "loss": 0.2302, |
| "step": 62200 |
| }, |
| { |
| "epoch": 1.6602707600469033, |
| "grad_norm": 18211.0, |
| "learning_rate": 1.989320330852597e-05, |
| "loss": 0.1694, |
| "step": 62300 |
| }, |
| { |
| "epoch": 1.6629357211384712, |
| "grad_norm": 8841.400390625, |
| "learning_rate": 1.9873462700120417e-05, |
| "loss": 0.1705, |
| "step": 62400 |
| }, |
| { |
| "epoch": 1.6656006822300395, |
| "grad_norm": 24008.82421875, |
| "learning_rate": 1.985372209171487e-05, |
| "loss": 0.1606, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.6656006822300395, |
| "eval_dev_accuracy": 0.9708549759889029, |
| "eval_dev_accuracy_threshold": 0.9542537927627563, |
| "eval_dev_average_precision": 0.841409192198319, |
| "eval_dev_f1": 0.7890381515314348, |
| "eval_dev_f1_threshold": 0.8909753561019897, |
| "eval_dev_precision": 0.7742275651165244, |
| "eval_dev_recall": 0.8044264270844746, |
| "eval_loss": 0.2822663486003876, |
| "eval_runtime": 911.9083, |
| "eval_samples_per_second": 145.461, |
| "eval_steps_per_second": 2.273, |
| "step": 62500 |
| }, |
| { |
| "epoch": 1.6682656433216074, |
| "grad_norm": 2424.1279296875, |
| "learning_rate": 1.9833981483309317e-05, |
| "loss": 0.1887, |
| "step": 62600 |
| }, |
| { |
| "epoch": 1.6709306044131755, |
| "grad_norm": 45195.04296875, |
| "learning_rate": 1.9814240874903764e-05, |
| "loss": 0.1918, |
| "step": 62700 |
| }, |
| { |
| "epoch": 1.6735955655047436, |
| "grad_norm": 2223.521728515625, |
| "learning_rate": 1.9794500266498217e-05, |
| "loss": 0.1475, |
| "step": 62800 |
| }, |
| { |
| "epoch": 1.6762605265963115, |
| "grad_norm": 2829.02099609375, |
| "learning_rate": 1.9774759658092664e-05, |
| "loss": 0.1995, |
| "step": 62900 |
| }, |
| { |
| "epoch": 1.6789254876878799, |
| "grad_norm": 11702.283203125, |
| "learning_rate": 1.975501904968711e-05, |
| "loss": 0.1648, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.6789254876878799, |
| "eval_dev_accuracy": 0.9710359073329966, |
| "eval_dev_accuracy_threshold": 0.910698652267456, |
| "eval_dev_average_precision": 0.849610869643878, |
| "eval_dev_f1": 0.7900427192658614, |
| "eval_dev_f1_threshold": 0.4727928936481476, |
| "eval_dev_precision": 0.7616432784218019, |
| "eval_dev_recall": 0.8206420510573025, |
| "eval_loss": 0.25969284772872925, |
| "eval_runtime": 910.5015, |
| "eval_samples_per_second": 145.686, |
| "eval_steps_per_second": 2.277, |
| "step": 63000 |
| }, |
| { |
| "epoch": 1.6815904487794477, |
| "grad_norm": 21649.341796875, |
| "learning_rate": 1.9735278441281564e-05, |
| "loss": 0.1788, |
| "step": 63100 |
| }, |
| { |
| "epoch": 1.6842554098710159, |
| "grad_norm": 86422.7421875, |
| "learning_rate": 1.971553783287601e-05, |
| "loss": 0.2286, |
| "step": 63200 |
| }, |
| { |
| "epoch": 1.686920370962584, |
| "grad_norm": 45808.265625, |
| "learning_rate": 1.969579722447046e-05, |
| "loss": 0.1611, |
| "step": 63300 |
| }, |
| { |
| "epoch": 1.6895853320541518, |
| "grad_norm": 13495.0380859375, |
| "learning_rate": 1.967605661606491e-05, |
| "loss": 0.1962, |
| "step": 63400 |
| }, |
| { |
| "epoch": 1.6922502931457202, |
| "grad_norm": 22458.46484375, |
| "learning_rate": 1.9656316007659357e-05, |
| "loss": 0.1825, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.6922502931457202, |
| "eval_dev_accuracy": 0.9710283685269927, |
| "eval_dev_accuracy_threshold": 0.932883620262146, |
| "eval_dev_average_precision": 0.8574042822114104, |
| "eval_dev_f1": 0.7900720576461169, |
| "eval_dev_f1_threshold": 0.9062104225158691, |
| "eval_dev_precision": 0.7702955870108243, |
| "eval_dev_recall": 0.8108907636682371, |
| "eval_loss": 0.20927684009075165, |
| "eval_runtime": 911.9738, |
| "eval_samples_per_second": 145.45, |
| "eval_steps_per_second": 2.273, |
| "step": 63500 |
| }, |
| { |
| "epoch": 1.694915254237288, |
| "grad_norm": 4333.6484375, |
| "learning_rate": 1.9636575399253807e-05, |
| "loss": 0.1795, |
| "step": 63600 |
| }, |
| { |
| "epoch": 1.6975802153288562, |
| "grad_norm": 51141.83203125, |
| "learning_rate": 1.9616834790848257e-05, |
| "loss": 0.1944, |
| "step": 63700 |
| }, |
| { |
| "epoch": 1.7002451764204243, |
| "grad_norm": 24413.966796875, |
| "learning_rate": 1.9597094182442704e-05, |
| "loss": 0.196, |
| "step": 63800 |
| }, |
| { |
| "epoch": 1.7029101375119922, |
| "grad_norm": 11386.5224609375, |
| "learning_rate": 1.9577353574037154e-05, |
| "loss": 0.1851, |
| "step": 63900 |
| }, |
| { |
| "epoch": 1.7055750986035605, |
| "grad_norm": 1291.42236328125, |
| "learning_rate": 1.9557612965631604e-05, |
| "loss": 0.1787, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.7055750986035605, |
| "eval_dev_accuracy": 0.9717747103213793, |
| "eval_dev_accuracy_threshold": 0.9631803035736084, |
| "eval_dev_average_precision": 0.8630868871875782, |
| "eval_dev_f1": 0.7981506777345803, |
| "eval_dev_f1_threshold": 0.9355161786079407, |
| "eval_dev_precision": 0.7667305945291208, |
| "eval_dev_recall": 0.8322559439027063, |
| "eval_loss": 0.23051400482654572, |
| "eval_runtime": 912.7714, |
| "eval_samples_per_second": 145.323, |
| "eval_steps_per_second": 2.271, |
| "step": 64000 |
| }, |
| { |
| "epoch": 1.7082400596951284, |
| "grad_norm": 689.987060546875, |
| "learning_rate": 1.953787235722605e-05, |
| "loss": 0.1924, |
| "step": 64100 |
| }, |
| { |
| "epoch": 1.7109050207866965, |
| "grad_norm": 11370.0517578125, |
| "learning_rate": 1.95181317488205e-05, |
| "loss": 0.1611, |
| "step": 64200 |
| }, |
| { |
| "epoch": 1.7135699818782646, |
| "grad_norm": 15404.4140625, |
| "learning_rate": 1.949839114041495e-05, |
| "loss": 0.1799, |
| "step": 64300 |
| }, |
| { |
| "epoch": 1.7162349429698325, |
| "grad_norm": 14026.65234375, |
| "learning_rate": 1.9478650532009397e-05, |
| "loss": 0.1977, |
| "step": 64400 |
| }, |
| { |
| "epoch": 1.7188999040614008, |
| "grad_norm": 1225.2841796875, |
| "learning_rate": 1.9458909923603847e-05, |
| "loss": 0.1672, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.7188999040614008, |
| "eval_dev_accuracy": 0.9720687237555315, |
| "eval_dev_accuracy_threshold": 0.920991063117981, |
| "eval_dev_average_precision": 0.845700489229083, |
| "eval_dev_f1": 0.7995607383778697, |
| "eval_dev_f1_threshold": 0.6048256158828735, |
| "eval_dev_precision": 0.7648059223689476, |
| "eval_dev_recall": 0.8376246302180343, |
| "eval_loss": 0.21997055411338806, |
| "eval_runtime": 912.741, |
| "eval_samples_per_second": 145.328, |
| "eval_steps_per_second": 2.271, |
| "step": 64500 |
| }, |
| { |
| "epoch": 1.7215648651529687, |
| "grad_norm": 18876.72265625, |
| "learning_rate": 1.9439169315198297e-05, |
| "loss": 0.1812, |
| "step": 64600 |
| }, |
| { |
| "epoch": 1.7242298262445368, |
| "grad_norm": 44768.2578125, |
| "learning_rate": 1.9419428706792744e-05, |
| "loss": 0.1641, |
| "step": 64700 |
| }, |
| { |
| "epoch": 1.726894787336105, |
| "grad_norm": 1987.0482177734375, |
| "learning_rate": 1.9399688098387194e-05, |
| "loss": 0.1526, |
| "step": 64800 |
| }, |
| { |
| "epoch": 1.7295597484276728, |
| "grad_norm": 1468.9228515625, |
| "learning_rate": 1.9379947489981644e-05, |
| "loss": 0.1745, |
| "step": 64900 |
| }, |
| { |
| "epoch": 1.7322247095192411, |
| "grad_norm": 2461.248291015625, |
| "learning_rate": 1.936020688157609e-05, |
| "loss": 0.2017, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.7322247095192411, |
| "eval_dev_accuracy": 0.9716842446493325, |
| "eval_dev_accuracy_threshold": 0.8851553201675415, |
| "eval_dev_average_precision": 0.8642482817005424, |
| "eval_dev_f1": 0.7979695431472081, |
| "eval_dev_f1_threshold": 0.674056887626648, |
| "eval_dev_precision": 0.7787859824780976, |
| "eval_dev_recall": 0.8181220554399036, |
| "eval_loss": 0.25105008482933044, |
| "eval_runtime": 933.219, |
| "eval_samples_per_second": 142.139, |
| "eval_steps_per_second": 2.221, |
| "step": 65000 |
| }, |
| { |
| "epoch": 1.734889670610809, |
| "grad_norm": 1901.36474609375, |
| "learning_rate": 1.934046627317054e-05, |
| "loss": 0.2092, |
| "step": 65100 |
| }, |
| { |
| "epoch": 1.7375546317023771, |
| "grad_norm": 25123.84375, |
| "learning_rate": 1.932072566476499e-05, |
| "loss": 0.1807, |
| "step": 65200 |
| }, |
| { |
| "epoch": 1.7402195927939452, |
| "grad_norm": 21136.314453125, |
| "learning_rate": 1.9300985056359437e-05, |
| "loss": 0.1627, |
| "step": 65300 |
| }, |
| { |
| "epoch": 1.742884553885513, |
| "grad_norm": 14610.0068359375, |
| "learning_rate": 1.9281244447953887e-05, |
| "loss": 0.1809, |
| "step": 65400 |
| }, |
| { |
| "epoch": 1.7455495149770814, |
| "grad_norm": 5105.17529296875, |
| "learning_rate": 1.9261503839548337e-05, |
| "loss": 0.1774, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.7455495149770814, |
| "eval_dev_accuracy": 0.9722270386816136, |
| "eval_dev_accuracy_threshold": 0.9311126470565796, |
| "eval_dev_average_precision": 0.8672414882858807, |
| "eval_dev_f1": 0.801593625498008, |
| "eval_dev_f1_threshold": 0.841367244720459, |
| "eval_dev_precision": 0.7779954629820581, |
| "eval_dev_recall": 0.8266681275336912, |
| "eval_loss": 0.2049088478088379, |
| "eval_runtime": 933.0893, |
| "eval_samples_per_second": 142.159, |
| "eval_steps_per_second": 2.222, |
| "step": 65500 |
| }, |
| { |
| "epoch": 1.7482144760686493, |
| "grad_norm": 52553.86328125, |
| "learning_rate": 1.9241763231142784e-05, |
| "loss": 0.154, |
| "step": 65600 |
| }, |
| { |
| "epoch": 1.7508794371602174, |
| "grad_norm": 8918.7666015625, |
| "learning_rate": 1.9222022622737234e-05, |
| "loss": 0.1871, |
| "step": 65700 |
| }, |
| { |
| "epoch": 1.7535443982517855, |
| "grad_norm": 1728.83984375, |
| "learning_rate": 1.9202282014331684e-05, |
| "loss": 0.1929, |
| "step": 65800 |
| }, |
| { |
| "epoch": 1.7562093593433534, |
| "grad_norm": 8542.5439453125, |
| "learning_rate": 1.918254140592613e-05, |
| "loss": 0.1519, |
| "step": 65900 |
| }, |
| { |
| "epoch": 1.7588743204349218, |
| "grad_norm": 40360.875, |
| "learning_rate": 1.916280079752058e-05, |
| "loss": 0.2105, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.7588743204349218, |
| "eval_dev_accuracy": 0.9723099655476566, |
| "eval_dev_accuracy_threshold": 0.82029128074646, |
| "eval_dev_average_precision": 0.8667448997071003, |
| "eval_dev_f1": 0.801227852873068, |
| "eval_dev_f1_threshold": 0.5722821354866028, |
| "eval_dev_precision": 0.7878627409447151, |
| "eval_dev_recall": 0.8150542346882875, |
| "eval_loss": 0.2998444736003876, |
| "eval_runtime": 935.7172, |
| "eval_samples_per_second": 141.76, |
| "eval_steps_per_second": 2.215, |
| "step": 66000 |
| }, |
| { |
| "epoch": 1.7615392815264896, |
| "grad_norm": 46394.6875, |
| "learning_rate": 1.914306018911503e-05, |
| "loss": 0.1664, |
| "step": 66100 |
| }, |
| { |
| "epoch": 1.7642042426180577, |
| "grad_norm": 3412.559814453125, |
| "learning_rate": 1.9123319580709477e-05, |
| "loss": 0.1806, |
| "step": 66200 |
| }, |
| { |
| "epoch": 1.7668692037096259, |
| "grad_norm": 5545.865234375, |
| "learning_rate": 1.910357897230393e-05, |
| "loss": 0.1881, |
| "step": 66300 |
| }, |
| { |
| "epoch": 1.7695341648011937, |
| "grad_norm": 85940.0234375, |
| "learning_rate": 1.9083838363898377e-05, |
| "loss": 0.1881, |
| "step": 66400 |
| }, |
| { |
| "epoch": 1.772199125892762, |
| "grad_norm": 15622.53125, |
| "learning_rate": 1.9064097755492824e-05, |
| "loss": 0.1889, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.772199125892762, |
| "eval_dev_accuracy": 0.9718651759934261, |
| "eval_dev_accuracy_threshold": 0.9094328880310059, |
| "eval_dev_average_precision": 0.8682256601471484, |
| "eval_dev_f1": 0.7982062780269058, |
| "eval_dev_f1_threshold": 0.6328648328781128, |
| "eval_dev_precision": 0.7697395197395197, |
| "eval_dev_recall": 0.8288594280705599, |
| "eval_loss": 0.19647949934005737, |
| "eval_runtime": 933.2357, |
| "eval_samples_per_second": 142.137, |
| "eval_steps_per_second": 2.221, |
| "step": 66500 |
| }, |
| { |
| "epoch": 1.77486408698433, |
| "grad_norm": 1002.01220703125, |
| "learning_rate": 1.9044357147087277e-05, |
| "loss": 0.1722, |
| "step": 66600 |
| }, |
| { |
| "epoch": 1.777529048075898, |
| "grad_norm": 45076.7421875, |
| "learning_rate": 1.9024616538681724e-05, |
| "loss": 0.1999, |
| "step": 66700 |
| }, |
| { |
| "epoch": 1.7801940091674662, |
| "grad_norm": 2053.866455078125, |
| "learning_rate": 1.900487593027617e-05, |
| "loss": 0.1894, |
| "step": 66800 |
| }, |
| { |
| "epoch": 1.782858970259034, |
| "grad_norm": 3085.87451171875, |
| "learning_rate": 1.8985135321870624e-05, |
| "loss": 0.1702, |
| "step": 66900 |
| }, |
| { |
| "epoch": 1.7855239313506024, |
| "grad_norm": 1689.106201171875, |
| "learning_rate": 1.896539471346507e-05, |
| "loss": 0.1905, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.7855239313506024, |
| "eval_dev_accuracy": 0.97235519838368, |
| "eval_dev_accuracy_threshold": 0.8816102743148804, |
| "eval_dev_average_precision": 0.8719513025342801, |
| "eval_dev_f1": 0.8005663642561224, |
| "eval_dev_f1_threshold": 0.595874547958374, |
| "eval_dev_precision": 0.7677529672098169, |
| "eval_dev_recall": 0.8363098498959132, |
| "eval_loss": 0.22260619699954987, |
| "eval_runtime": 935.4859, |
| "eval_samples_per_second": 141.795, |
| "eval_steps_per_second": 2.216, |
| "step": 67000 |
| }, |
| { |
| "epoch": 1.7881888924421703, |
| "grad_norm": 24842.880859375, |
| "learning_rate": 1.8945654105059517e-05, |
| "loss": 0.1809, |
| "step": 67100 |
| }, |
| { |
| "epoch": 1.7908538535337384, |
| "grad_norm": 60853.56640625, |
| "learning_rate": 1.892591349665397e-05, |
| "loss": 0.1825, |
| "step": 67200 |
| }, |
| { |
| "epoch": 1.7935188146253065, |
| "grad_norm": 6448.2060546875, |
| "learning_rate": 1.8906172888248417e-05, |
| "loss": 0.1912, |
| "step": 67300 |
| }, |
| { |
| "epoch": 1.7961837757168744, |
| "grad_norm": 28209.67578125, |
| "learning_rate": 1.8886432279842867e-05, |
| "loss": 0.1849, |
| "step": 67400 |
| }, |
| { |
| "epoch": 1.7988487368084427, |
| "grad_norm": 1441.7255859375, |
| "learning_rate": 1.8866691671437317e-05, |
| "loss": 0.1812, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.7988487368084427, |
| "eval_dev_accuracy": 0.9728678371919456, |
| "eval_dev_accuracy_threshold": 0.8422494530677795, |
| "eval_dev_average_precision": 0.8713666080730428, |
| "eval_dev_f1": 0.8021557531662624, |
| "eval_dev_f1_threshold": 0.6560682058334351, |
| "eval_dev_precision": 0.7893508697496818, |
| "eval_dev_recall": 0.8153829297688178, |
| "eval_loss": 0.21360942721366882, |
| "eval_runtime": 935.6466, |
| "eval_samples_per_second": 141.77, |
| "eval_steps_per_second": 2.216, |
| "step": 67500 |
| }, |
| { |
| "epoch": 1.8015136979000106, |
| "grad_norm": 19593.896484375, |
| "learning_rate": 1.8846951063031764e-05, |
| "loss": 0.1729, |
| "step": 67600 |
| }, |
| { |
| "epoch": 1.8041786589915787, |
| "grad_norm": 49532.5390625, |
| "learning_rate": 1.8827210454626214e-05, |
| "loss": 0.1981, |
| "step": 67700 |
| }, |
| { |
| "epoch": 1.8068436200831468, |
| "grad_norm": 2939.565185546875, |
| "learning_rate": 1.8807469846220664e-05, |
| "loss": 0.172, |
| "step": 67800 |
| }, |
| { |
| "epoch": 1.8095085811747147, |
| "grad_norm": 18294.060546875, |
| "learning_rate": 1.878772923781511e-05, |
| "loss": 0.1609, |
| "step": 67900 |
| }, |
| { |
| "epoch": 1.812173542266283, |
| "grad_norm": 67081.5234375, |
| "learning_rate": 1.876798862940956e-05, |
| "loss": 0.18, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.812173542266283, |
| "eval_dev_accuracy": 0.972196883457598, |
| "eval_dev_accuracy_threshold": 0.9536248445510864, |
| "eval_dev_average_precision": 0.8677887820499237, |
| "eval_dev_f1": 0.7948606271777002, |
| "eval_dev_f1_threshold": 0.8924222588539124, |
| "eval_dev_precision": 0.7899577967752408, |
| "eval_dev_recall": 0.7998246959570505, |
| "eval_loss": 0.22794483602046967, |
| "eval_runtime": 934.1595, |
| "eval_samples_per_second": 141.996, |
| "eval_steps_per_second": 2.219, |
| "step": 68000 |
| }, |
| { |
| "epoch": 1.814838503357851, |
| "grad_norm": 3441.131103515625, |
| "learning_rate": 1.874824802100401e-05, |
| "loss": 0.1618, |
| "step": 68100 |
| }, |
| { |
| "epoch": 1.817503464449419, |
| "grad_norm": 40774.67578125, |
| "learning_rate": 1.8728507412598457e-05, |
| "loss": 0.1673, |
| "step": 68200 |
| }, |
| { |
| "epoch": 1.8201684255409871, |
| "grad_norm": 23139.685546875, |
| "learning_rate": 1.8708766804192907e-05, |
| "loss": 0.1793, |
| "step": 68300 |
| }, |
| { |
| "epoch": 1.822833386632555, |
| "grad_norm": 8400.26171875, |
| "learning_rate": 1.8689026195787357e-05, |
| "loss": 0.219, |
| "step": 68400 |
| }, |
| { |
| "epoch": 1.8254983477241233, |
| "grad_norm": 874.6626586914062, |
| "learning_rate": 1.8669285587381804e-05, |
| "loss": 0.1714, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.8254983477241233, |
| "eval_dev_accuracy": 0.9731241565960783, |
| "eval_dev_accuracy_threshold": 0.939326286315918, |
| "eval_dev_average_precision": 0.872717385393903, |
| "eval_dev_f1": 0.803395225464191, |
| "eval_dev_f1_threshold": 0.7294609546661377, |
| "eval_dev_precision": 0.7787719839555692, |
| "eval_dev_recall": 0.8296263832584639, |
| "eval_loss": 0.22690728306770325, |
| "eval_runtime": 931.9708, |
| "eval_samples_per_second": 142.33, |
| "eval_steps_per_second": 2.224, |
| "step": 68500 |
| }, |
| { |
| "epoch": 1.8281633088156912, |
| "grad_norm": 339.2591552734375, |
| "learning_rate": 1.8649544978976254e-05, |
| "loss": 0.1969, |
| "step": 68600 |
| }, |
| { |
| "epoch": 1.8308282699072593, |
| "grad_norm": 48369.09375, |
| "learning_rate": 1.8629804370570704e-05, |
| "loss": 0.1715, |
| "step": 68700 |
| }, |
| { |
| "epoch": 1.8334932309988274, |
| "grad_norm": 1295.3619384765625, |
| "learning_rate": 1.861006376216515e-05, |
| "loss": 0.1728, |
| "step": 68800 |
| }, |
| { |
| "epoch": 1.8361581920903953, |
| "grad_norm": 13706.5322265625, |
| "learning_rate": 1.85903231537596e-05, |
| "loss": 0.1768, |
| "step": 68900 |
| }, |
| { |
| "epoch": 1.8388231531819637, |
| "grad_norm": 36329.11328125, |
| "learning_rate": 1.857058254535405e-05, |
| "loss": 0.1821, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.8388231531819637, |
| "eval_dev_accuracy": 0.9732900103281642, |
| "eval_dev_accuracy_threshold": 0.9531596899032593, |
| "eval_dev_average_precision": 0.8750664616109699, |
| "eval_dev_f1": 0.8036220816059348, |
| "eval_dev_f1_threshold": 0.925843358039856, |
| "eval_dev_precision": 0.8002172732210755, |
| "eval_dev_recall": 0.807055987728717, |
| "eval_loss": 0.22201138734817505, |
| "eval_runtime": 933.9653, |
| "eval_samples_per_second": 142.026, |
| "eval_steps_per_second": 2.22, |
| "step": 69000 |
| }, |
| { |
| "epoch": 1.8414881142735315, |
| "grad_norm": 21184.15234375, |
| "learning_rate": 1.8550841936948497e-05, |
| "loss": 0.1925, |
| "step": 69100 |
| }, |
| { |
| "epoch": 1.8441530753650996, |
| "grad_norm": 1523.7003173828125, |
| "learning_rate": 1.8531101328542947e-05, |
| "loss": 0.1761, |
| "step": 69200 |
| }, |
| { |
| "epoch": 1.8468180364566678, |
| "grad_norm": 18345.251953125, |
| "learning_rate": 1.8511360720137397e-05, |
| "loss": 0.1656, |
| "step": 69300 |
| }, |
| { |
| "epoch": 1.8494829975482356, |
| "grad_norm": 3282.25830078125, |
| "learning_rate": 1.8491620111731844e-05, |
| "loss": 0.2208, |
| "step": 69400 |
| }, |
| { |
| "epoch": 1.852147958639804, |
| "grad_norm": 10842.587890625, |
| "learning_rate": 1.8471879503326294e-05, |
| "loss": 0.1579, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.852147958639804, |
| "eval_dev_accuracy": 0.9733653983882032, |
| "eval_dev_accuracy_threshold": 0.9553133249282837, |
| "eval_dev_average_precision": 0.8719043392702807, |
| "eval_dev_f1": 0.8058681249342727, |
| "eval_dev_f1_threshold": 0.8481921553611755, |
| "eval_dev_precision": 0.7747447174198766, |
| "eval_dev_recall": 0.8395968007012161, |
| "eval_loss": 0.2123890370130539, |
| "eval_runtime": 932.9373, |
| "eval_samples_per_second": 142.182, |
| "eval_steps_per_second": 2.222, |
| "step": 69500 |
| }, |
| { |
| "epoch": 1.8548129197313719, |
| "grad_norm": 14122.6357421875, |
| "learning_rate": 1.8452138894920744e-05, |
| "loss": 0.1684, |
| "step": 69600 |
| }, |
| { |
| "epoch": 1.85747788082294, |
| "grad_norm": 22713.14453125, |
| "learning_rate": 1.843239828651519e-05, |
| "loss": 0.207, |
| "step": 69700 |
| }, |
| { |
| "epoch": 1.860142841914508, |
| "grad_norm": 21279.48828125, |
| "learning_rate": 1.841265767810964e-05, |
| "loss": 0.1679, |
| "step": 69800 |
| }, |
| { |
| "epoch": 1.862807803006076, |
| "grad_norm": 1724.1683349609375, |
| "learning_rate": 1.839291706970409e-05, |
| "loss": 0.1658, |
| "step": 69900 |
| }, |
| { |
| "epoch": 1.8654727640976443, |
| "grad_norm": 25310.3359375, |
| "learning_rate": 1.8373176461298537e-05, |
| "loss": 0.2035, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.8654727640976443, |
| "eval_dev_accuracy": 0.9732146222681252, |
| "eval_dev_accuracy_threshold": 0.9318354725837708, |
| "eval_dev_average_precision": 0.8761383143347535, |
| "eval_dev_f1": 0.8027572731220147, |
| "eval_dev_f1_threshold": 0.7808271646499634, |
| "eval_dev_precision": 0.7954178767344304, |
| "eval_dev_recall": 0.8102333735071765, |
| "eval_loss": 0.2241707593202591, |
| "eval_runtime": 934.0769, |
| "eval_samples_per_second": 142.009, |
| "eval_steps_per_second": 2.219, |
| "step": 70000 |
| }, |
| { |
| "epoch": 1.8681377251892122, |
| "grad_norm": 1192.052978515625, |
| "learning_rate": 1.8353435852892987e-05, |
| "loss": 0.1671, |
| "step": 70100 |
| }, |
| { |
| "epoch": 1.8708026862807803, |
| "grad_norm": 3381.109375, |
| "learning_rate": 1.8333695244487437e-05, |
| "loss": 0.1777, |
| "step": 70200 |
| }, |
| { |
| "epoch": 1.8734676473723484, |
| "grad_norm": 2287.74267578125, |
| "learning_rate": 1.8313954636081884e-05, |
| "loss": 0.1894, |
| "step": 70300 |
| }, |
| { |
| "epoch": 1.8761326084639163, |
| "grad_norm": 5671.9111328125, |
| "learning_rate": 1.8294214027676334e-05, |
| "loss": 0.2227, |
| "step": 70400 |
| }, |
| { |
| "epoch": 1.8787975695554846, |
| "grad_norm": 8669.9560546875, |
| "learning_rate": 1.8274473419270784e-05, |
| "loss": 0.1754, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.8787975695554846, |
| "eval_dev_accuracy": 0.9732221610741291, |
| "eval_dev_accuracy_threshold": 0.9238910675048828, |
| "eval_dev_average_precision": 0.8787022531852614, |
| "eval_dev_f1": 0.8059863355384449, |
| "eval_dev_f1_threshold": 0.760931134223938, |
| "eval_dev_precision": 0.7978529253891573, |
| "eval_dev_recall": 0.8142872795003835, |
| "eval_loss": 0.1879546046257019, |
| "eval_runtime": 934.0481, |
| "eval_samples_per_second": 142.013, |
| "eval_steps_per_second": 2.219, |
| "step": 70500 |
| }, |
| { |
| "epoch": 1.8814625306470525, |
| "grad_norm": 21830.791015625, |
| "learning_rate": 1.825473281086523e-05, |
| "loss": 0.1683, |
| "step": 70600 |
| }, |
| { |
| "epoch": 1.8841274917386206, |
| "grad_norm": 5870.6396484375, |
| "learning_rate": 1.823499220245968e-05, |
| "loss": 0.1618, |
| "step": 70700 |
| }, |
| { |
| "epoch": 1.8867924528301887, |
| "grad_norm": 9237.384765625, |
| "learning_rate": 1.821525159405413e-05, |
| "loss": 0.1806, |
| "step": 70800 |
| }, |
| { |
| "epoch": 1.8894574139217566, |
| "grad_norm": 5946.40380859375, |
| "learning_rate": 1.8195510985648577e-05, |
| "loss": 0.1701, |
| "step": 70900 |
| }, |
| { |
| "epoch": 1.892122375013325, |
| "grad_norm": 4265.1650390625, |
| "learning_rate": 1.8175770377243027e-05, |
| "loss": 0.1752, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.892122375013325, |
| "eval_dev_accuracy": 0.9730336909240315, |
| "eval_dev_accuracy_threshold": 0.9348808526992798, |
| "eval_dev_average_precision": 0.8700561831987852, |
| "eval_dev_f1": 0.8034291366708798, |
| "eval_dev_f1_threshold": 0.9348808526992798, |
| "eval_dev_precision": 0.8059536934950385, |
| "eval_dev_recall": 0.8009203462254848, |
| "eval_loss": 0.201664537191391, |
| "eval_runtime": 931.3245, |
| "eval_samples_per_second": 142.428, |
| "eval_steps_per_second": 2.226, |
| "step": 71000 |
| }, |
| { |
| "epoch": 1.8947873361048928, |
| "grad_norm": 2272.4169921875, |
| "learning_rate": 1.8156029768837477e-05, |
| "loss": 0.1688, |
| "step": 71100 |
| }, |
| { |
| "epoch": 1.897452297196461, |
| "grad_norm": 11893.5654296875, |
| "learning_rate": 1.8136289160431924e-05, |
| "loss": 0.184, |
| "step": 71200 |
| }, |
| { |
| "epoch": 1.900117258288029, |
| "grad_norm": 3861.369384765625, |
| "learning_rate": 1.8116548552026374e-05, |
| "loss": 0.1665, |
| "step": 71300 |
| }, |
| { |
| "epoch": 1.902782219379597, |
| "grad_norm": 35609.0, |
| "learning_rate": 1.8096807943620824e-05, |
| "loss": 0.1749, |
| "step": 71400 |
| }, |
| { |
| "epoch": 1.9054471804711652, |
| "grad_norm": 11618.3125, |
| "learning_rate": 1.8077067335215274e-05, |
| "loss": 0.1899, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.9054471804711652, |
| "eval_dev_accuracy": 0.9734483252542462, |
| "eval_dev_accuracy_threshold": 0.943538248538971, |
| "eval_dev_average_precision": 0.8746432264035248, |
| "eval_dev_f1": 0.8067354698533405, |
| "eval_dev_f1_threshold": 0.9360702037811279, |
| "eval_dev_precision": 0.7999569104815254, |
| "eval_dev_recall": 0.8136298893393229, |
| "eval_loss": 0.20475232601165771, |
| "eval_runtime": 860.459, |
| "eval_samples_per_second": 154.158, |
| "eval_steps_per_second": 2.409, |
| "step": 71500 |
| }, |
| { |
| "epoch": 1.9081121415627331, |
| "grad_norm": 8260.7607421875, |
| "learning_rate": 1.805732672680972e-05, |
| "loss": 0.1886, |
| "step": 71600 |
| }, |
| { |
| "epoch": 1.9107771026543012, |
| "grad_norm": 47676.78125, |
| "learning_rate": 1.803758611840417e-05, |
| "loss": 0.1858, |
| "step": 71700 |
| }, |
| { |
| "epoch": 1.9134420637458693, |
| "grad_norm": 554.1092529296875, |
| "learning_rate": 1.801784550999862e-05, |
| "loss": 0.165, |
| "step": 71800 |
| }, |
| { |
| "epoch": 1.9161070248374372, |
| "grad_norm": 12699.4365234375, |
| "learning_rate": 1.7998104901593067e-05, |
| "loss": 0.1784, |
| "step": 71900 |
| }, |
| { |
| "epoch": 1.9187719859290056, |
| "grad_norm": 4534.798828125, |
| "learning_rate": 1.7978364293187517e-05, |
| "loss": 0.1767, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.9187719859290056, |
| "eval_dev_accuracy": 0.9739835804805235, |
| "eval_dev_accuracy_threshold": 0.9395354986190796, |
| "eval_dev_average_precision": 0.8772612012666982, |
| "eval_dev_f1": 0.8093941820122765, |
| "eval_dev_f1_threshold": 0.875823974609375, |
| "eval_dev_precision": 0.7891340549542049, |
| "eval_dev_recall": 0.8307220335268982, |
| "eval_loss": 0.20605036616325378, |
| "eval_runtime": 861.3232, |
| "eval_samples_per_second": 154.004, |
| "eval_steps_per_second": 2.407, |
| "step": 72000 |
| }, |
| { |
| "epoch": 1.9214369470205734, |
| "grad_norm": 65605.9375, |
| "learning_rate": 1.7958623684781968e-05, |
| "loss": 0.1687, |
| "step": 72100 |
| }, |
| { |
| "epoch": 1.9241019081121415, |
| "grad_norm": 11532.1455078125, |
| "learning_rate": 1.7938883076376414e-05, |
| "loss": 0.1664, |
| "step": 72200 |
| }, |
| { |
| "epoch": 1.9267668692037097, |
| "grad_norm": 11916.1513671875, |
| "learning_rate": 1.7919142467970864e-05, |
| "loss": 0.1669, |
| "step": 72300 |
| }, |
| { |
| "epoch": 1.9294318302952775, |
| "grad_norm": 2029.2286376953125, |
| "learning_rate": 1.7899401859565314e-05, |
| "loss": 0.1787, |
| "step": 72400 |
| }, |
| { |
| "epoch": 1.9320967913868459, |
| "grad_norm": 6753.46142578125, |
| "learning_rate": 1.787966125115976e-05, |
| "loss": 0.1728, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.9320967913868459, |
| "eval_dev_accuracy": 0.9743379043627071, |
| "eval_dev_accuracy_threshold": 0.8970457315444946, |
| "eval_dev_average_precision": 0.8806920275415929, |
| "eval_dev_f1": 0.8153239556692241, |
| "eval_dev_f1_threshold": 0.7824004888534546, |
| "eval_dev_precision": 0.7935898765688206, |
| "eval_dev_recall": 0.8382820203790949, |
| "eval_loss": 0.19223952293395996, |
| "eval_runtime": 862.5657, |
| "eval_samples_per_second": 153.782, |
| "eval_steps_per_second": 2.403, |
| "step": 72500 |
| }, |
| { |
| "epoch": 1.9347617524784138, |
| "grad_norm": 27343.193359375, |
| "learning_rate": 1.785992064275421e-05, |
| "loss": 0.1443, |
| "step": 72600 |
| }, |
| { |
| "epoch": 1.9374267135699819, |
| "grad_norm": 13309.6455078125, |
| "learning_rate": 1.784018003434866e-05, |
| "loss": 0.1569, |
| "step": 72700 |
| }, |
| { |
| "epoch": 1.94009167466155, |
| "grad_norm": 1874.899169921875, |
| "learning_rate": 1.7820439425943108e-05, |
| "loss": 0.1931, |
| "step": 72800 |
| }, |
| { |
| "epoch": 1.9427566357531179, |
| "grad_norm": 31156.685546875, |
| "learning_rate": 1.7800698817537558e-05, |
| "loss": 0.1811, |
| "step": 72900 |
| }, |
| { |
| "epoch": 1.9454215968446862, |
| "grad_norm": 4346.09912109375, |
| "learning_rate": 1.7780958209132008e-05, |
| "loss": 0.1836, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.9454215968446862, |
| "eval_dev_accuracy": 0.9730563073420432, |
| "eval_dev_accuracy_threshold": 0.9250275492668152, |
| "eval_dev_average_precision": 0.8743046594125137, |
| "eval_dev_f1": 0.8057607880929436, |
| "eval_dev_f1_threshold": 0.8426618576049805, |
| "eval_dev_precision": 0.7878756151188357, |
| "eval_dev_recall": 0.8244768269968226, |
| "eval_loss": 0.207134410738945, |
| "eval_runtime": 861.6487, |
| "eval_samples_per_second": 153.946, |
| "eval_steps_per_second": 2.406, |
| "step": 73000 |
| }, |
| { |
| "epoch": 1.948086557936254, |
| "grad_norm": 5061.1884765625, |
| "learning_rate": 1.7761217600726454e-05, |
| "loss": 0.1739, |
| "step": 73100 |
| }, |
| { |
| "epoch": 1.9507515190278222, |
| "grad_norm": 103200.015625, |
| "learning_rate": 1.7741476992320904e-05, |
| "loss": 0.1966, |
| "step": 73200 |
| }, |
| { |
| "epoch": 1.9534164801193903, |
| "grad_norm": 18783.486328125, |
| "learning_rate": 1.7721736383915354e-05, |
| "loss": 0.1723, |
| "step": 73300 |
| }, |
| { |
| "epoch": 1.9560814412109582, |
| "grad_norm": 13243.9150390625, |
| "learning_rate": 1.7701995775509804e-05, |
| "loss": 0.1698, |
| "step": 73400 |
| }, |
| { |
| "epoch": 1.9587464023025265, |
| "grad_norm": 4332.658203125, |
| "learning_rate": 1.768225516710425e-05, |
| "loss": 0.1801, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.9587464023025265, |
| "eval_dev_accuracy": 0.972988458088008, |
| "eval_dev_accuracy_threshold": 0.9180799126625061, |
| "eval_dev_average_precision": 0.8762342719828209, |
| "eval_dev_f1": 0.8045175392942646, |
| "eval_dev_f1_threshold": 0.7035636901855469, |
| "eval_dev_precision": 0.7662337662337663, |
| "eval_dev_recall": 0.8468280924728827, |
| "eval_loss": 0.18561449646949768, |
| "eval_runtime": 862.9273, |
| "eval_samples_per_second": 153.717, |
| "eval_steps_per_second": 2.402, |
| "step": 73500 |
| }, |
| { |
| "epoch": 1.9614113633940944, |
| "grad_norm": 13960.3876953125, |
| "learning_rate": 1.76625145586987e-05, |
| "loss": 0.1599, |
| "step": 73600 |
| }, |
| { |
| "epoch": 1.9640763244856625, |
| "grad_norm": 12248.2890625, |
| "learning_rate": 1.764277395029315e-05, |
| "loss": 0.1722, |
| "step": 73700 |
| }, |
| { |
| "epoch": 1.9667412855772306, |
| "grad_norm": 20745.55859375, |
| "learning_rate": 1.7623033341887598e-05, |
| "loss": 0.1708, |
| "step": 73800 |
| }, |
| { |
| "epoch": 1.9694062466687985, |
| "grad_norm": 13722.9697265625, |
| "learning_rate": 1.7603292733482048e-05, |
| "loss": 0.1662, |
| "step": 73900 |
| }, |
| { |
| "epoch": 1.9720712077603668, |
| "grad_norm": 18372.69140625, |
| "learning_rate": 1.7583552125076498e-05, |
| "loss": 0.1716, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.9720712077603668, |
| "eval_dev_accuracy": 0.9739232700324922, |
| "eval_dev_accuracy_threshold": 0.8308413624763489, |
| "eval_dev_average_precision": 0.8841492699463087, |
| "eval_dev_f1": 0.8137931034482759, |
| "eval_dev_f1_threshold": 0.6751728057861328, |
| "eval_dev_precision": 0.7832387515200648, |
| "eval_dev_recall": 0.8468280924728827, |
| "eval_loss": 0.2016657292842865, |
| "eval_runtime": 862.1524, |
| "eval_samples_per_second": 153.856, |
| "eval_steps_per_second": 2.404, |
| "step": 74000 |
| }, |
| { |
| "epoch": 1.9747361688519347, |
| "grad_norm": 22373.701171875, |
| "learning_rate": 1.7563811516670944e-05, |
| "loss": 0.1741, |
| "step": 74100 |
| }, |
| { |
| "epoch": 1.9774011299435028, |
| "grad_norm": 1855.767822265625, |
| "learning_rate": 1.7544070908265394e-05, |
| "loss": 0.1318, |
| "step": 74200 |
| }, |
| { |
| "epoch": 1.980066091035071, |
| "grad_norm": 20893.662109375, |
| "learning_rate": 1.7524330299859844e-05, |
| "loss": 0.1782, |
| "step": 74300 |
| }, |
| { |
| "epoch": 1.9827310521266388, |
| "grad_norm": 1626.1358642578125, |
| "learning_rate": 1.750458969145429e-05, |
| "loss": 0.1842, |
| "step": 74400 |
| }, |
| { |
| "epoch": 1.9853960132182071, |
| "grad_norm": 8638.869140625, |
| "learning_rate": 1.748484908304874e-05, |
| "loss": 0.1545, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.9853960132182071, |
| "eval_dev_accuracy": 0.9740815849585742, |
| "eval_dev_accuracy_threshold": 0.7622551918029785, |
| "eval_dev_average_precision": 0.8838940929517627, |
| "eval_dev_f1": 0.8130659767141011, |
| "eval_dev_f1_threshold": 0.6812475919723511, |
| "eval_dev_precision": 0.800212201591512, |
| "eval_dev_recall": 0.826339432453161, |
| "eval_loss": 0.21240267157554626, |
| "eval_runtime": 862.2203, |
| "eval_samples_per_second": 153.844, |
| "eval_steps_per_second": 2.404, |
| "step": 74500 |
| }, |
| { |
| "epoch": 1.988060974309775, |
| "grad_norm": 12036.10546875, |
| "learning_rate": 1.746510847464319e-05, |
| "loss": 0.1786, |
| "step": 74600 |
| }, |
| { |
| "epoch": 1.9907259354013431, |
| "grad_norm": 3197.989013671875, |
| "learning_rate": 1.7445367866237638e-05, |
| "loss": 0.1589, |
| "step": 74700 |
| }, |
| { |
| "epoch": 1.9933908964929112, |
| "grad_norm": 2326.903564453125, |
| "learning_rate": 1.7425627257832088e-05, |
| "loss": 0.1712, |
| "step": 74800 |
| }, |
| { |
| "epoch": 1.9960558575844791, |
| "grad_norm": 13623.826171875, |
| "learning_rate": 1.7405886649426538e-05, |
| "loss": 0.1761, |
| "step": 74900 |
| }, |
| { |
| "epoch": 1.9987208186760475, |
| "grad_norm": 7701.57861328125, |
| "learning_rate": 1.7386146041020984e-05, |
| "loss": 0.1958, |
| "step": 75000 |
| }, |
| { |
| "epoch": 1.9987208186760475, |
| "eval_dev_accuracy": 0.9734558640602501, |
| "eval_dev_accuracy_threshold": 0.957332968711853, |
| "eval_dev_average_precision": 0.8773248937578426, |
| "eval_dev_f1": 0.8058651661075641, |
| "eval_dev_f1_threshold": 0.763139009475708, |
| "eval_dev_precision": 0.796044895777659, |
| "eval_dev_recall": 0.8159307549030349, |
| "eval_loss": 0.25920844078063965, |
| "eval_runtime": 862.3734, |
| "eval_samples_per_second": 153.816, |
| "eval_steps_per_second": 2.404, |
| "step": 75000 |
| }, |
| { |
| "epoch": 2.0013857797676153, |
| "grad_norm": 19200.3828125, |
| "learning_rate": 1.7366405432615434e-05, |
| "loss": 0.1859, |
| "step": 75100 |
| }, |
| { |
| "epoch": 2.0040507408591837, |
| "grad_norm": 27715.55859375, |
| "learning_rate": 1.7346664824209884e-05, |
| "loss": 0.215, |
| "step": 75200 |
| }, |
| { |
| "epoch": 2.0067157019507516, |
| "grad_norm": 14230.0625, |
| "learning_rate": 1.7326924215804334e-05, |
| "loss": 0.1883, |
| "step": 75300 |
| }, |
| { |
| "epoch": 2.0093806630423194, |
| "grad_norm": 214.24032592773438, |
| "learning_rate": 1.730718360739878e-05, |
| "loss": 0.1771, |
| "step": 75400 |
| }, |
| { |
| "epoch": 2.0120456241338878, |
| "grad_norm": 11949.2451171875, |
| "learning_rate": 1.728744299899323e-05, |
| "loss": 0.1568, |
| "step": 75500 |
| }, |
| { |
| "epoch": 2.0120456241338878, |
| "eval_dev_accuracy": 0.9732749327161564, |
| "eval_dev_accuracy_threshold": 0.9531142115592957, |
| "eval_dev_average_precision": 0.8772400052614694, |
| "eval_dev_f1": 0.8078490242333263, |
| "eval_dev_f1_threshold": 0.9034242630004883, |
| "eval_dev_precision": 0.7909711286089239, |
| "eval_dev_recall": 0.8254629122384135, |
| "eval_loss": 0.27995508909225464, |
| "eval_runtime": 861.968, |
| "eval_samples_per_second": 153.889, |
| "eval_steps_per_second": 2.405, |
| "step": 75500 |
| }, |
| { |
| "epoch": 2.0147105852254557, |
| "grad_norm": 1409.49951171875, |
| "learning_rate": 1.726770239058768e-05, |
| "loss": 0.1797, |
| "step": 75600 |
| }, |
| { |
| "epoch": 2.017375546317024, |
| "grad_norm": 5395.6484375, |
| "learning_rate": 1.7247961782182128e-05, |
| "loss": 0.1659, |
| "step": 75700 |
| }, |
| { |
| "epoch": 2.020040507408592, |
| "grad_norm": 49720.015625, |
| "learning_rate": 1.7228221173776578e-05, |
| "loss": 0.1519, |
| "step": 75800 |
| }, |
| { |
| "epoch": 2.0227054685001598, |
| "grad_norm": 39423.91015625, |
| "learning_rate": 1.7208480565371028e-05, |
| "loss": 0.1366, |
| "step": 75900 |
| }, |
| { |
| "epoch": 2.025370429591728, |
| "grad_norm": 1205.4697265625, |
| "learning_rate": 1.7188739956965474e-05, |
| "loss": 0.1641, |
| "step": 76000 |
| }, |
| { |
| "epoch": 2.025370429591728, |
| "eval_dev_accuracy": 0.9739760416745196, |
| "eval_dev_accuracy_threshold": 0.9528675079345703, |
| "eval_dev_average_precision": 0.8829642344114682, |
| "eval_dev_f1": 0.8102727032036007, |
| "eval_dev_f1_threshold": 0.8193379640579224, |
| "eval_dev_precision": 0.7840746054519369, |
| "eval_dev_recall": 0.8382820203790949, |
| "eval_loss": 0.22183284163475037, |
| "eval_runtime": 861.7487, |
| "eval_samples_per_second": 153.928, |
| "eval_steps_per_second": 2.406, |
| "step": 76000 |
| }, |
| { |
| "epoch": 2.028035390683296, |
| "grad_norm": 143011.90625, |
| "learning_rate": 1.7168999348559924e-05, |
| "loss": 0.1551, |
| "step": 76100 |
| }, |
| { |
| "epoch": 2.0307003517748643, |
| "grad_norm": 3733.740234375, |
| "learning_rate": 1.7149258740154374e-05, |
| "loss": 0.1612, |
| "step": 76200 |
| }, |
| { |
| "epoch": 2.033365312866432, |
| "grad_norm": 13346.1015625, |
| "learning_rate": 1.712951813174882e-05, |
| "loss": 0.1643, |
| "step": 76300 |
| }, |
| { |
| "epoch": 2.036030273958, |
| "grad_norm": 10167.767578125, |
| "learning_rate": 1.710977752334327e-05, |
| "loss": 0.1692, |
| "step": 76400 |
| }, |
| { |
| "epoch": 2.0386952350495684, |
| "grad_norm": 26428.076171875, |
| "learning_rate": 1.709003691493772e-05, |
| "loss": 0.1708, |
| "step": 76500 |
| }, |
| { |
| "epoch": 2.0386952350495684, |
| "eval_dev_accuracy": 0.9733277043581837, |
| "eval_dev_accuracy_threshold": 0.9573899507522583, |
| "eval_dev_average_precision": 0.8690568245333676, |
| "eval_dev_f1": 0.8137024870952604, |
| "eval_dev_f1_threshold": 0.8371973037719727, |
| "eval_dev_precision": 0.7762634301631516, |
| "eval_dev_recall": 0.8549359044592966, |
| "eval_loss": 0.21817246079444885, |
| "eval_runtime": 861.7458, |
| "eval_samples_per_second": 153.928, |
| "eval_steps_per_second": 2.406, |
| "step": 76500 |
| }, |
| { |
| "epoch": 2.0413601961411363, |
| "grad_norm": 22541.1796875, |
| "learning_rate": 1.7070296306532168e-05, |
| "loss": 0.165, |
| "step": 76600 |
| }, |
| { |
| "epoch": 2.0440251572327046, |
| "grad_norm": 49104.6015625, |
| "learning_rate": 1.7050555698126618e-05, |
| "loss": 0.1445, |
| "step": 76700 |
| }, |
| { |
| "epoch": 2.0466901183242725, |
| "grad_norm": 47796.04296875, |
| "learning_rate": 1.7030815089721068e-05, |
| "loss": 0.1354, |
| "step": 76800 |
| }, |
| { |
| "epoch": 2.0493550794158404, |
| "grad_norm": 21167.962890625, |
| "learning_rate": 1.7011074481315514e-05, |
| "loss": 0.1787, |
| "step": 76900 |
| }, |
| { |
| "epoch": 2.0520200405074087, |
| "grad_norm": 75447.2890625, |
| "learning_rate": 1.6991333872909964e-05, |
| "loss": 0.1626, |
| "step": 77000 |
| }, |
| { |
| "epoch": 2.0520200405074087, |
| "eval_dev_accuracy": 0.9745339133188086, |
| "eval_dev_accuracy_threshold": 0.9593422412872314, |
| "eval_dev_average_precision": 0.8806603026145806, |
| "eval_dev_f1": 0.8148537765621713, |
| "eval_dev_f1_threshold": 0.781623363494873, |
| "eval_dev_precision": 0.7836115326251897, |
| "eval_dev_recall": 0.848690697929221, |
| "eval_loss": 0.2216637134552002, |
| "eval_runtime": 862.3061, |
| "eval_samples_per_second": 153.828, |
| "eval_steps_per_second": 2.404, |
| "step": 77000 |
| }, |
| { |
| "epoch": 2.0546850015989766, |
| "grad_norm": 4420.5458984375, |
| "learning_rate": 1.6971593264504414e-05, |
| "loss": 0.1418, |
| "step": 77100 |
| }, |
| { |
| "epoch": 2.057349962690545, |
| "grad_norm": 14327.546875, |
| "learning_rate": 1.695185265609886e-05, |
| "loss": 0.2011, |
| "step": 77200 |
| }, |
| { |
| "epoch": 2.060014923782113, |
| "grad_norm": 19713.06640625, |
| "learning_rate": 1.693211204769331e-05, |
| "loss": 0.1593, |
| "step": 77300 |
| }, |
| { |
| "epoch": 2.0626798848736807, |
| "grad_norm": 5675.8125, |
| "learning_rate": 1.691237143928776e-05, |
| "loss": 0.1546, |
| "step": 77400 |
| }, |
| { |
| "epoch": 2.065344845965249, |
| "grad_norm": 7002.0654296875, |
| "learning_rate": 1.6892630830882208e-05, |
| "loss": 0.177, |
| "step": 77500 |
| }, |
| { |
| "epoch": 2.065344845965249, |
| "eval_dev_accuracy": 0.9752048670531561, |
| "eval_dev_accuracy_threshold": 0.8865873217582703, |
| "eval_dev_average_precision": 0.8890707955101652, |
| "eval_dev_f1": 0.8212508115126596, |
| "eval_dev_f1_threshold": 0.8439962863922119, |
| "eval_dev_precision": 0.8111574222507214, |
| "eval_dev_recall": 0.8315985537416457, |
| "eval_loss": 0.21185144782066345, |
| "eval_runtime": 860.1662, |
| "eval_samples_per_second": 154.211, |
| "eval_steps_per_second": 2.41, |
| "step": 77500 |
| }, |
| { |
| "epoch": 2.068009807056817, |
| "grad_norm": 418.3937683105469, |
| "learning_rate": 1.6872890222476658e-05, |
| "loss": 0.1546, |
| "step": 77600 |
| }, |
| { |
| "epoch": 2.0706747681483852, |
| "grad_norm": 47829.74609375, |
| "learning_rate": 1.6853149614071108e-05, |
| "loss": 0.1766, |
| "step": 77700 |
| }, |
| { |
| "epoch": 2.073339729239953, |
| "grad_norm": 395.5926208496094, |
| "learning_rate": 1.6833409005665554e-05, |
| "loss": 0.1879, |
| "step": 77800 |
| }, |
| { |
| "epoch": 2.076004690331521, |
| "grad_norm": 13378.1806640625, |
| "learning_rate": 1.6813668397260004e-05, |
| "loss": 0.1694, |
| "step": 77900 |
| }, |
| { |
| "epoch": 2.0786696514230893, |
| "grad_norm": 4878.7451171875, |
| "learning_rate": 1.6793927788854454e-05, |
| "loss": 0.1546, |
| "step": 78000 |
| }, |
| { |
| "epoch": 2.0786696514230893, |
| "eval_dev_accuracy": 0.9736971058523751, |
| "eval_dev_accuracy_threshold": 0.9617332220077515, |
| "eval_dev_average_precision": 0.8737670860803924, |
| "eval_dev_f1": 0.8101625374783019, |
| "eval_dev_f1_threshold": 0.8637624979019165, |
| "eval_dev_precision": 0.7791380008093889, |
| "eval_dev_recall": 0.8437602717212666, |
| "eval_loss": 0.24948453903198242, |
| "eval_runtime": 861.1759, |
| "eval_samples_per_second": 154.03, |
| "eval_steps_per_second": 2.407, |
| "step": 78000 |
| }, |
| { |
| "epoch": 2.0813346125146572, |
| "grad_norm": 26331.390625, |
| "learning_rate": 1.67741871804489e-05, |
| "loss": 0.1742, |
| "step": 78100 |
| }, |
| { |
| "epoch": 2.0839995736062256, |
| "grad_norm": 5203.9365234375, |
| "learning_rate": 1.675444657204335e-05, |
| "loss": 0.2024, |
| "step": 78200 |
| }, |
| { |
| "epoch": 2.0866645346977934, |
| "grad_norm": 27641.3671875, |
| "learning_rate": 1.67347059636378e-05, |
| "loss": 0.2126, |
| "step": 78300 |
| }, |
| { |
| "epoch": 2.0893294957893613, |
| "grad_norm": 3783.3671875, |
| "learning_rate": 1.6714965355232248e-05, |
| "loss": 0.1747, |
| "step": 78400 |
| }, |
| { |
| "epoch": 2.0919944568809297, |
| "grad_norm": 20038.98046875, |
| "learning_rate": 1.6695224746826698e-05, |
| "loss": 0.1807, |
| "step": 78500 |
| }, |
| { |
| "epoch": 2.0919944568809297, |
| "eval_dev_accuracy": 0.9743303655567032, |
| "eval_dev_accuracy_threshold": 0.9270470142364502, |
| "eval_dev_average_precision": 0.8818386397835865, |
| "eval_dev_f1": 0.816217350257002, |
| "eval_dev_f1_threshold": 0.7469815015792847, |
| "eval_dev_precision": 0.7828755407988731, |
| "eval_dev_recall": 0.8525254738687411, |
| "eval_loss": 0.21055419743061066, |
| "eval_runtime": 861.287, |
| "eval_samples_per_second": 154.01, |
| "eval_steps_per_second": 2.407, |
| "step": 78500 |
| }, |
| { |
| "epoch": 2.0946594179724976, |
| "grad_norm": 18032.57421875, |
| "learning_rate": 1.6675484138421148e-05, |
| "loss": 0.1805, |
| "step": 78600 |
| }, |
| { |
| "epoch": 2.097324379064066, |
| "grad_norm": 13172.416015625, |
| "learning_rate": 1.6655743530015594e-05, |
| "loss": 0.1498, |
| "step": 78700 |
| }, |
| { |
| "epoch": 2.0999893401556338, |
| "grad_norm": 10491.02734375, |
| "learning_rate": 1.6636002921610045e-05, |
| "loss": 0.1899, |
| "step": 78800 |
| }, |
| { |
| "epoch": 2.1026543012472017, |
| "grad_norm": 3893.85107421875, |
| "learning_rate": 1.6616262313204495e-05, |
| "loss": 0.1924, |
| "step": 78900 |
| }, |
| { |
| "epoch": 2.10531926233877, |
| "grad_norm": 1639.23486328125, |
| "learning_rate": 1.659652170479894e-05, |
| "loss": 0.1521, |
| "step": 79000 |
| }, |
| { |
| "epoch": 2.10531926233877, |
| "eval_dev_accuracy": 0.9743982148107383, |
| "eval_dev_accuracy_threshold": 0.9525002837181091, |
| "eval_dev_average_precision": 0.883524287942099, |
| "eval_dev_f1": 0.8129610403803071, |
| "eval_dev_f1_threshold": 0.9087203145027161, |
| "eval_dev_precision": 0.8108785698713756, |
| "eval_dev_recall": 0.8150542346882875, |
| "eval_loss": 0.24836769700050354, |
| "eval_runtime": 952.3381, |
| "eval_samples_per_second": 139.286, |
| "eval_steps_per_second": 2.177, |
| "step": 79000 |
| }, |
| { |
| "epoch": 2.107984223430338, |
| "grad_norm": 7783.5283203125, |
| "learning_rate": 1.657678109639339e-05, |
| "loss": 0.1988, |
| "step": 79100 |
| }, |
| { |
| "epoch": 2.1106491845219058, |
| "grad_norm": 1583.300537109375, |
| "learning_rate": 1.655704048798784e-05, |
| "loss": 0.1702, |
| "step": 79200 |
| }, |
| { |
| "epoch": 2.113314145613474, |
| "grad_norm": 1492.0706787109375, |
| "learning_rate": 1.6537299879582288e-05, |
| "loss": 0.1824, |
| "step": 79300 |
| }, |
| { |
| "epoch": 2.115979106705042, |
| "grad_norm": 18683.794921875, |
| "learning_rate": 1.651755927117674e-05, |
| "loss": 0.1688, |
| "step": 79400 |
| }, |
| { |
| "epoch": 2.1186440677966103, |
| "grad_norm": 8736.2275390625, |
| "learning_rate": 1.6497818662771188e-05, |
| "loss": 0.1809, |
| "step": 79500 |
| }, |
| { |
| "epoch": 2.1186440677966103, |
| "eval_dev_accuracy": 0.9739685028685157, |
| "eval_dev_accuracy_threshold": 0.9717953205108643, |
| "eval_dev_average_precision": 0.8798479877006415, |
| "eval_dev_f1": 0.8135902528044657, |
| "eval_dev_f1_threshold": 0.9465633630752563, |
| "eval_dev_precision": 0.7974537037037037, |
| "eval_dev_recall": 0.8303933384463679, |
| "eval_loss": 0.22024385631084442, |
| "eval_runtime": 951.2023, |
| "eval_samples_per_second": 139.452, |
| "eval_steps_per_second": 2.179, |
| "step": 79500 |
| }, |
| { |
| "epoch": 2.121309028888178, |
| "grad_norm": 54950.51953125, |
| "learning_rate": 1.6478078054365635e-05, |
| "loss": 0.1858, |
| "step": 79600 |
| }, |
| { |
| "epoch": 2.1239739899797465, |
| "grad_norm": 19716.146484375, |
| "learning_rate": 1.6458337445960088e-05, |
| "loss": 0.1642, |
| "step": 79700 |
| }, |
| { |
| "epoch": 2.1266389510713144, |
| "grad_norm": 18239.75, |
| "learning_rate": 1.6438596837554535e-05, |
| "loss": 0.191, |
| "step": 79800 |
| }, |
| { |
| "epoch": 2.1293039121628823, |
| "grad_norm": 41301.21875, |
| "learning_rate": 1.641885622914898e-05, |
| "loss": 0.1655, |
| "step": 79900 |
| }, |
| { |
| "epoch": 2.1319688732544506, |
| "grad_norm": 1119.526123046875, |
| "learning_rate": 1.6399115620743435e-05, |
| "loss": 0.1789, |
| "step": 80000 |
| }, |
| { |
| "epoch": 2.1319688732544506, |
| "eval_dev_accuracy": 0.9743152879446954, |
| "eval_dev_accuracy_threshold": 0.8854852914810181, |
| "eval_dev_average_precision": 0.8771901487923467, |
| "eval_dev_f1": 0.813726025900224, |
| "eval_dev_f1_threshold": 0.8826526403427124, |
| "eval_dev_precision": 0.8116415958142577, |
| "eval_dev_recall": 0.8158211898761916, |
| "eval_loss": 0.1959654837846756, |
| "eval_runtime": 952.4132, |
| "eval_samples_per_second": 139.275, |
| "eval_steps_per_second": 2.177, |
| "step": 80000 |
| }, |
| { |
| "epoch": 2.1346338343460185, |
| "grad_norm": 3469.789794921875, |
| "learning_rate": 1.637937501233788e-05, |
| "loss": 0.2002, |
| "step": 80100 |
| }, |
| { |
| "epoch": 2.1372987954375864, |
| "grad_norm": 15840.623046875, |
| "learning_rate": 1.635963440393233e-05, |
| "loss": 0.2139, |
| "step": 80200 |
| }, |
| { |
| "epoch": 2.1399637565291547, |
| "grad_norm": 24576.1328125, |
| "learning_rate": 1.633989379552678e-05, |
| "loss": 0.199, |
| "step": 80300 |
| }, |
| { |
| "epoch": 2.1426287176207226, |
| "grad_norm": 9852.4111328125, |
| "learning_rate": 1.6320153187121228e-05, |
| "loss": 0.165, |
| "step": 80400 |
| }, |
| { |
| "epoch": 2.145293678712291, |
| "grad_norm": 280.64031982421875, |
| "learning_rate": 1.6300412578715678e-05, |
| "loss": 0.1848, |
| "step": 80500 |
| }, |
| { |
| "epoch": 2.145293678712291, |
| "eval_dev_accuracy": 0.9742022058546368, |
| "eval_dev_accuracy_threshold": 0.9753606915473938, |
| "eval_dev_average_precision": 0.8782336024461705, |
| "eval_dev_f1": 0.8096592433592701, |
| "eval_dev_f1_threshold": 0.8103638887405396, |
| "eval_dev_precision": 0.7934371055952881, |
| "eval_dev_recall": 0.8265585625068478, |
| "eval_loss": 0.26615819334983826, |
| "eval_runtime": 951.0255, |
| "eval_samples_per_second": 139.478, |
| "eval_steps_per_second": 2.18, |
| "step": 80500 |
| }, |
| { |
| "epoch": 2.147958639803859, |
| "grad_norm": 3749.137939453125, |
| "learning_rate": 1.6280671970310128e-05, |
| "loss": 0.2118, |
| "step": 80600 |
| }, |
| { |
| "epoch": 2.150623600895427, |
| "grad_norm": 16408.94140625, |
| "learning_rate": 1.6260931361904575e-05, |
| "loss": 0.177, |
| "step": 80700 |
| }, |
| { |
| "epoch": 2.153288561986995, |
| "grad_norm": 122466.71875, |
| "learning_rate": 1.6241190753499025e-05, |
| "loss": 0.169, |
| "step": 80800 |
| }, |
| { |
| "epoch": 2.155953523078563, |
| "grad_norm": 35088.30078125, |
| "learning_rate": 1.6221450145093475e-05, |
| "loss": 0.1748, |
| "step": 80900 |
| }, |
| { |
| "epoch": 2.1586184841701312, |
| "grad_norm": 2193.1103515625, |
| "learning_rate": 1.620170953668792e-05, |
| "loss": 0.1532, |
| "step": 81000 |
| }, |
| { |
| "epoch": 2.1586184841701312, |
| "eval_dev_accuracy": 0.9747299222749101, |
| "eval_dev_accuracy_threshold": 0.7087757587432861, |
| "eval_dev_average_precision": 0.8839240203558189, |
| "eval_dev_f1": 0.8178559791463017, |
| "eval_dev_f1_threshold": 0.6686054468154907, |
| "eval_dev_precision": 0.8108108108108109, |
| "eval_dev_recall": 0.8250246521310398, |
| "eval_loss": 0.2607557475566864, |
| "eval_runtime": 952.9522, |
| "eval_samples_per_second": 139.196, |
| "eval_steps_per_second": 2.175, |
| "step": 81000 |
| }, |
| { |
| "epoch": 2.161283445261699, |
| "grad_norm": 2420.868896484375, |
| "learning_rate": 1.618196892828237e-05, |
| "loss": 0.1618, |
| "step": 81100 |
| }, |
| { |
| "epoch": 2.163948406353267, |
| "grad_norm": 706.0858764648438, |
| "learning_rate": 1.616222831987682e-05, |
| "loss": 0.1679, |
| "step": 81200 |
| }, |
| { |
| "epoch": 2.1666133674448353, |
| "grad_norm": 23174.521484375, |
| "learning_rate": 1.6142487711471268e-05, |
| "loss": 0.1808, |
| "step": 81300 |
| }, |
| { |
| "epoch": 2.1692783285364032, |
| "grad_norm": 15347.12890625, |
| "learning_rate": 1.6122747103065718e-05, |
| "loss": 0.1685, |
| "step": 81400 |
| }, |
| { |
| "epoch": 2.1719432896279716, |
| "grad_norm": 19526.70703125, |
| "learning_rate": 1.6103006494660168e-05, |
| "loss": 0.1901, |
| "step": 81500 |
| }, |
| { |
| "epoch": 2.1719432896279716, |
| "eval_dev_accuracy": 0.9744434476467617, |
| "eval_dev_accuracy_threshold": 0.9750630855560303, |
| "eval_dev_average_precision": 0.8830413621285588, |
| "eval_dev_f1": 0.8129956790461085, |
| "eval_dev_f1_threshold": 0.9695107936859131, |
| "eval_dev_precision": 0.8117081695063346, |
| "eval_dev_recall": 0.8142872795003835, |
| "eval_loss": 0.23483458161354065, |
| "eval_runtime": 950.8404, |
| "eval_samples_per_second": 139.505, |
| "eval_steps_per_second": 2.18, |
| "step": 81500 |
| }, |
| { |
| "epoch": 2.1746082507195394, |
| "grad_norm": 1307.5916748046875, |
| "learning_rate": 1.6083265886254615e-05, |
| "loss": 0.184, |
| "step": 81600 |
| }, |
| { |
| "epoch": 2.177273211811108, |
| "grad_norm": 40642.421875, |
| "learning_rate": 1.6063525277849065e-05, |
| "loss": 0.1667, |
| "step": 81700 |
| }, |
| { |
| "epoch": 2.1799381729026757, |
| "grad_norm": 1084.0020751953125, |
| "learning_rate": 1.6043784669443515e-05, |
| "loss": 0.1816, |
| "step": 81800 |
| }, |
| { |
| "epoch": 2.1826031339942435, |
| "grad_norm": 14024.021484375, |
| "learning_rate": 1.602404406103796e-05, |
| "loss": 0.159, |
| "step": 81900 |
| }, |
| { |
| "epoch": 2.185268095085812, |
| "grad_norm": 8854.5498046875, |
| "learning_rate": 1.600430345263241e-05, |
| "loss": 0.1553, |
| "step": 82000 |
| }, |
| { |
| "epoch": 2.185268095085812, |
| "eval_dev_accuracy": 0.9748957760069961, |
| "eval_dev_accuracy_threshold": 0.8661369681358337, |
| "eval_dev_average_precision": 0.8869519261803035, |
| "eval_dev_f1": 0.8157429896224332, |
| "eval_dev_f1_threshold": 0.8445290327072144, |
| "eval_dev_precision": 0.8220046723773501, |
| "eval_dev_recall": 0.8095759833461159, |
| "eval_loss": 0.23748071491718292, |
| "eval_runtime": 951.5083, |
| "eval_samples_per_second": 139.407, |
| "eval_steps_per_second": 2.179, |
| "step": 82000 |
| }, |
| { |
| "epoch": 2.1879330561773798, |
| "grad_norm": 44325.265625, |
| "learning_rate": 1.598456284422686e-05, |
| "loss": 0.1572, |
| "step": 82100 |
| }, |
| { |
| "epoch": 2.1905980172689477, |
| "grad_norm": 1203.1580810546875, |
| "learning_rate": 1.5964822235821308e-05, |
| "loss": 0.1629, |
| "step": 82200 |
| }, |
| { |
| "epoch": 2.193262978360516, |
| "grad_norm": 745.87353515625, |
| "learning_rate": 1.5945081627415758e-05, |
| "loss": 0.194, |
| "step": 82300 |
| }, |
| { |
| "epoch": 2.195927939452084, |
| "grad_norm": 17854.037109375, |
| "learning_rate": 1.5925341019010208e-05, |
| "loss": 0.1685, |
| "step": 82400 |
| }, |
| { |
| "epoch": 2.198592900543652, |
| "grad_norm": 44721.08203125, |
| "learning_rate": 1.5905600410604655e-05, |
| "loss": 0.1859, |
| "step": 82500 |
| }, |
| { |
| "epoch": 2.198592900543652, |
| "eval_dev_accuracy": 0.974345443168711, |
| "eval_dev_accuracy_threshold": 0.9739015102386475, |
| "eval_dev_average_precision": 0.8797686946603407, |
| "eval_dev_f1": 0.8160733549083065, |
| "eval_dev_f1_threshold": 0.9577875137329102, |
| "eval_dev_precision": 0.8036757675555083, |
| "eval_dev_recall": 0.8288594280705599, |
| "eval_loss": 0.2292918860912323, |
| "eval_runtime": 950.1815, |
| "eval_samples_per_second": 139.602, |
| "eval_steps_per_second": 2.182, |
| "step": 82500 |
| }, |
| { |
| "epoch": 2.20125786163522, |
| "grad_norm": 170.60641479492188, |
| "learning_rate": 1.5885859802199105e-05, |
| "loss": 0.1483, |
| "step": 82600 |
| }, |
| { |
| "epoch": 2.2039228227267884, |
| "grad_norm": 27626.072265625, |
| "learning_rate": 1.5866119193793555e-05, |
| "loss": 0.2056, |
| "step": 82700 |
| }, |
| { |
| "epoch": 2.2065877838183563, |
| "grad_norm": 731.1361083984375, |
| "learning_rate": 1.5846378585388e-05, |
| "loss": 0.1799, |
| "step": 82800 |
| }, |
| { |
| "epoch": 2.209252744909924, |
| "grad_norm": 36164.07421875, |
| "learning_rate": 1.582663797698245e-05, |
| "loss": 0.1645, |
| "step": 82900 |
| }, |
| { |
| "epoch": 2.2119177060014925, |
| "grad_norm": 6034.74853515625, |
| "learning_rate": 1.58068973685769e-05, |
| "loss": 0.1633, |
| "step": 83000 |
| }, |
| { |
| "epoch": 2.2119177060014925, |
| "eval_dev_accuracy": 0.9737423386883985, |
| "eval_dev_accuracy_threshold": 0.950665295124054, |
| "eval_dev_average_precision": 0.883874392367785, |
| "eval_dev_f1": 0.8089262330859885, |
| "eval_dev_f1_threshold": 0.9107600450515747, |
| "eval_dev_precision": 0.8056732963808282, |
| "eval_dev_recall": 0.8122055439903583, |
| "eval_loss": 0.23654605448246002, |
| "eval_runtime": 951.9974, |
| "eval_samples_per_second": 139.335, |
| "eval_steps_per_second": 2.178, |
| "step": 83000 |
| }, |
| { |
| "epoch": 2.2145826670930604, |
| "grad_norm": 10695.716796875, |
| "learning_rate": 1.5787156760171348e-05, |
| "loss": 0.1714, |
| "step": 83100 |
| }, |
| { |
| "epoch": 2.2172476281846283, |
| "grad_norm": 63246.39453125, |
| "learning_rate": 1.5767416151765798e-05, |
| "loss": 0.1793, |
| "step": 83200 |
| }, |
| { |
| "epoch": 2.2199125892761966, |
| "grad_norm": 1381.2412109375, |
| "learning_rate": 1.5747675543360248e-05, |
| "loss": 0.154, |
| "step": 83300 |
| }, |
| { |
| "epoch": 2.2225775503677645, |
| "grad_norm": 31067.8828125, |
| "learning_rate": 1.5727934934954695e-05, |
| "loss": 0.151, |
| "step": 83400 |
| }, |
| { |
| "epoch": 2.225242511459333, |
| "grad_norm": 33396.78125, |
| "learning_rate": 1.5708194326549148e-05, |
| "loss": 0.1841, |
| "step": 83500 |
| }, |
| { |
| "epoch": 2.225242511459333, |
| "eval_dev_accuracy": 0.9747902327229413, |
| "eval_dev_accuracy_threshold": 0.9669053554534912, |
| "eval_dev_average_precision": 0.8854411022874333, |
| "eval_dev_f1": 0.8149101635827299, |
| "eval_dev_f1_threshold": 0.9264481067657471, |
| "eval_dev_precision": 0.7981718848497583, |
| "eval_dev_recall": 0.8323655089295496, |
| "eval_loss": 0.20306049287319183, |
| "eval_runtime": 951.0541, |
| "eval_samples_per_second": 139.474, |
| "eval_steps_per_second": 2.18, |
| "step": 83500 |
| }, |
| { |
| "epoch": 2.2279074725509007, |
| "grad_norm": 1647.4901123046875, |
| "learning_rate": 1.5688453718143595e-05, |
| "loss": 0.1709, |
| "step": 83600 |
| }, |
| { |
| "epoch": 2.230572433642469, |
| "grad_norm": 310.0802307128906, |
| "learning_rate": 1.566871310973804e-05, |
| "loss": 0.1875, |
| "step": 83700 |
| }, |
| { |
| "epoch": 2.233237394734037, |
| "grad_norm": 14275.015625, |
| "learning_rate": 1.5648972501332495e-05, |
| "loss": 0.2041, |
| "step": 83800 |
| }, |
| { |
| "epoch": 2.235902355825605, |
| "grad_norm": 28323.603515625, |
| "learning_rate": 1.562923189292694e-05, |
| "loss": 0.1812, |
| "step": 83900 |
| }, |
| { |
| "epoch": 2.238567316917173, |
| "grad_norm": 25161.5546875, |
| "learning_rate": 1.5609491284521388e-05, |
| "loss": 0.1779, |
| "step": 84000 |
| }, |
| { |
| "epoch": 2.238567316917173, |
| "eval_dev_accuracy": 0.975137017799121, |
| "eval_dev_accuracy_threshold": 0.9797601699829102, |
| "eval_dev_average_precision": 0.8889116324411686, |
| "eval_dev_f1": 0.8158041179744018, |
| "eval_dev_f1_threshold": 0.9772592782974243, |
| "eval_dev_precision": 0.8289042180255569, |
| "eval_dev_recall": 0.8031116467623535, |
| "eval_loss": 0.2351406365633011, |
| "eval_runtime": 953.0513, |
| "eval_samples_per_second": 139.181, |
| "eval_steps_per_second": 2.175, |
| "step": 84000 |
| }, |
| { |
| "epoch": 2.241232278008741, |
| "grad_norm": 1789.634033203125, |
| "learning_rate": 1.558975067611584e-05, |
| "loss": 0.224, |
| "step": 84100 |
| }, |
| { |
| "epoch": 2.243897239100309, |
| "grad_norm": 5931.00048828125, |
| "learning_rate": 1.5570010067710288e-05, |
| "loss": 0.1624, |
| "step": 84200 |
| }, |
| { |
| "epoch": 2.2465622001918772, |
| "grad_norm": 18578.33203125, |
| "learning_rate": 1.5550269459304738e-05, |
| "loss": 0.1361, |
| "step": 84300 |
| }, |
| { |
| "epoch": 2.249227161283445, |
| "grad_norm": 1247.7115478515625, |
| "learning_rate": 1.5530528850899188e-05, |
| "loss": 0.1371, |
| "step": 84400 |
| }, |
| { |
| "epoch": 2.2518921223750135, |
| "grad_norm": 713.0791625976562, |
| "learning_rate": 1.5510788242493635e-05, |
| "loss": 0.2314, |
| "step": 84500 |
| }, |
| { |
| "epoch": 2.2518921223750135, |
| "eval_dev_accuracy": 0.974737461080914, |
| "eval_dev_accuracy_threshold": 0.9574118256568909, |
| "eval_dev_average_precision": 0.8861569751582977, |
| "eval_dev_f1": 0.8148996509598603, |
| "eval_dev_f1_threshold": 0.832693338394165, |
| "eval_dev_precision": 0.8112715821478987, |
| "eval_dev_recall": 0.8185603155472773, |
| "eval_loss": 0.2932807505130768, |
| "eval_runtime": 951.2089, |
| "eval_samples_per_second": 139.451, |
| "eval_steps_per_second": 2.179, |
| "step": 84500 |
| }, |
| { |
| "epoch": 2.2545570834665813, |
| "grad_norm": 2987.9375, |
| "learning_rate": 1.5491047634088085e-05, |
| "loss": 0.1889, |
| "step": 84600 |
| }, |
| { |
| "epoch": 2.2572220445581497, |
| "grad_norm": 2279.125, |
| "learning_rate": 1.5471307025682535e-05, |
| "loss": 0.2079, |
| "step": 84700 |
| }, |
| { |
| "epoch": 2.2598870056497176, |
| "grad_norm": 1106.462890625, |
| "learning_rate": 1.545156641727698e-05, |
| "loss": 0.1783, |
| "step": 84800 |
| }, |
| { |
| "epoch": 2.2625519667412854, |
| "grad_norm": 7212.99560546875, |
| "learning_rate": 1.543182580887143e-05, |
| "loss": 0.1551, |
| "step": 84900 |
| }, |
| { |
| "epoch": 2.2652169278328538, |
| "grad_norm": 22761.849609375, |
| "learning_rate": 1.541208520046588e-05, |
| "loss": 0.1606, |
| "step": 85000 |
| }, |
| { |
| "epoch": 2.2652169278328538, |
| "eval_dev_accuracy": 0.9748203879469569, |
| "eval_dev_accuracy_threshold": 0.9453166723251343, |
| "eval_dev_average_precision": 0.8915161607864528, |
| "eval_dev_f1": 0.8188866156993647, |
| "eval_dev_f1_threshold": 0.8840415477752686, |
| "eval_dev_precision": 0.8050174658621785, |
| "eval_dev_recall": 0.8332420291442971, |
| "eval_loss": 0.2096114605665207, |
| "eval_runtime": 952.4626, |
| "eval_samples_per_second": 139.267, |
| "eval_steps_per_second": 2.176, |
| "step": 85000 |
| }, |
| { |
| "epoch": 2.2678818889244217, |
| "grad_norm": 661.853271484375, |
| "learning_rate": 1.5392344592060328e-05, |
| "loss": 0.1887, |
| "step": 85100 |
| }, |
| { |
| "epoch": 2.2705468500159895, |
| "grad_norm": 26199.923828125, |
| "learning_rate": 1.5372603983654778e-05, |
| "loss": 0.1829, |
| "step": 85200 |
| }, |
| { |
| "epoch": 2.273211811107558, |
| "grad_norm": 11920.501953125, |
| "learning_rate": 1.5352863375249228e-05, |
| "loss": 0.18, |
| "step": 85300 |
| }, |
| { |
| "epoch": 2.2758767721991258, |
| "grad_norm": 13859.2724609375, |
| "learning_rate": 1.5333122766843675e-05, |
| "loss": 0.1935, |
| "step": 85400 |
| }, |
| { |
| "epoch": 2.278541733290694, |
| "grad_norm": 476.45367431640625, |
| "learning_rate": 1.5313382158438125e-05, |
| "loss": 0.1934, |
| "step": 85500 |
| }, |
| { |
| "epoch": 2.278541733290694, |
| "eval_dev_accuracy": 0.9749636252610312, |
| "eval_dev_accuracy_threshold": 0.9518921375274658, |
| "eval_dev_average_precision": 0.8866670870419442, |
| "eval_dev_f1": 0.8237035470740602, |
| "eval_dev_f1_threshold": 0.6820048093795776, |
| "eval_dev_precision": 0.7958120531154239, |
| "eval_dev_recall": 0.8536211241371754, |
| "eval_loss": 0.23012706637382507, |
| "eval_runtime": 952.604, |
| "eval_samples_per_second": 139.247, |
| "eval_steps_per_second": 2.176, |
| "step": 85500 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 112572, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|