| { |
| "best_metric": 0.8924948901640382, |
| "best_model_checkpoint": "/content/drive/MyDrive/PUBH 8885 CB/nuc_arg_long_readv4/checkpoint-1500", |
| "epoch": 3.663003663003663, |
| "eval_steps": 100, |
| "global_step": 1500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.2442002442002442, |
| "grad_norm": 25.231292724609375, |
| "learning_rate": 5e-06, |
| "loss": 2.8722, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2442002442002442, |
| "eval_loss": 2.381986141204834, |
| "eval_macro_f1": 0.07916135790210403, |
| "eval_macro_precision": 0.06590895945398809, |
| "eval_macro_recall": 0.10263326510757667, |
| "eval_micro_f1": 0.4847560975609756, |
| "eval_micro_precision": 0.4847560975609756, |
| "eval_micro_recall": 0.4847560975609756, |
| "eval_runtime": 27.9503, |
| "eval_samples_per_second": 82.146, |
| "eval_steps_per_second": 1.288, |
| "eval_weighted_f1": 0.3658094645034367, |
| "eval_weighted_precision": 0.3000991244760369, |
| "eval_weighted_recall": 0.4847560975609756, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4884004884004884, |
| "grad_norm": 74.01921844482422, |
| "learning_rate": 9.950000000000001e-06, |
| "loss": 1.9467, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4884004884004884, |
| "eval_loss": 1.6188805103302002, |
| "eval_macro_f1": 0.12352787740442463, |
| "eval_macro_precision": 0.1729632869793354, |
| "eval_macro_recall": 0.14519904411264478, |
| "eval_micro_f1": 0.6032229965156795, |
| "eval_micro_precision": 0.6032229965156795, |
| "eval_micro_recall": 0.6032229965156795, |
| "eval_runtime": 27.9989, |
| "eval_samples_per_second": 82.003, |
| "eval_steps_per_second": 1.286, |
| "eval_weighted_f1": 0.502828345764972, |
| "eval_weighted_precision": 0.5102415198384107, |
| "eval_weighted_recall": 0.6032229965156795, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "grad_norm": 61.67491149902344, |
| "learning_rate": 1.4950000000000001e-05, |
| "loss": 1.5019, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7326007326007326, |
| "eval_loss": 1.3270624876022339, |
| "eval_macro_f1": 0.18095213980700245, |
| "eval_macro_precision": 0.1639157420556758, |
| "eval_macro_recall": 0.2060668906229992, |
| "eval_micro_f1": 0.6872822299651568, |
| "eval_micro_precision": 0.6872822299651568, |
| "eval_micro_recall": 0.6872822299651568, |
| "eval_runtime": 28.1715, |
| "eval_samples_per_second": 81.501, |
| "eval_steps_per_second": 1.278, |
| "eval_weighted_f1": 0.6267031640391093, |
| "eval_weighted_precision": 0.5821328002832489, |
| "eval_weighted_recall": 0.6872822299651568, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.9768009768009768, |
| "grad_norm": 93.83369445800781, |
| "learning_rate": 1.9900000000000003e-05, |
| "loss": 1.2602, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9768009768009768, |
| "eval_loss": 1.1570900678634644, |
| "eval_macro_f1": 0.24796229984985535, |
| "eval_macro_precision": 0.250395185204251, |
| "eval_macro_recall": 0.2602306770905881, |
| "eval_micro_f1": 0.7151567944250871, |
| "eval_micro_precision": 0.7151567944250871, |
| "eval_micro_recall": 0.7151567944250871, |
| "eval_runtime": 28.097, |
| "eval_samples_per_second": 81.717, |
| "eval_steps_per_second": 1.281, |
| "eval_weighted_f1": 0.6742913163869271, |
| "eval_weighted_precision": 0.6523515088941247, |
| "eval_weighted_recall": 0.7151567944250871, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.221001221001221, |
| "grad_norm": 27.968883514404297, |
| "learning_rate": 2.4900000000000002e-05, |
| "loss": 1.0116, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.221001221001221, |
| "eval_loss": 0.8759449124336243, |
| "eval_macro_f1": 0.40662329686945736, |
| "eval_macro_precision": 0.4270552853235523, |
| "eval_macro_recall": 0.40522488609544827, |
| "eval_micro_f1": 0.801829268292683, |
| "eval_micro_precision": 0.801829268292683, |
| "eval_micro_recall": 0.801829268292683, |
| "eval_runtime": 28.1259, |
| "eval_samples_per_second": 81.633, |
| "eval_steps_per_second": 1.28, |
| "eval_weighted_f1": 0.7856079074786287, |
| "eval_weighted_precision": 0.778677751235951, |
| "eval_weighted_recall": 0.801829268292683, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.4652014652014653, |
| "grad_norm": 40.660743713378906, |
| "learning_rate": 2.4752635521978294e-05, |
| "loss": 0.8539, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.4652014652014653, |
| "eval_loss": 0.7635390758514404, |
| "eval_macro_f1": 0.47249477353778435, |
| "eval_macro_precision": 0.49858319443807503, |
| "eval_macro_recall": 0.49267372749042937, |
| "eval_micro_f1": 0.8340592334494773, |
| "eval_micro_precision": 0.8340592334494773, |
| "eval_micro_recall": 0.8340592334494773, |
| "eval_runtime": 28.2397, |
| "eval_samples_per_second": 81.304, |
| "eval_steps_per_second": 1.275, |
| "eval_weighted_f1": 0.8260353217142836, |
| "eval_weighted_precision": 0.8263412894566534, |
| "eval_weighted_recall": 0.8340592334494773, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.7094017094017095, |
| "grad_norm": 69.74828338623047, |
| "learning_rate": 2.40005092726346e-05, |
| "loss": 0.7398, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.7094017094017095, |
| "eval_loss": 0.6827709674835205, |
| "eval_macro_f1": 0.5156494920599783, |
| "eval_macro_precision": 0.595919953102261, |
| "eval_macro_recall": 0.516030034812032, |
| "eval_micro_f1": 0.8466898954703833, |
| "eval_micro_precision": 0.8466898954703833, |
| "eval_micro_recall": 0.8466898954703833, |
| "eval_runtime": 28.0593, |
| "eval_samples_per_second": 81.827, |
| "eval_steps_per_second": 1.283, |
| "eval_weighted_f1": 0.8359309638776365, |
| "eval_weighted_precision": 0.8368823626772747, |
| "eval_weighted_recall": 0.8466898954703833, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.9536019536019538, |
| "grad_norm": 18.172765731811523, |
| "learning_rate": 2.2774508957989417e-05, |
| "loss": 0.6728, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.9536019536019538, |
| "eval_loss": 0.6122449040412903, |
| "eval_macro_f1": 0.5609693508323702, |
| "eval_macro_precision": 0.6700802281488617, |
| "eval_macro_recall": 0.5537448671543558, |
| "eval_micro_f1": 0.8527874564459931, |
| "eval_micro_precision": 0.8527874564459931, |
| "eval_micro_recall": 0.8527874564459931, |
| "eval_runtime": 28.5179, |
| "eval_samples_per_second": 80.511, |
| "eval_steps_per_second": 1.262, |
| "eval_weighted_f1": 0.8465989784626531, |
| "eval_weighted_precision": 0.8602585363665758, |
| "eval_weighted_recall": 0.8527874564459931, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "grad_norm": 22.065692901611328, |
| "learning_rate": 2.112515144989503e-05, |
| "loss": 0.5593, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.197802197802198, |
| "eval_loss": 0.6033230423927307, |
| "eval_macro_f1": 0.6269631440779232, |
| "eval_macro_precision": 0.6458086803108424, |
| "eval_macro_recall": 0.6223857477581206, |
| "eval_micro_f1": 0.8628048780487805, |
| "eval_micro_precision": 0.8628048780487805, |
| "eval_micro_recall": 0.8628048780487805, |
| "eval_runtime": 28.0799, |
| "eval_samples_per_second": 81.767, |
| "eval_steps_per_second": 1.282, |
| "eval_weighted_f1": 0.8604939320250599, |
| "eval_weighted_precision": 0.8620706032280505, |
| "eval_weighted_recall": 0.8628048780487805, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.442002442002442, |
| "grad_norm": 46.23727035522461, |
| "learning_rate": 1.912039789067721e-05, |
| "loss": 0.5492, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.442002442002442, |
| "eval_loss": 0.5785375833511353, |
| "eval_macro_f1": 0.656816564197882, |
| "eval_macro_precision": 0.7161193189547552, |
| "eval_macro_recall": 0.656158547673032, |
| "eval_micro_f1": 0.8610627177700348, |
| "eval_micro_precision": 0.8610627177700348, |
| "eval_micro_recall": 0.8610627177700348, |
| "eval_runtime": 27.9572, |
| "eval_samples_per_second": 82.126, |
| "eval_steps_per_second": 1.288, |
| "eval_weighted_f1": 0.8635374104852748, |
| "eval_weighted_precision": 0.8750576393113125, |
| "eval_weighted_recall": 0.8610627177700348, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.6862026862026864, |
| "grad_norm": 18.426353454589844, |
| "learning_rate": 1.6842853380380934e-05, |
| "loss": 0.5028, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.6862026862026864, |
| "eval_loss": 0.5303468108177185, |
| "eval_macro_f1": 0.7262289134052592, |
| "eval_macro_precision": 0.7495966854257389, |
| "eval_macro_recall": 0.7177312989902742, |
| "eval_micro_f1": 0.8854529616724739, |
| "eval_micro_precision": 0.8854529616724739, |
| "eval_micro_recall": 0.8854529616724739, |
| "eval_runtime": 28.1073, |
| "eval_samples_per_second": 81.687, |
| "eval_steps_per_second": 1.281, |
| "eval_weighted_f1": 0.8844974105345418, |
| "eval_weighted_precision": 0.8869278196582587, |
| "eval_weighted_recall": 0.8854529616724739, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.9304029304029307, |
| "grad_norm": 18.274917602539062, |
| "learning_rate": 1.4386363265365535e-05, |
| "loss": 0.5033, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.9304029304029307, |
| "eval_loss": 0.49514588713645935, |
| "eval_macro_f1": 0.7355993207563093, |
| "eval_macro_precision": 0.7372036245463958, |
| "eval_macro_recall": 0.7414049084020491, |
| "eval_micro_f1": 0.8876306620209059, |
| "eval_micro_precision": 0.8876306620209059, |
| "eval_micro_recall": 0.8876306620209059, |
| "eval_runtime": 27.7713, |
| "eval_samples_per_second": 82.675, |
| "eval_steps_per_second": 1.296, |
| "eval_weighted_f1": 0.8867237445831763, |
| "eval_weighted_precision": 0.8876630720638423, |
| "eval_weighted_recall": 0.8876306620209059, |
| "step": 1200 |
| }, |
| { |
| "epoch": 3.1746031746031744, |
| "grad_norm": 16.126319885253906, |
| "learning_rate": 1.1852146276869743e-05, |
| "loss": 0.4332, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.1746031746031744, |
| "eval_loss": 0.5176035165786743, |
| "eval_macro_f1": 0.8066235690769744, |
| "eval_macro_precision": 0.8112408819720179, |
| "eval_macro_recall": 0.8135936233566461, |
| "eval_micro_f1": 0.8867595818815331, |
| "eval_micro_precision": 0.8867595818815331, |
| "eval_micro_recall": 0.8867595818815331, |
| "eval_runtime": 28.2301, |
| "eval_samples_per_second": 81.332, |
| "eval_steps_per_second": 1.275, |
| "eval_weighted_f1": 0.8881457405419927, |
| "eval_weighted_precision": 0.892519346409985, |
| "eval_weighted_recall": 0.8867595818815331, |
| "step": 1300 |
| }, |
| { |
| "epoch": 3.4188034188034186, |
| "grad_norm": 25.683168411254883, |
| "learning_rate": 9.344623852086093e-06, |
| "loss": 0.4328, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.4188034188034186, |
| "eval_loss": 0.48010551929473877, |
| "eval_macro_f1": 0.8188574972016193, |
| "eval_macro_precision": 0.8223473803322559, |
| "eval_macro_recall": 0.8205261076151831, |
| "eval_micro_f1": 0.8906794425087108, |
| "eval_micro_precision": 0.8906794425087108, |
| "eval_micro_recall": 0.8906794425087108, |
| "eval_runtime": 28.1842, |
| "eval_samples_per_second": 81.464, |
| "eval_steps_per_second": 1.277, |
| "eval_weighted_f1": 0.8908187701448725, |
| "eval_weighted_precision": 0.8923647873870492, |
| "eval_weighted_recall": 0.8906794425087108, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.663003663003663, |
| "grad_norm": 32.259490966796875, |
| "learning_rate": 6.967117488967232e-06, |
| "loss": 0.3965, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.663003663003663, |
| "eval_loss": 0.48184239864349365, |
| "eval_macro_f1": 0.8216617574200699, |
| "eval_macro_precision": 0.8214437028036421, |
| "eval_macro_recall": 0.8278310307484175, |
| "eval_micro_f1": 0.89198606271777, |
| "eval_micro_precision": 0.89198606271777, |
| "eval_micro_recall": 0.89198606271777, |
| "eval_runtime": 27.8631, |
| "eval_samples_per_second": 82.403, |
| "eval_steps_per_second": 1.292, |
| "eval_weighted_f1": 0.8924948901640382, |
| "eval_weighted_precision": 0.8949476957454691, |
| "eval_weighted_recall": 0.89198606271777, |
| "step": 1500 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 2045, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.46348379202519e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|