| { |
| "best_metric": 0.6774637699127197, |
| "best_model_checkpoint": "output/fine_tuning/checkpoints/Meta-Llama-3.1-8B-Instruct/sft/aixpa-ground-short-docs-checkpoint/checkpoint-340", |
| "epoch": 1.8181818181818183, |
| "eval_steps": 20, |
| "global_step": 340, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10695187165775401, |
| "grad_norm": 0.2608170211315155, |
| "learning_rate": 1.4819165403057078e-05, |
| "loss": 1.5177, |
| "mean_token_accuracy": 0.6778935924172401, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10695187165775401, |
| "eval_loss": 1.4139869213104248, |
| "eval_mean_token_accuracy": 0.6904795635037306, |
| "eval_runtime": 1035.6849, |
| "eval_samples_per_second": 0.238, |
| "eval_steps_per_second": 0.119, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "grad_norm": 0.2079063504934311, |
| "learning_rate": 1.8247997414535347e-05, |
| "loss": 1.3019, |
| "mean_token_accuracy": 0.7044360123574733, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21390374331550802, |
| "eval_loss": 1.1822656393051147, |
| "eval_mean_token_accuracy": 0.7299752424402934, |
| "eval_runtime": 1036.3743, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "grad_norm": 0.24613960087299347, |
| "learning_rate": 2e-05, |
| "loss": 1.1518, |
| "mean_token_accuracy": 0.7350467927753925, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.32085561497326204, |
| "eval_loss": 1.0543467998504639, |
| "eval_mean_token_accuracy": 0.7558261015550877, |
| "eval_runtime": 1037.5825, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "grad_norm": 0.21977411210536957, |
| "learning_rate": 2e-05, |
| "loss": 1.0451, |
| "mean_token_accuracy": 0.758122804760933, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.42780748663101603, |
| "eval_loss": 0.9605845808982849, |
| "eval_mean_token_accuracy": 0.7781746368098065, |
| "eval_runtime": 1036.6763, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "grad_norm": 0.29401177167892456, |
| "learning_rate": 2e-05, |
| "loss": 0.9781, |
| "mean_token_accuracy": 0.7737521544098854, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.5347593582887701, |
| "eval_loss": 0.9001632332801819, |
| "eval_mean_token_accuracy": 0.7922655029025504, |
| "eval_runtime": 1037.6159, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "grad_norm": 0.34717148542404175, |
| "learning_rate": 2e-05, |
| "loss": 0.9113, |
| "mean_token_accuracy": 0.7882794156670571, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.6417112299465241, |
| "eval_loss": 0.8677236437797546, |
| "eval_mean_token_accuracy": 0.7979402353123921, |
| "eval_runtime": 1037.0163, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "grad_norm": 0.3498166799545288, |
| "learning_rate": 2e-05, |
| "loss": 0.8725, |
| "mean_token_accuracy": 0.7943845748901367, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.7486631016042781, |
| "eval_loss": 0.8351719379425049, |
| "eval_mean_token_accuracy": 0.8040957174650053, |
| "eval_runtime": 1036.2597, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "grad_norm": 0.3868383467197418, |
| "learning_rate": 2e-05, |
| "loss": 0.8721, |
| "mean_token_accuracy": 0.7931242920458317, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.8556149732620321, |
| "eval_loss": 0.8117150068283081, |
| "eval_mean_token_accuracy": 0.808245500413383, |
| "eval_runtime": 1036.3899, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "grad_norm": 0.5154958367347717, |
| "learning_rate": 2e-05, |
| "loss": 0.83, |
| "mean_token_accuracy": 0.8012300632894039, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.9625668449197861, |
| "eval_loss": 0.7896639108657837, |
| "eval_mean_token_accuracy": 0.8132512133295943, |
| "eval_runtime": 1036.5717, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.0695187165775402, |
| "grad_norm": 0.42563366889953613, |
| "learning_rate": 2e-05, |
| "loss": 0.8034, |
| "mean_token_accuracy": 0.8062243178486824, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.0695187165775402, |
| "eval_loss": 0.7641515731811523, |
| "eval_mean_token_accuracy": 0.8180098591781244, |
| "eval_runtime": 1037.8429, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "grad_norm": 0.39126402139663696, |
| "learning_rate": 2e-05, |
| "loss": 0.7637, |
| "mean_token_accuracy": 0.8159952461719513, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.1764705882352942, |
| "eval_loss": 0.7506969571113586, |
| "eval_mean_token_accuracy": 0.8211809416127399, |
| "eval_runtime": 1036.9492, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.2834224598930482, |
| "grad_norm": 0.525314211845398, |
| "learning_rate": 2e-05, |
| "loss": 0.7014, |
| "mean_token_accuracy": 0.8259521864354611, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.2834224598930482, |
| "eval_loss": 0.7359923124313354, |
| "eval_mean_token_accuracy": 0.8241757876504727, |
| "eval_runtime": 1037.3586, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.3903743315508021, |
| "grad_norm": 1.2710996866226196, |
| "learning_rate": 2e-05, |
| "loss": 0.7084, |
| "mean_token_accuracy": 0.8261168003082275, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.3903743315508021, |
| "eval_loss": 0.7302640676498413, |
| "eval_mean_token_accuracy": 0.8257462154559003, |
| "eval_runtime": 1037.1577, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.4973262032085561, |
| "grad_norm": 0.5921723246574402, |
| "learning_rate": 2e-05, |
| "loss": 0.6984, |
| "mean_token_accuracy": 0.8257287561893463, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.4973262032085561, |
| "eval_loss": 0.716602087020874, |
| "eval_mean_token_accuracy": 0.8293129685448437, |
| "eval_runtime": 1037.3171, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.6042780748663101, |
| "grad_norm": 0.6089026927947998, |
| "learning_rate": 2e-05, |
| "loss": 0.6591, |
| "mean_token_accuracy": 0.8372392967343331, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.6042780748663101, |
| "eval_loss": 0.7121440768241882, |
| "eval_mean_token_accuracy": 0.8315709296280775, |
| "eval_runtime": 1038.1176, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.118, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.7112299465240641, |
| "grad_norm": 0.751674473285675, |
| "learning_rate": 2e-05, |
| "loss": 0.6036, |
| "mean_token_accuracy": 0.8498695828020573, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.7112299465240641, |
| "eval_loss": 0.697968602180481, |
| "eval_mean_token_accuracy": 0.8344496600027007, |
| "eval_runtime": 1037.7814, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 1.045749545097351, |
| "learning_rate": 2e-05, |
| "loss": 0.6099, |
| "mean_token_accuracy": 0.845644561946392, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "eval_loss": 0.6774637699127197, |
| "eval_mean_token_accuracy": 0.8406207769866881, |
| "eval_runtime": 1037.5418, |
| "eval_samples_per_second": 0.237, |
| "eval_steps_per_second": 0.119, |
| "step": 340 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 1870, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.0 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.857740816294871e+18, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|