| { |
| "best_global_step": 196, |
| "best_metric": 1.4676804542541504, |
| "best_model_checkpoint": "./models/codet5-sequenced/checkpoint-196", |
| "epoch": 13.067796610169491, |
| "eval_steps": 7, |
| "global_step": 196, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06779661016949153, |
| "grad_norm": 35.061439514160156, |
| "learning_rate": 0.0, |
| "loss": 6.9968, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.4745762711864407, |
| "eval_loss": 3.7553770542144775, |
| "eval_runtime": 31.376, |
| "eval_samples_per_second": 4.143, |
| "eval_steps_per_second": 0.223, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 2.471144676208496, |
| "learning_rate": 0.00012857142857142855, |
| "loss": 4.9711, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.9491525423728814, |
| "eval_loss": 2.576092481613159, |
| "eval_runtime": 32.5287, |
| "eval_samples_per_second": 3.996, |
| "eval_steps_per_second": 0.215, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.3389830508474576, |
| "grad_norm": 0.970221757888794, |
| "learning_rate": 0.0002714285714285714, |
| "loss": 2.7955, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.4067796610169492, |
| "eval_loss": 2.0272090435028076, |
| "eval_runtime": 32.8232, |
| "eval_samples_per_second": 3.961, |
| "eval_steps_per_second": 0.213, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.8813559322033897, |
| "eval_loss": 1.794872760772705, |
| "eval_runtime": 32.1072, |
| "eval_samples_per_second": 4.049, |
| "eval_steps_per_second": 0.218, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.253476619720459, |
| "learning_rate": 0.00028823529411764703, |
| "loss": 1.9943, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.3389830508474576, |
| "eval_loss": 1.6869820356369019, |
| "eval_runtime": 33.6053, |
| "eval_samples_per_second": 3.868, |
| "eval_steps_per_second": 0.208, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.6779661016949152, |
| "grad_norm": 0.17588205635547638, |
| "learning_rate": 0.00027352941176470583, |
| "loss": 1.7476, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.8135593220338984, |
| "eval_loss": 1.633076548576355, |
| "eval_runtime": 31.3432, |
| "eval_samples_per_second": 4.148, |
| "eval_steps_per_second": 0.223, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.2711864406779663, |
| "eval_loss": 1.595025658607483, |
| "eval_runtime": 32.6382, |
| "eval_samples_per_second": 3.983, |
| "eval_steps_per_second": 0.214, |
| "step": 49 |
| }, |
| { |
| "epoch": 3.3389830508474576, |
| "grad_norm": 0.1354854702949524, |
| "learning_rate": 0.0002588235294117647, |
| "loss": 1.6567, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.7457627118644066, |
| "eval_loss": 1.5682227611541748, |
| "eval_runtime": 32.3796, |
| "eval_samples_per_second": 4.015, |
| "eval_steps_per_second": 0.216, |
| "step": 56 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.14623650908470154, |
| "learning_rate": 0.0002441176470588235, |
| "loss": 1.6035, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.203389830508475, |
| "eval_loss": 1.5545539855957031, |
| "eval_runtime": 32.9074, |
| "eval_samples_per_second": 3.95, |
| "eval_steps_per_second": 0.213, |
| "step": 63 |
| }, |
| { |
| "epoch": 4.677966101694915, |
| "grad_norm": 0.08996161818504333, |
| "learning_rate": 0.0002294117647058823, |
| "loss": 1.5737, |
| "step": 70 |
| }, |
| { |
| "epoch": 4.677966101694915, |
| "eval_loss": 1.5401841402053833, |
| "eval_runtime": 36.4472, |
| "eval_samples_per_second": 3.567, |
| "eval_steps_per_second": 0.192, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.135593220338983, |
| "eval_loss": 1.5304237604141235, |
| "eval_runtime": 33.0897, |
| "eval_samples_per_second": 3.929, |
| "eval_steps_per_second": 0.212, |
| "step": 77 |
| }, |
| { |
| "epoch": 5.338983050847458, |
| "grad_norm": 0.12378664314746857, |
| "learning_rate": 0.00021470588235294116, |
| "loss": 1.5575, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.610169491525424, |
| "eval_loss": 1.5213029384613037, |
| "eval_runtime": 31.2672, |
| "eval_samples_per_second": 4.158, |
| "eval_steps_per_second": 0.224, |
| "step": 84 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.1171395406126976, |
| "learning_rate": 0.00019999999999999998, |
| "loss": 1.5386, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.067796610169491, |
| "eval_loss": 1.5130056142807007, |
| "eval_runtime": 33.1459, |
| "eval_samples_per_second": 3.922, |
| "eval_steps_per_second": 0.211, |
| "step": 91 |
| }, |
| { |
| "epoch": 6.5423728813559325, |
| "eval_loss": 1.5062706470489502, |
| "eval_runtime": 32.4291, |
| "eval_samples_per_second": 4.009, |
| "eval_steps_per_second": 0.216, |
| "step": 98 |
| }, |
| { |
| "epoch": 6.677966101694915, |
| "grad_norm": 0.06857075542211533, |
| "learning_rate": 0.0001852941176470588, |
| "loss": 1.5288, |
| "step": 100 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 1.5011202096939087, |
| "eval_runtime": 32.7571, |
| "eval_samples_per_second": 3.969, |
| "eval_steps_per_second": 0.214, |
| "step": 105 |
| }, |
| { |
| "epoch": 7.338983050847458, |
| "grad_norm": 0.07266195118427277, |
| "learning_rate": 0.00017058823529411763, |
| "loss": 1.5196, |
| "step": 110 |
| }, |
| { |
| "epoch": 7.47457627118644, |
| "eval_loss": 1.49434232711792, |
| "eval_runtime": 33.5153, |
| "eval_samples_per_second": 3.879, |
| "eval_steps_per_second": 0.209, |
| "step": 112 |
| }, |
| { |
| "epoch": 7.9491525423728815, |
| "eval_loss": 1.493066430091858, |
| "eval_runtime": 32.6462, |
| "eval_samples_per_second": 3.982, |
| "eval_steps_per_second": 0.214, |
| "step": 119 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.09790726006031036, |
| "learning_rate": 0.00015588235294117646, |
| "loss": 1.5098, |
| "step": 120 |
| }, |
| { |
| "epoch": 8.40677966101695, |
| "eval_loss": 1.4890639781951904, |
| "eval_runtime": 33.9906, |
| "eval_samples_per_second": 3.825, |
| "eval_steps_per_second": 0.206, |
| "step": 126 |
| }, |
| { |
| "epoch": 8.677966101694915, |
| "grad_norm": 0.08605582267045975, |
| "learning_rate": 0.00014117647058823528, |
| "loss": 1.504, |
| "step": 130 |
| }, |
| { |
| "epoch": 8.88135593220339, |
| "eval_loss": 1.4857922792434692, |
| "eval_runtime": 33.6557, |
| "eval_samples_per_second": 3.863, |
| "eval_steps_per_second": 0.208, |
| "step": 133 |
| }, |
| { |
| "epoch": 9.338983050847457, |
| "grad_norm": 0.07982663810253143, |
| "learning_rate": 0.0001264705882352941, |
| "loss": 1.501, |
| "step": 140 |
| }, |
| { |
| "epoch": 9.338983050847457, |
| "eval_loss": 1.4824906587600708, |
| "eval_runtime": 33.5066, |
| "eval_samples_per_second": 3.88, |
| "eval_steps_per_second": 0.209, |
| "step": 140 |
| }, |
| { |
| "epoch": 9.813559322033898, |
| "eval_loss": 1.478973150253296, |
| "eval_runtime": 33.5484, |
| "eval_samples_per_second": 3.875, |
| "eval_steps_per_second": 0.209, |
| "step": 147 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.07402677834033966, |
| "learning_rate": 0.00011176470588235293, |
| "loss": 1.4941, |
| "step": 150 |
| }, |
| { |
| "epoch": 10.271186440677965, |
| "eval_loss": 1.476096749305725, |
| "eval_runtime": 33.087, |
| "eval_samples_per_second": 3.929, |
| "eval_steps_per_second": 0.212, |
| "step": 154 |
| }, |
| { |
| "epoch": 10.677966101694915, |
| "grad_norm": 0.0733569785952568, |
| "learning_rate": 9.705882352941176e-05, |
| "loss": 1.4894, |
| "step": 160 |
| }, |
| { |
| "epoch": 10.745762711864407, |
| "eval_loss": 1.4757354259490967, |
| "eval_runtime": 33.5743, |
| "eval_samples_per_second": 3.872, |
| "eval_steps_per_second": 0.208, |
| "step": 161 |
| }, |
| { |
| "epoch": 11.203389830508474, |
| "eval_loss": 1.4727975130081177, |
| "eval_runtime": 33.5918, |
| "eval_samples_per_second": 3.87, |
| "eval_steps_per_second": 0.208, |
| "step": 168 |
| }, |
| { |
| "epoch": 11.338983050847457, |
| "grad_norm": 0.07810712605714798, |
| "learning_rate": 8.23529411764706e-05, |
| "loss": 1.4887, |
| "step": 170 |
| }, |
| { |
| "epoch": 11.677966101694915, |
| "eval_loss": 1.4703751802444458, |
| "eval_runtime": 33.6902, |
| "eval_samples_per_second": 3.859, |
| "eval_steps_per_second": 0.208, |
| "step": 175 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 0.0885886400938034, |
| "learning_rate": 6.76470588235294e-05, |
| "loss": 1.4844, |
| "step": 180 |
| }, |
| { |
| "epoch": 12.135593220338983, |
| "eval_loss": 1.4694443941116333, |
| "eval_runtime": 33.5918, |
| "eval_samples_per_second": 3.87, |
| "eval_steps_per_second": 0.208, |
| "step": 182 |
| }, |
| { |
| "epoch": 12.610169491525424, |
| "eval_loss": 1.47074556350708, |
| "eval_runtime": 33.0341, |
| "eval_samples_per_second": 3.935, |
| "eval_steps_per_second": 0.212, |
| "step": 189 |
| }, |
| { |
| "epoch": 12.677966101694915, |
| "grad_norm": 0.07929002493619919, |
| "learning_rate": 5.294117647058824e-05, |
| "loss": 1.48, |
| "step": 190 |
| }, |
| { |
| "epoch": 13.067796610169491, |
| "eval_loss": 1.4676804542541504, |
| "eval_runtime": 33.3819, |
| "eval_samples_per_second": 3.894, |
| "eval_steps_per_second": 0.21, |
| "step": 196 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 225, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 15, |
| "save_steps": 28, |
| "stateful_callbacks": { |
| "EarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 10, |
| "early_stopping_threshold": 0.001 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2586720180633600.0, |
| "train_batch_size": 20, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|