| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 312, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016, |
| "grad_norm": 2.1790294647216797, |
| "learning_rate": 1.5625e-05, |
| "loss": 0.8982, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.2921503782272339, |
| "learning_rate": 3.125e-05, |
| "loss": 0.7771, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 1.0662598609924316, |
| "learning_rate": 4.6875e-05, |
| "loss": 0.7065, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.8291209936141968, |
| "learning_rate": 4.9979726739605334e-05, |
| "loss": 0.6569, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.8745065927505493, |
| "learning_rate": 4.989742922931149e-05, |
| "loss": 0.6506, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.6823293566703796, |
| "learning_rate": 4.975207191995552e-05, |
| "loss": 0.6579, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.7768641114234924, |
| "learning_rate": 4.95440640639845e-05, |
| "loss": 0.6842, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.6644716262817383, |
| "learning_rate": 4.927399130600373e-05, |
| "loss": 0.6327, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.6596384644508362, |
| "learning_rate": 4.894261403389862e-05, |
| "loss": 0.6664, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.6109484434127808, |
| "learning_rate": 4.855086523796815e-05, |
| "loss": 0.6369, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.6302951574325562, |
| "learning_rate": 4.8099847884097434e-05, |
| "loss": 0.6217, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.6894915699958801, |
| "learning_rate": 4.7590831808365293e-05, |
| "loss": 0.6364, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.6988873481750488, |
| "learning_rate": 4.702525014183007e-05, |
| "loss": 0.6244, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 0.6387168765068054, |
| "learning_rate": 4.6404695275559475e-05, |
| "loss": 0.614, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.6547486186027527, |
| "learning_rate": 4.57309143772652e-05, |
| "loss": 0.6034, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.6504953503608704, |
| "learning_rate": 4.500580447216489e-05, |
| "loss": 0.6265, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.5533855557441711, |
| "learning_rate": 4.423140710192144e-05, |
| "loss": 0.6178, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.5795829892158508, |
| "learning_rate": 4.340990257669732e-05, |
| "loss": 0.6243, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.5757337808609009, |
| "learning_rate": 4.254360383650716e-05, |
| "loss": 0.605, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.6556103825569153, |
| "learning_rate": 4.163494993915196e-05, |
| "loss": 0.6046, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 0.6166912913322449, |
| "learning_rate": 4.0686499193069595e-05, |
| "loss": 0.6243, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 0.6043514609336853, |
| "learning_rate": 3.970092195443604e-05, |
| "loss": 0.6182, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 0.5503015518188477, |
| "learning_rate": 3.8680993108796956e-05, |
| "loss": 0.619, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 12.36988353729248, |
| "learning_rate": 3.7629584258397646e-05, |
| "loss": 0.5871, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.5975585579872131, |
| "learning_rate": 3.65496556372078e-05, |
| "loss": 0.6111, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 0.5625191330909729, |
| "learning_rate": 3.5444247776404274e-05, |
| "loss": 0.5999, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 0.5412049889564514, |
| "learning_rate": 3.4316472943777736e-05, |
| "loss": 0.5846, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.5721265077590942, |
| "learning_rate": 3.316950638116532e-05, |
| "loss": 0.5975, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 0.5761215090751648, |
| "learning_rate": 3.2006577364580284e-05, |
| "loss": 0.6136, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 0.5800392627716064, |
| "learning_rate": 3.083096011220896e-05, |
| "loss": 0.5969, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 0.5748460292816162, |
| "learning_rate": 2.9645964565873207e-05, |
| "loss": 0.6043, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.5393357872962952, |
| "learning_rate": 2.845492707191334e-05, |
| "loss": 0.5814, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 0.5506784319877625, |
| "learning_rate": 2.7261200987729242e-05, |
| "loss": 0.5676, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 0.5413019061088562, |
| "learning_rate": 2.606814724042701e-05, |
| "loss": 0.6135, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 0.5511438846588135, |
| "learning_rate": 2.4879124864153163e-05, |
| "loss": 0.5744, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.5734318494796753, |
| "learning_rate": 2.36974815427584e-05, |
| "loss": 0.5972, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 0.5379422307014465, |
| "learning_rate": 2.252654418441808e-05, |
| "loss": 0.5717, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 0.5357218980789185, |
| "learning_rate": 2.136960955474649e-05, |
| "loss": 0.6015, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 0.5256953835487366, |
| "learning_rate": 2.0229934994777195e-05, |
| "loss": 0.5913, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.5051993131637573, |
| "learning_rate": 1.911072924994306e-05, |
| "loss": 0.579, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 0.5518410205841064, |
| "learning_rate": 1.801514343587688e-05, |
| "loss": 0.5805, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 0.50865238904953, |
| "learning_rate": 1.6946262166468175e-05, |
| "loss": 0.5962, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 0.5044121146202087, |
| "learning_rate": 1.590709486915524e-05, |
| "loss": 0.5689, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.4880557060241699, |
| "learning_rate": 1.4900567311904107e-05, |
| "loss": 0.5671, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 0.5397293567657471, |
| "learning_rate": 1.392951336573011e-05, |
| "loss": 0.5693, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 0.5184421539306641, |
| "learning_rate": 1.2996667025954618e-05, |
| "loss": 0.5693, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 0.5067721009254456, |
| "learning_rate": 1.2104654714661188e-05, |
| "loss": 0.5693, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.4883371591567993, |
| "learning_rate": 1.1255987886023202e-05, |
| "loss": 0.5749, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 0.4902109205722809, |
| "learning_rate": 1.0453055955322938e-05, |
| "loss": 0.5791, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.4747011065483093, |
| "learning_rate": 9.698119571570258e-06, |
| "loss": 0.5795, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 0.4571741223335266, |
| "learning_rate": 8.993304252661744e-06, |
| "loss": 0.5657, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.48521485924720764, |
| "learning_rate": 8.340594401000496e-06, |
| "loss": 0.5773, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 0.4582931399345398, |
| "learning_rate": 7.741827716425654e-06, |
| "loss": 0.5822, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 0.4858649671077728, |
| "learning_rate": 7.198690022181837e-06, |
| "loss": 0.5826, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 0.49482160806655884, |
| "learning_rate": 6.712710518496049e-06, |
| "loss": 0.5588, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.4916088581085205, |
| "learning_rate": 6.285257477125605e-06, |
| "loss": 0.5716, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 0.4659317433834076, |
| "learning_rate": 5.9175343889989275e-06, |
| "loss": 0.5621, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 0.5164335370063782, |
| "learning_rate": 5.610576575795573e-06, |
| "loss": 0.55, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 0.5051558017730713, |
| "learning_rate": 5.36524827500562e-06, |
| "loss": 0.5709, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.4648708403110504, |
| "learning_rate": 5.182240206675272e-06, |
| "loss": 0.5661, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 0.47396937012672424, |
| "learning_rate": 5.06206762868959e-06, |
| "loss": 0.5582, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 0.46491411328315735, |
| "learning_rate": 5.005068886067688e-06, |
| "loss": 0.5557, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.9984, |
| "step": 312, |
| "total_flos": 34173306634240.0, |
| "train_loss": 0.6066549909420502, |
| "train_runtime": 5973.3628, |
| "train_samples_per_second": 1.674, |
| "train_steps_per_second": 0.052 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 312, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 34173306634240.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|