| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9897610921501707, |
| "eval_steps": 146, |
| "global_step": 438, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06825938566552901, |
| "grad_norm": 121.90807294112876, |
| "learning_rate": 9.885844748858448e-07, |
| "loss": 2.2874, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13651877133105803, |
| "grad_norm": 50.17053366859769, |
| "learning_rate": 9.771689497716894e-07, |
| "loss": 0.9472, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20477815699658702, |
| "grad_norm": 26.596625414659396, |
| "learning_rate": 9.657534246575343e-07, |
| "loss": 0.9087, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.27303754266211605, |
| "grad_norm": 66.69994801604244, |
| "learning_rate": 9.54337899543379e-07, |
| "loss": 0.8543, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3412969283276451, |
| "grad_norm": 48.76266229603266, |
| "learning_rate": 9.429223744292237e-07, |
| "loss": 0.6994, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.40955631399317405, |
| "grad_norm": 52.043092597792516, |
| "learning_rate": 9.315068493150684e-07, |
| "loss": 0.7693, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4778156996587031, |
| "grad_norm": 98.55471713268969, |
| "learning_rate": 9.200913242009132e-07, |
| "loss": 0.7138, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5460750853242321, |
| "grad_norm": 78.48196775888992, |
| "learning_rate": 9.08675799086758e-07, |
| "loss": 0.7903, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6143344709897611, |
| "grad_norm": 84.85124588773758, |
| "learning_rate": 8.972602739726027e-07, |
| "loss": 0.7586, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6825938566552902, |
| "grad_norm": 58.99018773695002, |
| "learning_rate": 8.858447488584474e-07, |
| "loss": 0.663, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7508532423208191, |
| "grad_norm": 39.94579058556075, |
| "learning_rate": 8.744292237442922e-07, |
| "loss": 0.589, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8191126279863481, |
| "grad_norm": 58.372841216401504, |
| "learning_rate": 8.63013698630137e-07, |
| "loss": 0.699, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8873720136518771, |
| "grad_norm": 43.360346848406074, |
| "learning_rate": 8.515981735159817e-07, |
| "loss": 0.6512, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9556313993174061, |
| "grad_norm": 61.008846083100586, |
| "learning_rate": 8.401826484018264e-07, |
| "loss": 0.7701, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9965870307167235, |
| "eval_loss": 0.619476318359375, |
| "eval_runtime": 10.3711, |
| "eval_samples_per_second": 25.07, |
| "eval_steps_per_second": 3.182, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.023890784982935, |
| "grad_norm": 36.97894806080412, |
| "learning_rate": 8.287671232876712e-07, |
| "loss": 0.5175, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0921501706484642, |
| "grad_norm": 18.0897726845655, |
| "learning_rate": 8.173515981735159e-07, |
| "loss": 0.3988, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1604095563139931, |
| "grad_norm": 53.788209383918755, |
| "learning_rate": 8.059360730593608e-07, |
| "loss": 0.3701, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2286689419795223, |
| "grad_norm": 34.202912199875364, |
| "learning_rate": 7.945205479452054e-07, |
| "loss": 0.4629, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.2969283276450512, |
| "grad_norm": 32.334790445469515, |
| "learning_rate": 7.831050228310501e-07, |
| "loss": 0.4946, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.36518771331058, |
| "grad_norm": 34.57788598902016, |
| "learning_rate": 7.71689497716895e-07, |
| "loss": 0.5149, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4334470989761092, |
| "grad_norm": 40.59898497248684, |
| "learning_rate": 7.602739726027397e-07, |
| "loss": 0.4565, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5017064846416384, |
| "grad_norm": 28.000076288707174, |
| "learning_rate": 7.488584474885844e-07, |
| "loss": 0.4383, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.5699658703071673, |
| "grad_norm": 28.11781200722797, |
| "learning_rate": 7.374429223744292e-07, |
| "loss": 0.3446, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6382252559726962, |
| "grad_norm": 21.457173259928325, |
| "learning_rate": 7.260273972602739e-07, |
| "loss": 0.4206, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.7064846416382253, |
| "grad_norm": 11.954122609487266, |
| "learning_rate": 7.146118721461188e-07, |
| "loss": 0.4117, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.7747440273037542, |
| "grad_norm": 25.454890078823407, |
| "learning_rate": 7.031963470319634e-07, |
| "loss": 0.3557, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8430034129692832, |
| "grad_norm": 59.00305506329269, |
| "learning_rate": 6.917808219178081e-07, |
| "loss": 0.4312, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9112627986348123, |
| "grad_norm": 19.693798816454596, |
| "learning_rate": 6.80365296803653e-07, |
| "loss": 0.3798, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.9795221843003414, |
| "grad_norm": 31.09835580820732, |
| "learning_rate": 6.689497716894977e-07, |
| "loss": 0.4721, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.993174061433447, |
| "eval_loss": 0.6094198226928711, |
| "eval_runtime": 10.038, |
| "eval_samples_per_second": 25.901, |
| "eval_steps_per_second": 3.287, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.04778156996587, |
| "grad_norm": 14.148404582312557, |
| "learning_rate": 6.575342465753423e-07, |
| "loss": 0.3175, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1160409556313993, |
| "grad_norm": 9.302005033170696, |
| "learning_rate": 6.461187214611872e-07, |
| "loss": 0.3385, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.1843003412969284, |
| "grad_norm": 23.055786559150018, |
| "learning_rate": 6.347031963470319e-07, |
| "loss": 0.2497, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.252559726962457, |
| "grad_norm": 15.965787246539906, |
| "learning_rate": 6.232876712328768e-07, |
| "loss": 0.2529, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.3208191126279862, |
| "grad_norm": 19.137778271986814, |
| "learning_rate": 6.118721461187214e-07, |
| "loss": 0.2681, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.3890784982935154, |
| "grad_norm": 43.528989020233965, |
| "learning_rate": 6.004566210045661e-07, |
| "loss": 0.1969, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.4573378839590445, |
| "grad_norm": 38.488063777446804, |
| "learning_rate": 5.89041095890411e-07, |
| "loss": 0.244, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.5255972696245736, |
| "grad_norm": 19.9306913959441, |
| "learning_rate": 5.776255707762557e-07, |
| "loss": 0.3198, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.5938566552901023, |
| "grad_norm": 19.0407162824872, |
| "learning_rate": 5.662100456621004e-07, |
| "loss": 0.2783, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.6621160409556315, |
| "grad_norm": 16.817106035170273, |
| "learning_rate": 5.547945205479452e-07, |
| "loss": 0.2687, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.73037542662116, |
| "grad_norm": 27.440144369018864, |
| "learning_rate": 5.433789954337899e-07, |
| "loss": 0.2999, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.7986348122866893, |
| "grad_norm": 26.842832940417683, |
| "learning_rate": 5.319634703196348e-07, |
| "loss": 0.3246, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.8668941979522184, |
| "grad_norm": 52.24386186460223, |
| "learning_rate": 5.205479452054794e-07, |
| "loss": 0.2099, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.9351535836177476, |
| "grad_norm": 39.20725213578627, |
| "learning_rate": 5.091324200913241e-07, |
| "loss": 0.3482, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.9897610921501707, |
| "eval_loss": 0.6506538391113281, |
| "eval_runtime": 10.0544, |
| "eval_samples_per_second": 25.859, |
| "eval_steps_per_second": 3.282, |
| "step": 438 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 876, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 146, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6392788746240.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|