| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9897610921501707, |
| "eval_steps": 146, |
| "global_step": 438, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06825938566552901, |
| "grad_norm": 180.72653528520956, |
| "learning_rate": 4.942922374429224e-07, |
| "loss": 3.2191, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13651877133105803, |
| "grad_norm": 67.78875632967043, |
| "learning_rate": 4.885844748858447e-07, |
| "loss": 1.3006, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20477815699658702, |
| "grad_norm": 58.498874329827956, |
| "learning_rate": 4.828767123287671e-07, |
| "loss": 1.0222, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.27303754266211605, |
| "grad_norm": 65.81012271745634, |
| "learning_rate": 4.771689497716894e-07, |
| "loss": 1.0702, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3412969283276451, |
| "grad_norm": 67.6868122468499, |
| "learning_rate": 4.7146118721461187e-07, |
| "loss": 0.8233, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.40955631399317405, |
| "grad_norm": 130.6895452398653, |
| "learning_rate": 4.657534246575342e-07, |
| "loss": 1.0114, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4778156996587031, |
| "grad_norm": 67.10774835755684, |
| "learning_rate": 4.600456621004566e-07, |
| "loss": 0.7751, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5460750853242321, |
| "grad_norm": 222.42047329326795, |
| "learning_rate": 4.54337899543379e-07, |
| "loss": 0.9296, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6143344709897611, |
| "grad_norm": 53.90173100677448, |
| "learning_rate": 4.4863013698630134e-07, |
| "loss": 0.8508, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6825938566552902, |
| "grad_norm": 73.87543764538118, |
| "learning_rate": 4.429223744292237e-07, |
| "loss": 0.7461, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7508532423208191, |
| "grad_norm": 51.730677807091766, |
| "learning_rate": 4.372146118721461e-07, |
| "loss": 0.6538, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.8191126279863481, |
| "grad_norm": 58.532049779965085, |
| "learning_rate": 4.315068493150685e-07, |
| "loss": 0.7099, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.8873720136518771, |
| "grad_norm": 69.81205859668461, |
| "learning_rate": 4.2579908675799087e-07, |
| "loss": 0.7059, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9556313993174061, |
| "grad_norm": 48.161843074686466, |
| "learning_rate": 4.200913242009132e-07, |
| "loss": 0.8547, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.9965870307167235, |
| "eval_loss": 0.6474742889404297, |
| "eval_runtime": 10.4556, |
| "eval_samples_per_second": 24.867, |
| "eval_steps_per_second": 3.156, |
| "step": 146 |
| }, |
| { |
| "epoch": 1.023890784982935, |
| "grad_norm": 27.25525751270437, |
| "learning_rate": 4.143835616438356e-07, |
| "loss": 0.6076, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0921501706484642, |
| "grad_norm": 25.863617719976116, |
| "learning_rate": 4.0867579908675797e-07, |
| "loss": 0.5054, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.1604095563139931, |
| "grad_norm": 52.70780610444248, |
| "learning_rate": 4.029680365296804e-07, |
| "loss": 0.4987, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.2286689419795223, |
| "grad_norm": 25.718837256960583, |
| "learning_rate": 3.972602739726027e-07, |
| "loss": 0.5697, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.2969283276450512, |
| "grad_norm": 36.98618805648008, |
| "learning_rate": 3.915525114155251e-07, |
| "loss": 0.615, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.36518771331058, |
| "grad_norm": 32.58402570740449, |
| "learning_rate": 3.858447488584475e-07, |
| "loss": 0.5849, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.4334470989761092, |
| "grad_norm": 23.37886970180941, |
| "learning_rate": 3.8013698630136986e-07, |
| "loss": 0.5924, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.5017064846416384, |
| "grad_norm": 27.73682160230221, |
| "learning_rate": 3.744292237442922e-07, |
| "loss": 0.5095, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.5699658703071673, |
| "grad_norm": 58.838640484256175, |
| "learning_rate": 3.687214611872146e-07, |
| "loss": 0.4661, |
| "step": 230 |
| }, |
| { |
| "epoch": 1.6382252559726962, |
| "grad_norm": 29.677456691329837, |
| "learning_rate": 3.6301369863013697e-07, |
| "loss": 0.5448, |
| "step": 240 |
| }, |
| { |
| "epoch": 1.7064846416382253, |
| "grad_norm": 83.73694041166883, |
| "learning_rate": 3.573059360730594e-07, |
| "loss": 0.4715, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.7747440273037542, |
| "grad_norm": 72.88041601319203, |
| "learning_rate": 3.515981735159817e-07, |
| "loss": 0.4408, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.8430034129692832, |
| "grad_norm": 48.86805911410425, |
| "learning_rate": 3.4589041095890407e-07, |
| "loss": 0.5299, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.9112627986348123, |
| "grad_norm": 56.39227585073634, |
| "learning_rate": 3.401826484018265e-07, |
| "loss": 0.462, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.9795221843003414, |
| "grad_norm": 47.10405902113264, |
| "learning_rate": 3.3447488584474886e-07, |
| "loss": 0.5135, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.993174061433447, |
| "eval_loss": 0.6217488646507263, |
| "eval_runtime": 10.0993, |
| "eval_samples_per_second": 25.744, |
| "eval_steps_per_second": 3.268, |
| "step": 292 |
| }, |
| { |
| "epoch": 2.04778156996587, |
| "grad_norm": 47.53720316702887, |
| "learning_rate": 3.287671232876712e-07, |
| "loss": 0.4304, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.1160409556313993, |
| "grad_norm": 29.27611194113683, |
| "learning_rate": 3.230593607305936e-07, |
| "loss": 0.4024, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.1843003412969284, |
| "grad_norm": 22.633855883872762, |
| "learning_rate": 3.1735159817351596e-07, |
| "loss": 0.3316, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.252559726962457, |
| "grad_norm": 33.60546914801217, |
| "learning_rate": 3.116438356164384e-07, |
| "loss": 0.3474, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.3208191126279862, |
| "grad_norm": 38.727812421442835, |
| "learning_rate": 3.059360730593607e-07, |
| "loss": 0.3546, |
| "step": 340 |
| }, |
| { |
| "epoch": 2.3890784982935154, |
| "grad_norm": 41.67978939810039, |
| "learning_rate": 3.0022831050228307e-07, |
| "loss": 0.2542, |
| "step": 350 |
| }, |
| { |
| "epoch": 2.4573378839590445, |
| "grad_norm": 77.57463923271911, |
| "learning_rate": 2.945205479452055e-07, |
| "loss": 0.2766, |
| "step": 360 |
| }, |
| { |
| "epoch": 2.5255972696245736, |
| "grad_norm": 59.43311669158369, |
| "learning_rate": 2.8881278538812786e-07, |
| "loss": 0.44, |
| "step": 370 |
| }, |
| { |
| "epoch": 2.5938566552901023, |
| "grad_norm": 22.434362950673165, |
| "learning_rate": 2.831050228310502e-07, |
| "loss": 0.2935, |
| "step": 380 |
| }, |
| { |
| "epoch": 2.6621160409556315, |
| "grad_norm": 30.626819579935223, |
| "learning_rate": 2.773972602739726e-07, |
| "loss": 0.3208, |
| "step": 390 |
| }, |
| { |
| "epoch": 2.73037542662116, |
| "grad_norm": 64.9106394772347, |
| "learning_rate": 2.7168949771689496e-07, |
| "loss": 0.41, |
| "step": 400 |
| }, |
| { |
| "epoch": 2.7986348122866893, |
| "grad_norm": 112.81858040393827, |
| "learning_rate": 2.659817351598174e-07, |
| "loss": 0.4234, |
| "step": 410 |
| }, |
| { |
| "epoch": 2.8668941979522184, |
| "grad_norm": 25.381208288443705, |
| "learning_rate": 2.602739726027397e-07, |
| "loss": 0.3328, |
| "step": 420 |
| }, |
| { |
| "epoch": 2.9351535836177476, |
| "grad_norm": 45.82561826818154, |
| "learning_rate": 2.5456621004566206e-07, |
| "loss": 0.4019, |
| "step": 430 |
| }, |
| { |
| "epoch": 2.9897610921501707, |
| "eval_loss": 0.64452064037323, |
| "eval_runtime": 10.0372, |
| "eval_samples_per_second": 25.904, |
| "eval_steps_per_second": 3.288, |
| "step": 438 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 876, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 146, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6392788746240.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|