| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 3751, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.026659557451346308, |
| "grad_norm": 7.0857120383857755, |
| "learning_rate": 2.6329787234042554e-06, |
| "loss": 1.0624, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.053319114902692616, |
| "grad_norm": 6.581000668568491, |
| "learning_rate": 5.292553191489362e-06, |
| "loss": 0.7422, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07997867235403892, |
| "grad_norm": 7.4656274686679085, |
| "learning_rate": 7.95212765957447e-06, |
| "loss": 0.7649, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10663822980538523, |
| "grad_norm": 4.554989486482939, |
| "learning_rate": 9.998854140728647e-06, |
| "loss": 0.7662, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13329778725673153, |
| "grad_norm": 4.023821679311659, |
| "learning_rate": 9.967263823916638e-06, |
| "loss": 0.784, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.15995734470807785, |
| "grad_norm": 4.221747608519819, |
| "learning_rate": 9.892664857121854e-06, |
| "loss": 0.79, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.18661690215942414, |
| "grad_norm": 3.838998761862227, |
| "learning_rate": 9.775703149433419e-06, |
| "loss": 0.7811, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.21327645961077046, |
| "grad_norm": 4.346355062560185, |
| "learning_rate": 9.617391404288412e-06, |
| "loss": 0.8124, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.23993601706211676, |
| "grad_norm": 4.985043330591158, |
| "learning_rate": 9.41910035106149e-06, |
| "loss": 0.7959, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.26659557451346305, |
| "grad_norm": 4.43851554256146, |
| "learning_rate": 9.18254687671603e-06, |
| "loss": 0.8065, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2932551319648094, |
| "grad_norm": 4.6639168705369425, |
| "learning_rate": 8.909779160277951e-06, |
| "loss": 0.7854, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3199146894161557, |
| "grad_norm": 3.7379632914857375, |
| "learning_rate": 8.603158938844122e-06, |
| "loss": 0.7729, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.346574246867502, |
| "grad_norm": 4.820935299394203, |
| "learning_rate": 8.265341058673722e-06, |
| "loss": 0.7831, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3732338043188483, |
| "grad_norm": 3.6917931938034148, |
| "learning_rate": 7.899250488417746e-06, |
| "loss": 0.7967, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.39989336177019463, |
| "grad_norm": 4.773594475659506, |
| "learning_rate": 7.5080569935157375e-06, |
| "loss": 0.7979, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4265529192215409, |
| "grad_norm": 4.447857956208416, |
| "learning_rate": 7.095147691039425e-06, |
| "loss": 0.7843, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.4532124766728872, |
| "grad_norm": 4.389149312164256, |
| "learning_rate": 6.664097722614934e-06, |
| "loss": 0.7721, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4798720341242335, |
| "grad_norm": 3.915206329218288, |
| "learning_rate": 6.218639299349676e-06, |
| "loss": 0.7526, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5065315915755798, |
| "grad_norm": 5.180289969908035, |
| "learning_rate": 5.7626293867858985e-06, |
| "loss": 0.7854, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5331911490269261, |
| "grad_norm": 4.139436495632767, |
| "learning_rate": 5.300016309678104e-06, |
| "loss": 0.7381, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5598507064782725, |
| "grad_norm": 4.936713870803515, |
| "learning_rate": 4.834805565744173e-06, |
| "loss": 0.7471, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5865102639296188, |
| "grad_norm": 4.5976706617804, |
| "learning_rate": 4.371025144389e-06, |
| "loss": 0.7611, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6131698213809651, |
| "grad_norm": 5.820707950034038, |
| "learning_rate": 3.912690650685726e-06, |
| "loss": 0.7374, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6398293788323114, |
| "grad_norm": 3.951105419131897, |
| "learning_rate": 3.4637705365856666e-06, |
| "loss": 0.7444, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6664889362836577, |
| "grad_norm": 4.529901890166711, |
| "learning_rate": 3.0281517403997245e-06, |
| "loss": 0.7452, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.693148493735004, |
| "grad_norm": 2.851635488548725, |
| "learning_rate": 2.6096060320590393e-06, |
| "loss": 0.7345, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7198080511863503, |
| "grad_norm": 3.6802521221701947, |
| "learning_rate": 2.2117573555516774e-06, |
| "loss": 0.7348, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7464676086376966, |
| "grad_norm": 3.6788331738320075, |
| "learning_rate": 1.8380504512982329e-06, |
| "loss": 0.7352, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.773127166089043, |
| "grad_norm": 3.664797807064228, |
| "learning_rate": 1.491721030146963e-06, |
| "loss": 0.7299, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.7997867235403893, |
| "grad_norm": 3.9751097792558783, |
| "learning_rate": 1.1757677572344577e-06, |
| "loss": 0.7353, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8264462809917356, |
| "grad_norm": 3.7953764140303985, |
| "learning_rate": 8.929262882873524e-07, |
| "loss": 0.7297, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8531058384430819, |
| "grad_norm": 3.4294084377480814, |
| "learning_rate": 6.456455831696234e-07, |
| "loss": 0.7151, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8797653958944281, |
| "grad_norm": 4.751586217200114, |
| "learning_rate": 4.3606670176271014e-07, |
| "loss": 0.695, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9064249533457744, |
| "grad_norm": 4.643644454272238, |
| "learning_rate": 2.660042657725931e-07, |
| "loss": 0.7423, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.9330845107971207, |
| "grad_norm": 4.371360024084083, |
| "learning_rate": 1.3693074697528231e-07, |
| "loss": 0.7068, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.959744068248467, |
| "grad_norm": 5.224648142017166, |
| "learning_rate": 4.996371793965837e-08, |
| "loss": 0.7121, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.9864036256998134, |
| "grad_norm": 4.815750775150759, |
| "learning_rate": 5.8561756162400785e-09, |
| "loss": 0.7437, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 3751, |
| "total_flos": 84620534874112.0, |
| "train_loss": 0.7639887226768826, |
| "train_runtime": 3882.0564, |
| "train_samples_per_second": 7.728, |
| "train_steps_per_second": 0.966 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 3751, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 84620534874112.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|