| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.001779061865097297, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 8.895309325486485e-05, | |
| "grad_norm": 0.5721328258514404, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.8022, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0001779061865097297, | |
| "grad_norm": 0.8923015594482422, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.7907, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0002668592797645945, | |
| "grad_norm": 0.4010128676891327, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.2071, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0003558123730194594, | |
| "grad_norm": 1.1923198699951172, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.0296, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00044476546627432423, | |
| "grad_norm": 0.44945457577705383, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.6782, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.000533718559529189, | |
| "grad_norm": 1.238482117652893, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.3948, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0006226716527840539, | |
| "grad_norm": 1.2721318006515503, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.4995, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0007116247460389188, | |
| "grad_norm": 0.647321343421936, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.5624, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0008005778392937836, | |
| "grad_norm": 2.433274984359741, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.6035, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0008895309325486485, | |
| "grad_norm": 0.5926241278648376, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.4234, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0009784840258035134, | |
| "grad_norm": 1.6820074319839478, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.1803, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.001067437119058378, | |
| "grad_norm": 1.5319762229919434, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.3767, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.001156390212313243, | |
| "grad_norm": 0.5725632309913635, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.245, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0012453433055681078, | |
| "grad_norm": 0.7384375929832458, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.2259, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0013342963988229727, | |
| "grad_norm": 0.5739320516586304, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.2383, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.0014232494920778376, | |
| "grad_norm": 0.962674617767334, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.2162, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0015122025853327023, | |
| "grad_norm": 1.2786195278167725, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.224, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.0016011556785875672, | |
| "grad_norm": 1.3110697269439697, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.3172, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.001690108771842432, | |
| "grad_norm": 0.6006544828414917, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.3636, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.001779061865097297, | |
| "grad_norm": 0.6102003455162048, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.1117, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0200419401728e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |