{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15047493651838614, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007523746825919308, "grad_norm": 60.44245910644531, "learning_rate": 1.7999999999999997e-05, "loss": 86.4235, "step": 10 }, { "epoch": 0.015047493651838616, "grad_norm": 40.04138946533203, "learning_rate": 3.8e-05, "loss": 65.01, "step": 20 }, { "epoch": 0.022571240477757923, "grad_norm": 30.586576461791992, "learning_rate": 5.7999999999999994e-05, "loss": 45.3936, "step": 30 }, { "epoch": 0.030094987303677233, "grad_norm": 27.570419311523438, "learning_rate": 7.8e-05, "loss": 34.0648, "step": 40 }, { "epoch": 0.037618734129596536, "grad_norm": 26.078763961791992, "learning_rate": 9.799999999999998e-05, "loss": 28.1261, "step": 50 }, { "epoch": 0.045142480955515846, "grad_norm": 24.367155075073242, "learning_rate": 0.00011799999999999998, "loss": 23.8896, "step": 60 }, { "epoch": 0.052666227781435156, "grad_norm": 18.385601043701172, "learning_rate": 0.000138, "loss": 19.575, "step": 70 }, { "epoch": 0.060189974607354466, "grad_norm": 13.478289604187012, "learning_rate": 0.00015799999999999996, "loss": 14.9189, "step": 80 }, { "epoch": 0.06771372143327377, "grad_norm": 8.762843132019043, "learning_rate": 0.000178, "loss": 10.0506, "step": 90 }, { "epoch": 0.07523746825919307, "grad_norm": 8.72549057006836, "learning_rate": 0.000198, "loss": 5.6379, "step": 100 }, { "epoch": 0.07523746825919307, "eval_loss": 0.49864813685417175, "eval_runtime": 38.1955, "eval_samples_per_second": 32.91, "eval_steps_per_second": 8.247, "step": 100 }, { "epoch": 0.08276121508511239, "grad_norm": 6.59492826461792, "learning_rate": 0.00021799999999999999, "loss": 2.9898, "step": 110 }, { "epoch": 0.09028496191103169, "grad_norm": 2.056182384490967, "learning_rate": 0.00023799999999999998, "loss": 1.6078, "step": 120 }, { "epoch": 0.09780870873695101, "grad_norm": 1.4380172491073608, "learning_rate": 0.000258, "loss": 0.8847, "step": 130 }, { "epoch": 0.10533245556287031, "grad_norm": 1.7172917127609253, "learning_rate": 0.000278, "loss": 0.6103, "step": 140 }, { "epoch": 0.11285620238878961, "grad_norm": 0.5045933723449707, "learning_rate": 0.000298, "loss": 0.3398, "step": 150 }, { "epoch": 0.12037994921470893, "grad_norm": 0.30618351697921753, "learning_rate": 0.0002964, "loss": 0.1732, "step": 160 }, { "epoch": 0.12790369604062823, "grad_norm": 0.7540925145149231, "learning_rate": 0.0002924, "loss": 0.1196, "step": 170 }, { "epoch": 0.13542744286654754, "grad_norm": 0.25162777304649353, "learning_rate": 0.00028839999999999996, "loss": 0.1363, "step": 180 }, { "epoch": 0.14295118969246684, "grad_norm": 0.12225139141082764, "learning_rate": 0.0002844, "loss": 0.0717, "step": 190 }, { "epoch": 0.15047493651838614, "grad_norm": 1.7007333040237427, "learning_rate": 0.0002804, "loss": 0.1011, "step": 200 }, { "epoch": 0.15047493651838614, "eval_loss": 0.012117554433643818, "eval_runtime": 38.2632, "eval_samples_per_second": 32.851, "eval_steps_per_second": 8.232, "step": 200 } ], "logging_steps": 10, "max_steps": 900, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3388839926169600.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }