| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 236, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.769037756472826, |
| "epoch": 0.10615711252653928, |
| "grad_norm": 0.048828125, |
| "learning_rate": 0.00019757963826274357, |
| "loss": 1.871598358154297, |
| "mean_token_accuracy": 0.5911617970466614, |
| "num_tokens": 120123.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 1.3297694745659827, |
| "epoch": 0.21231422505307856, |
| "grad_norm": 0.0849609375, |
| "learning_rate": 0.0001844623121722465, |
| "loss": 1.2977835083007812, |
| "mean_token_accuracy": 0.6848558619618416, |
| "num_tokens": 240275.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 1.0329674516618252, |
| "epoch": 0.3184713375796178, |
| "grad_norm": 0.1494140625, |
| "learning_rate": 0.0001614212712689668, |
| "loss": 1.0233447265625, |
| "mean_token_accuracy": 0.7406904135644435, |
| "num_tokens": 362917.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.725548449382186, |
| "epoch": 0.42462845010615713, |
| "grad_norm": 0.15625, |
| "learning_rate": 0.00013116367230759415, |
| "loss": 0.7052877044677734, |
| "mean_token_accuracy": 0.8207531237602234, |
| "num_tokens": 487617.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.5920333345420659, |
| "epoch": 0.5307855626326964, |
| "grad_norm": 0.162109375, |
| "learning_rate": 9.724456576318381e-05, |
| "loss": 0.567355728149414, |
| "mean_token_accuracy": 0.8563815836608409, |
| "num_tokens": 610059.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.33838055985048415, |
| "epoch": 0.6369426751592356, |
| "grad_norm": 0.1328125, |
| "learning_rate": 6.3649202943617e-05, |
| "loss": 0.29026262283325194, |
| "mean_token_accuracy": 0.9228389444947243, |
| "num_tokens": 733429.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.28926644794642925, |
| "epoch": 0.7430997876857749, |
| "grad_norm": 0.130859375, |
| "learning_rate": 3.4324797595226565e-05, |
| "loss": 0.2692854881286621, |
| "mean_token_accuracy": 0.9342927479743958, |
| "num_tokens": 855609.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.24624995148740708, |
| "epoch": 0.8492569002123143, |
| "grad_norm": 0.0771484375, |
| "learning_rate": 1.2716756648601857e-05, |
| "loss": 0.20696916580200195, |
| "mean_token_accuracy": 0.9515270568430424, |
| "num_tokens": 974239.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.2326159458886832, |
| "epoch": 0.9554140127388535, |
| "grad_norm": 0.0888671875, |
| "learning_rate": 1.3638696597277679e-06, |
| "loss": 0.2066523551940918, |
| "mean_token_accuracy": 0.9543360522389412, |
| "num_tokens": 1098264.0, |
| "step": 225 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 236, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.678257922290074e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|