| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 267, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "entropy": 1.8215227667987346, |
| "epoch": 0.0937426763534099, |
| "grad_norm": 0.0191650390625, |
| "learning_rate": 0.00019833656768294662, |
| "loss": 1.8891368103027344, |
| "mean_token_accuracy": 0.5957115419581532, |
| "num_tokens": 317950.0, |
| "step": 25 |
| }, |
| { |
| "entropy": 1.5572059528529645, |
| "epoch": 0.1874853527068198, |
| "grad_norm": 0.04345703125, |
| "learning_rate": 0.00018837086450537193, |
| "loss": 1.5083665466308593, |
| "mean_token_accuracy": 0.6573587663751096, |
| "num_tokens": 621644.0, |
| "step": 50 |
| }, |
| { |
| "entropy": 1.1745702652819454, |
| "epoch": 0.28122802906022965, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 0.00017027885831450318, |
| "loss": 1.1137271881103517, |
| "mean_token_accuracy": 0.7345353902876377, |
| "num_tokens": 932160.0, |
| "step": 75 |
| }, |
| { |
| "entropy": 0.9317568638548255, |
| "epoch": 0.3749707054136396, |
| "grad_norm": 0.034423828125, |
| "learning_rate": 0.00014572423233046386, |
| "loss": 0.8523539733886719, |
| "mean_token_accuracy": 0.8004629289358854, |
| "num_tokens": 1238125.0, |
| "step": 100 |
| }, |
| { |
| "entropy": 0.7057386192120612, |
| "epoch": 0.46871338176704946, |
| "grad_norm": 0.054931640625, |
| "learning_rate": 0.00011696495168962847, |
| "loss": 0.6438381958007813, |
| "mean_token_accuracy": 0.8506827702745795, |
| "num_tokens": 1548587.0, |
| "step": 125 |
| }, |
| { |
| "entropy": 0.6799281437788158, |
| "epoch": 0.5624560581204593, |
| "grad_norm": 0.05126953125, |
| "learning_rate": 8.664562816806022e-05, |
| "loss": 0.624417495727539, |
| "mean_token_accuracy": 0.8625156116485596, |
| "num_tokens": 1856691.0, |
| "step": 150 |
| }, |
| { |
| "entropy": 0.6683373341057449, |
| "epoch": 0.6561987344738692, |
| "grad_norm": 0.032470703125, |
| "learning_rate": 5.755433011241851e-05, |
| "loss": 0.6324382781982422, |
| "mean_token_accuracy": 0.8631715876888484, |
| "num_tokens": 2165370.0, |
| "step": 175 |
| }, |
| { |
| "entropy": 0.522582174600102, |
| "epoch": 0.7499414108272792, |
| "grad_norm": 0.0257568359375, |
| "learning_rate": 3.236620056190972e-05, |
| "loss": 0.4893897247314453, |
| "mean_token_accuracy": 0.8958207304775715, |
| "num_tokens": 2477308.0, |
| "step": 200 |
| }, |
| { |
| "entropy": 0.5211648133769632, |
| "epoch": 0.843684087180689, |
| "grad_norm": 0.02978515625, |
| "learning_rate": 1.339745962155613e-05, |
| "loss": 0.4698557662963867, |
| "mean_token_accuracy": 0.897075667232275, |
| "num_tokens": 2780765.0, |
| "step": 225 |
| }, |
| { |
| "entropy": 0.5598523593321443, |
| "epoch": 0.9374267635340989, |
| "grad_norm": 0.0281982421875, |
| "learning_rate": 2.392412244407294e-06, |
| "loss": 0.5123196792602539, |
| "mean_token_accuracy": 0.8904222106188535, |
| "num_tokens": 3094212.0, |
| "step": 250 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 267, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4085395007888691e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|