File size: 3,749 Bytes
36cf06a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | {
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 267,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 1.8215227667987346,
"epoch": 0.0937426763534099,
"grad_norm": 0.0191650390625,
"learning_rate": 0.00019833656768294662,
"loss": 1.8891368103027344,
"mean_token_accuracy": 0.5957115419581532,
"num_tokens": 317950.0,
"step": 25
},
{
"entropy": 1.5572059528529645,
"epoch": 0.1874853527068198,
"grad_norm": 0.04345703125,
"learning_rate": 0.00018837086450537193,
"loss": 1.5083665466308593,
"mean_token_accuracy": 0.6573587663751096,
"num_tokens": 621644.0,
"step": 50
},
{
"entropy": 1.1745702652819454,
"epoch": 0.28122802906022965,
"grad_norm": 0.05126953125,
"learning_rate": 0.00017027885831450318,
"loss": 1.1137271881103517,
"mean_token_accuracy": 0.7345353902876377,
"num_tokens": 932160.0,
"step": 75
},
{
"entropy": 0.9317568638548255,
"epoch": 0.3749707054136396,
"grad_norm": 0.034423828125,
"learning_rate": 0.00014572423233046386,
"loss": 0.8523539733886719,
"mean_token_accuracy": 0.8004629289358854,
"num_tokens": 1238125.0,
"step": 100
},
{
"entropy": 0.7057386192120612,
"epoch": 0.46871338176704946,
"grad_norm": 0.054931640625,
"learning_rate": 0.00011696495168962847,
"loss": 0.6438381958007813,
"mean_token_accuracy": 0.8506827702745795,
"num_tokens": 1548587.0,
"step": 125
},
{
"entropy": 0.6799281437788158,
"epoch": 0.5624560581204593,
"grad_norm": 0.05126953125,
"learning_rate": 8.664562816806022e-05,
"loss": 0.624417495727539,
"mean_token_accuracy": 0.8625156116485596,
"num_tokens": 1856691.0,
"step": 150
},
{
"entropy": 0.6683373341057449,
"epoch": 0.6561987344738692,
"grad_norm": 0.032470703125,
"learning_rate": 5.755433011241851e-05,
"loss": 0.6324382781982422,
"mean_token_accuracy": 0.8631715876888484,
"num_tokens": 2165370.0,
"step": 175
},
{
"entropy": 0.522582174600102,
"epoch": 0.7499414108272792,
"grad_norm": 0.0257568359375,
"learning_rate": 3.236620056190972e-05,
"loss": 0.4893897247314453,
"mean_token_accuracy": 0.8958207304775715,
"num_tokens": 2477308.0,
"step": 200
},
{
"entropy": 0.5211648133769632,
"epoch": 0.843684087180689,
"grad_norm": 0.02978515625,
"learning_rate": 1.339745962155613e-05,
"loss": 0.4698557662963867,
"mean_token_accuracy": 0.897075667232275,
"num_tokens": 2780765.0,
"step": 225
},
{
"entropy": 0.5598523593321443,
"epoch": 0.9374267635340989,
"grad_norm": 0.0281982421875,
"learning_rate": 2.392412244407294e-06,
"loss": 0.5123196792602539,
"mean_token_accuracy": 0.8904222106188535,
"num_tokens": 3094212.0,
"step": 250
}
],
"logging_steps": 25,
"max_steps": 267,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.4085395007888691e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|