stack_exc_binary_instr_lm_head / trainer_state.json
immortalPi's picture
Upload folder using huggingface_hub
9be6cca verified
{
"best_global_step": 100,
"best_metric": 0.28937000036239624,
"best_model_checkpoint": "/content/models/gemma_qlora_lmh_inst/checkpoint-100",
"epoch": 1.9607843137254903,
"eval_steps": 20,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 2.1317927479743957,
"epoch": 0.39215686274509803,
"grad_norm": 4.763621807098389,
"learning_rate": 8.137254901960784e-06,
"loss": 0.7536,
"mean_token_accuracy": 0.76171875,
"num_tokens": 46761.0,
"step": 20
},
{
"epoch": 0.39215686274509803,
"eval_entropy": 2.128274752543523,
"eval_loss": 0.34577950835227966,
"eval_mean_token_accuracy": 0.7480332163664011,
"eval_num_tokens": 46761.0,
"eval_runtime": 2.4531,
"eval_samples_per_second": 82.754,
"eval_steps_per_second": 5.3,
"step": 20
},
{
"entropy": 2.1670219361782075,
"epoch": 0.7843137254901961,
"grad_norm": 5.078281402587891,
"learning_rate": 6.176470588235295e-06,
"loss": 0.3072,
"mean_token_accuracy": 0.8203125,
"num_tokens": 95614.0,
"step": 40
},
{
"epoch": 0.7843137254901961,
"eval_entropy": 2.1048440016233005,
"eval_loss": 0.35016557574272156,
"eval_mean_token_accuracy": 0.8236451057287363,
"eval_num_tokens": 95614.0,
"eval_runtime": 2.4636,
"eval_samples_per_second": 82.4,
"eval_steps_per_second": 5.277,
"step": 40
},
{
"entropy": 2.1607252150774,
"epoch": 1.1764705882352942,
"grad_norm": 10.205085754394531,
"learning_rate": 4.215686274509805e-06,
"loss": 0.266,
"mean_token_accuracy": 0.86328125,
"num_tokens": 143455.0,
"step": 60
},
{
"epoch": 1.1764705882352942,
"eval_entropy": 2.12537236397083,
"eval_loss": 0.3281736969947815,
"eval_mean_token_accuracy": 0.835664336497967,
"eval_num_tokens": 143455.0,
"eval_runtime": 2.4452,
"eval_samples_per_second": 83.019,
"eval_steps_per_second": 5.317,
"step": 60
},
{
"entropy": 2.1871365696191787,
"epoch": 1.5686274509803921,
"grad_norm": 6.319624423980713,
"learning_rate": 2.254901960784314e-06,
"loss": 0.2363,
"mean_token_accuracy": 0.88125,
"num_tokens": 192572.0,
"step": 80
},
{
"epoch": 1.5686274509803921,
"eval_entropy": 2.1334355702767005,
"eval_loss": 0.29352903366088867,
"eval_mean_token_accuracy": 0.8465909086740934,
"eval_num_tokens": 192572.0,
"eval_runtime": 2.4718,
"eval_samples_per_second": 82.128,
"eval_steps_per_second": 5.259,
"step": 80
},
{
"entropy": 2.1668645977973937,
"epoch": 1.9607843137254903,
"grad_norm": 3.9370977878570557,
"learning_rate": 2.9411764705882356e-07,
"loss": 0.2172,
"mean_token_accuracy": 0.8984375,
"num_tokens": 240555.0,
"step": 100
},
{
"epoch": 1.9607843137254903,
"eval_entropy": 2.129720770395719,
"eval_loss": 0.28937000036239624,
"eval_mean_token_accuracy": 0.8597027980364286,
"eval_num_tokens": 240555.0,
"eval_runtime": 2.4499,
"eval_samples_per_second": 82.861,
"eval_steps_per_second": 5.306,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 102,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 20,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6060538554167808.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}