Safetensors
OGPSA / qwen_OGPSA /dpo /trainer_state.json
long2333's picture
Upload 33 files
7d0cb16 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 79,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.064,
"grad_norm": 6.679206848144531,
"learning_rate": 9.936876709681666e-07,
"logits/chosen": -0.8538128137588501,
"logits/rejected": -0.3107415735721588,
"logps/chosen": -76.47968292236328,
"logps/rejected": -334.6451416015625,
"loss": 0.3147,
"num_input_tokens_seen": 256000,
"rewards/accuracies": 0.793749988079071,
"rewards/chosen": 0.6914523839950562,
"rewards/margins": 1.9821197986602783,
"rewards/rejected": -1.2906672954559326,
"step": 5,
"train_runtime": 1679.4563,
"train_tokens_per_second": 152.43
},
{
"epoch": 0.128,
"grad_norm": 0.1262335479259491,
"learning_rate": 9.68316749134364e-07,
"logits/chosen": -0.7508918642997742,
"logits/rejected": -0.27888986468315125,
"logps/chosen": -66.19853973388672,
"logps/rejected": -340.9087829589844,
"loss": 0.0071,
"num_input_tokens_seen": 507424,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1356029510498047,
"rewards/margins": 8.877564430236816,
"rewards/rejected": -5.7419610023498535,
"step": 10,
"train_runtime": 2616.3069,
"train_tokens_per_second": 193.947
},
{
"epoch": 0.192,
"grad_norm": 0.23208095133304596,
"learning_rate": 9.24491681045682e-07,
"logits/chosen": -0.6891063451766968,
"logits/rejected": -0.21943971514701843,
"logps/chosen": -56.16652297973633,
"logps/rejected": -382.0284423828125,
"loss": 0.0003,
"num_input_tokens_seen": 763040,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.018775939941406,
"rewards/margins": 16.494199752807617,
"rewards/rejected": -11.475423812866211,
"step": 15,
"train_runtime": 3551.0174,
"train_tokens_per_second": 214.879
},
{
"epoch": 0.256,
"grad_norm": 0.15515422821044922,
"learning_rate": 8.639394051847471e-07,
"logits/chosen": -0.5768939256668091,
"logits/rejected": -0.18155181407928467,
"logps/chosen": -51.949642181396484,
"logps/rejected": -367.6221923828125,
"loss": 0.0015,
"num_input_tokens_seen": 1014528,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.824897289276123,
"rewards/margins": 17.040712356567383,
"rewards/rejected": -11.215815544128418,
"step": 20,
"train_runtime": 4492.2862,
"train_tokens_per_second": 225.838
},
{
"epoch": 0.32,
"grad_norm": 1.1735421419143677,
"learning_rate": 7.890460001124241e-07,
"logits/chosen": -0.3975931704044342,
"logits/rejected": -0.11135731637477875,
"logps/chosen": -54.473506927490234,
"logps/rejected": -390.9683532714844,
"loss": 0.0027,
"num_input_tokens_seen": 1279392,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.003702640533447,
"rewards/margins": 17.591880798339844,
"rewards/rejected": -11.588177680969238,
"step": 25,
"train_runtime": 5435.3332,
"train_tokens_per_second": 235.384
},
{
"epoch": 0.384,
"grad_norm": 0.07401101291179657,
"learning_rate": 7.027626604064969e-07,
"logits/chosen": -0.27080175280570984,
"logits/rejected": -0.03910026326775551,
"logps/chosen": -51.99932098388672,
"logps/rejected": -378.4240417480469,
"loss": 0.0127,
"num_input_tokens_seen": 1528704,
"rewards/accuracies": 0.995312511920929,
"rewards/chosen": 6.039531707763672,
"rewards/margins": 18.659555435180664,
"rewards/rejected": -12.620022773742676,
"step": 30,
"train_runtime": 6382.847,
"train_tokens_per_second": 239.502
},
{
"epoch": 0.448,
"grad_norm": 4.258738040924072,
"learning_rate": 6.084894040531589e-07,
"logits/chosen": -0.2694633901119232,
"logits/rejected": -0.0405702069401741,
"logps/chosen": -51.36381912231445,
"logps/rejected": -390.9013366699219,
"loss": 0.0037,
"num_input_tokens_seen": 1785344,
"rewards/accuracies": 0.9984375238418579,
"rewards/chosen": 6.026695728302002,
"rewards/margins": 19.767641067504883,
"rewards/rejected": -13.740945816040039,
"step": 35,
"train_runtime": 7322.5463,
"train_tokens_per_second": 243.815
},
{
"epoch": 0.512,
"grad_norm": 2.983978509902954,
"learning_rate": 5.09941093832535e-07,
"logits/chosen": -0.2793508768081665,
"logits/rejected": -0.027698948979377747,
"logps/chosen": -48.76968002319336,
"logps/rejected": -391.20758056640625,
"loss": 0.0066,
"num_input_tokens_seen": 2041504,
"rewards/accuracies": 0.9984375238418579,
"rewards/chosen": 6.045849323272705,
"rewards/margins": 19.663557052612305,
"rewards/rejected": -13.61771011352539,
"step": 40,
"train_runtime": 8270.3741,
"train_tokens_per_second": 246.845
},
{
"epoch": 0.576,
"grad_norm": 0.4456007480621338,
"learning_rate": 4.1100105216122496e-07,
"logits/chosen": -0.3262310028076172,
"logits/rejected": -0.06511984020471573,
"logps/chosen": -52.1357536315918,
"logps/rejected": -398.6451416015625,
"loss": 0.0004,
"num_input_tokens_seen": 2297408,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.014974117279053,
"rewards/margins": 20.012720108032227,
"rewards/rejected": -13.997745513916016,
"step": 45,
"train_runtime": 9219.9165,
"train_tokens_per_second": 249.179
},
{
"epoch": 0.64,
"grad_norm": 4.368767738342285,
"learning_rate": 3.1556803773799613e-07,
"logits/chosen": -0.3225005865097046,
"logits/rejected": -0.07438264787197113,
"logps/chosen": -49.3891716003418,
"logps/rejected": -390.9215087890625,
"loss": 0.0017,
"num_input_tokens_seen": 2552704,
"rewards/accuracies": 0.9984375238418579,
"rewards/chosen": 6.208975791931152,
"rewards/margins": 20.07794952392578,
"rewards/rejected": -13.868974685668945,
"step": 50,
"train_runtime": 10157.4064,
"train_tokens_per_second": 251.315
},
{
"epoch": 0.704,
"grad_norm": 0.9644066095352173,
"learning_rate": 2.2740261391866633e-07,
"logits/chosen": -0.3191309869289398,
"logits/rejected": -0.06508567929267883,
"logps/chosen": -52.274436950683594,
"logps/rejected": -377.0621337890625,
"loss": 0.0006,
"num_input_tokens_seen": 2802080,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.761786937713623,
"rewards/margins": 19.63225555419922,
"rewards/rejected": -13.870468139648438,
"step": 55,
"train_runtime": 11101.9212,
"train_tokens_per_second": 252.396
},
{
"epoch": 0.768,
"grad_norm": 0.1925666630268097,
"learning_rate": 1.4997896271528737e-07,
"logits/chosen": -0.2972492575645447,
"logits/rejected": -0.04955977201461792,
"logps/chosen": -52.26522445678711,
"logps/rejected": -388.23724365234375,
"loss": 0.0055,
"num_input_tokens_seen": 3057440,
"rewards/accuracies": 0.9984375238418579,
"rewards/chosen": 5.993190765380859,
"rewards/margins": 19.97586441040039,
"rewards/rejected": -13.982673645019531,
"step": 60,
"train_runtime": 12037.0054,
"train_tokens_per_second": 254.003
},
{
"epoch": 0.832,
"grad_norm": 2.587663412094116,
"learning_rate": 8.634798372847146e-08,
"logits/chosen": -0.32648926973342896,
"logits/rejected": -0.04533248394727707,
"logps/chosen": -54.620330810546875,
"logps/rejected": -382.8239440917969,
"loss": 0.0011,
"num_input_tokens_seen": 3306816,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.238775253295898,
"rewards/margins": 19.866392135620117,
"rewards/rejected": -13.627615928649902,
"step": 65,
"train_runtime": 12987.846,
"train_tokens_per_second": 254.609
},
{
"epoch": 0.896,
"grad_norm": 0.31388822197914124,
"learning_rate": 3.901707263589671e-08,
"logits/chosen": -0.3047231435775757,
"logits/rejected": -0.024533234536647797,
"logps/chosen": -51.3085823059082,
"logps/rejected": -381.90814208984375,
"loss": 0.0007,
"num_input_tokens_seen": 3555424,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.905380725860596,
"rewards/margins": 19.539260864257812,
"rewards/rejected": -13.633878707885742,
"step": 70,
"train_runtime": 13943.1522,
"train_tokens_per_second": 254.994
},
{
"epoch": 0.96,
"grad_norm": 0.31311288475990295,
"learning_rate": 9.851316597681959e-09,
"logits/chosen": -0.30659955739974976,
"logits/rejected": -0.04967175051569939,
"logps/chosen": -51.19919967651367,
"logps/rejected": -385.9148254394531,
"loss": 0.0015,
"num_input_tokens_seen": 3806624,
"rewards/accuracies": 1.0,
"rewards/chosen": 6.100650310516357,
"rewards/margins": 19.596603393554688,
"rewards/rejected": -13.495952606201172,
"step": 75,
"train_runtime": 14884.713,
"train_tokens_per_second": 255.741
},
{
"epoch": 1.0,
"num_input_tokens_seen": 3967808,
"step": 79,
"total_flos": 1.6832915605500723e+17,
"train_loss": 0.02286101435605861,
"train_runtime": 15157.818,
"train_samples_per_second": 0.66,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 79,
"num_input_tokens_seen": 3967808,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6832915605500723e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}