Vikaspandey582003's picture
checkpoint step 50
c00d096 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 25.0,
"eval_steps": 500,
"global_step": 50,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1,
"completions/max_length": 193.0,
"completions/max_terminated_length": 163.0,
"completions/mean_length": 103.975,
"completions/mean_terminated_length": 90.39881134033203,
"completions/min_length": 17.6,
"completions/min_terminated_length": 17.6,
"entropy": 0.1820149033330381,
"epoch": 2.5,
"frac_reward_zero_std": 0.5,
"grad_norm": 0.25,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0794088900089264,
"num_tokens": 14355.0,
"reward": 0.15020800828933717,
"reward_std": 0.6376187483081595,
"rewards/reward_fn/mean": 0.15020800828933717,
"rewards/reward_fn/std": 0.6376187764341011,
"step": 5,
"step_time": 30.522303848797673
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 151.2,
"completions/max_terminated_length": 141.6,
"completions/mean_length": 80.075,
"completions/mean_terminated_length": 77.78928833007812,
"completions/min_length": 17.8,
"completions/min_terminated_length": 17.8,
"entropy": 0.16221200795844198,
"epoch": 5.0,
"frac_reward_zero_std": 0.5,
"grad_norm": 0.228515625,
"learning_rate": 2.25e-06,
"loss": 0.07169516086578369,
"num_tokens": 27462.0,
"reward": 0.4012819856405258,
"reward_std": 0.4128362699819263,
"rewards/reward_fn/mean": 0.4012819856405258,
"rewards/reward_fn/std": 0.4128363010211615,
"step": 10,
"step_time": 25.01976956339822
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 185.6,
"completions/max_terminated_length": 171.6,
"completions/mean_length": 83.65,
"completions/mean_terminated_length": 79.14285736083984,
"completions/min_length": 16.6,
"completions/min_terminated_length": 16.6,
"entropy": 0.14225535104051232,
"epoch": 7.5,
"frac_reward_zero_std": 0.3,
"grad_norm": 0.265625,
"learning_rate": 3.5e-06,
"loss": 0.0717179834842682,
"num_tokens": 40740.0,
"reward": 0.10207997858524323,
"reward_std": 0.7454913818277419,
"rewards/reward_fn/mean": 0.10207997858524323,
"rewards/reward_fn/std": 0.7454914333298802,
"step": 15,
"step_time": 29.626422570000432
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.1,
"completions/max_length": 202.0,
"completions/max_terminated_length": 163.0,
"completions/mean_length": 98.85,
"completions/mean_terminated_length": 81.79285888671875,
"completions/min_length": 19.2,
"completions/min_terminated_length": 19.2,
"entropy": 0.1534867493668571,
"epoch": 10.0,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.2431640625,
"learning_rate": 4.75e-06,
"loss": 0.09726614952087402,
"num_tokens": 54862.0,
"reward": 0.25181599259376525,
"reward_std": 0.6045640033902601,
"rewards/reward_fn/mean": 0.25181599259376525,
"rewards/reward_fn/std": 0.6045640454394743,
"step": 20,
"step_time": 31.625062462999267
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.125,
"completions/max_length": 233.6,
"completions/max_terminated_length": 194.4,
"completions/mean_length": 99.525,
"completions/mean_terminated_length": 77.78690643310547,
"completions/min_length": 18.8,
"completions/min_terminated_length": 18.8,
"entropy": 0.1538564210291952,
"epoch": 12.5,
"frac_reward_zero_std": 0.6,
"grad_norm": 0.0,
"learning_rate": 4.981481481481482e-06,
"loss": 0.13929661512374877,
"num_tokens": 69039.0,
"reward": 0.0006859898567199707,
"reward_std": 1.0108385920524596,
"rewards/reward_fn/mean": 0.0006859898567199707,
"rewards/reward_fn/std": 1.0108386158943177,
"step": 25,
"step_time": 35.65783783460065
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05,
"completions/max_length": 181.8,
"completions/max_terminated_length": 153.0,
"completions/mean_length": 65.05,
"completions/mean_terminated_length": 56.03928680419922,
"completions/min_length": 16.6,
"completions/min_terminated_length": 16.6,
"entropy": 0.13224927680566906,
"epoch": 15.0,
"frac_reward_zero_std": 0.5,
"grad_norm": 0.1875,
"learning_rate": 4.958333333333334e-06,
"loss": -0.003383058309555054,
"num_tokens": 81545.0,
"reward": 0.3994179755449295,
"reward_std": 0.5622525057464373,
"rewards/reward_fn/mean": 0.3994179755449295,
"rewards/reward_fn/std": 0.56225254482124,
"step": 30,
"step_time": 29.112151033400732
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.05,
"completions/max_length": 131.0,
"completions/max_terminated_length": 128.6,
"completions/mean_length": 61.075,
"completions/mean_terminated_length": 53.282144165039064,
"completions/min_length": 17.6,
"completions/min_terminated_length": 17.6,
"entropy": 0.11974610288161784,
"epoch": 17.5,
"frac_reward_zero_std": 0.7,
"grad_norm": 0.220703125,
"learning_rate": 4.935185185185186e-06,
"loss": 0.004663025587797165,
"num_tokens": 93892.0,
"reward": 0.3469119846820831,
"reward_std": 0.5775202971824911,
"rewards/reward_fn/mean": 0.3469119846820831,
"rewards/reward_fn/std": 0.5775203009106917,
"step": 35,
"step_time": 22.457519923200017
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.075,
"completions/max_length": 179.6,
"completions/max_terminated_length": 131.2,
"completions/mean_length": 78.025,
"completions/mean_terminated_length": 65.70357513427734,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 0.12550681543070824,
"epoch": 20.0,
"frac_reward_zero_std": 0.4,
"grad_norm": 0.4609375,
"learning_rate": 4.9120370370370375e-06,
"loss": 0.036792796850204465,
"num_tokens": 107209.0,
"reward": 0.3498039901256561,
"reward_std": 0.7408117946935817,
"rewards/reward_fn/mean": 0.3498039901256561,
"rewards/reward_fn/std": 0.7408118456369266,
"step": 40,
"step_time": 28.692756705999635
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.025,
"completions/max_length": 189.0,
"completions/max_terminated_length": 178.8,
"completions/mean_length": 92.675,
"completions/mean_terminated_length": 89.51071472167969,
"completions/min_length": 17.8,
"completions/min_terminated_length": 17.8,
"entropy": 0.15165529411751777,
"epoch": 22.5,
"frac_reward_zero_std": 0.5,
"grad_norm": 0.232421875,
"learning_rate": 4.888888888888889e-06,
"loss": -0.004727205634117127,
"num_tokens": 120868.0,
"reward": 0.30072798430919645,
"reward_std": 0.7614144545921591,
"rewards/reward_fn/mean": 0.30072798430919645,
"rewards/reward_fn/std": 0.7614145380415721,
"step": 45,
"step_time": 30.053675796201425
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.075,
"completions/max_length": 208.6,
"completions/max_terminated_length": 189.6,
"completions/mean_length": 102.625,
"completions/mean_terminated_length": 90.11666870117188,
"completions/min_length": 18.8,
"completions/min_terminated_length": 18.8,
"entropy": 0.15195838457439095,
"epoch": 25.0,
"frac_reward_zero_std": 0.6,
"grad_norm": 0.208984375,
"learning_rate": 4.865740740740741e-06,
"loss": 0.04500017166137695,
"num_tokens": 135121.0,
"reward": 0.5009119868278503,
"reward_std": 0.5213470441231038,
"rewards/reward_fn/mean": 0.5009119868278503,
"rewards/reward_fn/std": 0.5213470560469432,
"step": 50,
"step_time": 32.55890014459801
}
],
"logging_steps": 5,
"max_steps": 1100,
"num_input_tokens_seen": 135121,
"num_train_epochs": 550,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}