ARPO / 044-grpo-7b_perception_test /trainer_state.json
CserDu123's picture
Upload 044-grpo-7b_perception_test/trainer_state.json with huggingface_hub
04de687 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 396,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 248.0,
"completions/max_terminated_length": 248.0,
"completions/mean_length": 122.875,
"completions/mean_terminated_length": 122.875,
"completions/min_length": 51.0,
"completions/min_terminated_length": 51.0,
"entropy": 0.7635231614112854,
"epoch": 0.0025252525252525255,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 105648.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 1
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 364.0,
"completions/mean_length": 131.6328125,
"completions/mean_terminated_length": 129.64566040039062,
"completions/min_length": 61.0,
"completions/min_terminated_length": 61.0,
"entropy": 0.7612149715423584,
"epoch": 0.005050505050505051,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.6195482015609741,
"learning_rate": 9.974747474747475e-07,
"loss": -0.0,
"num_tokens": 223745.0,
"reward": 0.0078125,
"reward_std": 0.022097086533904076,
"rewards/video_r1_accuracy_reward/mean": 0.0078125,
"rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
"rewards/video_r1_format_reward/mean": 0.0078125,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 2
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 286.0,
"completions/max_terminated_length": 286.0,
"completions/mean_length": 134.2265625,
"completions/mean_terminated_length": 134.2265625,
"completions/min_length": 44.0,
"completions/min_terminated_length": 44.0,
"entropy": 0.7807673215866089,
"epoch": 0.007575757575757576,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.5153651833534241,
"learning_rate": 9.949494949494949e-07,
"loss": 0.0,
"num_tokens": 336902.0,
"reward": 0.0015625000232830644,
"reward_std": 0.0016703829169273376,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.03125,
"rewards/video_r1_format_reward/std": 0.1746762990951538,
"step": 3
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 290.0,
"completions/mean_length": 125.453125,
"completions/mean_terminated_length": 123.41732025146484,
"completions/min_length": 48.0,
"completions/min_terminated_length": 48.0,
"entropy": 0.7577059268951416,
"epoch": 0.010101010101010102,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.924242424242425e-07,
"loss": 0.0,
"num_tokens": 450968.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 4
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 291.0,
"completions/max_terminated_length": 291.0,
"completions/mean_length": 125.5546875,
"completions/mean_terminated_length": 125.5546875,
"completions/min_length": 52.0,
"completions/min_terminated_length": 52.0,
"entropy": 0.7316204309463501,
"epoch": 0.012626262626262626,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.898989898989898e-07,
"loss": 0.0,
"num_tokens": 569063.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 5
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 302.0,
"completions/max_terminated_length": 302.0,
"completions/mean_length": 134.765625,
"completions/mean_terminated_length": 134.765625,
"completions/min_length": 58.0,
"completions/min_terminated_length": 58.0,
"entropy": 0.7153864502906799,
"epoch": 0.015151515151515152,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.873737373737374e-07,
"loss": 0.0,
"num_tokens": 677257.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 6
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 347.0,
"completions/max_terminated_length": 347.0,
"completions/mean_length": 142.9140625,
"completions/mean_terminated_length": 142.9140625,
"completions/min_length": 47.0,
"completions/min_terminated_length": 47.0,
"entropy": 0.6941128373146057,
"epoch": 0.017676767676767676,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.848484848484847e-07,
"loss": 0.0,
"num_tokens": 791206.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 7
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 339.0,
"completions/max_terminated_length": 339.0,
"completions/mean_length": 153.609375,
"completions/mean_terminated_length": 153.609375,
"completions/min_length": 50.0,
"completions/min_terminated_length": 50.0,
"entropy": 0.7712859511375427,
"epoch": 0.020202020202020204,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.823232323232323e-07,
"loss": 0.0,
"num_tokens": 906244.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 8
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 308.0,
"completions/max_terminated_length": 308.0,
"completions/mean_length": 133.671875,
"completions/mean_terminated_length": 133.671875,
"completions/min_length": 63.0,
"completions/min_terminated_length": 63.0,
"entropy": 0.7461484670639038,
"epoch": 0.022727272727272728,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.797979797979797e-07,
"loss": 0.0,
"num_tokens": 1022034.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 9
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 274.0,
"completions/max_terminated_length": 274.0,
"completions/mean_length": 127.5390625,
"completions/mean_terminated_length": 127.5390625,
"completions/min_length": 55.0,
"completions/min_terminated_length": 55.0,
"entropy": 0.7210257649421692,
"epoch": 0.025252525252525252,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.772727272727273e-07,
"loss": 0.0,
"num_tokens": 1149551.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 10
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 297.0,
"completions/max_terminated_length": 297.0,
"completions/mean_length": 131.25,
"completions/mean_terminated_length": 131.25,
"completions/min_length": 49.0,
"completions/min_terminated_length": 49.0,
"entropy": 0.7806915640830994,
"epoch": 0.027777777777777776,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.747474747474746e-07,
"loss": 0.0,
"num_tokens": 1255743.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 11
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 323.0,
"completions/max_terminated_length": 323.0,
"completions/mean_length": 130.3203125,
"completions/mean_terminated_length": 130.3203125,
"completions/min_length": 42.0,
"completions/min_terminated_length": 42.0,
"entropy": 0.6973187923431396,
"epoch": 0.030303030303030304,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.722222222222222e-07,
"loss": 0.0,
"num_tokens": 1366240.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 12
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 377.0,
"completions/mean_length": 154.109375,
"completions/mean_terminated_length": 152.29922485351562,
"completions/min_length": 57.0,
"completions/min_terminated_length": 57.0,
"entropy": 0.6917202472686768,
"epoch": 0.03282828282828283,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.3806738555431366,
"learning_rate": 9.696969696969698e-07,
"loss": 0.0,
"num_tokens": 1474046.0,
"reward": 0.0003906250058207661,
"reward_std": 0.0011048543965443969,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0078125,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 13
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 288.0,
"completions/max_terminated_length": 288.0,
"completions/mean_length": 139.734375,
"completions/mean_terminated_length": 139.734375,
"completions/min_length": 37.0,
"completions/min_terminated_length": 37.0,
"entropy": 0.6320770978927612,
"epoch": 0.03535353535353535,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.671717171717171e-07,
"loss": 0.0,
"num_tokens": 1585796.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 14
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 380.0,
"completions/max_terminated_length": 380.0,
"completions/mean_length": 158.1953125,
"completions/mean_terminated_length": 158.1953125,
"completions/min_length": 50.0,
"completions/min_terminated_length": 50.0,
"entropy": 0.6951600313186646,
"epoch": 0.03787878787878788,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.646464646464647e-07,
"loss": 0.0,
"num_tokens": 1710981.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 15
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 276.0,
"completions/max_terminated_length": 276.0,
"completions/mean_length": 136.390625,
"completions/mean_terminated_length": 136.390625,
"completions/min_length": 57.0,
"completions/min_terminated_length": 57.0,
"entropy": 0.7261592149734497,
"epoch": 0.04040404040404041,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.4154719114303589,
"learning_rate": 9.62121212121212e-07,
"loss": -0.0,
"num_tokens": 1830455.0,
"reward": 0.0078125,
"reward_std": 0.022097086533904076,
"rewards/video_r1_accuracy_reward/mean": 0.0078125,
"rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
"rewards/video_r1_format_reward/mean": 0.0078125,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 16
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 279.0,
"completions/max_terminated_length": 279.0,
"completions/mean_length": 135.8203125,
"completions/mean_terminated_length": 135.8203125,
"completions/min_length": 52.0,
"completions/min_terminated_length": 52.0,
"entropy": 0.7299904823303223,
"epoch": 0.04292929292929293,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.41257989406585693,
"learning_rate": 9.595959595959596e-07,
"loss": -0.0,
"num_tokens": 1935088.0,
"reward": 0.0074218749068677425,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.0078125,
"rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 17
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 272.0,
"completions/max_terminated_length": 272.0,
"completions/mean_length": 143.9296875,
"completions/mean_terminated_length": 143.9296875,
"completions/min_length": 68.0,
"completions/min_terminated_length": 68.0,
"entropy": 0.7212563753128052,
"epoch": 0.045454545454545456,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.57070707070707e-07,
"loss": 0.0,
"num_tokens": 2044351.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 18
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 317.0,
"completions/max_terminated_length": 317.0,
"completions/mean_length": 133.453125,
"completions/mean_terminated_length": 133.453125,
"completions/min_length": 62.0,
"completions/min_terminated_length": 62.0,
"entropy": 0.7506937384605408,
"epoch": 0.047979797979797977,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.6540532112121582,
"learning_rate": 9.545454545454546e-07,
"loss": 0.0,
"num_tokens": 2156297.0,
"reward": 0.0003906250058207661,
"reward_std": 0.0011048543965443969,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0078125,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 19
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 353.0,
"completions/max_terminated_length": 353.0,
"completions/mean_length": 141.8515625,
"completions/mean_terminated_length": 141.8515625,
"completions/min_length": 60.0,
"completions/min_terminated_length": 60.0,
"entropy": 0.7586977481842041,
"epoch": 0.050505050505050504,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.6235540509223938,
"learning_rate": 9.520202020202019e-07,
"loss": 0.0,
"num_tokens": 2288918.0,
"reward": 0.0074218749068677425,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.0078125,
"rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 20
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 380.0,
"completions/max_terminated_length": 380.0,
"completions/mean_length": 131.8359375,
"completions/mean_terminated_length": 131.8359375,
"completions/min_length": 54.0,
"completions/min_terminated_length": 54.0,
"entropy": 0.7679413557052612,
"epoch": 0.05303030303030303,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.4877886474132538,
"learning_rate": 9.494949494949495e-07,
"loss": 0.0,
"num_tokens": 2408449.0,
"reward": 0.0011718750465661287,
"reward_std": 0.0016173411859199405,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0234375,
"rewards/video_r1_format_reward/std": 0.15188287198543549,
"step": 21
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 384.0,
"completions/max_terminated_length": 355.0,
"completions/mean_length": 146.3671875,
"completions/mean_terminated_length": 142.59524536132812,
"completions/min_length": 59.0,
"completions/min_terminated_length": 59.0,
"entropy": 0.7015185356140137,
"epoch": 0.05555555555555555,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.34478825330734253,
"learning_rate": 9.46969696969697e-07,
"loss": -0.0,
"num_tokens": 2526696.0,
"reward": 0.0078125,
"reward_std": 0.022097086533904076,
"rewards/video_r1_accuracy_reward/mean": 0.0078125,
"rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
"rewards/video_r1_format_reward/mean": 0.0078125,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 22
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 306.0,
"completions/max_terminated_length": 306.0,
"completions/mean_length": 129.15625,
"completions/mean_terminated_length": 129.15625,
"completions/min_length": 56.0,
"completions/min_terminated_length": 56.0,
"entropy": 0.7712253332138062,
"epoch": 0.05808080808080808,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 9.444444444444444e-07,
"loss": 0.0,
"num_tokens": 2640884.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.0,
"rewards/video_r1_accuracy_reward/std": 0.0,
"rewards/video_r1_format_reward/mean": 0.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 23
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 294.0,
"completions/max_terminated_length": 294.0,
"completions/mean_length": 135.5078125,
"completions/mean_terminated_length": 135.5078125,
"completions/min_length": 61.0,
"completions/min_terminated_length": 61.0,
"entropy": 0.8073737621307373,
"epoch": 0.06060606060606061,
"frac_reward_zero_std": 0.8125,
"grad_norm": 0.915313720703125,
"learning_rate": 9.419191919191919e-07,
"loss": -0.0,
"num_tokens": 2747125.0,
"reward": 0.00859374925494194,
"reward_std": 0.02430679462850094,
"rewards/video_r1_accuracy_reward/mean": 0.0078125,
"rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
"rewards/video_r1_format_reward/mean": 0.0234375,
"rewards/video_r1_format_reward/std": 0.15188287198543549,
"step": 24
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 339.0,
"completions/max_terminated_length": 339.0,
"completions/mean_length": 131.3515625,
"completions/mean_terminated_length": 131.3515625,
"completions/min_length": 39.0,
"completions/min_terminated_length": 39.0,
"entropy": 0.7123849987983704,
"epoch": 0.06313131313131314,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.4686827659606934,
"learning_rate": 9.393939393939395e-07,
"loss": -0.0,
"num_tokens": 2863482.0,
"reward": 0.0859375,
"reward_std": 0.17301878333091736,
"rewards/video_r1_accuracy_reward/mean": 0.0859375,
"rewards/video_r1_accuracy_reward/std": 0.2813730239868164,
"rewards/video_r1_format_reward/mean": 0.0859375,
"rewards/video_r1_format_reward/std": 0.2813730239868164,
"step": 25
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 248.0,
"completions/max_terminated_length": 248.0,
"completions/mean_length": 126.0625,
"completions/mean_terminated_length": 126.0625,
"completions/min_length": 51.0,
"completions/min_terminated_length": 51.0,
"entropy": 0.7570379972457886,
"epoch": 0.06565656565656566,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.2738335132598877,
"learning_rate": 9.368686868686868e-07,
"loss": 0.0,
"num_tokens": 2991954.0,
"reward": 0.04218749701976776,
"reward_std": 0.09613416343927383,
"rewards/video_r1_accuracy_reward/mean": 0.0390625,
"rewards/video_r1_accuracy_reward/std": 0.194504976272583,
"rewards/video_r1_format_reward/mean": 0.1015625,
"rewards/video_r1_format_reward/std": 0.3032590448856354,
"step": 26
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 327.0,
"completions/max_terminated_length": 327.0,
"completions/mean_length": 144.0859375,
"completions/mean_terminated_length": 144.0859375,
"completions/min_length": 53.0,
"completions/min_terminated_length": 53.0,
"entropy": 0.7464326620101929,
"epoch": 0.06818181818181818,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.6063857078552246,
"learning_rate": 9.343434343434343e-07,
"loss": -0.0,
"num_tokens": 3113973.0,
"reward": 0.111328125,
"reward_std": 0.20764078199863434,
"rewards/video_r1_accuracy_reward/mean": 0.109375,
"rewards/video_r1_accuracy_reward/std": 0.31333550810813904,
"rewards/video_r1_format_reward/mean": 0.1484375,
"rewards/video_r1_format_reward/std": 0.356930136680603,
"step": 27
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 258.0,
"completions/max_terminated_length": 258.0,
"completions/mean_length": 135.25,
"completions/mean_terminated_length": 135.25,
"completions/min_length": 54.0,
"completions/min_terminated_length": 54.0,
"entropy": 0.6928939819335938,
"epoch": 0.0707070707070707,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.5145729780197144,
"learning_rate": 9.318181818181817e-07,
"loss": 0.0,
"num_tokens": 3228973.0,
"reward": 0.03476562350988388,
"reward_std": 0.09340079128742218,
"rewards/video_r1_accuracy_reward/mean": 0.03125,
"rewards/video_r1_accuracy_reward/std": 0.1746762990951538,
"rewards/video_r1_format_reward/mean": 0.1015625,
"rewards/video_r1_format_reward/std": 0.3032590448856354,
"step": 28
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 378.0,
"completions/max_terminated_length": 378.0,
"completions/mean_length": 144.5,
"completions/mean_terminated_length": 144.5,
"completions/min_length": 59.0,
"completions/min_terminated_length": 59.0,
"entropy": 0.6991415023803711,
"epoch": 0.07323232323232323,
"frac_reward_zero_std": 0.0625,
"grad_norm": 1.993449091911316,
"learning_rate": 9.292929292929292e-07,
"loss": -0.0,
"num_tokens": 3338453.0,
"reward": 0.31640625,
"reward_std": 0.34012913703918457,
"rewards/video_r1_accuracy_reward/mean": 0.3125,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 0.390625,
"rewards/video_r1_format_reward/std": 0.4898075461387634,
"step": 29
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 349.0,
"completions/max_terminated_length": 349.0,
"completions/mean_length": 134.1015625,
"completions/mean_terminated_length": 134.1015625,
"completions/min_length": 59.0,
"completions/min_terminated_length": 59.0,
"entropy": 0.7583435773849487,
"epoch": 0.07575757575757576,
"frac_reward_zero_std": 0.0625,
"grad_norm": 2.37294340133667,
"learning_rate": 9.267676767676768e-07,
"loss": -0.0,
"num_tokens": 3450378.0,
"reward": 0.3359375,
"reward_std": 0.32593491673469543,
"rewards/video_r1_accuracy_reward/mean": 0.328125,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 0.484375,
"rewards/video_r1_format_reward/std": 0.5017194747924805,
"step": 30
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 366.0,
"completions/mean_length": 134.0546875,
"completions/mean_terminated_length": 132.08660888671875,
"completions/min_length": 44.0,
"completions/min_terminated_length": 44.0,
"entropy": 0.7964510917663574,
"epoch": 0.07828282828282829,
"frac_reward_zero_std": 0.1875,
"grad_norm": 1.9095652103424072,
"learning_rate": 9.242424242424241e-07,
"loss": -0.0,
"num_tokens": 3560025.0,
"reward": 0.3550781011581421,
"reward_std": 0.267575740814209,
"rewards/video_r1_accuracy_reward/mean": 0.34375,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 0.5703125,
"rewards/video_r1_format_reward/std": 0.4969765841960907,
"step": 31
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 289.0,
"completions/max_terminated_length": 289.0,
"completions/mean_length": 113.0625,
"completions/mean_terminated_length": 113.0625,
"completions/min_length": 46.0,
"completions/min_terminated_length": 46.0,
"entropy": 0.6799850463867188,
"epoch": 0.08080808080808081,
"frac_reward_zero_std": 0.0625,
"grad_norm": 2.1304097175598145,
"learning_rate": 9.217171717171717e-07,
"loss": -0.0,
"num_tokens": 3680297.0,
"reward": 0.5066406726837158,
"reward_std": 0.4047975242137909,
"rewards/video_r1_accuracy_reward/mean": 0.4921875,
"rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
"rewards/video_r1_format_reward/mean": 0.78125,
"rewards/video_r1_format_reward/std": 0.41502299904823303,
"step": 32
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 294.0,
"completions/max_terminated_length": 294.0,
"completions/mean_length": 108.75,
"completions/mean_terminated_length": 108.75,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 0.7177351713180542,
"epoch": 0.08333333333333333,
"frac_reward_zero_std": 0.25,
"grad_norm": 2.0556108951568604,
"learning_rate": 9.191919191919192e-07,
"loss": -0.0,
"num_tokens": 3797793.0,
"reward": 0.47968751192092896,
"reward_std": 0.27063843607902527,
"rewards/video_r1_accuracy_reward/mean": 0.4609375,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 0.8359375,
"rewards/video_r1_format_reward/std": 0.371787428855896,
"step": 33
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 270.0,
"completions/max_terminated_length": 270.0,
"completions/mean_length": 110.3046875,
"completions/mean_terminated_length": 110.3046875,
"completions/min_length": 54.0,
"completions/min_terminated_length": 54.0,
"entropy": 0.7837837934494019,
"epoch": 0.08585858585858586,
"frac_reward_zero_std": 0.25,
"grad_norm": 1.8440849781036377,
"learning_rate": 9.166666666666665e-07,
"loss": -0.0,
"num_tokens": 3912032.0,
"reward": 0.661328136920929,
"reward_std": 0.29765215516090393,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 0.90625,
"rewards/video_r1_format_reward/std": 0.29262590408325195,
"step": 34
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 283.0,
"completions/max_terminated_length": 283.0,
"completions/mean_length": 99.5546875,
"completions/mean_terminated_length": 99.5546875,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"entropy": 0.8179616928100586,
"epoch": 0.08838383838383838,
"frac_reward_zero_std": 0.125,
"grad_norm": 2.0900862216949463,
"learning_rate": 9.141414141414141e-07,
"loss": -0.0,
"num_tokens": 4013039.0,
"reward": 0.48281246423721313,
"reward_std": 0.3318884074687958,
"rewards/video_r1_accuracy_reward/mean": 0.4609375,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 0.8984375,
"rewards/video_r1_format_reward/std": 0.3032590448856354,
"step": 35
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 262.0,
"completions/max_terminated_length": 262.0,
"completions/mean_length": 102.671875,
"completions/mean_terminated_length": 102.671875,
"completions/min_length": 49.0,
"completions/min_terminated_length": 49.0,
"entropy": 0.8404669761657715,
"epoch": 0.09090909090909091,
"frac_reward_zero_std": 0.25,
"grad_norm": 1.946840524673462,
"learning_rate": 9.116161616161616e-07,
"loss": -0.0,
"num_tokens": 4118005.0,
"reward": 0.604296863079071,
"reward_std": 0.27570241689682007,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 0.953125,
"rewards/video_r1_format_reward/std": 0.21220162510871887,
"step": 36
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 242.0,
"completions/max_terminated_length": 242.0,
"completions/mean_length": 112.7265625,
"completions/mean_terminated_length": 112.7265625,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"entropy": 0.8344206809997559,
"epoch": 0.09343434343434344,
"frac_reward_zero_std": 0.25,
"grad_norm": 1.875348687171936,
"learning_rate": 9.09090909090909e-07,
"loss": -0.0,
"num_tokens": 4232098.0,
"reward": 0.701171875,
"reward_std": 0.3029305934906006,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 0.9609375,
"rewards/video_r1_format_reward/std": 0.194504976272583,
"step": 37
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 324.0,
"completions/max_terminated_length": 324.0,
"completions/mean_length": 98.640625,
"completions/mean_terminated_length": 98.640625,
"completions/min_length": 36.0,
"completions/min_terminated_length": 36.0,
"entropy": 0.8952550888061523,
"epoch": 0.09595959595959595,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.779735803604126,
"learning_rate": 9.065656565656565e-07,
"loss": 0.0,
"num_tokens": 4337628.0,
"reward": 0.7101562023162842,
"reward_std": 0.2632066607475281,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 38
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 263.0,
"completions/max_terminated_length": 263.0,
"completions/mean_length": 102.375,
"completions/mean_terminated_length": 102.375,
"completions/min_length": 45.0,
"completions/min_terminated_length": 45.0,
"entropy": 0.903910756111145,
"epoch": 0.09848484848484848,
"frac_reward_zero_std": 0.1875,
"grad_norm": 2.0209107398986816,
"learning_rate": 9.040404040404041e-07,
"loss": -0.0,
"num_tokens": 4437268.0,
"reward": 0.5621093511581421,
"reward_std": 0.36356228590011597,
"rewards/video_r1_accuracy_reward/mean": 0.5390625,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 39
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 242.0,
"completions/mean_length": 94.59375,
"completions/mean_terminated_length": 92.31495666503906,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 0.87197345495224,
"epoch": 0.10101010101010101,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.7272930145263672,
"learning_rate": 9.015151515151514e-07,
"loss": 0.0,
"num_tokens": 4552040.0,
"reward": 0.708984375,
"reward_std": 0.2568144202232361,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 0.96875,
"rewards/video_r1_format_reward/std": 0.1746762990951538,
"step": 40
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 195.0,
"completions/max_terminated_length": 195.0,
"completions/mean_length": 94.8515625,
"completions/mean_terminated_length": 94.8515625,
"completions/min_length": 49.0,
"completions/min_terminated_length": 49.0,
"entropy": 0.9597364664077759,
"epoch": 0.10353535353535354,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.6093056201934814,
"learning_rate": 8.98989898989899e-07,
"loss": -0.0,
"num_tokens": 4640853.0,
"reward": 0.576953113079071,
"reward_std": 0.216628760099411,
"rewards/video_r1_accuracy_reward/mean": 0.5546875,
"rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 41
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 199.0,
"completions/max_terminated_length": 199.0,
"completions/mean_length": 84.984375,
"completions/mean_terminated_length": 84.984375,
"completions/min_length": 43.0,
"completions/min_terminated_length": 43.0,
"entropy": 0.9007290601730347,
"epoch": 0.10606060606060606,
"frac_reward_zero_std": 0.25,
"grad_norm": 2.0971033573150635,
"learning_rate": 8.964646464646465e-07,
"loss": -0.0,
"num_tokens": 4748699.0,
"reward": 0.516796886920929,
"reward_std": 0.3205876350402832,
"rewards/video_r1_accuracy_reward/mean": 0.4921875,
"rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
"rewards/video_r1_format_reward/mean": 0.984375,
"rewards/video_r1_format_reward/std": 0.12450689822435379,
"step": 42
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 173.0,
"completions/max_terminated_length": 173.0,
"completions/mean_length": 89.2890625,
"completions/mean_terminated_length": 89.2890625,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"entropy": 0.9400933980941772,
"epoch": 0.10858585858585859,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.9215672016143799,
"learning_rate": 8.939393939393938e-07,
"loss": -0.0,
"num_tokens": 4867288.0,
"reward": 0.6199219226837158,
"reward_std": 0.26100048422813416,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 0.96875,
"rewards/video_r1_format_reward/std": 0.1746762990951538,
"step": 43
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 233.0,
"completions/max_terminated_length": 233.0,
"completions/mean_length": 86.1953125,
"completions/mean_terminated_length": 86.1953125,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"entropy": 0.9206110835075378,
"epoch": 0.1111111111111111,
"frac_reward_zero_std": 0.1875,
"grad_norm": 2.1925864219665527,
"learning_rate": 8.914141414141414e-07,
"loss": -0.0,
"num_tokens": 4986809.0,
"reward": 0.671875,
"reward_std": 0.2928203344345093,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 0.96875,
"rewards/video_r1_format_reward/std": 0.1746762990951538,
"step": 44
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 217.0,
"completions/max_terminated_length": 217.0,
"completions/mean_length": 90.3046875,
"completions/mean_terminated_length": 90.3046875,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 0.974540650844574,
"epoch": 0.11363636363636363,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.7343508005142212,
"learning_rate": 8.888888888888888e-07,
"loss": 0.0,
"num_tokens": 5092520.0,
"reward": 0.725390613079071,
"reward_std": 0.23314352333545685,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 45
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 167.0,
"completions/max_terminated_length": 167.0,
"completions/mean_length": 80.625,
"completions/mean_terminated_length": 80.625,
"completions/min_length": 38.0,
"completions/min_terminated_length": 38.0,
"entropy": 0.9657076597213745,
"epoch": 0.11616161616161616,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.9508693218231201,
"learning_rate": 8.863636363636363e-07,
"loss": -0.0,
"num_tokens": 5205544.0,
"reward": 0.746874988079071,
"reward_std": 0.2970072031021118,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 0.984375,
"rewards/video_r1_format_reward/std": 0.12450689822435379,
"step": 46
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 223.0,
"completions/max_terminated_length": 223.0,
"completions/mean_length": 88.828125,
"completions/mean_terminated_length": 88.828125,
"completions/min_length": 37.0,
"completions/min_terminated_length": 37.0,
"entropy": 0.9713828563690186,
"epoch": 0.11868686868686869,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.7221375703811646,
"learning_rate": 8.838383838383838e-07,
"loss": -0.0,
"num_tokens": 5319394.0,
"reward": 0.5695312023162842,
"reward_std": 0.26034435629844666,
"rewards/video_r1_accuracy_reward/mean": 0.546875,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 47
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 150.0,
"completions/max_terminated_length": 150.0,
"completions/mean_length": 80.8515625,
"completions/mean_terminated_length": 80.8515625,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"entropy": 0.9206292629241943,
"epoch": 0.12121212121212122,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.957437515258789,
"learning_rate": 8.813131313131313e-07,
"loss": -0.0,
"num_tokens": 5412327.0,
"reward": 0.643750011920929,
"reward_std": 0.27909553050994873,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 48
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 152.0,
"completions/max_terminated_length": 152.0,
"completions/mean_length": 82.21875,
"completions/mean_terminated_length": 82.21875,
"completions/min_length": 37.0,
"completions/min_terminated_length": 37.0,
"entropy": 0.9331451654434204,
"epoch": 0.12373737373737374,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.8745862245559692,
"learning_rate": 8.787878787878787e-07,
"loss": -0.0,
"num_tokens": 5516315.0,
"reward": 0.4429687559604645,
"reward_std": 0.25346940755844116,
"rewards/video_r1_accuracy_reward/mean": 0.4140625,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 49
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 174.0,
"completions/max_terminated_length": 174.0,
"completions/mean_length": 79.7890625,
"completions/mean_terminated_length": 79.7890625,
"completions/min_length": 43.0,
"completions/min_terminated_length": 43.0,
"entropy": 0.9588379859924316,
"epoch": 0.12626262626262627,
"frac_reward_zero_std": 0.25,
"grad_norm": 2.0629332065582275,
"learning_rate": 8.762626262626263e-07,
"loss": -0.0,
"num_tokens": 5615632.0,
"reward": 0.5464843511581421,
"reward_std": 0.34427377581596375,
"rewards/video_r1_accuracy_reward/mean": 0.5234375,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 0.984375,
"rewards/video_r1_format_reward/std": 0.12450689822435379,
"step": 50
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 145.0,
"completions/max_terminated_length": 145.0,
"completions/mean_length": 79.265625,
"completions/mean_terminated_length": 79.265625,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 0.9568088054656982,
"epoch": 0.12878787878787878,
"frac_reward_zero_std": 0.3125,
"grad_norm": 2.221264123916626,
"learning_rate": 8.737373737373737e-07,
"loss": 0.0,
"num_tokens": 5714786.0,
"reward": 0.5914062261581421,
"reward_std": 0.2784692645072937,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 51
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 148.0,
"completions/max_terminated_length": 148.0,
"completions/mean_length": 74.5625,
"completions/mean_terminated_length": 74.5625,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 0.9146069288253784,
"epoch": 0.13131313131313133,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.8530735969543457,
"learning_rate": 8.712121212121211e-07,
"loss": -0.0,
"num_tokens": 5827042.0,
"reward": 0.598828136920929,
"reward_std": 0.22847865521907806,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 52
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 180.0,
"completions/max_terminated_length": 180.0,
"completions/mean_length": 76.890625,
"completions/mean_terminated_length": 76.890625,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 0.9405485987663269,
"epoch": 0.13383838383838384,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.9973477125167847,
"learning_rate": 8.686868686868687e-07,
"loss": -0.0,
"num_tokens": 5927692.0,
"reward": 0.666015625,
"reward_std": 0.2661140561103821,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 53
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 162.0,
"completions/max_terminated_length": 162.0,
"completions/mean_length": 74.015625,
"completions/mean_terminated_length": 74.015625,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"entropy": 1.0538530349731445,
"epoch": 0.13636363636363635,
"frac_reward_zero_std": 0.375,
"grad_norm": 2.2168405055999756,
"learning_rate": 8.661616161616161e-07,
"loss": 0.0,
"num_tokens": 6036374.0,
"reward": 0.5398437976837158,
"reward_std": 0.2713738679885864,
"rewards/video_r1_accuracy_reward/mean": 0.515625,
"rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 54
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 144.0,
"completions/mean_length": 79.8203125,
"completions/mean_terminated_length": 77.42520141601562,
"completions/min_length": 33.0,
"completions/min_terminated_length": 33.0,
"entropy": 0.9944831132888794,
"epoch": 0.1388888888888889,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.8037936687469482,
"learning_rate": 8.636363636363636e-07,
"loss": 0.0,
"num_tokens": 6138703.0,
"reward": 0.7621093988418579,
"reward_std": 0.2209778130054474,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 55
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 126.0,
"completions/max_terminated_length": 126.0,
"completions/mean_length": 73.3359375,
"completions/mean_terminated_length": 73.3359375,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 1.032738208770752,
"epoch": 0.1414141414141414,
"frac_reward_zero_std": 0.5625,
"grad_norm": 1.762905478477478,
"learning_rate": 8.611111111111111e-07,
"loss": -0.0,
"num_tokens": 6236354.0,
"reward": 0.6285156011581421,
"reward_std": 0.18521998822689056,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 56
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 135.0,
"completions/max_terminated_length": 135.0,
"completions/mean_length": 75.171875,
"completions/mean_terminated_length": 75.171875,
"completions/min_length": 30.0,
"completions/min_terminated_length": 30.0,
"entropy": 1.0041790008544922,
"epoch": 0.14393939393939395,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.8384010791778564,
"learning_rate": 8.585858585858586e-07,
"loss": 0.0,
"num_tokens": 6336768.0,
"reward": 0.806640625,
"reward_std": 0.2180173397064209,
"rewards/video_r1_accuracy_reward/mean": 0.796875,
"rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 57
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 145.0,
"completions/max_terminated_length": 145.0,
"completions/mean_length": 70.8828125,
"completions/mean_terminated_length": 70.8828125,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 1.0715279579162598,
"epoch": 0.14646464646464646,
"frac_reward_zero_std": 0.3125,
"grad_norm": 2.389326810836792,
"learning_rate": 8.56060606060606e-07,
"loss": 0.0,
"num_tokens": 6436537.0,
"reward": 0.627734363079071,
"reward_std": 0.25768929719924927,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 0.9765625,
"rewards/video_r1_format_reward/std": 0.15188287198543549,
"step": 58
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 151.0,
"completions/max_terminated_length": 151.0,
"completions/mean_length": 75.09375,
"completions/mean_terminated_length": 75.09375,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 1.02361261844635,
"epoch": 0.14898989898989898,
"frac_reward_zero_std": 0.3125,
"grad_norm": 2.1776626110076904,
"learning_rate": 8.535353535353534e-07,
"loss": -0.0,
"num_tokens": 6547989.0,
"reward": 0.5546875,
"reward_std": 0.3171003460884094,
"rewards/video_r1_accuracy_reward/mean": 0.53125,
"rewards/video_r1_accuracy_reward/std": 0.5009832978248596,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 59
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 141.0,
"completions/max_terminated_length": 141.0,
"completions/mean_length": 81.34375,
"completions/mean_terminated_length": 81.34375,
"completions/min_length": 38.0,
"completions/min_terminated_length": 38.0,
"entropy": 1.0184237957000732,
"epoch": 0.15151515151515152,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.7039854526519775,
"learning_rate": 8.51010101010101e-07,
"loss": -0.0,
"num_tokens": 6661905.0,
"reward": 0.703125,
"reward_std": 0.2143877148628235,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 60
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 138.0,
"completions/max_terminated_length": 138.0,
"completions/mean_length": 76.8125,
"completions/mean_terminated_length": 76.8125,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 1.012675404548645,
"epoch": 0.15404040404040403,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.9084367752075195,
"learning_rate": 8.484848484848484e-07,
"loss": -0.0,
"num_tokens": 6762833.0,
"reward": 0.6585937738418579,
"reward_std": 0.22463490068912506,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 61
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 168.0,
"completions/max_terminated_length": 168.0,
"completions/mean_length": 76.7890625,
"completions/mean_terminated_length": 76.7890625,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 0.9767247438430786,
"epoch": 0.15656565656565657,
"frac_reward_zero_std": 0.375,
"grad_norm": 1.992623209953308,
"learning_rate": 8.459595959595959e-07,
"loss": -0.0,
"num_tokens": 6864190.0,
"reward": 0.666015625,
"reward_std": 0.27260246872901917,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 62
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 149.0,
"completions/max_terminated_length": 149.0,
"completions/mean_length": 73.65625,
"completions/mean_terminated_length": 73.65625,
"completions/min_length": 35.0,
"completions/min_terminated_length": 35.0,
"entropy": 0.9239650368690491,
"epoch": 0.1590909090909091,
"frac_reward_zero_std": 0.4375,
"grad_norm": 2.0338518619537354,
"learning_rate": 8.434343434343434e-07,
"loss": 0.0,
"num_tokens": 6969522.0,
"reward": 0.732421875,
"reward_std": 0.22757862508296967,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 63
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 137.0,
"completions/max_terminated_length": 137.0,
"completions/mean_length": 75.9609375,
"completions/mean_terminated_length": 75.9609375,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 0.8829290270805359,
"epoch": 0.16161616161616163,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.6126983165740967,
"learning_rate": 8.409090909090909e-07,
"loss": 0.0,
"num_tokens": 7077061.0,
"reward": 0.762499988079071,
"reward_std": 0.09695503860712051,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 64
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 133.0,
"completions/max_terminated_length": 133.0,
"completions/mean_length": 72.6171875,
"completions/mean_terminated_length": 72.6171875,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"entropy": 0.8675624132156372,
"epoch": 0.16414141414141414,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.3695436716079712,
"learning_rate": 8.383838383838383e-07,
"loss": -0.0,
"num_tokens": 7192844.0,
"reward": 0.7476562261581421,
"reward_std": 0.10669228434562683,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 65
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 147.0,
"completions/max_terminated_length": 147.0,
"completions/mean_length": 75.734375,
"completions/mean_terminated_length": 75.734375,
"completions/min_length": 30.0,
"completions/min_terminated_length": 30.0,
"entropy": 0.9987907409667969,
"epoch": 0.16666666666666666,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.6358082294464111,
"learning_rate": 8.358585858585859e-07,
"loss": -0.0,
"num_tokens": 7303458.0,
"reward": 0.7699218988418579,
"reward_std": 0.16915903985500336,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 66
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 156.0,
"completions/max_terminated_length": 156.0,
"completions/mean_length": 74.1328125,
"completions/mean_terminated_length": 74.1328125,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"entropy": 0.98237144947052,
"epoch": 0.1691919191919192,
"frac_reward_zero_std": 0.5625,
"grad_norm": 1.6880619525909424,
"learning_rate": 8.333333333333333e-07,
"loss": -0.0,
"num_tokens": 7419707.0,
"reward": 0.6585937738418579,
"reward_std": 0.1944032609462738,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 67
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 136.0,
"completions/max_terminated_length": 136.0,
"completions/mean_length": 71.609375,
"completions/mean_terminated_length": 71.609375,
"completions/min_length": 29.0,
"completions/min_terminated_length": 29.0,
"entropy": 0.9826507568359375,
"epoch": 0.1717171717171717,
"frac_reward_zero_std": 0.4375,
"grad_norm": 1.8394359350204468,
"learning_rate": 8.308080808080807e-07,
"loss": -0.0,
"num_tokens": 7521593.0,
"reward": 0.6808593273162842,
"reward_std": 0.25810331106185913,
"rewards/video_r1_accuracy_reward/mean": 0.6640625,
"rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 68
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 162.0,
"completions/max_terminated_length": 162.0,
"completions/mean_length": 70.4609375,
"completions/mean_terminated_length": 70.4609375,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"entropy": 0.9084649085998535,
"epoch": 0.17424242424242425,
"frac_reward_zero_std": 0.4375,
"grad_norm": 2.100003480911255,
"learning_rate": 8.282828282828283e-07,
"loss": 0.0,
"num_tokens": 7627260.0,
"reward": 0.740234375,
"reward_std": 0.23314350843429565,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 69
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 162.0,
"completions/max_terminated_length": 162.0,
"completions/mean_length": 80.4453125,
"completions/mean_terminated_length": 80.4453125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.9652878046035767,
"epoch": 0.17676767676767677,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.6002858877182007,
"learning_rate": 8.257575757575757e-07,
"loss": -0.0,
"num_tokens": 7720845.0,
"reward": 0.6734374761581421,
"reward_std": 0.15718072652816772,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 70
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 177.0,
"completions/max_terminated_length": 177.0,
"completions/mean_length": 79.203125,
"completions/mean_terminated_length": 79.203125,
"completions/min_length": 33.0,
"completions/min_terminated_length": 33.0,
"entropy": 1.0387096405029297,
"epoch": 0.17929292929292928,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.9980230331420898,
"learning_rate": 8.232323232323232e-07,
"loss": -0.0,
"num_tokens": 7818975.0,
"reward": 0.49531248211860657,
"reward_std": 0.2143877148628235,
"rewards/video_r1_accuracy_reward/mean": 0.46875,
"rewards/video_r1_accuracy_reward/std": 0.5009832978248596,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 71
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 139.0,
"completions/max_terminated_length": 139.0,
"completions/mean_length": 73.6015625,
"completions/mean_terminated_length": 73.6015625,
"completions/min_length": 36.0,
"completions/min_terminated_length": 36.0,
"entropy": 1.002305269241333,
"epoch": 0.18181818181818182,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.7402448654174805,
"learning_rate": 8.207070707070707e-07,
"loss": -0.0,
"num_tokens": 7928484.0,
"reward": 0.606640636920929,
"reward_std": 0.20465511083602905,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 72
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 190.0,
"completions/max_terminated_length": 190.0,
"completions/mean_length": 74.6953125,
"completions/mean_terminated_length": 74.6953125,
"completions/min_length": 29.0,
"completions/min_terminated_length": 29.0,
"entropy": 0.903598427772522,
"epoch": 0.18434343434343434,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.6977179050445557,
"learning_rate": 8.181818181818182e-07,
"loss": -0.0,
"num_tokens": 8043461.0,
"reward": 0.5992187261581421,
"reward_std": 0.21215128898620605,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 73
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 142.0,
"completions/max_terminated_length": 142.0,
"completions/mean_length": 76.6328125,
"completions/mean_terminated_length": 76.6328125,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 0.9879953265190125,
"epoch": 0.18686868686868688,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.3695952892303467,
"learning_rate": 8.156565656565656e-07,
"loss": -0.0,
"num_tokens": 8141774.0,
"reward": 0.569531261920929,
"reward_std": 0.12444031983613968,
"rewards/video_r1_accuracy_reward/mean": 0.546875,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 74
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 137.0,
"completions/max_terminated_length": 137.0,
"completions/mean_length": 73.5625,
"completions/mean_terminated_length": 73.5625,
"completions/min_length": 40.0,
"completions/min_terminated_length": 40.0,
"entropy": 1.0036814212799072,
"epoch": 0.1893939393939394,
"frac_reward_zero_std": 0.375,
"grad_norm": 2.0377132892608643,
"learning_rate": 8.131313131313132e-07,
"loss": 0.0,
"num_tokens": 8238342.0,
"reward": 0.673046886920929,
"reward_std": 0.2601749897003174,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 75
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 227.0,
"completions/max_terminated_length": 227.0,
"completions/mean_length": 81.3046875,
"completions/mean_terminated_length": 81.3046875,
"completions/min_length": 33.0,
"completions/min_terminated_length": 33.0,
"entropy": 0.9508095383644104,
"epoch": 0.1919191919191919,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.1749444007873535,
"learning_rate": 8.106060606060605e-07,
"loss": -0.0,
"num_tokens": 8350589.0,
"reward": 0.6437499523162842,
"reward_std": 0.12493351101875305,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 76
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 168.0,
"completions/max_terminated_length": 168.0,
"completions/mean_length": 71.0390625,
"completions/mean_terminated_length": 71.0390625,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"entropy": 0.9800167083740234,
"epoch": 0.19444444444444445,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.6353753805160522,
"learning_rate": 8.08080808080808e-07,
"loss": 0.0,
"num_tokens": 8454770.0,
"reward": 0.7105468511581421,
"reward_std": 0.1551697999238968,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 77
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 134.0,
"completions/max_terminated_length": 134.0,
"completions/mean_length": 65.9296875,
"completions/mean_terminated_length": 65.9296875,
"completions/min_length": 30.0,
"completions/min_terminated_length": 30.0,
"entropy": 0.9639301300048828,
"epoch": 0.19696969696969696,
"frac_reward_zero_std": 0.375,
"grad_norm": 2.4808032512664795,
"learning_rate": 8.055555555555556e-07,
"loss": -0.0,
"num_tokens": 8552857.0,
"reward": 0.5023437738418579,
"reward_std": 0.23973365128040314,
"rewards/video_r1_accuracy_reward/mean": 0.4765625,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 78
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 164.0,
"completions/max_terminated_length": 164.0,
"completions/mean_length": 69.28125,
"completions/mean_terminated_length": 69.28125,
"completions/min_length": 36.0,
"completions/min_terminated_length": 36.0,
"entropy": 0.9725464582443237,
"epoch": 0.1994949494949495,
"frac_reward_zero_std": 0.5,
"grad_norm": 1.850342869758606,
"learning_rate": 8.030303030303029e-07,
"loss": -0.0,
"num_tokens": 8649085.0,
"reward": 0.5843750238418579,
"reward_std": 0.21215128898620605,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 79
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 154.0,
"completions/max_terminated_length": 154.0,
"completions/mean_length": 72.0078125,
"completions/mean_terminated_length": 72.0078125,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"entropy": 0.9206636548042297,
"epoch": 0.20202020202020202,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.5013765096664429,
"learning_rate": 8.005050505050505e-07,
"loss": 0.0,
"num_tokens": 8745462.0,
"reward": 0.614062488079071,
"reward_std": 0.15292873978614807,
"rewards/video_r1_accuracy_reward/mean": 0.59375,
"rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 80
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 190.0,
"completions/max_terminated_length": 190.0,
"completions/mean_length": 72.53125,
"completions/mean_terminated_length": 72.53125,
"completions/min_length": 38.0,
"completions/min_terminated_length": 38.0,
"entropy": 0.9708524942398071,
"epoch": 0.20454545454545456,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.4737831354141235,
"learning_rate": 7.97979797979798e-07,
"loss": 0.0,
"num_tokens": 8852826.0,
"reward": 0.6734374761581421,
"reward_std": 0.1426815390586853,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 81
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 136.0,
"completions/max_terminated_length": 136.0,
"completions/mean_length": 73.265625,
"completions/mean_terminated_length": 73.265625,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"entropy": 0.9690735936164856,
"epoch": 0.20707070707070707,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.4358322620391846,
"learning_rate": 7.954545454545454e-07,
"loss": -0.0,
"num_tokens": 8950708.0,
"reward": 0.740234375,
"reward_std": 0.10770007222890854,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 82
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 123.0,
"completions/max_terminated_length": 123.0,
"completions/mean_length": 58.984375,
"completions/mean_terminated_length": 58.984375,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 0.9185530543327332,
"epoch": 0.20959595959595959,
"frac_reward_zero_std": 0.5,
"grad_norm": 2.4200658798217773,
"learning_rate": 7.929292929292929e-07,
"loss": -0.0,
"num_tokens": 9045026.0,
"reward": 0.517578125,
"reward_std": 0.21439234912395477,
"rewards/video_r1_accuracy_reward/mean": 0.4921875,
"rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 83
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 122.0,
"completions/max_terminated_length": 122.0,
"completions/mean_length": 65.3203125,
"completions/mean_terminated_length": 65.3203125,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 0.9491331577301025,
"epoch": 0.21212121212121213,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.0213149785995483,
"learning_rate": 7.904040404040404e-07,
"loss": -0.0,
"num_tokens": 9152019.0,
"reward": 0.740234375,
"reward_std": 0.058214765042066574,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 84
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 118.0,
"completions/max_terminated_length": 118.0,
"completions/mean_length": 69.296875,
"completions/mean_terminated_length": 69.296875,
"completions/min_length": 30.0,
"completions/min_terminated_length": 30.0,
"entropy": 0.9853086471557617,
"epoch": 0.21464646464646464,
"frac_reward_zero_std": 0.5625,
"grad_norm": 1.8322724103927612,
"learning_rate": 7.878787878787878e-07,
"loss": 0.0,
"num_tokens": 9262145.0,
"reward": 0.666015625,
"reward_std": 0.17940622568130493,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 85
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 123.0,
"completions/max_terminated_length": 123.0,
"completions/mean_length": 68.265625,
"completions/mean_terminated_length": 68.265625,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 0.9777708649635315,
"epoch": 0.21717171717171718,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.12037193775177,
"learning_rate": 7.853535353535353e-07,
"loss": -0.0,
"num_tokens": 9371435.0,
"reward": 0.46562501788139343,
"reward_std": 0.08345898985862732,
"rewards/video_r1_accuracy_reward/mean": 0.4375,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 86
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 155.0,
"completions/max_terminated_length": 155.0,
"completions/mean_length": 70.34375,
"completions/mean_terminated_length": 70.34375,
"completions/min_length": 39.0,
"completions/min_terminated_length": 39.0,
"entropy": 0.9397503733634949,
"epoch": 0.2196969696969697,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.3667945861816406,
"learning_rate": 7.828282828282829e-07,
"loss": -0.0,
"num_tokens": 9478271.0,
"reward": 0.606640636920929,
"reward_std": 0.1196737289428711,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 87
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 181.0,
"completions/max_terminated_length": 181.0,
"completions/mean_length": 66.671875,
"completions/mean_terminated_length": 66.671875,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 0.9511775970458984,
"epoch": 0.2222222222222222,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.7731772661209106,
"learning_rate": 7.803030303030302e-07,
"loss": -0.0,
"num_tokens": 9587653.0,
"reward": 0.5992187261581421,
"reward_std": 0.16266599297523499,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 88
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 145.0,
"completions/max_terminated_length": 145.0,
"completions/mean_length": 64.3203125,
"completions/mean_terminated_length": 64.3203125,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 0.9355500936508179,
"epoch": 0.22474747474747475,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.7302086353302002,
"learning_rate": 7.777777777777778e-07,
"loss": -0.0,
"num_tokens": 9691022.0,
"reward": 0.799609363079071,
"reward_std": 0.09218844771385193,
"rewards/video_r1_accuracy_reward/mean": 0.7890625,
"rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 89
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 144.0,
"completions/max_terminated_length": 144.0,
"completions/mean_length": 68.2109375,
"completions/mean_terminated_length": 68.2109375,
"completions/min_length": 35.0,
"completions/min_terminated_length": 35.0,
"entropy": 0.9529180526733398,
"epoch": 0.22727272727272727,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.217525839805603,
"learning_rate": 7.752525252525253e-07,
"loss": -0.0,
"num_tokens": 9797753.0,
"reward": 0.651171863079071,
"reward_std": 0.07596279680728912,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 90
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 158.0,
"completions/max_terminated_length": 158.0,
"completions/mean_length": 75.765625,
"completions/mean_terminated_length": 75.765625,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 0.9758607745170593,
"epoch": 0.2297979797979798,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.5119833946228027,
"learning_rate": 7.727272727272727e-07,
"loss": -0.0,
"num_tokens": 9904523.0,
"reward": 0.6363281011581421,
"reward_std": 0.1416737586259842,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 91
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 131.0,
"completions/max_terminated_length": 131.0,
"completions/mean_length": 68.4453125,
"completions/mean_terminated_length": 68.4453125,
"completions/min_length": 37.0,
"completions/min_terminated_length": 37.0,
"entropy": 1.0206053256988525,
"epoch": 0.23232323232323232,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.599021315574646,
"learning_rate": 7.702020202020202e-07,
"loss": -0.0,
"num_tokens": 10007580.0,
"reward": 0.5695312023162842,
"reward_std": 0.1406659632921219,
"rewards/video_r1_accuracy_reward/mean": 0.546875,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 92
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 163.0,
"completions/max_terminated_length": 163.0,
"completions/mean_length": 71.671875,
"completions/mean_terminated_length": 71.671875,
"completions/min_length": 36.0,
"completions/min_terminated_length": 36.0,
"entropy": 0.9673388600349426,
"epoch": 0.23484848484848486,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.1879236698150635,
"learning_rate": 7.676767676767675e-07,
"loss": 0.0,
"num_tokens": 10129522.0,
"reward": 0.5843749642372131,
"reward_std": 0.10120701789855957,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 93
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 149.0,
"completions/max_terminated_length": 149.0,
"completions/mean_length": 68.9609375,
"completions/mean_terminated_length": 68.9609375,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 1.0109703540802002,
"epoch": 0.23737373737373738,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.7206881046295166,
"learning_rate": 7.651515151515151e-07,
"loss": -0.0,
"num_tokens": 10232605.0,
"reward": 0.725390613079071,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 94
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 158.0,
"completions/max_terminated_length": 158.0,
"completions/mean_length": 69.828125,
"completions/mean_terminated_length": 69.828125,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 1.0361416339874268,
"epoch": 0.2398989898989899,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.9655052423477173,
"learning_rate": 7.626262626262626e-07,
"loss": -0.0,
"num_tokens": 10343447.0,
"reward": 0.7699218988418579,
"reward_std": 0.1551697999238968,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 95
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 139.0,
"completions/max_terminated_length": 139.0,
"completions/mean_length": 70.5078125,
"completions/mean_terminated_length": 70.5078125,
"completions/min_length": 30.0,
"completions/min_terminated_length": 30.0,
"entropy": 1.0260932445526123,
"epoch": 0.24242424242424243,
"frac_reward_zero_std": 0.5625,
"grad_norm": 1.967199683189392,
"learning_rate": 7.6010101010101e-07,
"loss": 0.0,
"num_tokens": 10455376.0,
"reward": 0.6511719226837158,
"reward_std": 0.18265508115291595,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 96
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 139.0,
"completions/max_terminated_length": 139.0,
"completions/mean_length": 66.625,
"completions/mean_terminated_length": 66.625,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 1.0024924278259277,
"epoch": 0.24494949494949494,
"frac_reward_zero_std": 0.875,
"grad_norm": 0.9387697577476501,
"learning_rate": 7.575757575757575e-07,
"loss": -0.0,
"num_tokens": 10563352.0,
"reward": 0.6066405773162842,
"reward_std": 0.05821476876735687,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 97
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 191.0,
"completions/max_terminated_length": 191.0,
"completions/mean_length": 66.890625,
"completions/mean_terminated_length": 66.890625,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 0.9643306732177734,
"epoch": 0.2474747474747475,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.325685739517212,
"learning_rate": 7.550505050505051e-07,
"loss": 0.0,
"num_tokens": 10671386.0,
"reward": 0.5992187261581421,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 98
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 144.0,
"completions/max_terminated_length": 144.0,
"completions/mean_length": 70.015625,
"completions/mean_terminated_length": 70.015625,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"entropy": 1.0711150169372559,
"epoch": 0.25,
"frac_reward_zero_std": 0.5625,
"grad_norm": 1.8700460195541382,
"learning_rate": 7.525252525252524e-07,
"loss": 0.0,
"num_tokens": 10769676.0,
"reward": 0.49531251192092896,
"reward_std": 0.16317594051361084,
"rewards/video_r1_accuracy_reward/mean": 0.46875,
"rewards/video_r1_accuracy_reward/std": 0.5009832978248596,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 99
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 141.0,
"completions/max_terminated_length": 141.0,
"completions/mean_length": 66.1875,
"completions/mean_terminated_length": 66.1875,
"completions/min_length": 32.0,
"completions/min_terminated_length": 32.0,
"entropy": 1.032357096672058,
"epoch": 0.25252525252525254,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.2700526714324951,
"learning_rate": 7.5e-07,
"loss": 0.0,
"num_tokens": 10868492.0,
"reward": 0.7328125238418579,
"reward_std": 0.0737217366695404,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 100
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 133.0,
"completions/max_terminated_length": 133.0,
"completions/mean_length": 68.2890625,
"completions/mean_terminated_length": 68.2890625,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 0.9825550317764282,
"epoch": 0.255050505050505,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.7565488219261169,
"learning_rate": 7.474747474747475e-07,
"loss": -0.0,
"num_tokens": 10983977.0,
"reward": 0.6585937738418579,
"reward_std": 0.027485283091664314,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 101
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 131.0,
"completions/max_terminated_length": 131.0,
"completions/mean_length": 66.1484375,
"completions/mean_terminated_length": 66.1484375,
"completions/min_length": 33.0,
"completions/min_terminated_length": 33.0,
"entropy": 1.0391050577163696,
"epoch": 0.25757575757575757,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.6984504461288452,
"learning_rate": 7.449494949494948e-07,
"loss": 0.0,
"num_tokens": 11084348.0,
"reward": 0.6363281011581421,
"reward_std": 0.15942178666591644,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 102
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 107.0,
"completions/max_terminated_length": 107.0,
"completions/mean_length": 61.875,
"completions/mean_terminated_length": 61.875,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 1.0670381784439087,
"epoch": 0.2601010101010101,
"frac_reward_zero_std": 0.4375,
"grad_norm": 2.5665719509124756,
"learning_rate": 7.424242424242424e-07,
"loss": -0.0,
"num_tokens": 11193508.0,
"reward": 0.6585937738418579,
"reward_std": 0.23437213897705078,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 103
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 113.0,
"completions/max_terminated_length": 113.0,
"completions/mean_length": 62.8984375,
"completions/mean_terminated_length": 62.8984375,
"completions/min_length": 33.0,
"completions/min_terminated_length": 33.0,
"entropy": 1.031665563583374,
"epoch": 0.26262626262626265,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.8193395137786865,
"learning_rate": 7.398989898989899e-07,
"loss": -0.0,
"num_tokens": 11299487.0,
"reward": 0.688281238079071,
"reward_std": 0.13842955231666565,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 104
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 104.0,
"completions/max_terminated_length": 104.0,
"completions/mean_length": 55.46875,
"completions/mean_terminated_length": 55.46875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.050790786743164,
"epoch": 0.26515151515151514,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.4435532093048096,
"learning_rate": 7.373737373737373e-07,
"loss": -0.0,
"num_tokens": 11408659.0,
"reward": 0.6214843988418579,
"reward_std": 0.10993648320436478,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 105
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 109.0,
"completions/max_terminated_length": 109.0,
"completions/mean_length": 61.3515625,
"completions/mean_terminated_length": 61.3515625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0249364376068115,
"epoch": 0.2676767676767677,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.6191179752349854,
"learning_rate": 7.348484848484848e-07,
"loss": 0.0,
"num_tokens": 11512456.0,
"reward": 0.6066405773162842,
"reward_std": 0.10019923746585846,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 106
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 132.0,
"completions/max_terminated_length": 132.0,
"completions/mean_length": 59.1484375,
"completions/mean_terminated_length": 59.1484375,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 1.0390393733978271,
"epoch": 0.2702020202020202,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.6593204736709595,
"learning_rate": 7.323232323232324e-07,
"loss": -0.0,
"num_tokens": 11622035.0,
"reward": 0.606640636920929,
"reward_std": 0.16165819764137268,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 107
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 113.0,
"completions/max_terminated_length": 113.0,
"completions/mean_length": 61.7890625,
"completions/mean_terminated_length": 61.7890625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0370471477508545,
"epoch": 0.2727272727272727,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.7022017240524292,
"learning_rate": 7.297979797979797e-07,
"loss": -0.0,
"num_tokens": 11727192.0,
"reward": 0.688281238079071,
"reward_std": 0.13518071174621582,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 108
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 130.0,
"completions/max_terminated_length": 130.0,
"completions/mean_length": 59.1484375,
"completions/mean_terminated_length": 59.1484375,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.060609221458435,
"epoch": 0.27525252525252525,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.5183703899383545,
"learning_rate": 7.272727272727272e-07,
"loss": -0.0,
"num_tokens": 11834915.0,
"reward": 0.6957031488418579,
"reward_std": 0.10993649065494537,
"rewards/video_r1_accuracy_reward/mean": 0.6796875,
"rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 109
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 147.0,
"completions/max_terminated_length": 147.0,
"completions/mean_length": 61.46875,
"completions/mean_terminated_length": 61.46875,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 1.0464633703231812,
"epoch": 0.2777777777777778,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.4522336721420288,
"learning_rate": 7.247474747474747e-07,
"loss": -0.0,
"num_tokens": 11946295.0,
"reward": 0.7105468511581421,
"reward_std": 0.08570004999637604,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 110
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 124.0,
"completions/max_terminated_length": 124.0,
"completions/mean_length": 63.34375,
"completions/mean_terminated_length": 63.34375,
"completions/min_length": 31.0,
"completions/min_terminated_length": 31.0,
"entropy": 1.0706329345703125,
"epoch": 0.2803030303030303,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.7645903825759888,
"learning_rate": 7.222222222222221e-07,
"loss": -0.0,
"num_tokens": 12049227.0,
"reward": 0.8292968273162842,
"reward_std": 0.11967373639345169,
"rewards/video_r1_accuracy_reward/mean": 0.8203125,
"rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 111
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 107.0,
"completions/max_terminated_length": 107.0,
"completions/mean_length": 58.9375,
"completions/mean_terminated_length": 58.9375,
"completions/min_length": 29.0,
"completions/min_terminated_length": 29.0,
"entropy": 1.1456776857376099,
"epoch": 0.2828282828282828,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.4835776090621948,
"learning_rate": 7.196969696969697e-07,
"loss": 0.0,
"num_tokens": 12153771.0,
"reward": 0.7105468511581421,
"reward_std": 0.11418846249580383,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 112
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 111.0,
"completions/max_terminated_length": 111.0,
"completions/mean_length": 60.28125,
"completions/mean_terminated_length": 60.28125,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 1.2109147310256958,
"epoch": 0.28535353535353536,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.5145522356033325,
"learning_rate": 7.171717171717171e-07,
"loss": -0.0,
"num_tokens": 12257335.0,
"reward": 0.45820310711860657,
"reward_std": 0.10019923746585846,
"rewards/video_r1_accuracy_reward/mean": 0.4296875,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 113
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 109.0,
"completions/max_terminated_length": 109.0,
"completions/mean_length": 57.6875,
"completions/mean_terminated_length": 57.6875,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 1.1032978296279907,
"epoch": 0.2878787878787879,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.8097097873687744,
"learning_rate": 7.146464646464646e-07,
"loss": -0.0,
"num_tokens": 12374687.0,
"reward": 0.740234375,
"reward_std": 0.10993648320436478,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 114
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 55.1953125,
"completions/mean_terminated_length": 55.1953125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.2030807733535767,
"epoch": 0.2904040404040404,
"frac_reward_zero_std": 0.625,
"grad_norm": 2.118196487426758,
"learning_rate": 7.121212121212121e-07,
"loss": -0.0,
"num_tokens": 12485544.0,
"reward": 0.443359375,
"reward_std": 0.1649070382118225,
"rewards/video_r1_accuracy_reward/mean": 0.4140625,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 115
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 52.40625,
"completions/mean_terminated_length": 52.40625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1323903799057007,
"epoch": 0.29292929292929293,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.8301972150802612,
"learning_rate": 7.095959595959596e-07,
"loss": 0.0,
"num_tokens": 12584092.0,
"reward": 0.7476562261581421,
"reward_std": 0.09695503115653992,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 116
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 126.0,
"completions/max_terminated_length": 126.0,
"completions/mean_length": 63.2578125,
"completions/mean_terminated_length": 63.2578125,
"completions/min_length": 28.0,
"completions/min_terminated_length": 28.0,
"entropy": 1.2527742385864258,
"epoch": 0.29545454545454547,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.677696943283081,
"learning_rate": 7.07070707070707e-07,
"loss": -0.0,
"num_tokens": 12687173.0,
"reward": 0.6511719226837158,
"reward_std": 0.12768451869487762,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 117
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 135.0,
"completions/max_terminated_length": 135.0,
"completions/mean_length": 57.9140625,
"completions/mean_terminated_length": 57.9140625,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 1.1632239818572998,
"epoch": 0.29797979797979796,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.4128031730651855,
"learning_rate": 7.045454545454545e-07,
"loss": -0.0,
"num_tokens": 12777050.0,
"reward": 0.762499988079071,
"reward_std": 0.08894424885511398,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 118
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 121.0,
"completions/max_terminated_length": 121.0,
"completions/mean_length": 55.078125,
"completions/mean_terminated_length": 55.078125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.1989972591400146,
"epoch": 0.3005050505050505,
"frac_reward_zero_std": 0.6875,
"grad_norm": 1.9097048044204712,
"learning_rate": 7.02020202020202e-07,
"loss": 0.0,
"num_tokens": 12894444.0,
"reward": 0.6734375357627869,
"reward_std": 0.12869229912757874,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 119
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 139.0,
"completions/max_terminated_length": 139.0,
"completions/mean_length": 59.7578125,
"completions/mean_terminated_length": 59.7578125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.3133735656738281,
"epoch": 0.30303030303030304,
"frac_reward_zero_std": 0.625,
"grad_norm": 1.8875597715377808,
"learning_rate": 6.994949494949494e-07,
"loss": 0.0,
"num_tokens": 12986525.0,
"reward": 0.717578113079071,
"reward_std": 0.1526612639427185,
"rewards/video_r1_accuracy_reward/mean": 0.703125,
"rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 120
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 47.015625,
"completions/mean_terminated_length": 47.015625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1269464492797852,
"epoch": 0.3055555555555556,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.4073408842086792,
"learning_rate": 6.96969696969697e-07,
"loss": -0.0,
"num_tokens": 13101023.0,
"reward": 0.799609363079071,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.7890625,
"rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 121
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 116.0,
"completions/max_terminated_length": 116.0,
"completions/mean_length": 52.6640625,
"completions/mean_terminated_length": 52.6640625,
"completions/min_length": 27.0,
"completions/min_terminated_length": 27.0,
"entropy": 1.16685152053833,
"epoch": 0.30808080808080807,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.5721867084503174,
"learning_rate": 6.944444444444444e-07,
"loss": 0.0,
"num_tokens": 13208740.0,
"reward": 0.6214843988418579,
"reward_std": 0.08021478354930878,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 122
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 89.0,
"completions/max_terminated_length": 89.0,
"completions/mean_length": 47.6953125,
"completions/mean_terminated_length": 47.6953125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.2507858276367188,
"epoch": 0.3106060606060606,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.312094211578369,
"learning_rate": 6.919191919191919e-07,
"loss": -0.0,
"num_tokens": 13313453.0,
"reward": 0.5621094107627869,
"reward_std": 0.10770007222890854,
"rewards/video_r1_accuracy_reward/mean": 0.5390625,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 123
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 112.0,
"completions/max_terminated_length": 112.0,
"completions/mean_length": 53.2578125,
"completions/mean_terminated_length": 53.2578125,
"completions/min_length": 26.0,
"completions/min_terminated_length": 26.0,
"entropy": 1.216563105583191,
"epoch": 0.31313131313131315,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.8229986429214478,
"learning_rate": 6.893939393939394e-07,
"loss": 0.0,
"num_tokens": 13423294.0,
"reward": 0.5843750238418579,
"reward_std": 0.07920699566602707,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 124
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 49.171875,
"completions/mean_terminated_length": 49.171875,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.2454454898834229,
"epoch": 0.31565656565656564,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0392420291900635,
"learning_rate": 6.868686868686868e-07,
"loss": -0.0,
"num_tokens": 13520556.0,
"reward": 0.725390613079071,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 125
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 48.1640625,
"completions/mean_terminated_length": 48.1640625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1565489768981934,
"epoch": 0.3181818181818182,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.4579185247421265,
"learning_rate": 6.843434343434343e-07,
"loss": -0.0,
"num_tokens": 13634321.0,
"reward": 0.688281238079071,
"reward_std": 0.05922255665063858,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 126
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 51.28125,
"completions/mean_terminated_length": 51.28125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.2307085990905762,
"epoch": 0.3207070707070707,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.3030145168304443,
"learning_rate": 6.818181818181817e-07,
"loss": -0.0,
"num_tokens": 13727629.0,
"reward": 0.532421886920929,
"reward_std": 0.1131853386759758,
"rewards/video_r1_accuracy_reward/mean": 0.5078125,
"rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 127
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 147.0,
"completions/max_terminated_length": 147.0,
"completions/mean_length": 47.484375,
"completions/mean_terminated_length": 47.484375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1688158512115479,
"epoch": 0.32323232323232326,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.8600326776504517,
"learning_rate": 6.792929292929293e-07,
"loss": 0.0,
"num_tokens": 13837475.0,
"reward": 0.666015625,
"reward_std": 0.0727139487862587,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 128
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 111.0,
"completions/max_terminated_length": 111.0,
"completions/mean_length": 47.390625,
"completions/mean_terminated_length": 47.390625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1925861835479736,
"epoch": 0.32575757575757575,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.420927882194519,
"learning_rate": 6.767676767676767e-07,
"loss": -0.0,
"num_tokens": 13950165.0,
"reward": 0.8070312738418579,
"reward_std": 0.0737217366695404,
"rewards/video_r1_accuracy_reward/mean": 0.796875,
"rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 129
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 45.359375,
"completions/mean_terminated_length": 45.359375,
"completions/min_length": 13.0,
"completions/min_terminated_length": 13.0,
"entropy": 1.1712085008621216,
"epoch": 0.3282828282828283,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.66424560546875,
"learning_rate": 6.742424242424242e-07,
"loss": 0.0,
"num_tokens": 14042243.0,
"reward": 0.6363281011581421,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 130
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 112.0,
"completions/max_terminated_length": 112.0,
"completions/mean_length": 43.6171875,
"completions/mean_terminated_length": 43.6171875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.274552583694458,
"epoch": 0.33080808080808083,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.4996423721313477,
"learning_rate": 6.717171717171717e-07,
"loss": 0.0,
"num_tokens": 14142794.0,
"reward": 0.591796875,
"reward_std": 0.12219925224781036,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 131
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 91.0,
"completions/max_terminated_length": 91.0,
"completions/mean_length": 42.3203125,
"completions/mean_terminated_length": 42.3203125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1475682258605957,
"epoch": 0.3333333333333333,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.9287738800048828,
"learning_rate": 6.691919191919192e-07,
"loss": 0.0,
"num_tokens": 14232227.0,
"reward": 0.6285156011581421,
"reward_std": 0.06965583562850952,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 132
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 100.0,
"completions/max_terminated_length": 100.0,
"completions/mean_length": 42.59375,
"completions/mean_terminated_length": 42.59375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.2122106552124023,
"epoch": 0.33585858585858586,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.023392915725708,
"learning_rate": 6.666666666666666e-07,
"loss": 0.0,
"num_tokens": 14331927.0,
"reward": 0.740234375,
"reward_std": 0.10344808548688889,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 133
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 40.53125,
"completions/mean_terminated_length": 40.53125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1993948221206665,
"epoch": 0.3383838383838384,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.7871482372283936,
"learning_rate": 6.641414141414141e-07,
"loss": 0.0,
"num_tokens": 14422979.0,
"reward": 0.5472656488418579,
"reward_std": 0.0727139487862587,
"rewards/video_r1_accuracy_reward/mean": 0.5234375,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 134
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 44.0859375,
"completions/mean_terminated_length": 44.0859375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1800475120544434,
"epoch": 0.3409090909090909,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.805823564529419,
"learning_rate": 6.616161616161616e-07,
"loss": -0.0,
"num_tokens": 14521638.0,
"reward": 0.62109375,
"reward_std": 0.08131963759660721,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 135
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 89.0,
"completions/max_terminated_length": 89.0,
"completions/mean_length": 39.5,
"completions/mean_terminated_length": 39.5,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1338804960250854,
"epoch": 0.3434343434343434,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.844430446624756,
"learning_rate": 6.59090909090909e-07,
"loss": -0.0,
"num_tokens": 14621022.0,
"reward": 0.703125,
"reward_std": 0.12119147181510925,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 136
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 101.0,
"completions/max_terminated_length": 101.0,
"completions/mean_length": 37.484375,
"completions/mean_terminated_length": 37.484375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0793958902359009,
"epoch": 0.34595959595959597,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.934971332550049,
"learning_rate": 6.565656565656566e-07,
"loss": -0.0,
"num_tokens": 14735044.0,
"reward": 0.5621093511581421,
"reward_std": 0.09420402348041534,
"rewards/video_r1_accuracy_reward/mean": 0.5390625,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 137
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 39.546875,
"completions/mean_terminated_length": 39.546875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1664319038391113,
"epoch": 0.3484848484848485,
"frac_reward_zero_std": 0.625,
"grad_norm": 2.60278058052063,
"learning_rate": 6.54040404040404e-07,
"loss": -0.0,
"num_tokens": 14846898.0,
"reward": 0.6140625476837158,
"reward_std": 0.1454278975725174,
"rewards/video_r1_accuracy_reward/mean": 0.59375,
"rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 138
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 69.0,
"completions/max_terminated_length": 69.0,
"completions/mean_length": 34.8671875,
"completions/mean_terminated_length": 34.8671875,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1905630826950073,
"epoch": 0.351010101010101,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.826327323913574,
"learning_rate": 6.515151515151515e-07,
"loss": -0.0,
"num_tokens": 14937953.0,
"reward": 0.680859386920929,
"reward_std": 0.13193649053573608,
"rewards/video_r1_accuracy_reward/mean": 0.6640625,
"rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 139
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 34.3203125,
"completions/mean_terminated_length": 34.3203125,
"completions/min_length": 13.0,
"completions/min_terminated_length": 13.0,
"entropy": 1.1751891374588013,
"epoch": 0.35353535353535354,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.3793742656707764,
"learning_rate": 6.48989898989899e-07,
"loss": 0.0,
"num_tokens": 15043954.0,
"reward": 0.8070312738418579,
"reward_std": 0.08345898985862732,
"rewards/video_r1_accuracy_reward/mean": 0.796875,
"rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 140
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 82.0,
"completions/max_terminated_length": 82.0,
"completions/mean_length": 35.5,
"completions/mean_terminated_length": 35.5,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.2330318689346313,
"epoch": 0.3560606060606061,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.5116065740585327,
"learning_rate": 6.464646464646465e-07,
"loss": -0.0,
"num_tokens": 15128850.0,
"reward": 0.5695312023162842,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.546875,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 141
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 70.0,
"completions/max_terminated_length": 70.0,
"completions/mean_length": 32.6171875,
"completions/mean_terminated_length": 32.6171875,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.1521470546722412,
"epoch": 0.35858585858585856,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.4774539470672607,
"learning_rate": 6.439393939393939e-07,
"loss": -0.0,
"num_tokens": 15236257.0,
"reward": 0.7105468511581421,
"reward_std": 0.10019923001527786,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 142
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 31.734375,
"completions/mean_terminated_length": 31.734375,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.1453057527542114,
"epoch": 0.3611111111111111,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7422317266464233,
"learning_rate": 6.414141414141414e-07,
"loss": 0.0,
"num_tokens": 15338215.0,
"reward": 0.6437499523162842,
"reward_std": 0.05497056990861893,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 143
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 34.3671875,
"completions/mean_terminated_length": 34.3671875,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.2442141771316528,
"epoch": 0.36363636363636365,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.5866944789886475,
"learning_rate": 6.388888888888888e-07,
"loss": 0.0,
"num_tokens": 15450086.0,
"reward": 0.4878906011581421,
"reward_std": 0.0727139487862587,
"rewards/video_r1_accuracy_reward/mean": 0.4609375,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 144
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 33.3125,
"completions/mean_terminated_length": 33.3125,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.2739644050598145,
"epoch": 0.3661616161616162,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.671504259109497,
"learning_rate": 6.363636363636363e-07,
"loss": -0.0,
"num_tokens": 15550070.0,
"reward": 0.680859386920929,
"reward_std": 0.12768451869487762,
"rewards/video_r1_accuracy_reward/mean": 0.6640625,
"rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 145
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 86.0,
"completions/max_terminated_length": 86.0,
"completions/mean_length": 30.2109375,
"completions/mean_terminated_length": 30.2109375,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.149760127067566,
"epoch": 0.3686868686868687,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.9195950031280518,
"learning_rate": 6.338383838383839e-07,
"loss": -0.0,
"num_tokens": 15658265.0,
"reward": 0.6734374761581421,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 146
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 31.5703125,
"completions/mean_terminated_length": 31.5703125,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"entropy": 1.2297152280807495,
"epoch": 0.3712121212121212,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.8299896717071533,
"learning_rate": 6.313131313131312e-07,
"loss": -0.0,
"num_tokens": 15760138.0,
"reward": 0.651171863079071,
"reward_std": 0.08021478354930878,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 147
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 60.0,
"completions/max_terminated_length": 60.0,
"completions/mean_length": 29.1875,
"completions/mean_terminated_length": 29.1875,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.1559841632843018,
"epoch": 0.37373737373737376,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.4495553970336914,
"learning_rate": 6.287878787878788e-07,
"loss": -0.0,
"num_tokens": 15859386.0,
"reward": 0.6214843392372131,
"reward_std": 0.08570004999637604,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 148
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 29.859375,
"completions/mean_terminated_length": 29.859375,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"entropy": 1.2058125734329224,
"epoch": 0.37626262626262624,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.5699256658554077,
"learning_rate": 6.262626262626263e-07,
"loss": -0.0,
"num_tokens": 15961120.0,
"reward": 0.703125,
"reward_std": 0.06145896762609482,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 149
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 33.84375,
"completions/mean_terminated_length": 33.84375,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"entropy": 1.2072830200195312,
"epoch": 0.3787878787878788,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.6067988872528076,
"learning_rate": 6.237373737373736e-07,
"loss": -0.0,
"num_tokens": 16063572.0,
"reward": 0.4878906309604645,
"reward_std": 0.06297669559717178,
"rewards/video_r1_accuracy_reward/mean": 0.4609375,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 150
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 66.0,
"completions/max_terminated_length": 66.0,
"completions/mean_length": 32.734375,
"completions/mean_terminated_length": 32.734375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.239713191986084,
"epoch": 0.3813131313131313,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.205798387527466,
"learning_rate": 6.212121212121212e-07,
"loss": -0.0,
"num_tokens": 16174394.0,
"reward": 0.5914062261581421,
"reward_std": 0.06408154964447021,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 151
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 70.0,
"completions/max_terminated_length": 70.0,
"completions/mean_length": 34.765625,
"completions/mean_terminated_length": 34.765625,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.320723533630371,
"epoch": 0.3838383838383838,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.119114875793457,
"learning_rate": 6.186868686868687e-07,
"loss": 0.0,
"num_tokens": 16276724.0,
"reward": 0.635937511920929,
"reward_std": 0.09064806997776031,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 152
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 36.5390625,
"completions/mean_terminated_length": 36.5390625,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.2870080471038818,
"epoch": 0.38636363636363635,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.5376033782958984,
"learning_rate": 6.161616161616161e-07,
"loss": -0.0,
"num_tokens": 16371001.0,
"reward": 0.6214843988418579,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 153
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 89.0,
"completions/max_terminated_length": 89.0,
"completions/mean_length": 35.984375,
"completions/mean_terminated_length": 35.984375,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.2718448638916016,
"epoch": 0.3888888888888889,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.5065395832061768,
"learning_rate": 6.136363636363636e-07,
"loss": -0.0,
"num_tokens": 16482023.0,
"reward": 0.7847656011581421,
"reward_std": 0.05821476876735687,
"rewards/video_r1_accuracy_reward/mean": 0.7734375,
"rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 154
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 34.0,
"completions/mean_terminated_length": 34.0,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.2422447204589844,
"epoch": 0.39141414141414144,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.1209592819213867,
"learning_rate": 6.111111111111112e-07,
"loss": -0.0,
"num_tokens": 16578319.0,
"reward": 0.5621093511581421,
"reward_std": 0.09218844771385193,
"rewards/video_r1_accuracy_reward/mean": 0.5390625,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 155
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 68.0,
"completions/max_terminated_length": 68.0,
"completions/mean_length": 33.4375,
"completions/mean_terminated_length": 33.4375,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.1495954990386963,
"epoch": 0.3939393939393939,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.415402889251709,
"learning_rate": 6.085858585858585e-07,
"loss": 0.0,
"num_tokens": 16677999.0,
"reward": 0.6953125,
"reward_std": 0.04958236962556839,
"rewards/video_r1_accuracy_reward/mean": 0.6796875,
"rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 156
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 33.4375,
"completions/mean_terminated_length": 33.4375,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.2009010314941406,
"epoch": 0.39646464646464646,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.6719857454299927,
"learning_rate": 6.060606060606061e-07,
"loss": 0.0,
"num_tokens": 16781351.0,
"reward": 0.606640636920929,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 157
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 33.9296875,
"completions/mean_terminated_length": 33.9296875,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.2379920482635498,
"epoch": 0.398989898989899,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.6537461280822754,
"learning_rate": 6.035353535353535e-07,
"loss": 0.0,
"num_tokens": 16878030.0,
"reward": 0.8292968273162842,
"reward_std": 0.1244356632232666,
"rewards/video_r1_accuracy_reward/mean": 0.8203125,
"rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 158
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 79.0,
"completions/max_terminated_length": 79.0,
"completions/mean_length": 32.1953125,
"completions/mean_terminated_length": 32.1953125,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.1720532178878784,
"epoch": 0.4015151515151515,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.911094903945923,
"learning_rate": 6.010101010101009e-07,
"loss": 0.0,
"num_tokens": 16982935.0,
"reward": 0.569531261920929,
"reward_std": 0.10120702534914017,
"rewards/video_r1_accuracy_reward/mean": 0.546875,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 159
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 74.0,
"completions/max_terminated_length": 74.0,
"completions/mean_length": 34.1484375,
"completions/mean_terminated_length": 34.1484375,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.1654397249221802,
"epoch": 0.40404040404040403,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.369276762008667,
"learning_rate": 5.984848484848485e-07,
"loss": -0.0,
"num_tokens": 17070618.0,
"reward": 0.7328125238418579,
"reward_std": 0.05497056990861893,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 160
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 34.7890625,
"completions/mean_terminated_length": 34.7890625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1633415222167969,
"epoch": 0.4065656565656566,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.564042568206787,
"learning_rate": 5.959595959595959e-07,
"loss": 0.0,
"num_tokens": 17175703.0,
"reward": 0.7992187738418579,
"reward_std": 0.04921317845582962,
"rewards/video_r1_accuracy_reward/mean": 0.7890625,
"rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 161
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 82.0,
"completions/max_terminated_length": 82.0,
"completions/mean_length": 35.1796875,
"completions/mean_terminated_length": 35.1796875,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.3527387380599976,
"epoch": 0.4090909090909091,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.0236308574676514,
"learning_rate": 5.934343434343434e-07,
"loss": -0.0,
"num_tokens": 17283214.0,
"reward": 0.6363281011581421,
"reward_std": 0.08245119452476501,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 162
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 34.8046875,
"completions/mean_terminated_length": 34.8046875,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.2128992080688477,
"epoch": 0.4116161616161616,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.5096118450164795,
"learning_rate": 5.909090909090909e-07,
"loss": -0.0,
"num_tokens": 17390165.0,
"reward": 0.7550780773162842,
"reward_std": 0.09046198427677155,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 163
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 69.0,
"completions/max_terminated_length": 69.0,
"completions/mean_length": 32.9140625,
"completions/mean_terminated_length": 32.9140625,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.1064362525939941,
"epoch": 0.41414141414141414,
"frac_reward_zero_std": 0.6875,
"grad_norm": 3.49741268157959,
"learning_rate": 5.883838383838384e-07,
"loss": -0.0,
"num_tokens": 17484994.0,
"reward": 0.6359374523162842,
"reward_std": 0.10555607080459595,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 164
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 108.0,
"completions/max_terminated_length": 108.0,
"completions/mean_length": 34.1015625,
"completions/mean_terminated_length": 34.1015625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1739277839660645,
"epoch": 0.4166666666666667,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.649794578552246,
"learning_rate": 5.858585858585858e-07,
"loss": 0.0,
"num_tokens": 17582335.0,
"reward": 0.576953113079071,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.5546875,
"rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 165
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 38.046875,
"completions/mean_terminated_length": 38.046875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.2043449878692627,
"epoch": 0.41919191919191917,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.621335506439209,
"learning_rate": 5.833333333333334e-07,
"loss": -0.0,
"num_tokens": 17673021.0,
"reward": 0.5249999761581421,
"reward_std": 0.06145896762609482,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 166
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 69.0,
"completions/max_terminated_length": 69.0,
"completions/mean_length": 35.953125,
"completions/mean_terminated_length": 35.953125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.2063016891479492,
"epoch": 0.4217171717171717,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.0640013217926025,
"learning_rate": 5.808080808080808e-07,
"loss": 0.0,
"num_tokens": 17767647.0,
"reward": 0.7476562261581421,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 167
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 73.0,
"completions/max_terminated_length": 73.0,
"completions/mean_length": 35.9765625,
"completions/mean_terminated_length": 35.9765625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1426618099212646,
"epoch": 0.42424242424242425,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.6504007577896118,
"learning_rate": 5.782828282828282e-07,
"loss": 0.0,
"num_tokens": 17882004.0,
"reward": 0.740234375,
"reward_std": 0.08021478354930878,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 168
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 36.078125,
"completions/mean_terminated_length": 36.078125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.167665719985962,
"epoch": 0.42676767676767674,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.703382968902588,
"learning_rate": 5.757575757575758e-07,
"loss": -0.0,
"num_tokens": 17961174.0,
"reward": 0.598828136920929,
"reward_std": 0.07939308881759644,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 169
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 38.3671875,
"completions/mean_terminated_length": 38.3671875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1713335514068604,
"epoch": 0.4292929292929293,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.324023962020874,
"learning_rate": 5.732323232323232e-07,
"loss": 0.0,
"num_tokens": 18065077.0,
"reward": 0.42851561307907104,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.3984375,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 170
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 64.0,
"completions/max_terminated_length": 64.0,
"completions/mean_length": 35.40625,
"completions/mean_terminated_length": 35.40625,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.166663646697998,
"epoch": 0.4318181818181818,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.060908317565918,
"learning_rate": 5.707070707070707e-07,
"loss": 0.0,
"num_tokens": 18166713.0,
"reward": 0.5992187261581421,
"reward_std": 0.10120701789855957,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 171
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 34.8515625,
"completions/mean_terminated_length": 34.8515625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0551965236663818,
"epoch": 0.43434343434343436,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.146479606628418,
"learning_rate": 5.681818181818182e-07,
"loss": -0.0,
"num_tokens": 18271646.0,
"reward": 0.48046875,
"reward_std": 0.07920699566602707,
"rewards/video_r1_accuracy_reward/mean": 0.453125,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 172
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 72.0,
"completions/max_terminated_length": 72.0,
"completions/mean_length": 36.4921875,
"completions/mean_terminated_length": 36.4921875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0619423389434814,
"epoch": 0.43686868686868685,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.411008358001709,
"learning_rate": 5.656565656565657e-07,
"loss": 0.0,
"num_tokens": 18372037.0,
"reward": 0.5992187261581421,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 173
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 36.5625,
"completions/mean_terminated_length": 36.5625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.162034511566162,
"epoch": 0.4393939393939394,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.3831713199615479,
"learning_rate": 5.631313131313131e-07,
"loss": 0.0,
"num_tokens": 18462141.0,
"reward": 0.814453125,
"reward_std": 0.058214765042066574,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 174
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 38.4453125,
"completions/mean_terminated_length": 38.4453125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.2333970069885254,
"epoch": 0.44191919191919193,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.2612643241882324,
"learning_rate": 5.606060606060605e-07,
"loss": 0.0,
"num_tokens": 18564110.0,
"reward": 0.829296886920929,
"reward_std": 0.11469841748476028,
"rewards/video_r1_accuracy_reward/mean": 0.8203125,
"rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 175
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 35.8125,
"completions/mean_terminated_length": 35.8125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1255345344543457,
"epoch": 0.4444444444444444,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.9899846315383911,
"learning_rate": 5.58080808080808e-07,
"loss": 0.0,
"num_tokens": 18665734.0,
"reward": 0.8960937261581421,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.890625,
"rewards/video_r1_accuracy_reward/std": 0.31333550810813904,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 176
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 65.0,
"completions/max_terminated_length": 65.0,
"completions/mean_length": 34.0,
"completions/mean_terminated_length": 34.0,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.142645239830017,
"epoch": 0.44696969696969696,
"frac_reward_zero_std": 0.625,
"grad_norm": 2.496107578277588,
"learning_rate": 5.555555555555555e-07,
"loss": 0.0,
"num_tokens": 18759270.0,
"reward": 0.532421886920929,
"reward_std": 0.13244643807411194,
"rewards/video_r1_accuracy_reward/mean": 0.5078125,
"rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 177
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 82.0,
"completions/max_terminated_length": 82.0,
"completions/mean_length": 38.4609375,
"completions/mean_terminated_length": 38.4609375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1225965023040771,
"epoch": 0.4494949494949495,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.3409671783447266,
"learning_rate": 5.53030303030303e-07,
"loss": -0.0,
"num_tokens": 18862553.0,
"reward": 0.6585937738418579,
"reward_std": 0.14693352580070496,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 178
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 100.0,
"completions/max_terminated_length": 100.0,
"completions/mean_length": 37.390625,
"completions/mean_terminated_length": 37.390625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1464502811431885,
"epoch": 0.45202020202020204,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.6066229343414307,
"learning_rate": 5.505050505050505e-07,
"loss": -0.0,
"num_tokens": 18956595.0,
"reward": 0.814453125,
"reward_std": 0.08570004999637604,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 179
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 39.1171875,
"completions/mean_terminated_length": 39.1171875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1246540546417236,
"epoch": 0.45454545454545453,
"frac_reward_zero_std": 0.625,
"grad_norm": 3.036703109741211,
"learning_rate": 5.47979797979798e-07,
"loss": 0.0,
"num_tokens": 19059722.0,
"reward": 0.7476562261581421,
"reward_std": 0.14319148659706116,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 180
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 126.0,
"completions/max_terminated_length": 126.0,
"completions/mean_length": 41.46875,
"completions/mean_terminated_length": 41.46875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1428744792938232,
"epoch": 0.45707070707070707,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 5.454545454545454e-07,
"loss": 0.0,
"num_tokens": 19158318.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 181
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 40.1796875,
"completions/mean_terminated_length": 40.1796875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1367346048355103,
"epoch": 0.4595959595959596,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.153904676437378,
"learning_rate": 5.42929292929293e-07,
"loss": -0.0,
"num_tokens": 19266637.0,
"reward": 0.5621093511581421,
"reward_std": 0.058214765042066574,
"rewards/video_r1_accuracy_reward/mean": 0.5390625,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 182
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 40.1875,
"completions/mean_terminated_length": 40.1875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1929256916046143,
"epoch": 0.4621212121212121,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.8813252449035645,
"learning_rate": 5.404040404040404e-07,
"loss": -0.0,
"num_tokens": 19361085.0,
"reward": 0.7843749523162842,
"reward_std": 0.07706765085458755,
"rewards/video_r1_accuracy_reward/mean": 0.7734375,
"rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 183
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 39.515625,
"completions/mean_terminated_length": 39.515625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1405987739562988,
"epoch": 0.46464646464646464,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 5.378787878787878e-07,
"loss": 0.0,
"num_tokens": 19462775.0,
"reward": 0.5249999761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 184
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 38.4296875,
"completions/mean_terminated_length": 38.4296875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.2080726623535156,
"epoch": 0.4671717171717172,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.3265841007232666,
"learning_rate": 5.353535353535354e-07,
"loss": -0.0,
"num_tokens": 19543830.0,
"reward": 0.703125,
"reward_std": 0.13842955231666565,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 185
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 73.0,
"completions/max_terminated_length": 73.0,
"completions/mean_length": 40.359375,
"completions/mean_terminated_length": 40.359375,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1394485235214233,
"epoch": 0.4696969696969697,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.003188371658325,
"learning_rate": 5.328282828282828e-07,
"loss": -0.0,
"num_tokens": 19636020.0,
"reward": 0.6734374761581421,
"reward_std": 0.07920700311660767,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 186
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 116.0,
"completions/max_terminated_length": 116.0,
"completions/mean_length": 42.0703125,
"completions/mean_terminated_length": 42.0703125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.2103469371795654,
"epoch": 0.4722222222222222,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.9415223598480225,
"learning_rate": 5.303030303030303e-07,
"loss": 0.0,
"num_tokens": 19746389.0,
"reward": 0.717968761920929,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.703125,
"rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 187
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 115.0,
"completions/max_terminated_length": 115.0,
"completions/mean_length": 39.9921875,
"completions/mean_terminated_length": 39.9921875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0821011066436768,
"epoch": 0.47474747474747475,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 5.277777777777777e-07,
"loss": 0.0,
"num_tokens": 19840548.0,
"reward": 0.8218749761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.8125,
"rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 188
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 109.0,
"completions/max_terminated_length": 109.0,
"completions/mean_length": 39.6953125,
"completions/mean_terminated_length": 39.6953125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.1291919946670532,
"epoch": 0.4772727272727273,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.641886830329895,
"learning_rate": 5.252525252525253e-07,
"loss": 0.0,
"num_tokens": 19931517.0,
"reward": 0.762499988079071,
"reward_std": 0.04198446497321129,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 189
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 108.0,
"completions/max_terminated_length": 108.0,
"completions/mean_length": 43.8828125,
"completions/mean_terminated_length": 43.8828125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.197535753250122,
"epoch": 0.4797979797979798,
"frac_reward_zero_std": 0.6875,
"grad_norm": 2.2403042316436768,
"learning_rate": 5.227272727272727e-07,
"loss": 0.0,
"num_tokens": 20039918.0,
"reward": 0.62890625,
"reward_std": 0.13092872500419617,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 190
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 41.3671875,
"completions/mean_terminated_length": 41.3671875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.253387451171875,
"epoch": 0.4823232323232323,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.8688163757324219,
"learning_rate": 5.202020202020201e-07,
"loss": 0.0,
"num_tokens": 20148373.0,
"reward": 0.4136718511581421,
"reward_std": 0.08570004999637604,
"rewards/video_r1_accuracy_reward/mean": 0.3828125,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 191
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 36.578125,
"completions/mean_terminated_length": 36.578125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1290706396102905,
"epoch": 0.48484848484848486,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7567572593688965,
"learning_rate": 5.176767676767676e-07,
"loss": -0.0,
"num_tokens": 20245007.0,
"reward": 0.814453125,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 192
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 89.0,
"completions/max_terminated_length": 89.0,
"completions/mean_length": 43.1875,
"completions/mean_terminated_length": 43.1875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1787712574005127,
"epoch": 0.48737373737373735,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.6133161783218384,
"learning_rate": 5.151515151515151e-07,
"loss": -0.0,
"num_tokens": 20340607.0,
"reward": 0.703125,
"reward_std": 0.06145896762609482,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 193
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 41.0,
"completions/mean_terminated_length": 41.0,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.215273141860962,
"epoch": 0.4898989898989899,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.7843291759490967,
"learning_rate": 5.126262626262626e-07,
"loss": 0.0,
"num_tokens": 20446183.0,
"reward": 0.5992187261581421,
"reward_std": 0.09695503115653992,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 194
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 95.0,
"completions/max_terminated_length": 95.0,
"completions/mean_length": 42.1640625,
"completions/mean_terminated_length": 42.1640625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1246238946914673,
"epoch": 0.49242424242424243,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.0955389738082886,
"learning_rate": 5.1010101010101e-07,
"loss": 0.0,
"num_tokens": 20552172.0,
"reward": 0.6507812738418579,
"reward_std": 0.04958236962556839,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 195
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 42.5,
"completions/mean_terminated_length": 42.5,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.1459453105926514,
"epoch": 0.494949494949495,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.4178069829940796,
"learning_rate": 5.075757575757576e-07,
"loss": 0.0,
"num_tokens": 20654036.0,
"reward": 0.591796875,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 196
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 39.59375,
"completions/mean_terminated_length": 39.59375,
"completions/min_length": 16.0,
"completions/min_terminated_length": 16.0,
"entropy": 1.142756700515747,
"epoch": 0.49747474747474746,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.4916136264801025,
"learning_rate": 5.05050505050505e-07,
"loss": -0.0,
"num_tokens": 20759896.0,
"reward": 0.4804687201976776,
"reward_std": 0.08345898985862732,
"rewards/video_r1_accuracy_reward/mean": 0.453125,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 197
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 40.7421875,
"completions/mean_terminated_length": 40.7421875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1972713470458984,
"epoch": 0.5,
"frac_reward_zero_std": 0.6875,
"grad_norm": 3.071634292602539,
"learning_rate": 5.025252525252525e-07,
"loss": -0.0,
"num_tokens": 20869311.0,
"reward": 0.703125,
"reward_std": 0.1254434585571289,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 198
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 91.0,
"completions/max_terminated_length": 91.0,
"completions/mean_length": 41.9609375,
"completions/mean_terminated_length": 41.9609375,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.2184077501296997,
"epoch": 0.5025252525252525,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.4103143215179443,
"learning_rate": 5e-07,
"loss": -0.0,
"num_tokens": 20965562.0,
"reward": 0.5992187857627869,
"reward_std": 0.08345898985862732,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 199
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 37.8515625,
"completions/mean_terminated_length": 37.8515625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0560106039047241,
"epoch": 0.5050505050505051,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7731906175613403,
"learning_rate": 4.974747474747474e-07,
"loss": -0.0,
"num_tokens": 21069135.0,
"reward": 0.7328125238418579,
"reward_std": 0.05497056990861893,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 200
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 40.6953125,
"completions/mean_terminated_length": 40.6953125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1646404266357422,
"epoch": 0.5075757575757576,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.949494949494949e-07,
"loss": 0.0,
"num_tokens": 21182136.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 201
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 39.3828125,
"completions/mean_terminated_length": 39.3828125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.173717975616455,
"epoch": 0.51010101010101,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.3920958042144775,
"learning_rate": 4.924242424242424e-07,
"loss": 0.0,
"num_tokens": 21284945.0,
"reward": 0.6882812976837158,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 202
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 95.0,
"completions/max_terminated_length": 95.0,
"completions/mean_length": 38.6875,
"completions/mean_terminated_length": 38.6875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1436386108398438,
"epoch": 0.5126262626262627,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.3957781791687012,
"learning_rate": 4.898989898989898e-07,
"loss": 0.0,
"num_tokens": 21389041.0,
"reward": 0.762499988079071,
"reward_std": 0.04198446497321129,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 203
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 37.1171875,
"completions/mean_terminated_length": 37.1171875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0701844692230225,
"epoch": 0.5151515151515151,
"frac_reward_zero_std": 0.75,
"grad_norm": 6.448522090911865,
"learning_rate": 4.873737373737373e-07,
"loss": -0.0,
"num_tokens": 21490960.0,
"reward": 0.5249999761581421,
"reward_std": 0.11094427108764648,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 204
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 36.6484375,
"completions/mean_terminated_length": 36.6484375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1014225482940674,
"epoch": 0.5176767676767676,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2016708850860596,
"learning_rate": 4.848484848484849e-07,
"loss": -0.0,
"num_tokens": 21589139.0,
"reward": 0.7699218988418579,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 205
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 103.0,
"completions/max_terminated_length": 103.0,
"completions/mean_length": 42.125,
"completions/mean_terminated_length": 42.125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1724364757537842,
"epoch": 0.5202020202020202,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.823232323232323e-07,
"loss": 0.0,
"num_tokens": 21683795.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 206
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 79.0,
"completions/max_terminated_length": 79.0,
"completions/mean_length": 38.78125,
"completions/mean_terminated_length": 38.78125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0875164270401,
"epoch": 0.5227272727272727,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.797979797979798e-07,
"loss": 0.0,
"num_tokens": 21786679.0,
"reward": 0.6437499523162842,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 207
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 38.7734375,
"completions/mean_terminated_length": 38.7734375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1065541505813599,
"epoch": 0.5252525252525253,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.205068588256836,
"learning_rate": 4.772727272727273e-07,
"loss": 0.0,
"num_tokens": 21881450.0,
"reward": 0.7847656607627869,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.7734375,
"rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 208
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 36.2109375,
"completions/mean_terminated_length": 36.2109375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0509859323501587,
"epoch": 0.5277777777777778,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.425723671913147,
"learning_rate": 4.7474747474747474e-07,
"loss": -0.0,
"num_tokens": 21985117.0,
"reward": 0.7699218988418579,
"reward_std": 0.05821476876735687,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 209
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 38.3671875,
"completions/mean_terminated_length": 38.3671875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0938265323638916,
"epoch": 0.5303030303030303,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.6129071712493896,
"learning_rate": 4.722222222222222e-07,
"loss": -0.0,
"num_tokens": 22083148.0,
"reward": 0.8070312738418579,
"reward_std": 0.11094427108764648,
"rewards/video_r1_accuracy_reward/mean": 0.796875,
"rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 210
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 105.0,
"completions/max_terminated_length": 105.0,
"completions/mean_length": 38.5390625,
"completions/mean_terminated_length": 38.5390625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1249573230743408,
"epoch": 0.5328282828282829,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0192317962646484,
"learning_rate": 4.696969696969697e-07,
"loss": 0.0,
"num_tokens": 22182537.0,
"reward": 0.651171863079071,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 211
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 79.0,
"completions/max_terminated_length": 79.0,
"completions/mean_length": 37.875,
"completions/mean_terminated_length": 37.875,
"completions/min_length": 14.0,
"completions/min_terminated_length": 14.0,
"entropy": 1.1420645713806152,
"epoch": 0.5353535353535354,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2227110862731934,
"learning_rate": 4.6717171717171714e-07,
"loss": 0.0,
"num_tokens": 22284025.0,
"reward": 0.755078136920929,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 212
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 105.0,
"completions/max_terminated_length": 105.0,
"completions/mean_length": 39.8828125,
"completions/mean_terminated_length": 39.8828125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0884701013565063,
"epoch": 0.5378787878787878,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.646464646464646e-07,
"loss": 0.0,
"num_tokens": 22386154.0,
"reward": 0.8812500238418579,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.875,
"rewards/video_r1_accuracy_reward/std": 0.3320184051990509,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 213
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 39.0625,
"completions/mean_terminated_length": 39.0625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0740153789520264,
"epoch": 0.5404040404040404,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.3575637340545654,
"learning_rate": 4.6212121212121207e-07,
"loss": -0.0,
"num_tokens": 22478426.0,
"reward": 0.7105468511581421,
"reward_std": 0.07596279680728912,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 214
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 36.9765625,
"completions/mean_terminated_length": 36.9765625,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.0606670379638672,
"epoch": 0.5429292929292929,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.6686134338378906,
"learning_rate": 4.595959595959596e-07,
"loss": 0.0,
"num_tokens": 22570903.0,
"reward": 0.6957031488418579,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6796875,
"rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 215
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 39.2109375,
"completions/mean_terminated_length": 39.2109375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.117194414138794,
"epoch": 0.5454545454545454,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.5707070707070705e-07,
"loss": 0.0,
"num_tokens": 22660930.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 216
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 86.0,
"completions/max_terminated_length": 86.0,
"completions/mean_length": 38.84375,
"completions/mean_terminated_length": 38.84375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1242808103561401,
"epoch": 0.547979797979798,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.227063536643982,
"learning_rate": 4.545454545454545e-07,
"loss": 0.0,
"num_tokens": 22753702.0,
"reward": 0.814453125,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 217
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 68.0,
"completions/max_terminated_length": 68.0,
"completions/mean_length": 36.9453125,
"completions/mean_terminated_length": 36.9453125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.083737850189209,
"epoch": 0.5505050505050505,
"frac_reward_zero_std": 0.8125,
"grad_norm": 3.3731985092163086,
"learning_rate": 4.5202020202020204e-07,
"loss": 0.0,
"num_tokens": 22856391.0,
"reward": 0.7996094226837158,
"reward_std": 0.08245119452476501,
"rewards/video_r1_accuracy_reward/mean": 0.7890625,
"rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 218
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 92.0,
"completions/max_terminated_length": 92.0,
"completions/mean_length": 39.375,
"completions/mean_terminated_length": 39.375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.153437614440918,
"epoch": 0.553030303030303,
"frac_reward_zero_std": 0.8125,
"grad_norm": 3.4006295204162598,
"learning_rate": 4.494949494949495e-07,
"loss": 0.0,
"num_tokens": 22955055.0,
"reward": 0.651171863079071,
"reward_std": 0.0727139487862587,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 219
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 71.0,
"completions/max_terminated_length": 71.0,
"completions/mean_length": 38.5703125,
"completions/mean_terminated_length": 38.5703125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.126805305480957,
"epoch": 0.5555555555555556,
"frac_reward_zero_std": 0.625,
"grad_norm": 2.5174307823181152,
"learning_rate": 4.469696969696969e-07,
"loss": -0.0,
"num_tokens": 23062816.0,
"reward": 0.666015625,
"reward_std": 0.1464356929063797,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 220
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 37.0625,
"completions/mean_terminated_length": 37.0625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0423493385314941,
"epoch": 0.5580808080808081,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.49821937084198,
"learning_rate": 4.444444444444444e-07,
"loss": -0.0,
"num_tokens": 23170032.0,
"reward": 0.7398437857627869,
"reward_std": 0.05291558802127838,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 221
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 70.0,
"completions/max_terminated_length": 70.0,
"completions/mean_length": 34.9765625,
"completions/mean_terminated_length": 34.9765625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0868003368377686,
"epoch": 0.5606060606060606,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.3574455976486206,
"learning_rate": 4.419191919191919e-07,
"loss": 0.0,
"num_tokens": 23272485.0,
"reward": 0.6363281011581421,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 222
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 71.0,
"completions/max_terminated_length": 71.0,
"completions/mean_length": 37.90625,
"completions/mean_terminated_length": 37.90625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0563578605651855,
"epoch": 0.5631313131313131,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.896294355392456,
"learning_rate": 4.3939393939393937e-07,
"loss": -0.0,
"num_tokens": 23366809.0,
"reward": 0.799609363079071,
"reward_std": 0.03072948381304741,
"rewards/video_r1_accuracy_reward/mean": 0.7890625,
"rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 223
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 37.359375,
"completions/mean_terminated_length": 37.359375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0919694900512695,
"epoch": 0.5656565656565656,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.2921442985534668,
"learning_rate": 4.3686868686868683e-07,
"loss": -0.0,
"num_tokens": 23468999.0,
"reward": 0.591796875,
"reward_std": 0.05821476876735687,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 224
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 39.7734375,
"completions/mean_terminated_length": 39.7734375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1395623683929443,
"epoch": 0.5681818181818182,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.0386202335357666,
"learning_rate": 4.3434343434343435e-07,
"loss": -0.0,
"num_tokens": 23574730.0,
"reward": 0.7476562261581421,
"reward_std": 0.11519625782966614,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 225
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 72.0,
"completions/max_terminated_length": 72.0,
"completions/mean_length": 36.4453125,
"completions/mean_terminated_length": 36.4453125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0388587713241577,
"epoch": 0.5707070707070707,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.514066219329834,
"learning_rate": 4.318181818181818e-07,
"loss": -0.0,
"num_tokens": 23671659.0,
"reward": 0.6957030892372131,
"reward_std": 0.0947139710187912,
"rewards/video_r1_accuracy_reward/mean": 0.6796875,
"rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 226
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 97.0,
"completions/max_terminated_length": 97.0,
"completions/mean_length": 37.9375,
"completions/mean_terminated_length": 37.9375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0328450202941895,
"epoch": 0.5732323232323232,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.3931517601013184,
"learning_rate": 4.292929292929293e-07,
"loss": -0.0,
"num_tokens": 23776019.0,
"reward": 0.688281238079071,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 227
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 43.9921875,
"completions/mean_terminated_length": 41.31496047973633,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0174750089645386,
"epoch": 0.5757575757575758,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.0087193250656128,
"learning_rate": 4.267676767676767e-07,
"loss": 0.0,
"num_tokens": 23879906.0,
"reward": 0.784375011920929,
"reward_std": 0.03183433786034584,
"rewards/video_r1_accuracy_reward/mean": 0.7734375,
"rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 228
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 39.2734375,
"completions/mean_terminated_length": 39.2734375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 0.9948429465293884,
"epoch": 0.5782828282828283,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.242424242424242e-07,
"loss": 0.0,
"num_tokens": 23975525.0,
"reward": 0.5843749642372131,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 229
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 91.0,
"completions/max_terminated_length": 91.0,
"completions/mean_length": 39.765625,
"completions/mean_terminated_length": 39.765625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0840778350830078,
"epoch": 0.5808080808080808,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.692561388015747,
"learning_rate": 4.217171717171717e-07,
"loss": 0.0,
"num_tokens": 24076743.0,
"reward": 0.7550780773162842,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 230
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 39.5546875,
"completions/mean_terminated_length": 39.5546875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0627765655517578,
"epoch": 0.5833333333333334,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.6762282848358154,
"learning_rate": 4.1919191919191915e-07,
"loss": 0.0,
"num_tokens": 24168014.0,
"reward": 0.6585937738418579,
"reward_std": 0.027485283091664314,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 231
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 42.03125,
"completions/mean_terminated_length": 42.03125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1012775897979736,
"epoch": 0.5858585858585859,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.1221230030059814,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.0,
"num_tokens": 24264154.0,
"reward": 0.4359374940395355,
"reward_std": 0.05497056618332863,
"rewards/video_r1_accuracy_reward/mean": 0.40625,
"rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 232
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 92.0,
"completions/max_terminated_length": 92.0,
"completions/mean_length": 41.5625,
"completions/mean_terminated_length": 41.5625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.053170919418335,
"epoch": 0.5883838383838383,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.132128357887268,
"learning_rate": 4.1414141414141413e-07,
"loss": -0.0,
"num_tokens": 24373658.0,
"reward": 0.7328125238418579,
"reward_std": 0.03173727169632912,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 233
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 41.6484375,
"completions/mean_terminated_length": 41.6484375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1390644311904907,
"epoch": 0.5909090909090909,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.8743163347244263,
"learning_rate": 4.116161616161616e-07,
"loss": 0.0,
"num_tokens": 24482197.0,
"reward": 0.6363281607627869,
"reward_std": 0.07596279680728912,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 234
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 91.0,
"completions/max_terminated_length": 91.0,
"completions/mean_length": 41.4921875,
"completions/mean_terminated_length": 41.4921875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1158329248428345,
"epoch": 0.5934343434343434,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.9406242966651917,
"learning_rate": 4.090909090909091e-07,
"loss": 0.0,
"num_tokens": 24573972.0,
"reward": 0.576953113079071,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.5546875,
"rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 235
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 38.7734375,
"completions/mean_terminated_length": 38.7734375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0798749923706055,
"epoch": 0.5959595959595959,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.1411508321762085,
"learning_rate": 4.065656565656566e-07,
"loss": -0.0,
"num_tokens": 24676151.0,
"reward": 0.7027343511581421,
"reward_std": 0.0011048546293750405,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 236
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 97.0,
"completions/max_terminated_length": 97.0,
"completions/mean_length": 43.1640625,
"completions/mean_terminated_length": 43.1640625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0829260349273682,
"epoch": 0.5984848484848485,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0807678699493408,
"learning_rate": 4.04040404040404e-07,
"loss": -0.0,
"num_tokens": 24789604.0,
"reward": 0.7847656011581421,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.7734375,
"rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 237
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 42.8828125,
"completions/mean_terminated_length": 42.8828125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1113200187683105,
"epoch": 0.601010101010101,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.7582932710647583,
"learning_rate": 4.0151515151515146e-07,
"loss": 0.0,
"num_tokens": 24890173.0,
"reward": 0.7105469107627869,
"reward_std": 0.06297669559717178,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 238
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 38.59375,
"completions/mean_terminated_length": 38.59375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0123108625411987,
"epoch": 0.6035353535353535,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.2676467895507812,
"learning_rate": 3.98989898989899e-07,
"loss": -0.0,
"num_tokens": 24995257.0,
"reward": 0.606640636920929,
"reward_std": 0.1131853312253952,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 239
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 118.0,
"completions/max_terminated_length": 118.0,
"completions/mean_length": 41.9765625,
"completions/mean_terminated_length": 41.9765625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0618376731872559,
"epoch": 0.6060606060606061,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 3.9646464646464644e-07,
"loss": 0.0,
"num_tokens": 25087574.0,
"reward": 0.5843749642372131,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 240
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 70.0,
"completions/max_terminated_length": 70.0,
"completions/mean_length": 39.2421875,
"completions/mean_terminated_length": 39.2421875,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.0472171306610107,
"epoch": 0.6085858585858586,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.201456069946289,
"learning_rate": 3.939393939393939e-07,
"loss": 0.0,
"num_tokens": 25177613.0,
"reward": 0.7105468511581421,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 241
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 41.1953125,
"completions/mean_terminated_length": 41.1953125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0318273305892944,
"epoch": 0.6111111111111112,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 3.9141414141414143e-07,
"loss": 0.0,
"num_tokens": 25275414.0,
"reward": 0.5843749642372131,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 242
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 114.0,
"completions/max_terminated_length": 114.0,
"completions/mean_length": 40.625,
"completions/mean_terminated_length": 40.625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0426480770111084,
"epoch": 0.6136363636363636,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.9526947736740112,
"learning_rate": 3.888888888888889e-07,
"loss": 0.0,
"num_tokens": 25373318.0,
"reward": 0.614062488079071,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.59375,
"rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 243
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 41.3515625,
"completions/mean_terminated_length": 41.3515625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0614213943481445,
"epoch": 0.6161616161616161,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.547713279724121,
"learning_rate": 3.8636363636363636e-07,
"loss": 0.0,
"num_tokens": 25481939.0,
"reward": 0.7476562261581421,
"reward_std": 0.08345898985862732,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 244
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 42.1015625,
"completions/mean_terminated_length": 42.1015625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1329345703125,
"epoch": 0.6186868686868687,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2779253721237183,
"learning_rate": 3.8383838383838377e-07,
"loss": -0.0,
"num_tokens": 25588960.0,
"reward": 0.7328124642372131,
"reward_std": 0.03173727169632912,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 245
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 103.0,
"completions/max_terminated_length": 103.0,
"completions/mean_length": 43.3203125,
"completions/mean_terminated_length": 43.3203125,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 1.0123302936553955,
"epoch": 0.6212121212121212,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.0937881469726562,
"learning_rate": 3.813131313131313e-07,
"loss": 0.0,
"num_tokens": 25690657.0,
"reward": 0.725390613079071,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 246
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 82.0,
"completions/max_terminated_length": 82.0,
"completions/mean_length": 41.9765625,
"completions/mean_terminated_length": 41.9765625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1118111610412598,
"epoch": 0.6237373737373737,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.778397560119629,
"learning_rate": 3.7878787878787876e-07,
"loss": 0.0,
"num_tokens": 25789478.0,
"reward": 0.6140625476837158,
"reward_std": 0.05497056618332863,
"rewards/video_r1_accuracy_reward/mean": 0.59375,
"rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 247
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 43.578125,
"completions/mean_terminated_length": 43.578125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.018457055091858,
"epoch": 0.6262626262626263,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7696324586868286,
"learning_rate": 3.762626262626262e-07,
"loss": -0.0,
"num_tokens": 25888824.0,
"reward": 0.539843738079071,
"reward_std": 0.05922255665063858,
"rewards/video_r1_accuracy_reward/mean": 0.515625,
"rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 248
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 101.0,
"completions/max_terminated_length": 101.0,
"completions/mean_length": 43.9453125,
"completions/mean_terminated_length": 43.9453125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1335992813110352,
"epoch": 0.6287878787878788,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.2729897499084473,
"learning_rate": 3.7373737373737374e-07,
"loss": -0.0,
"num_tokens": 25983369.0,
"reward": 0.8960937261581421,
"reward_std": 0.06145896762609482,
"rewards/video_r1_accuracy_reward/mean": 0.890625,
"rewards/video_r1_accuracy_reward/std": 0.31333550810813904,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 249
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 41.171875,
"completions/mean_terminated_length": 41.171875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.062988519668579,
"epoch": 0.6313131313131313,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.235278844833374,
"learning_rate": 3.712121212121212e-07,
"loss": -0.0,
"num_tokens": 26080023.0,
"reward": 0.7105468511581421,
"reward_std": 0.05821476876735687,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 250
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 39.1328125,
"completions/mean_terminated_length": 39.1328125,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.0980217456817627,
"epoch": 0.6338383838383839,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.177210807800293,
"learning_rate": 3.686868686868687e-07,
"loss": -0.0,
"num_tokens": 26168704.0,
"reward": 0.7476562261581421,
"reward_std": 0.027485284954309464,
"rewards/video_r1_accuracy_reward/mean": 0.734375,
"rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 251
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 79.0,
"completions/max_terminated_length": 79.0,
"completions/mean_length": 40.4453125,
"completions/mean_terminated_length": 40.4453125,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 0.98891681432724,
"epoch": 0.6363636363636364,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.9645073413848877,
"learning_rate": 3.661616161616162e-07,
"loss": -0.0,
"num_tokens": 26265033.0,
"reward": 0.7921874523162842,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.78125,
"rewards/video_r1_accuracy_reward/std": 0.41502299904823303,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 252
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 115.0,
"completions/max_terminated_length": 115.0,
"completions/mean_length": 41.390625,
"completions/mean_terminated_length": 41.390625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0786819458007812,
"epoch": 0.6388888888888888,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.4744101762771606,
"learning_rate": 3.636363636363636e-07,
"loss": -0.0,
"num_tokens": 26367979.0,
"reward": 0.6734374761581421,
"reward_std": 0.09319624304771423,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 253
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 128.0,
"completions/max_terminated_length": 128.0,
"completions/mean_length": 42.15625,
"completions/mean_terminated_length": 42.15625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0700483322143555,
"epoch": 0.6414141414141414,
"frac_reward_zero_std": 0.5625,
"grad_norm": 4.3232808113098145,
"learning_rate": 3.6111111111111107e-07,
"loss": 0.0,
"num_tokens": 26467855.0,
"reward": 0.7328125238418579,
"reward_std": 0.18041402101516724,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 254
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 92.0,
"completions/max_terminated_length": 92.0,
"completions/mean_length": 41.0625,
"completions/mean_terminated_length": 41.0625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0175724029541016,
"epoch": 0.6439393939393939,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.968988835811615,
"learning_rate": 3.5858585858585854e-07,
"loss": -0.0,
"num_tokens": 26581903.0,
"reward": 0.651171863079071,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 255
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 71.0,
"completions/max_terminated_length": 71.0,
"completions/mean_length": 38.3359375,
"completions/mean_terminated_length": 38.3359375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.042513132095337,
"epoch": 0.6464646464646465,
"frac_reward_zero_std": 0.875,
"grad_norm": 3.0125222206115723,
"learning_rate": 3.5606060606060606e-07,
"loss": -0.0,
"num_tokens": 26688290.0,
"reward": 0.7625000476837158,
"reward_std": 0.06347454339265823,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 256
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 89.0,
"completions/max_terminated_length": 89.0,
"completions/mean_length": 41.1953125,
"completions/mean_terminated_length": 41.1953125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0545010566711426,
"epoch": 0.648989898989899,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0394936800003052,
"learning_rate": 3.535353535353535e-07,
"loss": 0.0,
"num_tokens": 26781019.0,
"reward": 0.725390613079071,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 257
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 110.0,
"completions/max_terminated_length": 110.0,
"completions/mean_length": 39.71875,
"completions/mean_terminated_length": 39.71875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.084099292755127,
"epoch": 0.6515151515151515,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.8688188791275024,
"learning_rate": 3.51010101010101e-07,
"loss": 0.0,
"num_tokens": 26885175.0,
"reward": 0.5472655892372131,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.5234375,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 258
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 40.6875,
"completions/mean_terminated_length": 40.6875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0880751609802246,
"epoch": 0.6540404040404041,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7870049476623535,
"learning_rate": 3.484848484848485e-07,
"loss": 0.0,
"num_tokens": 26989175.0,
"reward": 0.539843738079071,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.515625,
"rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 259
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 43.2734375,
"completions/mean_terminated_length": 43.2734375,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 1.10056734085083,
"epoch": 0.6565656565656566,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.1377336978912354,
"learning_rate": 3.4595959595959597e-07,
"loss": -0.0,
"num_tokens": 27096962.0,
"reward": 0.6585937738418579,
"reward_std": 0.07920700311660767,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 260
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 72.0,
"completions/max_terminated_length": 72.0,
"completions/mean_length": 40.546875,
"completions/mean_terminated_length": 40.546875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0717473030090332,
"epoch": 0.6590909090909091,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.8865928649902344,
"learning_rate": 3.434343434343434e-07,
"loss": 0.0,
"num_tokens": 27190608.0,
"reward": 0.703125,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 261
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 77.0,
"completions/max_terminated_length": 77.0,
"completions/mean_length": 41.6328125,
"completions/mean_terminated_length": 41.6328125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0417635440826416,
"epoch": 0.6616161616161617,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.4803223609924316,
"learning_rate": 3.4090909090909085e-07,
"loss": -0.0,
"num_tokens": 27297481.0,
"reward": 0.7328125238418579,
"reward_std": 0.08670784533023834,
"rewards/video_r1_accuracy_reward/mean": 0.71875,
"rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 262
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 92.0,
"completions/max_terminated_length": 92.0,
"completions/mean_length": 41.2578125,
"completions/mean_terminated_length": 41.2578125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.068854570388794,
"epoch": 0.6641414141414141,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.475841522216797,
"learning_rate": 3.3838383838383837e-07,
"loss": -0.0,
"num_tokens": 27403098.0,
"reward": 0.688281238079071,
"reward_std": 0.07920700311660767,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 263
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 104.0,
"completions/max_terminated_length": 104.0,
"completions/mean_length": 40.9765625,
"completions/mean_terminated_length": 40.9765625,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.073037028312683,
"epoch": 0.6666666666666666,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.0775296688079834,
"learning_rate": 3.3585858585858583e-07,
"loss": -0.0,
"num_tokens": 27499639.0,
"reward": 0.6734374761581421,
"reward_std": 0.11844511330127716,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 264
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 41.203125,
"completions/mean_terminated_length": 41.203125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0243158340454102,
"epoch": 0.6691919191919192,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.3727004528045654,
"learning_rate": 3.333333333333333e-07,
"loss": -0.0,
"num_tokens": 27602497.0,
"reward": 0.666015625,
"reward_std": 0.08995203673839569,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 265
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 112.0,
"completions/max_terminated_length": 112.0,
"completions/mean_length": 40.1875,
"completions/mean_terminated_length": 40.1875,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.0149409770965576,
"epoch": 0.6717171717171717,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.7323969602584839,
"learning_rate": 3.308080808080808e-07,
"loss": -0.0,
"num_tokens": 27704273.0,
"reward": 0.606640636920929,
"reward_std": 0.08021478354930878,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 266
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 39.5390625,
"completions/mean_terminated_length": 39.5390625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.094785451889038,
"epoch": 0.6742424242424242,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.412737250328064,
"learning_rate": 3.282828282828283e-07,
"loss": -0.0,
"num_tokens": 27806486.0,
"reward": 0.5472656488418579,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.5234375,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 267
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 40.75,
"completions/mean_terminated_length": 40.75,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0335110425949097,
"epoch": 0.6767676767676768,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 3.2575757575757575e-07,
"loss": 0.0,
"num_tokens": 27915974.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 268
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 39.34375,
"completions/mean_terminated_length": 39.34375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0604133605957031,
"epoch": 0.6792929292929293,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.308901786804199,
"learning_rate": 3.2323232323232327e-07,
"loss": -0.0,
"num_tokens": 28020154.0,
"reward": 0.6363281011581421,
"reward_std": 0.10993647575378418,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 269
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 41.546875,
"completions/mean_terminated_length": 41.546875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1338186264038086,
"epoch": 0.6818181818181818,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2709474563598633,
"learning_rate": 3.207070707070707e-07,
"loss": -0.0,
"num_tokens": 28127328.0,
"reward": 0.532421886920929,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.5078125,
"rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 270
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 86.0,
"completions/max_terminated_length": 86.0,
"completions/mean_length": 40.3515625,
"completions/mean_terminated_length": 40.3515625,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.0553174018859863,
"epoch": 0.6843434343434344,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.8711135387420654,
"learning_rate": 3.1818181818181815e-07,
"loss": -0.0,
"num_tokens": 28229925.0,
"reward": 0.7625000476837158,
"reward_std": 0.06145896762609482,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 271
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 37.5625,
"completions/mean_terminated_length": 37.5625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 0.9569792151451111,
"epoch": 0.6868686868686869,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.4626072645187378,
"learning_rate": 3.156565656565656e-07,
"loss": 0.0,
"num_tokens": 28331109.0,
"reward": 0.6066405773162842,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 272
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 86.0,
"completions/max_terminated_length": 86.0,
"completions/mean_length": 41.34375,
"completions/mean_terminated_length": 41.34375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1569232940673828,
"epoch": 0.6893939393939394,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 3.1313131313131313e-07,
"loss": 0.0,
"num_tokens": 28436025.0,
"reward": 0.5843749642372131,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 273
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 38.3828125,
"completions/mean_terminated_length": 38.3828125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 0.9568972587585449,
"epoch": 0.6919191919191919,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.252964735031128,
"learning_rate": 3.106060606060606e-07,
"loss": -0.0,
"num_tokens": 28538722.0,
"reward": 0.6808593273162842,
"reward_std": 0.0727139487862587,
"rewards/video_r1_accuracy_reward/mean": 0.6640625,
"rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 274
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 42.71875,
"completions/mean_terminated_length": 42.71875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.112146258354187,
"epoch": 0.6944444444444444,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.2951831817626953,
"learning_rate": 3.0808080808080806e-07,
"loss": 0.0,
"num_tokens": 28637126.0,
"reward": 0.5394531488418579,
"reward_std": 0.028590137138962746,
"rewards/video_r1_accuracy_reward/mean": 0.515625,
"rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 275
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 115.0,
"completions/max_terminated_length": 115.0,
"completions/mean_length": 39.984375,
"completions/mean_terminated_length": 39.984375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1185648441314697,
"epoch": 0.696969696969697,
"frac_reward_zero_std": 0.6875,
"grad_norm": 3.1425654888153076,
"learning_rate": 3.055555555555556e-07,
"loss": 0.0,
"num_tokens": 28740852.0,
"reward": 0.77734375,
"reward_std": 0.12119146436452866,
"rewards/video_r1_accuracy_reward/mean": 0.765625,
"rewards/video_r1_accuracy_reward/std": 0.42527204751968384,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 276
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 77.0,
"completions/max_terminated_length": 77.0,
"completions/mean_length": 39.4609375,
"completions/mean_terminated_length": 39.4609375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.014233112335205,
"epoch": 0.6994949494949495,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.9896016120910645,
"learning_rate": 3.0303030303030305e-07,
"loss": 0.0,
"num_tokens": 28848535.0,
"reward": 0.666015625,
"reward_std": 0.09046198427677155,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 277
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 40.828125,
"completions/mean_terminated_length": 40.828125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0652706623077393,
"epoch": 0.702020202020202,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.8897011280059814,
"learning_rate": 3.0050505050505046e-07,
"loss": 0.0,
"num_tokens": 28951817.0,
"reward": 0.5027344226837158,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.4765625,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 278
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 40.046875,
"completions/mean_terminated_length": 40.046875,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.0515589714050293,
"epoch": 0.7045454545454546,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2125859260559082,
"learning_rate": 2.9797979797979793e-07,
"loss": 0.0,
"num_tokens": 29046855.0,
"reward": 0.576953113079071,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.5546875,
"rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 279
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 41.078125,
"completions/mean_terminated_length": 41.078125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0689277648925781,
"epoch": 0.7070707070707071,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.9545454545454545e-07,
"loss": 0.0,
"num_tokens": 29154393.0,
"reward": 0.8218749761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.8125,
"rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 280
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 39.515625,
"completions/mean_terminated_length": 39.515625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.11039137840271,
"epoch": 0.7095959595959596,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.8850462436676025,
"learning_rate": 2.929292929292929e-07,
"loss": -0.0,
"num_tokens": 29251595.0,
"reward": 0.9183593988418579,
"reward_std": 0.03072948381304741,
"rewards/video_r1_accuracy_reward/mean": 0.9140625,
"rewards/video_r1_accuracy_reward/std": 0.2813730239868164,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 281
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 86.0,
"completions/max_terminated_length": 86.0,
"completions/mean_length": 39.34375,
"completions/mean_terminated_length": 39.34375,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1349459886550903,
"epoch": 0.7121212121212122,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.9888182878494263,
"learning_rate": 2.904040404040404e-07,
"loss": -0.0,
"num_tokens": 29345079.0,
"reward": 0.46562498807907104,
"reward_std": 0.05497056990861893,
"rewards/video_r1_accuracy_reward/mean": 0.4375,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 282
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 73.0,
"completions/max_terminated_length": 73.0,
"completions/mean_length": 38.6328125,
"completions/mean_terminated_length": 38.6328125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 0.9957581162452698,
"epoch": 0.7146464646464646,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.878787878787879e-07,
"loss": 0.0,
"num_tokens": 29436368.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 283
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 77.0,
"completions/max_terminated_length": 77.0,
"completions/mean_length": 38.5390625,
"completions/mean_terminated_length": 38.5390625,
"completions/min_length": 18.0,
"completions/min_terminated_length": 18.0,
"entropy": 1.0308442115783691,
"epoch": 0.7171717171717171,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.872098684310913,
"learning_rate": 2.8535353535353536e-07,
"loss": 0.0,
"num_tokens": 29536589.0,
"reward": 0.6585937142372131,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 284
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 101.0,
"completions/max_terminated_length": 101.0,
"completions/mean_length": 41.53125,
"completions/mean_terminated_length": 41.53125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.080606460571289,
"epoch": 0.7196969696969697,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.6087223291397095,
"learning_rate": 2.8282828282828283e-07,
"loss": -0.0,
"num_tokens": 29646105.0,
"reward": 0.762499988079071,
"reward_std": 0.06145896390080452,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 285
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 116.0,
"completions/max_terminated_length": 116.0,
"completions/mean_length": 39.109375,
"completions/mean_terminated_length": 39.109375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 0.9553197622299194,
"epoch": 0.7222222222222222,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0064791440963745,
"learning_rate": 2.8030303030303024e-07,
"loss": 0.0,
"num_tokens": 29756375.0,
"reward": 0.7550780773162842,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 286
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 91.0,
"completions/max_terminated_length": 91.0,
"completions/mean_length": 42.5234375,
"completions/mean_terminated_length": 42.5234375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1082098484039307,
"epoch": 0.7247474747474747,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7488712072372437,
"learning_rate": 2.7777777777777776e-07,
"loss": 0.0,
"num_tokens": 29862402.0,
"reward": 0.703125,
"reward_std": 0.04198446497321129,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 287
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 42.578125,
"completions/mean_terminated_length": 42.578125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0919723510742188,
"epoch": 0.7272727272727273,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.1887331008911133,
"learning_rate": 2.752525252525252e-07,
"loss": 0.0,
"num_tokens": 29962308.0,
"reward": 0.6214843392372131,
"reward_std": 0.058214765042066574,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 288
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 105.0,
"completions/max_terminated_length": 105.0,
"completions/mean_length": 44.5390625,
"completions/mean_terminated_length": 44.5390625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0653269290924072,
"epoch": 0.7297979797979798,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.727272727272727e-07,
"loss": 0.0,
"num_tokens": 30067217.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 289
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 41.0703125,
"completions/mean_terminated_length": 41.0703125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1131395101547241,
"epoch": 0.7323232323232324,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.3746576309204102,
"learning_rate": 2.702020202020202e-07,
"loss": -0.0,
"num_tokens": 30166226.0,
"reward": 0.688281238079071,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 290
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 139.0,
"completions/max_terminated_length": 139.0,
"completions/mean_length": 45.3203125,
"completions/mean_terminated_length": 45.3203125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1243813037872314,
"epoch": 0.7348484848484849,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.915048360824585,
"learning_rate": 2.676767676767677e-07,
"loss": -0.0,
"num_tokens": 30259571.0,
"reward": 0.7847656011581421,
"reward_std": 0.08245119452476501,
"rewards/video_r1_accuracy_reward/mean": 0.7734375,
"rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 291
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 79.0,
"completions/max_terminated_length": 79.0,
"completions/mean_length": 42.1875,
"completions/mean_terminated_length": 42.1875,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.050102949142456,
"epoch": 0.7373737373737373,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.6515151515151514e-07,
"loss": 0.0,
"num_tokens": 30374483.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 292
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 121.0,
"completions/max_terminated_length": 121.0,
"completions/mean_length": 41.5234375,
"completions/mean_terminated_length": 41.5234375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.032031536102295,
"epoch": 0.73989898989899,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.6262626262626266e-07,
"loss": 0.0,
"num_tokens": 30471046.0,
"reward": 0.5249999761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 293
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 41.453125,
"completions/mean_terminated_length": 41.453125,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 1.0804343223571777,
"epoch": 0.7424242424242424,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.5472633838653564,
"learning_rate": 2.6010101010101007e-07,
"loss": -0.0,
"num_tokens": 30581344.0,
"reward": 0.5249999761581421,
"reward_std": 0.10344342887401581,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 294
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 103.0,
"completions/mean_length": 45.3359375,
"completions/mean_terminated_length": 42.669288635253906,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 0.9924347400665283,
"epoch": 0.7449494949494949,
"frac_reward_zero_std": 0.875,
"grad_norm": 3.591127395629883,
"learning_rate": 2.5757575757575754e-07,
"loss": 0.0,
"num_tokens": 30692403.0,
"reward": 0.821484386920929,
"reward_std": 0.04308931902050972,
"rewards/video_r1_accuracy_reward/mean": 0.8125,
"rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 295
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 43.6015625,
"completions/mean_terminated_length": 43.6015625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1211515665054321,
"epoch": 0.7474747474747475,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.6249940395355225,
"learning_rate": 2.55050505050505e-07,
"loss": -0.0,
"num_tokens": 30794800.0,
"reward": 0.62890625,
"reward_std": 0.07920699566602707,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 296
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 112.0,
"completions/max_terminated_length": 112.0,
"completions/mean_length": 39.15625,
"completions/mean_terminated_length": 39.15625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1058385372161865,
"epoch": 0.75,
"frac_reward_zero_std": 0.625,
"grad_norm": 3.0298686027526855,
"learning_rate": 2.525252525252525e-07,
"loss": -0.0,
"num_tokens": 30891148.0,
"reward": 0.6734375357627869,
"reward_std": 0.17140009999275208,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 297
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 152.0,
"completions/max_terminated_length": 152.0,
"completions/mean_length": 42.15625,
"completions/mean_terminated_length": 42.15625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0815918445587158,
"epoch": 0.7525252525252525,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.4456374645233154,
"learning_rate": 2.5e-07,
"loss": 0.0,
"num_tokens": 30999208.0,
"reward": 0.5250000357627869,
"reward_std": 0.04198446497321129,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 298
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 98.0,
"completions/max_terminated_length": 98.0,
"completions/mean_length": 40.9453125,
"completions/mean_terminated_length": 40.9453125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1108628511428833,
"epoch": 0.7550505050505051,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.825129508972168,
"learning_rate": 2.4747474747474745e-07,
"loss": 0.0,
"num_tokens": 31100649.0,
"reward": 0.591796875,
"reward_std": 0.11195206642150879,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 299
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 100.0,
"completions/max_terminated_length": 100.0,
"completions/mean_length": 44.2578125,
"completions/mean_terminated_length": 44.2578125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.1574559211730957,
"epoch": 0.7575757575757576,
"frac_reward_zero_std": 0.75,
"grad_norm": 1.9903881549835205,
"learning_rate": 2.449494949494949e-07,
"loss": -0.0,
"num_tokens": 31207226.0,
"reward": 0.62890625,
"reward_std": 0.11642953008413315,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 300
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 82.0,
"completions/max_terminated_length": 82.0,
"completions/mean_length": 37.9921875,
"completions/mean_terminated_length": 37.9921875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0374675989151,
"epoch": 0.76010101010101,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.569753885269165,
"learning_rate": 2.4242424242424244e-07,
"loss": 0.0,
"num_tokens": 31304289.0,
"reward": 0.6953125,
"reward_std": 0.07706765830516815,
"rewards/video_r1_accuracy_reward/mean": 0.6796875,
"rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 301
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 41.484375,
"completions/mean_terminated_length": 41.484375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.127359390258789,
"epoch": 0.7626262626262627,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.5562926530838013,
"learning_rate": 2.398989898989899e-07,
"loss": -0.0,
"num_tokens": 31400983.0,
"reward": 0.5621093511581421,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.5390625,
"rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 302
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 107.0,
"completions/max_terminated_length": 107.0,
"completions/mean_length": 42.6328125,
"completions/mean_terminated_length": 42.6328125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.028510332107544,
"epoch": 0.7651515151515151,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.891268253326416,
"learning_rate": 2.3737373737373737e-07,
"loss": -0.0,
"num_tokens": 31511296.0,
"reward": 0.688281238079071,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 303
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 67.0,
"completions/max_terminated_length": 67.0,
"completions/mean_length": 38.3984375,
"completions/mean_terminated_length": 38.3984375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.018940806388855,
"epoch": 0.7676767676767676,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7596899271011353,
"learning_rate": 2.3484848484848486e-07,
"loss": -0.0,
"num_tokens": 31624579.0,
"reward": 0.725390613079071,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 304
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 40.8359375,
"completions/mean_terminated_length": 40.8359375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.066922664642334,
"epoch": 0.7702020202020202,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.9264626502990723,
"learning_rate": 2.323232323232323e-07,
"loss": -0.0,
"num_tokens": 31733166.0,
"reward": 0.5695312023162842,
"reward_std": 0.027485284954309464,
"rewards/video_r1_accuracy_reward/mean": 0.546875,
"rewards/video_r1_accuracy_reward/std": 0.4997538626194,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 305
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 100.0,
"completions/max_terminated_length": 100.0,
"completions/mean_length": 39.703125,
"completions/mean_terminated_length": 39.703125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1334049701690674,
"epoch": 0.7727272727272727,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.070505142211914,
"learning_rate": 2.297979797979798e-07,
"loss": 0.0,
"num_tokens": 31838536.0,
"reward": 0.62890625,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.609375,
"rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 306
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 98.0,
"completions/max_terminated_length": 98.0,
"completions/mean_length": 41.46875,
"completions/mean_terminated_length": 41.46875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.073242425918579,
"epoch": 0.7752525252525253,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.7374638319015503,
"learning_rate": 2.2727272727272726e-07,
"loss": -0.0,
"num_tokens": 31948676.0,
"reward": 0.7105468511581421,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 307
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 112.0,
"completions/max_terminated_length": 112.0,
"completions/mean_length": 43.734375,
"completions/mean_terminated_length": 43.734375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1168217658996582,
"epoch": 0.7777777777777778,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.1745333671569824,
"learning_rate": 2.2474747474747475e-07,
"loss": -0.0,
"num_tokens": 32042010.0,
"reward": 0.7996094226837158,
"reward_std": 0.10019923746585846,
"rewards/video_r1_accuracy_reward/mean": 0.7890625,
"rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 308
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 41.578125,
"completions/mean_terminated_length": 41.578125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.1183019876480103,
"epoch": 0.7803030303030303,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.8185198307037354,
"learning_rate": 2.222222222222222e-07,
"loss": -0.0,
"num_tokens": 32146732.0,
"reward": 0.591796875,
"reward_std": 0.08245119452476501,
"rewards/video_r1_accuracy_reward/mean": 0.5703125,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 309
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 42.5078125,
"completions/mean_terminated_length": 42.5078125,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.156842589378357,
"epoch": 0.7828282828282829,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.6074113845825195,
"learning_rate": 2.1969696969696968e-07,
"loss": -0.0,
"num_tokens": 32241525.0,
"reward": 0.703125,
"reward_std": 0.08670784533023834,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 310
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 42.2421875,
"completions/mean_terminated_length": 42.2421875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.058318853378296,
"epoch": 0.7853535353535354,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0678150653839111,
"learning_rate": 2.1717171717171718e-07,
"loss": -0.0,
"num_tokens": 32347036.0,
"reward": 0.910937488079071,
"reward_std": 0.03173727169632912,
"rewards/video_r1_accuracy_reward/mean": 0.90625,
"rewards/video_r1_accuracy_reward/std": 0.29262590408325195,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 311
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 97.0,
"completions/max_terminated_length": 97.0,
"completions/mean_length": 42.40625,
"completions/mean_terminated_length": 42.40625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1335667371749878,
"epoch": 0.7878787878787878,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.1464646464646464e-07,
"loss": 0.0,
"num_tokens": 32443480.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 312
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 41.234375,
"completions/mean_terminated_length": 41.234375,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.0559073686599731,
"epoch": 0.7904040404040404,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.3612805604934692,
"learning_rate": 2.121212121212121e-07,
"loss": 0.0,
"num_tokens": 32548430.0,
"reward": 0.8367187976837158,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.828125,
"rewards/video_r1_accuracy_reward/std": 0.378754198551178,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 313
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 42.0390625,
"completions/mean_terminated_length": 42.0390625,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.055532693862915,
"epoch": 0.7929292929292929,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.0959595959595957e-07,
"loss": 0.0,
"num_tokens": 32640787.0,
"reward": 0.5843749642372131,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 314
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 39.9375,
"completions/mean_terminated_length": 39.9375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1142704486846924,
"epoch": 0.7954545454545454,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.0707070707070707e-07,
"loss": 0.0,
"num_tokens": 32744659.0,
"reward": 0.6437499523162842,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 315
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 40.3671875,
"completions/mean_terminated_length": 40.3671875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0762892961502075,
"epoch": 0.797979797979798,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.0454545454545456e-07,
"loss": 0.0,
"num_tokens": 32838018.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 316
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 41.578125,
"completions/mean_terminated_length": 41.578125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.0358078479766846,
"epoch": 0.8005050505050505,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.9207355380058289,
"learning_rate": 2.02020202020202e-07,
"loss": 0.0,
"num_tokens": 32945708.0,
"reward": 0.6957031488418579,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6796875,
"rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 317
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 39.0625,
"completions/mean_terminated_length": 39.0625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0110433101654053,
"epoch": 0.803030303030303,
"frac_reward_zero_std": 0.8125,
"grad_norm": 3.387294054031372,
"learning_rate": 1.994949494949495e-07,
"loss": 0.0,
"num_tokens": 33047076.0,
"reward": 0.703125,
"reward_std": 0.07920699566602707,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 318
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 105.0,
"completions/max_terminated_length": 105.0,
"completions/mean_length": 41.5078125,
"completions/mean_terminated_length": 41.5078125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.0886144638061523,
"epoch": 0.8055555555555556,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.9696969696969696e-07,
"loss": 0.0,
"num_tokens": 33161533.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 319
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 39.40625,
"completions/mean_terminated_length": 39.40625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0247807502746582,
"epoch": 0.8080808080808081,
"frac_reward_zero_std": 0.9375,
"grad_norm": 2.800685167312622,
"learning_rate": 1.9444444444444445e-07,
"loss": 0.0,
"num_tokens": 33261601.0,
"reward": 0.45820313692092896,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.4296875,
"rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 320
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 41.890625,
"completions/mean_terminated_length": 41.890625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0938504934310913,
"epoch": 0.8106060606060606,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.4721579551696777,
"learning_rate": 1.9191919191919189e-07,
"loss": 0.0,
"num_tokens": 33362019.0,
"reward": 0.7179687023162842,
"reward_std": 0.027485283091664314,
"rewards/video_r1_accuracy_reward/mean": 0.703125,
"rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 321
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 120.0,
"completions/max_terminated_length": 120.0,
"completions/mean_length": 42.2265625,
"completions/mean_terminated_length": 42.2265625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0467185974121094,
"epoch": 0.8131313131313131,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.8939393939393938e-07,
"loss": 0.0,
"num_tokens": 33472384.0,
"reward": 0.6437499523162842,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 322
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 139.0,
"completions/max_terminated_length": 139.0,
"completions/mean_length": 46.7421875,
"completions/mean_terminated_length": 46.7421875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.163482427597046,
"epoch": 0.8156565656565656,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.9332994222640991,
"learning_rate": 1.8686868686868687e-07,
"loss": -0.0,
"num_tokens": 33566471.0,
"reward": 0.8070312738418579,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.796875,
"rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 323
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 111.0,
"completions/max_terminated_length": 111.0,
"completions/mean_length": 41.0546875,
"completions/mean_terminated_length": 41.0546875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 0.9953482151031494,
"epoch": 0.8181818181818182,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.8781511783599854,
"learning_rate": 1.8434343434343434e-07,
"loss": -0.0,
"num_tokens": 33663286.0,
"reward": 0.814453125,
"reward_std": 0.10993649065494537,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 324
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 42.5,
"completions/mean_terminated_length": 42.5,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0891491174697876,
"epoch": 0.8207070707070707,
"frac_reward_zero_std": 0.9375,
"grad_norm": 2.3374204635620117,
"learning_rate": 1.818181818181818e-07,
"loss": 0.0,
"num_tokens": 33767966.0,
"reward": 0.814453125,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 325
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 84.0,
"completions/max_terminated_length": 84.0,
"completions/mean_length": 41.984375,
"completions/mean_terminated_length": 41.984375,
"completions/min_length": 7.0,
"completions/min_terminated_length": 7.0,
"entropy": 1.0346364974975586,
"epoch": 0.8232323232323232,
"frac_reward_zero_std": 0.8125,
"grad_norm": 4.739593029022217,
"learning_rate": 1.7929292929292927e-07,
"loss": -0.0,
"num_tokens": 33880908.0,
"reward": 0.7250000238418579,
"reward_std": 0.059319622814655304,
"rewards/video_r1_accuracy_reward/mean": 0.7109375,
"rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 326
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 41.0078125,
"completions/mean_terminated_length": 41.0078125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.019489049911499,
"epoch": 0.8257575757575758,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.7676767676767676e-07,
"loss": 0.0,
"num_tokens": 33992085.0,
"reward": 0.8218749761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.8125,
"rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 327
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 39.9140625,
"completions/mean_terminated_length": 39.9140625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0503275394439697,
"epoch": 0.8282828282828283,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.8522095680236816,
"learning_rate": 1.7424242424242425e-07,
"loss": 0.0,
"num_tokens": 34098090.0,
"reward": 0.814453125,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 328
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 39.390625,
"completions/mean_terminated_length": 39.390625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0890402793884277,
"epoch": 0.8308080808080808,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.717171717171717e-07,
"loss": 0.0,
"num_tokens": 34197460.0,
"reward": 0.5843750238418579,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 329
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 40.515625,
"completions/mean_terminated_length": 40.515625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0883162021636963,
"epoch": 0.8333333333333334,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.6919191919191918e-07,
"loss": 0.0,
"num_tokens": 34295606.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 330
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 38.5546875,
"completions/mean_terminated_length": 38.5546875,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.9863436222076416,
"epoch": 0.8358585858585859,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.8784723281860352,
"learning_rate": 1.6666666666666665e-07,
"loss": -0.0,
"num_tokens": 34403301.0,
"reward": 0.7699218988418579,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 331
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 98.0,
"completions/max_terminated_length": 98.0,
"completions/mean_length": 39.4296875,
"completions/mean_terminated_length": 39.4296875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0463258028030396,
"epoch": 0.8383838383838383,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.3829569816589355,
"learning_rate": 1.6414141414141414e-07,
"loss": -0.0,
"num_tokens": 34498212.0,
"reward": 0.8515625,
"reward_std": 0.05497056990861893,
"rewards/video_r1_accuracy_reward/mean": 0.84375,
"rewards/video_r1_accuracy_reward/std": 0.3645188808441162,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 332
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 71.0,
"completions/max_terminated_length": 71.0,
"completions/mean_length": 39.7734375,
"completions/mean_terminated_length": 39.7734375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0662963390350342,
"epoch": 0.8409090909090909,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.7944689989089966,
"learning_rate": 1.6161616161616163e-07,
"loss": 0.0,
"num_tokens": 34595415.0,
"reward": 0.666015625,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 333
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 92.0,
"completions/max_terminated_length": 92.0,
"completions/mean_length": 41.2421875,
"completions/mean_terminated_length": 41.2421875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.035414695739746,
"epoch": 0.8434343434343434,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.5909090909090907e-07,
"loss": 0.0,
"num_tokens": 34693454.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 334
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 74.0,
"completions/max_terminated_length": 74.0,
"completions/mean_length": 40.5625,
"completions/mean_terminated_length": 40.5625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.992094874382019,
"epoch": 0.8459595959595959,
"frac_reward_zero_std": 0.875,
"grad_norm": 4.349198341369629,
"learning_rate": 1.5656565656565657e-07,
"loss": -0.0,
"num_tokens": 34788646.0,
"reward": 0.740234375,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 335
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 38.90625,
"completions/mean_terminated_length": 38.90625,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.0111061334609985,
"epoch": 0.8484848484848485,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.5404040404040403e-07,
"loss": 0.0,
"num_tokens": 34894258.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 336
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 89.0,
"completions/max_terminated_length": 89.0,
"completions/mean_length": 41.484375,
"completions/mean_terminated_length": 41.484375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1294646263122559,
"epoch": 0.851010101010101,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.3885397911071777,
"learning_rate": 1.5151515151515152e-07,
"loss": -0.0,
"num_tokens": 34997048.0,
"reward": 0.6214843988418579,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 337
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 44.3671875,
"completions/mean_terminated_length": 41.69291305541992,
"completions/min_length": 17.0,
"completions/min_terminated_length": 17.0,
"entropy": 1.0213119983673096,
"epoch": 0.8535353535353535,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.31857022643089294,
"learning_rate": 1.4898989898989896e-07,
"loss": 0.0,
"num_tokens": 35089799.0,
"reward": 0.7621093988418579,
"reward_std": 0.0011048543965443969,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 338
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 95.0,
"completions/max_terminated_length": 95.0,
"completions/mean_length": 43.2265625,
"completions/mean_terminated_length": 43.2265625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0955770015716553,
"epoch": 0.8560606060606061,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.10260009765625,
"learning_rate": 1.4646464646464646e-07,
"loss": -0.0,
"num_tokens": 35189916.0,
"reward": 0.688281238079071,
"reward_std": 0.027485284954309464,
"rewards/video_r1_accuracy_reward/mean": 0.671875,
"rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 339
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 72.0,
"completions/max_terminated_length": 72.0,
"completions/mean_length": 39.0078125,
"completions/mean_terminated_length": 39.0078125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0868955850601196,
"epoch": 0.8585858585858586,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.8857719898223877,
"learning_rate": 1.4393939393939395e-07,
"loss": 0.0,
"num_tokens": 35299301.0,
"reward": 0.5992187261581421,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 340
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 39.328125,
"completions/mean_terminated_length": 39.328125,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0072779655456543,
"epoch": 0.8611111111111112,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.1714649200439453,
"learning_rate": 1.4141414141414141e-07,
"loss": -0.0,
"num_tokens": 35401055.0,
"reward": 0.643750011920929,
"reward_std": 0.05497056990861893,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 341
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 38.5,
"completions/mean_terminated_length": 38.5,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0291508436203003,
"epoch": 0.8636363636363636,
"frac_reward_zero_std": 0.875,
"grad_norm": 5.02390718460083,
"learning_rate": 1.3888888888888888e-07,
"loss": -0.0,
"num_tokens": 35502143.0,
"reward": 0.651171863079071,
"reward_std": 0.062466755509376526,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 342
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 113.0,
"completions/max_terminated_length": 113.0,
"completions/mean_length": 40.9453125,
"completions/mean_terminated_length": 40.9453125,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.0368475914001465,
"epoch": 0.8661616161616161,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.3636363636363635e-07,
"loss": 0.0,
"num_tokens": 35610304.0,
"reward": 0.6437499523162842,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 343
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 77.0,
"completions/max_terminated_length": 77.0,
"completions/mean_length": 40.2734375,
"completions/mean_terminated_length": 40.2734375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0711452960968018,
"epoch": 0.8686868686868687,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.956529140472412,
"learning_rate": 1.3383838383838384e-07,
"loss": -0.0,
"num_tokens": 35720067.0,
"reward": 0.5992187261581421,
"reward_std": 0.05922255665063858,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 344
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 42.9140625,
"completions/mean_terminated_length": 42.9140625,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.1545445919036865,
"epoch": 0.8712121212121212,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.3131313131313133e-07,
"loss": 0.0,
"num_tokens": 35819584.0,
"reward": 0.5249999761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 345
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 79.0,
"completions/max_terminated_length": 79.0,
"completions/mean_length": 43.2421875,
"completions/mean_terminated_length": 43.2421875,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1233935356140137,
"epoch": 0.8737373737373737,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.0883266925811768,
"learning_rate": 1.2878787878787877e-07,
"loss": 0.0,
"num_tokens": 35926447.0,
"reward": 0.6066405773162842,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 346
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 44.890625,
"completions/mean_terminated_length": 44.890625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1265748739242554,
"epoch": 0.8762626262626263,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2674680948257446,
"learning_rate": 1.2626262626262626e-07,
"loss": 0.0,
"num_tokens": 36026345.0,
"reward": 0.814453125,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 347
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 39.609375,
"completions/mean_terminated_length": 39.609375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.024735450744629,
"epoch": 0.8787878787878788,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.2373737373737373e-07,
"loss": 0.0,
"num_tokens": 36128031.0,
"reward": 0.5249999761581421,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5,
"rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 348
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 44.5546875,
"completions/mean_terminated_length": 44.5546875,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.0595622062683105,
"epoch": 0.8813131313131313,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.2121212121212122e-07,
"loss": 0.0,
"num_tokens": 36225950.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 349
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 39.4765625,
"completions/mean_terminated_length": 39.4765625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0925328731536865,
"epoch": 0.8838383838383839,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.037301778793335,
"learning_rate": 1.1868686868686869e-07,
"loss": 0.0,
"num_tokens": 36321339.0,
"reward": 0.614062488079071,
"reward_std": 0.0937061756849289,
"rewards/video_r1_accuracy_reward/mean": 0.59375,
"rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 350
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 77.0,
"completions/max_terminated_length": 77.0,
"completions/mean_length": 39.9609375,
"completions/mean_terminated_length": 39.9609375,
"completions/min_length": 19.0,
"completions/min_terminated_length": 19.0,
"entropy": 1.053574800491333,
"epoch": 0.8863636363636364,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.9315614700317383,
"learning_rate": 1.1616161616161615e-07,
"loss": 0.0,
"num_tokens": 36427534.0,
"reward": 0.7179687023162842,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.703125,
"rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 351
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 41.765625,
"completions/mean_terminated_length": 41.765625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0955579280853271,
"epoch": 0.8888888888888888,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.852755069732666,
"learning_rate": 1.1363636363636363e-07,
"loss": 0.0,
"num_tokens": 36517216.0,
"reward": 0.666015625,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 352
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 100.0,
"completions/max_terminated_length": 100.0,
"completions/mean_length": 41.1484375,
"completions/mean_terminated_length": 41.1484375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0147664546966553,
"epoch": 0.8914141414141414,
"frac_reward_zero_std": 0.875,
"grad_norm": 3.0033349990844727,
"learning_rate": 1.111111111111111e-07,
"loss": -0.0,
"num_tokens": 36631027.0,
"reward": 0.4433593451976776,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.4140625,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 353
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 92.0,
"completions/max_terminated_length": 92.0,
"completions/mean_length": 41.4140625,
"completions/mean_terminated_length": 41.4140625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0533812046051025,
"epoch": 0.8939393939393939,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.2410473823547363,
"learning_rate": 1.0858585858585859e-07,
"loss": -0.0,
"num_tokens": 36731992.0,
"reward": 0.666015625,
"reward_std": 0.06297669559717178,
"rewards/video_r1_accuracy_reward/mean": 0.6484375,
"rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 354
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 43.734375,
"completions/mean_terminated_length": 43.734375,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 1.12202787399292,
"epoch": 0.8964646464646465,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.0606060606060605e-07,
"loss": 0.0,
"num_tokens": 36848974.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 355
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 128.0,
"completions/max_terminated_length": 128.0,
"completions/mean_length": 43.1796875,
"completions/mean_terminated_length": 43.1796875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0783835649490356,
"epoch": 0.898989898989899,
"frac_reward_zero_std": 0.9375,
"grad_norm": 4.910101890563965,
"learning_rate": 1.0353535353535353e-07,
"loss": 0.0,
"num_tokens": 36956045.0,
"reward": 0.7179687023162842,
"reward_std": 0.027485283091664314,
"rewards/video_r1_accuracy_reward/mean": 0.703125,
"rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 356
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 120.0,
"completions/max_terminated_length": 120.0,
"completions/mean_length": 38.59375,
"completions/mean_terminated_length": 38.59375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0902092456817627,
"epoch": 0.9015151515151515,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.2750098705291748,
"learning_rate": 1.01010101010101e-07,
"loss": -0.0,
"num_tokens": 37047657.0,
"reward": 0.6066405773162842,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.5859375,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 357
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 107.0,
"completions/max_terminated_length": 107.0,
"completions/mean_length": 45.1796875,
"completions/mean_terminated_length": 45.1796875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1572962999343872,
"epoch": 0.9040404040404041,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.8475706577301025,
"learning_rate": 9.848484848484848e-08,
"loss": 0.0,
"num_tokens": 37147744.0,
"reward": 0.829296886920929,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.8203125,
"rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 358
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 42.375,
"completions/mean_terminated_length": 42.375,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0927423238754272,
"epoch": 0.9065656565656566,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.16367244720459,
"learning_rate": 9.595959595959594e-08,
"loss": 0.0,
"num_tokens": 37259240.0,
"reward": 0.7921874523162842,
"reward_std": 0.0737217366695404,
"rewards/video_r1_accuracy_reward/mean": 0.78125,
"rewards/video_r1_accuracy_reward/std": 0.41502299904823303,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 359
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 119.0,
"completions/max_terminated_length": 119.0,
"completions/mean_length": 38.2265625,
"completions/mean_terminated_length": 38.2265625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0505998134613037,
"epoch": 0.9090909090909091,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.0892300605773926,
"learning_rate": 9.343434343434344e-08,
"loss": -0.0,
"num_tokens": 37351021.0,
"reward": 0.5992187261581421,
"reward_std": 0.04198446497321129,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 360
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0078125,
"completions/max_length": 384.0,
"completions/max_terminated_length": 95.0,
"completions/mean_length": 45.1015625,
"completions/mean_terminated_length": 42.433067321777344,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 0.9791218042373657,
"epoch": 0.9116161616161617,
"frac_reward_zero_std": 0.875,
"grad_norm": 3.24613881111145,
"learning_rate": 9.09090909090909e-08,
"loss": 0.0,
"num_tokens": 37463530.0,
"reward": 0.7101562023162842,
"reward_std": 0.04958236962556839,
"rewards/video_r1_accuracy_reward/mean": 0.6953125,
"rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 361
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 107.0,
"completions/max_terminated_length": 107.0,
"completions/mean_length": 43.7890625,
"completions/mean_terminated_length": 43.7890625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1600117683410645,
"epoch": 0.9141414141414141,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.4082045555114746,
"learning_rate": 8.838383838383838e-08,
"loss": -0.0,
"num_tokens": 37546919.0,
"reward": 0.4507812559604645,
"reward_std": 0.090959832072258,
"rewards/video_r1_accuracy_reward/mean": 0.421875,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 362
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 74.0,
"completions/max_terminated_length": 74.0,
"completions/mean_length": 40.6640625,
"completions/mean_terminated_length": 40.6640625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0563390254974365,
"epoch": 0.9166666666666666,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 8.585858585858585e-08,
"loss": 0.0,
"num_tokens": 37642260.0,
"reward": 0.5843749642372131,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 363
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 40.3359375,
"completions/mean_terminated_length": 40.3359375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0542752742767334,
"epoch": 0.9191919191919192,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.0764169692993164,
"learning_rate": 8.333333333333333e-08,
"loss": 0.0,
"num_tokens": 37752327.0,
"reward": 0.539843738079071,
"reward_std": 0.0937061756849289,
"rewards/video_r1_accuracy_reward/mean": 0.515625,
"rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 364
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 39.75,
"completions/mean_terminated_length": 39.75,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0389442443847656,
"epoch": 0.9217171717171717,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 8.080808080808082e-08,
"loss": 0.0,
"num_tokens": 37840775.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 365
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 82.0,
"completions/max_terminated_length": 82.0,
"completions/mean_length": 43.8359375,
"completions/mean_terminated_length": 43.8359375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0553057193756104,
"epoch": 0.9242424242424242,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.5467336177825928,
"learning_rate": 7.828282828282828e-08,
"loss": 0.0,
"num_tokens": 37939746.0,
"reward": 0.6363281011581421,
"reward_std": 0.06297669559717178,
"rewards/video_r1_accuracy_reward/mean": 0.6171875,
"rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 366
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 93.0,
"completions/max_terminated_length": 93.0,
"completions/mean_length": 40.75,
"completions/mean_terminated_length": 40.75,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.1079405546188354,
"epoch": 0.9267676767676768,
"frac_reward_zero_std": 0.9375,
"grad_norm": 2.4892454147338867,
"learning_rate": 7.575757575757576e-08,
"loss": 0.0,
"num_tokens": 38040370.0,
"reward": 0.7550780773162842,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 367
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 39.0625,
"completions/mean_terminated_length": 39.0625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0692365169525146,
"epoch": 0.9292929292929293,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.5513192415237427,
"learning_rate": 7.323232323232323e-08,
"loss": 0.0,
"num_tokens": 38149562.0,
"reward": 0.5992187261581421,
"reward_std": 0.051721714437007904,
"rewards/video_r1_accuracy_reward/mean": 0.578125,
"rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 368
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 107.0,
"completions/max_terminated_length": 107.0,
"completions/mean_length": 38.9609375,
"completions/mean_terminated_length": 38.9609375,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.9736450910568237,
"epoch": 0.9318181818181818,
"frac_reward_zero_std": 0.75,
"grad_norm": 2.6586601734161377,
"learning_rate": 7.070707070707071e-08,
"loss": 0.0,
"num_tokens": 38255013.0,
"reward": 0.7550780773162842,
"reward_std": 0.09046198427677155,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 369
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 114.0,
"completions/max_terminated_length": 114.0,
"completions/mean_length": 42.0625,
"completions/mean_terminated_length": 42.0625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0568822622299194,
"epoch": 0.9343434343434344,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.516427993774414,
"learning_rate": 6.818181818181817e-08,
"loss": -0.0,
"num_tokens": 38344869.0,
"reward": 0.6214843392372131,
"reward_std": 0.05272950232028961,
"rewards/video_r1_accuracy_reward/mean": 0.6015625,
"rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 370
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 78.0,
"completions/max_terminated_length": 78.0,
"completions/mean_length": 41.4609375,
"completions/mean_terminated_length": 41.4609375,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.016709566116333,
"epoch": 0.9368686868686869,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 6.565656565656566e-08,
"loss": 0.0,
"num_tokens": 38439152.0,
"reward": 0.703125,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.6875,
"rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 371
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 96.0,
"completions/max_terminated_length": 96.0,
"completions/mean_length": 41.9921875,
"completions/mean_terminated_length": 41.9921875,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.039790391921997,
"epoch": 0.9393939393939394,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.421544075012207,
"learning_rate": 6.313131313131313e-08,
"loss": -0.0,
"num_tokens": 38528439.0,
"reward": 0.5472656488418579,
"reward_std": 0.03072948195040226,
"rewards/video_r1_accuracy_reward/mean": 0.5234375,
"rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 372
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 121.0,
"completions/max_terminated_length": 121.0,
"completions/mean_length": 39.5390625,
"completions/mean_terminated_length": 39.5390625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0156900882720947,
"epoch": 0.9419191919191919,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0844476222991943,
"learning_rate": 6.060606060606061e-08,
"loss": -0.0,
"num_tokens": 38620764.0,
"reward": 0.8070312738418579,
"reward_std": 0.027485284954309464,
"rewards/video_r1_accuracy_reward/mean": 0.796875,
"rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 373
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 91.0,
"completions/max_terminated_length": 91.0,
"completions/mean_length": 41.515625,
"completions/mean_terminated_length": 41.515625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.1009835004806519,
"epoch": 0.9444444444444444,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.7400307655334473,
"learning_rate": 5.8080808080808076e-08,
"loss": 0.0,
"num_tokens": 38722894.0,
"reward": 0.443359375,
"reward_std": 0.058214765042066574,
"rewards/video_r1_accuracy_reward/mean": 0.4140625,
"rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 374
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 73.0,
"completions/max_terminated_length": 73.0,
"completions/mean_length": 39.25,
"completions/mean_terminated_length": 39.25,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 0.9999622106552124,
"epoch": 0.946969696969697,
"frac_reward_zero_std": 0.8125,
"grad_norm": 3.1559898853302,
"learning_rate": 5.555555555555555e-08,
"loss": -0.0,
"num_tokens": 38820150.0,
"reward": 0.740234375,
"reward_std": 0.08995203673839569,
"rewards/video_r1_accuracy_reward/mean": 0.7265625,
"rewards/video_r1_accuracy_reward/std": 0.447474867105484,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 375
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 75.0,
"completions/max_terminated_length": 75.0,
"completions/mean_length": 38.1484375,
"completions/mean_terminated_length": 38.1484375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0171918869018555,
"epoch": 0.9494949494949495,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.901557445526123,
"learning_rate": 5.303030303030303e-08,
"loss": 0.0,
"num_tokens": 38918177.0,
"reward": 0.814453125,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.8046875,
"rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 376
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 40.1484375,
"completions/mean_terminated_length": 40.1484375,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0424983501434326,
"epoch": 0.952020202020202,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.0092467069625854,
"learning_rate": 5.05050505050505e-08,
"loss": -0.0,
"num_tokens": 39023852.0,
"reward": 0.5101562738418579,
"reward_std": 0.027485284954309464,
"rewards/video_r1_accuracy_reward/mean": 0.484375,
"rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 377
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 38.2890625,
"completions/mean_terminated_length": 38.2890625,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 0.9870564341545105,
"epoch": 0.9545454545454546,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.797979797979797e-08,
"loss": 0.0,
"num_tokens": 39119657.0,
"reward": 0.6437499523162842,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 378
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 88.0,
"completions/max_terminated_length": 88.0,
"completions/mean_length": 41.375,
"completions/mean_terminated_length": 41.375,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.1362022161483765,
"epoch": 0.9570707070707071,
"frac_reward_zero_std": 0.75,
"grad_norm": 3.5773839950561523,
"learning_rate": 4.545454545454545e-08,
"loss": -0.0,
"num_tokens": 39225209.0,
"reward": 0.673046886920929,
"reward_std": 0.07057460397481918,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 379
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 72.0,
"completions/max_terminated_length": 72.0,
"completions/mean_length": 39.5703125,
"completions/mean_terminated_length": 39.5703125,
"completions/min_length": 24.0,
"completions/min_terminated_length": 24.0,
"entropy": 0.9890860319137573,
"epoch": 0.9595959595959596,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 4.292929292929292e-08,
"loss": 0.0,
"num_tokens": 39322978.0,
"reward": 0.7625000476837158,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 380
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 42.4375,
"completions/mean_terminated_length": 42.4375,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0359077453613281,
"epoch": 0.9621212121212122,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.6127235889434814,
"learning_rate": 4.040404040404041e-08,
"loss": 0.0,
"num_tokens": 39427570.0,
"reward": 0.7699218988418579,
"reward_std": 0.06297669559717178,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 381
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 85.0,
"completions/max_terminated_length": 85.0,
"completions/mean_length": 43.0,
"completions/mean_terminated_length": 43.0,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0722706317901611,
"epoch": 0.9646464646464646,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.7954891920089722,
"learning_rate": 3.787878787878788e-08,
"loss": 0.0,
"num_tokens": 39525642.0,
"reward": 0.8218749761581421,
"reward_std": 0.07920700311660767,
"rewards/video_r1_accuracy_reward/mean": 0.8125,
"rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 382
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 83.0,
"completions/max_terminated_length": 83.0,
"completions/mean_length": 41.796875,
"completions/mean_terminated_length": 41.796875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0892385244369507,
"epoch": 0.9671717171717171,
"frac_reward_zero_std": 0.875,
"grad_norm": 2.684129238128662,
"learning_rate": 3.5353535353535353e-08,
"loss": -0.0,
"num_tokens": 39620936.0,
"reward": 0.717968761920929,
"reward_std": 0.05922255665063858,
"rewards/video_r1_accuracy_reward/mean": 0.703125,
"rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 383
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 67.0,
"completions/max_terminated_length": 67.0,
"completions/mean_length": 37.6328125,
"completions/mean_terminated_length": 37.6328125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 0.9923283457756042,
"epoch": 0.9696969696969697,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 3.282828282828283e-08,
"loss": 0.0,
"num_tokens": 39719425.0,
"reward": 0.6437499523162842,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.625,
"rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 384
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 40.5703125,
"completions/mean_terminated_length": 40.5703125,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.9553056955337524,
"epoch": 0.9722222222222222,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 3.0303030303030305e-08,
"loss": 0.0,
"num_tokens": 39825234.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 385
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 94.0,
"completions/max_terminated_length": 94.0,
"completions/mean_length": 41.765625,
"completions/mean_terminated_length": 41.765625,
"completions/min_length": 20.0,
"completions/min_terminated_length": 20.0,
"entropy": 1.0995062589645386,
"epoch": 0.9747474747474747,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.4076322317123413,
"learning_rate": 2.7777777777777774e-08,
"loss": 0.0,
"num_tokens": 39925308.0,
"reward": 0.7550780773162842,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.7421875,
"rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 386
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 80.0,
"completions/max_terminated_length": 80.0,
"completions/mean_length": 42.9453125,
"completions/mean_terminated_length": 42.9453125,
"completions/min_length": 25.0,
"completions/min_terminated_length": 25.0,
"entropy": 0.9817566275596619,
"epoch": 0.9772727272727273,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.9628924131393433,
"learning_rate": 2.525252525252525e-08,
"loss": 0.0,
"num_tokens": 40033437.0,
"reward": 0.9332031011581421,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.9296875,
"rewards/video_r1_accuracy_reward/std": 0.2566775679588318,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 387
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 87.0,
"completions/max_terminated_length": 87.0,
"completions/mean_length": 43.6328125,
"completions/mean_terminated_length": 43.6328125,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0863628387451172,
"epoch": 0.9797979797979798,
"frac_reward_zero_std": 0.8125,
"grad_norm": 2.400270462036133,
"learning_rate": 2.2727272727272725e-08,
"loss": 0.0,
"num_tokens": 40137166.0,
"reward": 0.6734374761581421,
"reward_std": 0.06946974992752075,
"rewards/video_r1_accuracy_reward/mean": 0.65625,
"rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 388
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 76.0,
"completions/max_terminated_length": 76.0,
"completions/mean_length": 39.21875,
"completions/mean_terminated_length": 39.21875,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 0.9649280309677124,
"epoch": 0.9823232323232324,
"frac_reward_zero_std": 0.875,
"grad_norm": 1.8059344291687012,
"learning_rate": 2.0202020202020204e-08,
"loss": 0.0,
"num_tokens": 40238914.0,
"reward": 0.6808593273162842,
"reward_std": 0.04847751557826996,
"rewards/video_r1_accuracy_reward/mean": 0.6640625,
"rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 389
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 106.0,
"completions/max_terminated_length": 106.0,
"completions/mean_length": 41.9375,
"completions/mean_terminated_length": 41.9375,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0557193756103516,
"epoch": 0.9848484848484849,
"frac_reward_zero_std": 0.875,
"grad_norm": 3.0474207401275635,
"learning_rate": 1.7676767676767677e-08,
"loss": -0.0,
"num_tokens": 40355434.0,
"reward": 0.7699218988418579,
"reward_std": 0.062466755509376526,
"rewards/video_r1_accuracy_reward/mean": 0.7578125,
"rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 390
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 102.0,
"completions/max_terminated_length": 102.0,
"completions/mean_length": 39.8359375,
"completions/mean_terminated_length": 39.8359375,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.9982803463935852,
"epoch": 0.9873737373737373,
"frac_reward_zero_std": 0.9375,
"grad_norm": 0.9107327461242676,
"learning_rate": 1.5151515151515152e-08,
"loss": -0.0,
"num_tokens": 40468109.0,
"reward": 0.651171863079071,
"reward_std": 0.020992232486605644,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 391
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 100.0,
"completions/max_terminated_length": 100.0,
"completions/mean_length": 44.515625,
"completions/mean_terminated_length": 44.515625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0974993705749512,
"epoch": 0.98989898989899,
"frac_reward_zero_std": 0.9375,
"grad_norm": 1.1084349155426025,
"learning_rate": 1.2626262626262625e-08,
"loss": 0.0,
"num_tokens": 40570487.0,
"reward": 0.6585937142372131,
"reward_std": 0.027485283091664314,
"rewards/video_r1_accuracy_reward/mean": 0.640625,
"rewards/video_r1_accuracy_reward/std": 0.481702595949173,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 392
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 81.0,
"completions/max_terminated_length": 81.0,
"completions/mean_length": 38.5,
"completions/mean_terminated_length": 38.5,
"completions/min_length": 21.0,
"completions/min_terminated_length": 21.0,
"entropy": 1.0579657554626465,
"epoch": 0.9924242424242424,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 1.0101010101010102e-08,
"loss": 0.0,
"num_tokens": 40667895.0,
"reward": 0.5843750238418579,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.5625,
"rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 393
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 90.0,
"completions/max_terminated_length": 90.0,
"completions/mean_length": 39.2890625,
"completions/mean_terminated_length": 39.2890625,
"completions/min_length": 22.0,
"completions/min_terminated_length": 22.0,
"entropy": 1.0520402193069458,
"epoch": 0.9949494949494949,
"frac_reward_zero_std": 0.8125,
"grad_norm": 1.8841626644134521,
"learning_rate": 7.575757575757576e-09,
"loss": -0.0,
"num_tokens": 40764580.0,
"reward": 0.6804687976837158,
"reward_std": 0.04958236962556839,
"rewards/video_r1_accuracy_reward/mean": 0.6640625,
"rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
"rewards/video_r1_format_reward/mean": 0.9921875,
"rewards/video_r1_format_reward/std": 0.0883883461356163,
"step": 394
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 99.0,
"completions/max_terminated_length": 99.0,
"completions/mean_length": 41.4140625,
"completions/mean_terminated_length": 41.4140625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 1.0266304016113281,
"epoch": 0.9974747474747475,
"frac_reward_zero_std": 0.8125,
"grad_norm": 4.306983470916748,
"learning_rate": 5.050505050505051e-09,
"loss": 0.0,
"num_tokens": 40861593.0,
"reward": 0.651171863079071,
"reward_std": 0.06297669559717178,
"rewards/video_r1_accuracy_reward/mean": 0.6328125,
"rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 395
},
{
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 74.0,
"completions/max_terminated_length": 74.0,
"completions/mean_length": 38.1640625,
"completions/mean_terminated_length": 38.1640625,
"completions/min_length": 23.0,
"completions/min_terminated_length": 23.0,
"entropy": 0.9408200979232788,
"epoch": 1.0,
"frac_reward_zero_std": 1.0,
"grad_norm": 0.0,
"learning_rate": 2.5252525252525255e-09,
"loss": 0.0,
"num_tokens": 40963742.0,
"reward": 0.762499988079071,
"reward_std": 0.0,
"rewards/video_r1_accuracy_reward/mean": 0.75,
"rewards/video_r1_accuracy_reward/std": 0.434714138507843,
"rewards/video_r1_format_reward/mean": 1.0,
"rewards/video_r1_format_reward/std": 0.0,
"step": 396
}
],
"logging_steps": 1.0,
"max_steps": 396,
"num_input_tokens_seen": 40963742,
"num_train_epochs": 1,
"save_steps": 159,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}