{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 396, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/max_terminated_length": 248.0, "completions/mean_length": 122.875, "completions/mean_terminated_length": 122.875, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "entropy": 0.7635231614112854, "epoch": 0.0025252525252525255, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 105648.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 1 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 364.0, "completions/mean_length": 131.6328125, "completions/mean_terminated_length": 129.64566040039062, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "entropy": 0.7612149715423584, "epoch": 0.005050505050505051, "frac_reward_zero_std": 0.9375, "grad_norm": 0.6195482015609741, "learning_rate": 9.974747474747475e-07, "loss": -0.0, "num_tokens": 223745.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/video_r1_accuracy_reward/mean": 0.0078125, "rewards/video_r1_accuracy_reward/std": 0.0883883461356163, "rewards/video_r1_format_reward/mean": 0.0078125, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 2 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.0, "completions/max_terminated_length": 286.0, "completions/mean_length": 134.2265625, "completions/mean_terminated_length": 134.2265625, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "entropy": 0.7807673215866089, "epoch": 0.007575757575757576, "frac_reward_zero_std": 0.9375, "grad_norm": 0.5153651833534241, "learning_rate": 9.949494949494949e-07, "loss": 0.0, "num_tokens": 336902.0, "reward": 0.0015625000232830644, "reward_std": 0.0016703829169273376, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.03125, "rewards/video_r1_format_reward/std": 0.1746762990951538, "step": 3 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 290.0, "completions/mean_length": 125.453125, "completions/mean_terminated_length": 123.41732025146484, "completions/min_length": 48.0, "completions/min_terminated_length": 48.0, "entropy": 0.7577059268951416, "epoch": 0.010101010101010102, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.924242424242425e-07, "loss": 0.0, "num_tokens": 450968.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 4 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.0, "completions/max_terminated_length": 291.0, "completions/mean_length": 125.5546875, "completions/mean_terminated_length": 125.5546875, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "entropy": 0.7316204309463501, "epoch": 0.012626262626262626, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.898989898989898e-07, "loss": 0.0, "num_tokens": 569063.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 5 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.0, "completions/max_terminated_length": 302.0, "completions/mean_length": 134.765625, "completions/mean_terminated_length": 134.765625, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "entropy": 0.7153864502906799, "epoch": 0.015151515151515152, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.873737373737374e-07, "loss": 0.0, "num_tokens": 677257.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 6 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/max_terminated_length": 347.0, "completions/mean_length": 142.9140625, "completions/mean_terminated_length": 142.9140625, "completions/min_length": 47.0, "completions/min_terminated_length": 47.0, "entropy": 0.6941128373146057, "epoch": 0.017676767676767676, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.848484848484847e-07, "loss": 0.0, "num_tokens": 791206.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 7 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/max_terminated_length": 339.0, "completions/mean_length": 153.609375, "completions/mean_terminated_length": 153.609375, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "entropy": 0.7712859511375427, "epoch": 0.020202020202020204, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.823232323232323e-07, "loss": 0.0, "num_tokens": 906244.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 8 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.0, "completions/max_terminated_length": 308.0, "completions/mean_length": 133.671875, "completions/mean_terminated_length": 133.671875, "completions/min_length": 63.0, "completions/min_terminated_length": 63.0, "entropy": 0.7461484670639038, "epoch": 0.022727272727272728, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.797979797979797e-07, "loss": 0.0, "num_tokens": 1022034.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 9 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.0, "completions/max_terminated_length": 274.0, "completions/mean_length": 127.5390625, "completions/mean_terminated_length": 127.5390625, "completions/min_length": 55.0, "completions/min_terminated_length": 55.0, "entropy": 0.7210257649421692, "epoch": 0.025252525252525252, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.772727272727273e-07, "loss": 0.0, "num_tokens": 1149551.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 10 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/max_terminated_length": 297.0, "completions/mean_length": 131.25, "completions/mean_terminated_length": 131.25, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.7806915640830994, "epoch": 0.027777777777777776, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.747474747474746e-07, "loss": 0.0, "num_tokens": 1255743.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 11 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/max_terminated_length": 323.0, "completions/mean_length": 130.3203125, "completions/mean_terminated_length": 130.3203125, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "entropy": 0.6973187923431396, "epoch": 0.030303030303030304, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.722222222222222e-07, "loss": 0.0, "num_tokens": 1366240.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 12 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 377.0, "completions/mean_length": 154.109375, "completions/mean_terminated_length": 152.29922485351562, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "entropy": 0.6917202472686768, "epoch": 0.03282828282828283, "frac_reward_zero_std": 0.9375, "grad_norm": 0.3806738555431366, "learning_rate": 9.696969696969698e-07, "loss": 0.0, "num_tokens": 1474046.0, "reward": 0.0003906250058207661, "reward_std": 0.0011048543965443969, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0078125, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 13 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.0, "completions/max_terminated_length": 288.0, "completions/mean_length": 139.734375, "completions/mean_terminated_length": 139.734375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.6320770978927612, "epoch": 0.03535353535353535, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.671717171717171e-07, "loss": 0.0, "num_tokens": 1585796.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 14 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/max_terminated_length": 380.0, "completions/mean_length": 158.1953125, "completions/mean_terminated_length": 158.1953125, "completions/min_length": 50.0, "completions/min_terminated_length": 50.0, "entropy": 0.6951600313186646, "epoch": 0.03787878787878788, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.646464646464647e-07, "loss": 0.0, "num_tokens": 1710981.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 15 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/max_terminated_length": 276.0, "completions/mean_length": 136.390625, "completions/mean_terminated_length": 136.390625, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "entropy": 0.7261592149734497, "epoch": 0.04040404040404041, "frac_reward_zero_std": 0.9375, "grad_norm": 0.4154719114303589, "learning_rate": 9.62121212121212e-07, "loss": -0.0, "num_tokens": 1830455.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/video_r1_accuracy_reward/mean": 0.0078125, "rewards/video_r1_accuracy_reward/std": 0.0883883461356163, "rewards/video_r1_format_reward/mean": 0.0078125, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.0, "completions/max_terminated_length": 279.0, "completions/mean_length": 135.8203125, "completions/mean_terminated_length": 135.8203125, "completions/min_length": 52.0, "completions/min_terminated_length": 52.0, "entropy": 0.7299904823303223, "epoch": 0.04292929292929293, "frac_reward_zero_std": 0.9375, "grad_norm": 0.41257989406585693, "learning_rate": 9.595959595959596e-07, "loss": -0.0, "num_tokens": 1935088.0, "reward": 0.0074218749068677425, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.0078125, "rewards/video_r1_accuracy_reward/std": 0.0883883461356163, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 17 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/max_terminated_length": 272.0, "completions/mean_length": 143.9296875, "completions/mean_terminated_length": 143.9296875, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "entropy": 0.7212563753128052, "epoch": 0.045454545454545456, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.57070707070707e-07, "loss": 0.0, "num_tokens": 2044351.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 18 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/max_terminated_length": 317.0, "completions/mean_length": 133.453125, "completions/mean_terminated_length": 133.453125, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "entropy": 0.7506937384605408, "epoch": 0.047979797979797977, "frac_reward_zero_std": 0.9375, "grad_norm": 0.6540532112121582, "learning_rate": 9.545454545454546e-07, "loss": 0.0, "num_tokens": 2156297.0, "reward": 0.0003906250058207661, "reward_std": 0.0011048543965443969, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0078125, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 19 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.0, "completions/max_terminated_length": 353.0, "completions/mean_length": 141.8515625, "completions/mean_terminated_length": 141.8515625, "completions/min_length": 60.0, "completions/min_terminated_length": 60.0, "entropy": 0.7586977481842041, "epoch": 0.050505050505050504, "frac_reward_zero_std": 0.9375, "grad_norm": 0.6235540509223938, "learning_rate": 9.520202020202019e-07, "loss": 0.0, "num_tokens": 2288918.0, "reward": 0.0074218749068677425, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.0078125, "rewards/video_r1_accuracy_reward/std": 0.0883883461356163, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 20 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.0, "completions/max_terminated_length": 380.0, "completions/mean_length": 131.8359375, "completions/mean_terminated_length": 131.8359375, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "entropy": 0.7679413557052612, "epoch": 0.05303030303030303, "frac_reward_zero_std": 0.9375, "grad_norm": 0.4877886474132538, "learning_rate": 9.494949494949495e-07, "loss": 0.0, "num_tokens": 2408449.0, "reward": 0.0011718750465661287, "reward_std": 0.0016173411859199405, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0234375, "rewards/video_r1_format_reward/std": 0.15188287198543549, "step": 21 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 384.0, "completions/max_terminated_length": 355.0, "completions/mean_length": 146.3671875, "completions/mean_terminated_length": 142.59524536132812, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "entropy": 0.7015185356140137, "epoch": 0.05555555555555555, "frac_reward_zero_std": 0.9375, "grad_norm": 0.34478825330734253, "learning_rate": 9.46969696969697e-07, "loss": -0.0, "num_tokens": 2526696.0, "reward": 0.0078125, "reward_std": 0.022097086533904076, "rewards/video_r1_accuracy_reward/mean": 0.0078125, "rewards/video_r1_accuracy_reward/std": 0.0883883461356163, "rewards/video_r1_format_reward/mean": 0.0078125, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 22 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.0, "completions/max_terminated_length": 306.0, "completions/mean_length": 129.15625, "completions/mean_terminated_length": 129.15625, "completions/min_length": 56.0, "completions/min_terminated_length": 56.0, "entropy": 0.7712253332138062, "epoch": 0.05808080808080808, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 9.444444444444444e-07, "loss": 0.0, "num_tokens": 2640884.0, "reward": 0.0, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.0, "rewards/video_r1_accuracy_reward/std": 0.0, "rewards/video_r1_format_reward/mean": 0.0, "rewards/video_r1_format_reward/std": 0.0, "step": 23 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.0, "completions/max_terminated_length": 294.0, "completions/mean_length": 135.5078125, "completions/mean_terminated_length": 135.5078125, "completions/min_length": 61.0, "completions/min_terminated_length": 61.0, "entropy": 0.8073737621307373, "epoch": 0.06060606060606061, "frac_reward_zero_std": 0.8125, "grad_norm": 0.915313720703125, "learning_rate": 9.419191919191919e-07, "loss": -0.0, "num_tokens": 2747125.0, "reward": 0.00859374925494194, "reward_std": 0.02430679462850094, "rewards/video_r1_accuracy_reward/mean": 0.0078125, "rewards/video_r1_accuracy_reward/std": 0.0883883461356163, "rewards/video_r1_format_reward/mean": 0.0234375, "rewards/video_r1_format_reward/std": 0.15188287198543549, "step": 24 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/max_terminated_length": 339.0, "completions/mean_length": 131.3515625, "completions/mean_terminated_length": 131.3515625, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.7123849987983704, "epoch": 0.06313131313131314, "frac_reward_zero_std": 0.5, "grad_norm": 1.4686827659606934, "learning_rate": 9.393939393939395e-07, "loss": -0.0, "num_tokens": 2863482.0, "reward": 0.0859375, "reward_std": 0.17301878333091736, "rewards/video_r1_accuracy_reward/mean": 0.0859375, "rewards/video_r1_accuracy_reward/std": 0.2813730239868164, "rewards/video_r1_format_reward/mean": 0.0859375, "rewards/video_r1_format_reward/std": 0.2813730239868164, "step": 25 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/max_terminated_length": 248.0, "completions/mean_length": 126.0625, "completions/mean_terminated_length": 126.0625, "completions/min_length": 51.0, "completions/min_terminated_length": 51.0, "entropy": 0.7570379972457886, "epoch": 0.06565656565656566, "frac_reward_zero_std": 0.625, "grad_norm": 1.2738335132598877, "learning_rate": 9.368686868686868e-07, "loss": 0.0, "num_tokens": 2991954.0, "reward": 0.04218749701976776, "reward_std": 0.09613416343927383, "rewards/video_r1_accuracy_reward/mean": 0.0390625, "rewards/video_r1_accuracy_reward/std": 0.194504976272583, "rewards/video_r1_format_reward/mean": 0.1015625, "rewards/video_r1_format_reward/std": 0.3032590448856354, "step": 26 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.0, "completions/max_terminated_length": 327.0, "completions/mean_length": 144.0859375, "completions/mean_terminated_length": 144.0859375, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "entropy": 0.7464326620101929, "epoch": 0.06818181818181818, "frac_reward_zero_std": 0.375, "grad_norm": 1.6063857078552246, "learning_rate": 9.343434343434343e-07, "loss": -0.0, "num_tokens": 3113973.0, "reward": 0.111328125, "reward_std": 0.20764078199863434, "rewards/video_r1_accuracy_reward/mean": 0.109375, "rewards/video_r1_accuracy_reward/std": 0.31333550810813904, "rewards/video_r1_format_reward/mean": 0.1484375, "rewards/video_r1_format_reward/std": 0.356930136680603, "step": 27 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/max_terminated_length": 258.0, "completions/mean_length": 135.25, "completions/mean_terminated_length": 135.25, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "entropy": 0.6928939819335938, "epoch": 0.0707070707070707, "frac_reward_zero_std": 0.4375, "grad_norm": 1.5145729780197144, "learning_rate": 9.318181818181817e-07, "loss": 0.0, "num_tokens": 3228973.0, "reward": 0.03476562350988388, "reward_std": 0.09340079128742218, "rewards/video_r1_accuracy_reward/mean": 0.03125, "rewards/video_r1_accuracy_reward/std": 0.1746762990951538, "rewards/video_r1_format_reward/mean": 0.1015625, "rewards/video_r1_format_reward/std": 0.3032590448856354, "step": 28 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.0, "completions/max_terminated_length": 378.0, "completions/mean_length": 144.5, "completions/mean_terminated_length": 144.5, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "entropy": 0.6991415023803711, "epoch": 0.07323232323232323, "frac_reward_zero_std": 0.0625, "grad_norm": 1.993449091911316, "learning_rate": 9.292929292929292e-07, "loss": -0.0, "num_tokens": 3338453.0, "reward": 0.31640625, "reward_std": 0.34012913703918457, "rewards/video_r1_accuracy_reward/mean": 0.3125, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 0.390625, "rewards/video_r1_format_reward/std": 0.4898075461387634, "step": 29 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.0, "completions/max_terminated_length": 349.0, "completions/mean_length": 134.1015625, "completions/mean_terminated_length": 134.1015625, "completions/min_length": 59.0, "completions/min_terminated_length": 59.0, "entropy": 0.7583435773849487, "epoch": 0.07575757575757576, "frac_reward_zero_std": 0.0625, "grad_norm": 2.37294340133667, "learning_rate": 9.267676767676768e-07, "loss": -0.0, "num_tokens": 3450378.0, "reward": 0.3359375, "reward_std": 0.32593491673469543, "rewards/video_r1_accuracy_reward/mean": 0.328125, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 0.484375, "rewards/video_r1_format_reward/std": 0.5017194747924805, "step": 30 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 366.0, "completions/mean_length": 134.0546875, "completions/mean_terminated_length": 132.08660888671875, "completions/min_length": 44.0, "completions/min_terminated_length": 44.0, "entropy": 0.7964510917663574, "epoch": 0.07828282828282829, "frac_reward_zero_std": 0.1875, "grad_norm": 1.9095652103424072, "learning_rate": 9.242424242424241e-07, "loss": -0.0, "num_tokens": 3560025.0, "reward": 0.3550781011581421, "reward_std": 0.267575740814209, "rewards/video_r1_accuracy_reward/mean": 0.34375, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 0.5703125, "rewards/video_r1_format_reward/std": 0.4969765841960907, "step": 31 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.0, "completions/max_terminated_length": 289.0, "completions/mean_length": 113.0625, "completions/mean_terminated_length": 113.0625, "completions/min_length": 46.0, "completions/min_terminated_length": 46.0, "entropy": 0.6799850463867188, "epoch": 0.08080808080808081, "frac_reward_zero_std": 0.0625, "grad_norm": 2.1304097175598145, "learning_rate": 9.217171717171717e-07, "loss": -0.0, "num_tokens": 3680297.0, "reward": 0.5066406726837158, "reward_std": 0.4047975242137909, "rewards/video_r1_accuracy_reward/mean": 0.4921875, "rewards/video_r1_accuracy_reward/std": 0.5019033551216125, "rewards/video_r1_format_reward/mean": 0.78125, "rewards/video_r1_format_reward/std": 0.41502299904823303, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.0, "completions/max_terminated_length": 294.0, "completions/mean_length": 108.75, "completions/mean_terminated_length": 108.75, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.7177351713180542, "epoch": 0.08333333333333333, "frac_reward_zero_std": 0.25, "grad_norm": 2.0556108951568604, "learning_rate": 9.191919191919192e-07, "loss": -0.0, "num_tokens": 3797793.0, "reward": 0.47968751192092896, "reward_std": 0.27063843607902527, "rewards/video_r1_accuracy_reward/mean": 0.4609375, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 0.8359375, "rewards/video_r1_format_reward/std": 0.371787428855896, "step": 33 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.0, "completions/max_terminated_length": 270.0, "completions/mean_length": 110.3046875, "completions/mean_terminated_length": 110.3046875, "completions/min_length": 54.0, "completions/min_terminated_length": 54.0, "entropy": 0.7837837934494019, "epoch": 0.08585858585858586, "frac_reward_zero_std": 0.25, "grad_norm": 1.8440849781036377, "learning_rate": 9.166666666666665e-07, "loss": -0.0, "num_tokens": 3912032.0, "reward": 0.661328136920929, "reward_std": 0.29765215516090393, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 0.90625, "rewards/video_r1_format_reward/std": 0.29262590408325195, "step": 34 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.0, "completions/max_terminated_length": 283.0, "completions/mean_length": 99.5546875, "completions/mean_terminated_length": 99.5546875, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.8179616928100586, "epoch": 0.08838383838383838, "frac_reward_zero_std": 0.125, "grad_norm": 2.0900862216949463, "learning_rate": 9.141414141414141e-07, "loss": -0.0, "num_tokens": 4013039.0, "reward": 0.48281246423721313, "reward_std": 0.3318884074687958, "rewards/video_r1_accuracy_reward/mean": 0.4609375, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 0.8984375, "rewards/video_r1_format_reward/std": 0.3032590448856354, "step": 35 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/max_terminated_length": 262.0, "completions/mean_length": 102.671875, "completions/mean_terminated_length": 102.671875, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.8404669761657715, "epoch": 0.09090909090909091, "frac_reward_zero_std": 0.25, "grad_norm": 1.946840524673462, "learning_rate": 9.116161616161616e-07, "loss": -0.0, "num_tokens": 4118005.0, "reward": 0.604296863079071, "reward_std": 0.27570241689682007, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 0.953125, "rewards/video_r1_format_reward/std": 0.21220162510871887, "step": 36 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/max_terminated_length": 242.0, "completions/mean_length": 112.7265625, "completions/mean_terminated_length": 112.7265625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.8344206809997559, "epoch": 0.09343434343434344, "frac_reward_zero_std": 0.25, "grad_norm": 1.875348687171936, "learning_rate": 9.09090909090909e-07, "loss": -0.0, "num_tokens": 4232098.0, "reward": 0.701171875, "reward_std": 0.3029305934906006, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 0.9609375, "rewards/video_r1_format_reward/std": 0.194504976272583, "step": 37 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.0, "completions/max_terminated_length": 324.0, "completions/mean_length": 98.640625, "completions/mean_terminated_length": 98.640625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.8952550888061523, "epoch": 0.09595959595959595, "frac_reward_zero_std": 0.375, "grad_norm": 1.779735803604126, "learning_rate": 9.065656565656565e-07, "loss": 0.0, "num_tokens": 4337628.0, "reward": 0.7101562023162842, "reward_std": 0.2632066607475281, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 38 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/max_terminated_length": 263.0, "completions/mean_length": 102.375, "completions/mean_terminated_length": 102.375, "completions/min_length": 45.0, "completions/min_terminated_length": 45.0, "entropy": 0.903910756111145, "epoch": 0.09848484848484848, "frac_reward_zero_std": 0.1875, "grad_norm": 2.0209107398986816, "learning_rate": 9.040404040404041e-07, "loss": -0.0, "num_tokens": 4437268.0, "reward": 0.5621093511581421, "reward_std": 0.36356228590011597, "rewards/video_r1_accuracy_reward/mean": 0.5390625, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 39 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 242.0, "completions/mean_length": 94.59375, "completions/mean_terminated_length": 92.31495666503906, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.87197345495224, "epoch": 0.10101010101010101, "frac_reward_zero_std": 0.375, "grad_norm": 1.7272930145263672, "learning_rate": 9.015151515151514e-07, "loss": 0.0, "num_tokens": 4552040.0, "reward": 0.708984375, "reward_std": 0.2568144202232361, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 0.96875, "rewards/video_r1_format_reward/std": 0.1746762990951538, "step": 40 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/max_terminated_length": 195.0, "completions/mean_length": 94.8515625, "completions/mean_terminated_length": 94.8515625, "completions/min_length": 49.0, "completions/min_terminated_length": 49.0, "entropy": 0.9597364664077759, "epoch": 0.10353535353535354, "frac_reward_zero_std": 0.5, "grad_norm": 1.6093056201934814, "learning_rate": 8.98989898989899e-07, "loss": -0.0, "num_tokens": 4640853.0, "reward": 0.576953113079071, "reward_std": 0.216628760099411, "rewards/video_r1_accuracy_reward/mean": 0.5546875, "rewards/video_r1_accuracy_reward/std": 0.4989531338214874, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 41 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/max_terminated_length": 199.0, "completions/mean_length": 84.984375, "completions/mean_terminated_length": 84.984375, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.9007290601730347, "epoch": 0.10606060606060606, "frac_reward_zero_std": 0.25, "grad_norm": 2.0971033573150635, "learning_rate": 8.964646464646465e-07, "loss": -0.0, "num_tokens": 4748699.0, "reward": 0.516796886920929, "reward_std": 0.3205876350402832, "rewards/video_r1_accuracy_reward/mean": 0.4921875, "rewards/video_r1_accuracy_reward/std": 0.5019033551216125, "rewards/video_r1_format_reward/mean": 0.984375, "rewards/video_r1_format_reward/std": 0.12450689822435379, "step": 42 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/max_terminated_length": 173.0, "completions/mean_length": 89.2890625, "completions/mean_terminated_length": 89.2890625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.9400933980941772, "epoch": 0.10858585858585859, "frac_reward_zero_std": 0.375, "grad_norm": 1.9215672016143799, "learning_rate": 8.939393939393938e-07, "loss": -0.0, "num_tokens": 4867288.0, "reward": 0.6199219226837158, "reward_std": 0.26100048422813416, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 0.96875, "rewards/video_r1_format_reward/std": 0.1746762990951538, "step": 43 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/max_terminated_length": 233.0, "completions/mean_length": 86.1953125, "completions/mean_terminated_length": 86.1953125, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.9206110835075378, "epoch": 0.1111111111111111, "frac_reward_zero_std": 0.1875, "grad_norm": 2.1925864219665527, "learning_rate": 8.914141414141414e-07, "loss": -0.0, "num_tokens": 4986809.0, "reward": 0.671875, "reward_std": 0.2928203344345093, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 0.96875, "rewards/video_r1_format_reward/std": 0.1746762990951538, "step": 44 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/max_terminated_length": 217.0, "completions/mean_length": 90.3046875, "completions/mean_terminated_length": 90.3046875, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.974540650844574, "epoch": 0.11363636363636363, "frac_reward_zero_std": 0.4375, "grad_norm": 1.7343508005142212, "learning_rate": 8.888888888888888e-07, "loss": 0.0, "num_tokens": 5092520.0, "reward": 0.725390613079071, "reward_std": 0.23314352333545685, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 45 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/max_terminated_length": 167.0, "completions/mean_length": 80.625, "completions/mean_terminated_length": 80.625, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.9657076597213745, "epoch": 0.11616161616161616, "frac_reward_zero_std": 0.375, "grad_norm": 1.9508693218231201, "learning_rate": 8.863636363636363e-07, "loss": -0.0, "num_tokens": 5205544.0, "reward": 0.746874988079071, "reward_std": 0.2970072031021118, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 0.984375, "rewards/video_r1_format_reward/std": 0.12450689822435379, "step": 46 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/max_terminated_length": 223.0, "completions/mean_length": 88.828125, "completions/mean_terminated_length": 88.828125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.9713828563690186, "epoch": 0.11868686868686869, "frac_reward_zero_std": 0.4375, "grad_norm": 1.7221375703811646, "learning_rate": 8.838383838383838e-07, "loss": -0.0, "num_tokens": 5319394.0, "reward": 0.5695312023162842, "reward_std": 0.26034435629844666, "rewards/video_r1_accuracy_reward/mean": 0.546875, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 47 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/max_terminated_length": 150.0, "completions/mean_length": 80.8515625, "completions/mean_terminated_length": 80.8515625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 0.9206292629241943, "epoch": 0.12121212121212122, "frac_reward_zero_std": 0.375, "grad_norm": 1.957437515258789, "learning_rate": 8.813131313131313e-07, "loss": -0.0, "num_tokens": 5412327.0, "reward": 0.643750011920929, "reward_std": 0.27909553050994873, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/max_terminated_length": 152.0, "completions/mean_length": 82.21875, "completions/mean_terminated_length": 82.21875, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 0.9331451654434204, "epoch": 0.12373737373737374, "frac_reward_zero_std": 0.375, "grad_norm": 1.8745862245559692, "learning_rate": 8.787878787878787e-07, "loss": -0.0, "num_tokens": 5516315.0, "reward": 0.4429687559604645, "reward_std": 0.25346940755844116, "rewards/video_r1_accuracy_reward/mean": 0.4140625, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 49 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/max_terminated_length": 174.0, "completions/mean_length": 79.7890625, "completions/mean_terminated_length": 79.7890625, "completions/min_length": 43.0, "completions/min_terminated_length": 43.0, "entropy": 0.9588379859924316, "epoch": 0.12626262626262627, "frac_reward_zero_std": 0.25, "grad_norm": 2.0629332065582275, "learning_rate": 8.762626262626263e-07, "loss": -0.0, "num_tokens": 5615632.0, "reward": 0.5464843511581421, "reward_std": 0.34427377581596375, "rewards/video_r1_accuracy_reward/mean": 0.5234375, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 0.984375, "rewards/video_r1_format_reward/std": 0.12450689822435379, "step": 50 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/max_terminated_length": 145.0, "completions/mean_length": 79.265625, "completions/mean_terminated_length": 79.265625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.9568088054656982, "epoch": 0.12878787878787878, "frac_reward_zero_std": 0.3125, "grad_norm": 2.221264123916626, "learning_rate": 8.737373737373737e-07, "loss": 0.0, "num_tokens": 5714786.0, "reward": 0.5914062261581421, "reward_std": 0.2784692645072937, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 51 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/max_terminated_length": 148.0, "completions/mean_length": 74.5625, "completions/mean_terminated_length": 74.5625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.9146069288253784, "epoch": 0.13131313131313133, "frac_reward_zero_std": 0.4375, "grad_norm": 1.8530735969543457, "learning_rate": 8.712121212121211e-07, "loss": -0.0, "num_tokens": 5827042.0, "reward": 0.598828136920929, "reward_std": 0.22847865521907806, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 52 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/max_terminated_length": 180.0, "completions/mean_length": 76.890625, "completions/mean_terminated_length": 76.890625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.9405485987663269, "epoch": 0.13383838383838384, "frac_reward_zero_std": 0.375, "grad_norm": 1.9973477125167847, "learning_rate": 8.686868686868687e-07, "loss": -0.0, "num_tokens": 5927692.0, "reward": 0.666015625, "reward_std": 0.2661140561103821, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 53 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/max_terminated_length": 162.0, "completions/mean_length": 74.015625, "completions/mean_terminated_length": 74.015625, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 1.0538530349731445, "epoch": 0.13636363636363635, "frac_reward_zero_std": 0.375, "grad_norm": 2.2168405055999756, "learning_rate": 8.661616161616161e-07, "loss": 0.0, "num_tokens": 6036374.0, "reward": 0.5398437976837158, "reward_std": 0.2713738679885864, "rewards/video_r1_accuracy_reward/mean": 0.515625, "rewards/video_r1_accuracy_reward/std": 0.5017194747924805, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 54 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 144.0, "completions/mean_length": 79.8203125, "completions/mean_terminated_length": 77.42520141601562, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.9944831132888794, "epoch": 0.1388888888888889, "frac_reward_zero_std": 0.5, "grad_norm": 1.8037936687469482, "learning_rate": 8.636363636363636e-07, "loss": 0.0, "num_tokens": 6138703.0, "reward": 0.7621093988418579, "reward_std": 0.2209778130054474, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 55 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/max_terminated_length": 126.0, "completions/mean_length": 73.3359375, "completions/mean_terminated_length": 73.3359375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.032738208770752, "epoch": 0.1414141414141414, "frac_reward_zero_std": 0.5625, "grad_norm": 1.762905478477478, "learning_rate": 8.611111111111111e-07, "loss": -0.0, "num_tokens": 6236354.0, "reward": 0.6285156011581421, "reward_std": 0.18521998822689056, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 56 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/max_terminated_length": 135.0, "completions/mean_length": 75.171875, "completions/mean_terminated_length": 75.171875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.0041790008544922, "epoch": 0.14393939393939395, "frac_reward_zero_std": 0.4375, "grad_norm": 1.8384010791778564, "learning_rate": 8.585858585858586e-07, "loss": 0.0, "num_tokens": 6336768.0, "reward": 0.806640625, "reward_std": 0.2180173397064209, "rewards/video_r1_accuracy_reward/mean": 0.796875, "rewards/video_r1_accuracy_reward/std": 0.40390563011169434, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 57 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/max_terminated_length": 145.0, "completions/mean_length": 70.8828125, "completions/mean_terminated_length": 70.8828125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 1.0715279579162598, "epoch": 0.14646464646464646, "frac_reward_zero_std": 0.3125, "grad_norm": 2.389326810836792, "learning_rate": 8.56060606060606e-07, "loss": 0.0, "num_tokens": 6436537.0, "reward": 0.627734363079071, "reward_std": 0.25768929719924927, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 0.9765625, "rewards/video_r1_format_reward/std": 0.15188287198543549, "step": 58 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/max_terminated_length": 151.0, "completions/mean_length": 75.09375, "completions/mean_terminated_length": 75.09375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 1.02361261844635, "epoch": 0.14898989898989898, "frac_reward_zero_std": 0.3125, "grad_norm": 2.1776626110076904, "learning_rate": 8.535353535353534e-07, "loss": -0.0, "num_tokens": 6547989.0, "reward": 0.5546875, "reward_std": 0.3171003460884094, "rewards/video_r1_accuracy_reward/mean": 0.53125, "rewards/video_r1_accuracy_reward/std": 0.5009832978248596, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 59 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/max_terminated_length": 141.0, "completions/mean_length": 81.34375, "completions/mean_terminated_length": 81.34375, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 1.0184237957000732, "epoch": 0.15151515151515152, "frac_reward_zero_std": 0.5, "grad_norm": 1.7039854526519775, "learning_rate": 8.51010101010101e-07, "loss": -0.0, "num_tokens": 6661905.0, "reward": 0.703125, "reward_std": 0.2143877148628235, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 60 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/max_terminated_length": 138.0, "completions/mean_length": 76.8125, "completions/mean_terminated_length": 76.8125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.012675404548645, "epoch": 0.15404040404040403, "frac_reward_zero_std": 0.4375, "grad_norm": 1.9084367752075195, "learning_rate": 8.484848484848484e-07, "loss": -0.0, "num_tokens": 6762833.0, "reward": 0.6585937738418579, "reward_std": 0.22463490068912506, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 61 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/max_terminated_length": 168.0, "completions/mean_length": 76.7890625, "completions/mean_terminated_length": 76.7890625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.9767247438430786, "epoch": 0.15656565656565657, "frac_reward_zero_std": 0.375, "grad_norm": 1.992623209953308, "learning_rate": 8.459595959595959e-07, "loss": -0.0, "num_tokens": 6864190.0, "reward": 0.666015625, "reward_std": 0.27260246872901917, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 62 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/max_terminated_length": 149.0, "completions/mean_length": 73.65625, "completions/mean_terminated_length": 73.65625, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.9239650368690491, "epoch": 0.1590909090909091, "frac_reward_zero_std": 0.4375, "grad_norm": 2.0338518619537354, "learning_rate": 8.434343434343434e-07, "loss": 0.0, "num_tokens": 6969522.0, "reward": 0.732421875, "reward_std": 0.22757862508296967, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 63 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/max_terminated_length": 137.0, "completions/mean_length": 75.9609375, "completions/mean_terminated_length": 75.9609375, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.8829290270805359, "epoch": 0.16161616161616163, "frac_reward_zero_std": 0.75, "grad_norm": 1.6126983165740967, "learning_rate": 8.409090909090909e-07, "loss": 0.0, "num_tokens": 7077061.0, "reward": 0.762499988079071, "reward_std": 0.09695503860712051, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/max_terminated_length": 133.0, "completions/mean_length": 72.6171875, "completions/mean_terminated_length": 72.6171875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.8675624132156372, "epoch": 0.16414141414141414, "frac_reward_zero_std": 0.75, "grad_norm": 1.3695436716079712, "learning_rate": 8.383838383838383e-07, "loss": -0.0, "num_tokens": 7192844.0, "reward": 0.7476562261581421, "reward_std": 0.10669228434562683, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 65 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/max_terminated_length": 147.0, "completions/mean_length": 75.734375, "completions/mean_terminated_length": 75.734375, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.9987907409667969, "epoch": 0.16666666666666666, "frac_reward_zero_std": 0.625, "grad_norm": 1.6358082294464111, "learning_rate": 8.358585858585859e-07, "loss": -0.0, "num_tokens": 7303458.0, "reward": 0.7699218988418579, "reward_std": 0.16915903985500336, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 66 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/max_terminated_length": 156.0, "completions/mean_length": 74.1328125, "completions/mean_terminated_length": 74.1328125, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.98237144947052, "epoch": 0.1691919191919192, "frac_reward_zero_std": 0.5625, "grad_norm": 1.6880619525909424, "learning_rate": 8.333333333333333e-07, "loss": -0.0, "num_tokens": 7419707.0, "reward": 0.6585937738418579, "reward_std": 0.1944032609462738, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 67 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/max_terminated_length": 136.0, "completions/mean_length": 71.609375, "completions/mean_terminated_length": 71.609375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.9826507568359375, "epoch": 0.1717171717171717, "frac_reward_zero_std": 0.4375, "grad_norm": 1.8394359350204468, "learning_rate": 8.308080808080807e-07, "loss": -0.0, "num_tokens": 7521593.0, "reward": 0.6808593273162842, "reward_std": 0.25810331106185913, "rewards/video_r1_accuracy_reward/mean": 0.6640625, "rewards/video_r1_accuracy_reward/std": 0.47417303919792175, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 68 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/max_terminated_length": 162.0, "completions/mean_length": 70.4609375, "completions/mean_terminated_length": 70.4609375, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.9084649085998535, "epoch": 0.17424242424242425, "frac_reward_zero_std": 0.4375, "grad_norm": 2.100003480911255, "learning_rate": 8.282828282828283e-07, "loss": 0.0, "num_tokens": 7627260.0, "reward": 0.740234375, "reward_std": 0.23314350843429565, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 69 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/max_terminated_length": 162.0, "completions/mean_length": 80.4453125, "completions/mean_terminated_length": 80.4453125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9652878046035767, "epoch": 0.17676767676767677, "frac_reward_zero_std": 0.625, "grad_norm": 1.6002858877182007, "learning_rate": 8.257575757575757e-07, "loss": -0.0, "num_tokens": 7720845.0, "reward": 0.6734374761581421, "reward_std": 0.15718072652816772, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 70 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/max_terminated_length": 177.0, "completions/mean_length": 79.203125, "completions/mean_terminated_length": 79.203125, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 1.0387096405029297, "epoch": 0.17929292929292928, "frac_reward_zero_std": 0.5, "grad_norm": 1.9980230331420898, "learning_rate": 8.232323232323232e-07, "loss": -0.0, "num_tokens": 7818975.0, "reward": 0.49531248211860657, "reward_std": 0.2143877148628235, "rewards/video_r1_accuracy_reward/mean": 0.46875, "rewards/video_r1_accuracy_reward/std": 0.5009832978248596, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 71 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/max_terminated_length": 139.0, "completions/mean_length": 73.6015625, "completions/mean_terminated_length": 73.6015625, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 1.002305269241333, "epoch": 0.18181818181818182, "frac_reward_zero_std": 0.5, "grad_norm": 1.7402448654174805, "learning_rate": 8.207070707070707e-07, "loss": -0.0, "num_tokens": 7928484.0, "reward": 0.606640636920929, "reward_std": 0.20465511083602905, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 72 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/max_terminated_length": 190.0, "completions/mean_length": 74.6953125, "completions/mean_terminated_length": 74.6953125, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 0.903598427772522, "epoch": 0.18434343434343434, "frac_reward_zero_std": 0.5, "grad_norm": 1.6977179050445557, "learning_rate": 8.181818181818182e-07, "loss": -0.0, "num_tokens": 8043461.0, "reward": 0.5992187261581421, "reward_std": 0.21215128898620605, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 73 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/max_terminated_length": 142.0, "completions/mean_length": 76.6328125, "completions/mean_terminated_length": 76.6328125, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.9879953265190125, "epoch": 0.18686868686868688, "frac_reward_zero_std": 0.6875, "grad_norm": 1.3695952892303467, "learning_rate": 8.156565656565656e-07, "loss": -0.0, "num_tokens": 8141774.0, "reward": 0.569531261920929, "reward_std": 0.12444031983613968, "rewards/video_r1_accuracy_reward/mean": 0.546875, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 74 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/max_terminated_length": 137.0, "completions/mean_length": 73.5625, "completions/mean_terminated_length": 73.5625, "completions/min_length": 40.0, "completions/min_terminated_length": 40.0, "entropy": 1.0036814212799072, "epoch": 0.1893939393939394, "frac_reward_zero_std": 0.375, "grad_norm": 2.0377132892608643, "learning_rate": 8.131313131313132e-07, "loss": 0.0, "num_tokens": 8238342.0, "reward": 0.673046886920929, "reward_std": 0.2601749897003174, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 75 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/max_terminated_length": 227.0, "completions/mean_length": 81.3046875, "completions/mean_terminated_length": 81.3046875, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 0.9508095383644104, "epoch": 0.1919191919191919, "frac_reward_zero_std": 0.75, "grad_norm": 1.1749444007873535, "learning_rate": 8.106060606060605e-07, "loss": -0.0, "num_tokens": 8350589.0, "reward": 0.6437499523162842, "reward_std": 0.12493351101875305, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 76 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/max_terminated_length": 168.0, "completions/mean_length": 71.0390625, "completions/mean_terminated_length": 71.0390625, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 0.9800167083740234, "epoch": 0.19444444444444445, "frac_reward_zero_std": 0.625, "grad_norm": 1.6353753805160522, "learning_rate": 8.08080808080808e-07, "loss": 0.0, "num_tokens": 8454770.0, "reward": 0.7105468511581421, "reward_std": 0.1551697999238968, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 77 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/max_terminated_length": 134.0, "completions/mean_length": 65.9296875, "completions/mean_terminated_length": 65.9296875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.9639301300048828, "epoch": 0.19696969696969696, "frac_reward_zero_std": 0.375, "grad_norm": 2.4808032512664795, "learning_rate": 8.055555555555556e-07, "loss": -0.0, "num_tokens": 8552857.0, "reward": 0.5023437738418579, "reward_std": 0.23973365128040314, "rewards/video_r1_accuracy_reward/mean": 0.4765625, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 78 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/max_terminated_length": 164.0, "completions/mean_length": 69.28125, "completions/mean_terminated_length": 69.28125, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.9725464582443237, "epoch": 0.1994949494949495, "frac_reward_zero_std": 0.5, "grad_norm": 1.850342869758606, "learning_rate": 8.030303030303029e-07, "loss": -0.0, "num_tokens": 8649085.0, "reward": 0.5843750238418579, "reward_std": 0.21215128898620605, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 79 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/max_terminated_length": 154.0, "completions/mean_length": 72.0078125, "completions/mean_terminated_length": 72.0078125, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 0.9206636548042297, "epoch": 0.20202020202020202, "frac_reward_zero_std": 0.625, "grad_norm": 1.5013765096664429, "learning_rate": 8.005050505050505e-07, "loss": 0.0, "num_tokens": 8745462.0, "reward": 0.614062488079071, "reward_std": 0.15292873978614807, "rewards/video_r1_accuracy_reward/mean": 0.59375, "rewards/video_r1_accuracy_reward/std": 0.4930621087551117, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/max_terminated_length": 190.0, "completions/mean_length": 72.53125, "completions/mean_terminated_length": 72.53125, "completions/min_length": 38.0, "completions/min_terminated_length": 38.0, "entropy": 0.9708524942398071, "epoch": 0.20454545454545456, "frac_reward_zero_std": 0.6875, "grad_norm": 1.4737831354141235, "learning_rate": 7.97979797979798e-07, "loss": 0.0, "num_tokens": 8852826.0, "reward": 0.6734374761581421, "reward_std": 0.1426815390586853, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 81 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/max_terminated_length": 136.0, "completions/mean_length": 73.265625, "completions/mean_terminated_length": 73.265625, "completions/min_length": 41.0, "completions/min_terminated_length": 41.0, "entropy": 0.9690735936164856, "epoch": 0.20707070707070707, "frac_reward_zero_std": 0.75, "grad_norm": 1.4358322620391846, "learning_rate": 7.954545454545454e-07, "loss": -0.0, "num_tokens": 8950708.0, "reward": 0.740234375, "reward_std": 0.10770007222890854, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 82 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/max_terminated_length": 123.0, "completions/mean_length": 58.984375, "completions/mean_terminated_length": 58.984375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.9185530543327332, "epoch": 0.20959595959595959, "frac_reward_zero_std": 0.5, "grad_norm": 2.4200658798217773, "learning_rate": 7.929292929292929e-07, "loss": -0.0, "num_tokens": 9045026.0, "reward": 0.517578125, "reward_std": 0.21439234912395477, "rewards/video_r1_accuracy_reward/mean": 0.4921875, "rewards/video_r1_accuracy_reward/std": 0.5019033551216125, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 83 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/max_terminated_length": 122.0, "completions/mean_length": 65.3203125, "completions/mean_terminated_length": 65.3203125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.9491331577301025, "epoch": 0.21212121212121213, "frac_reward_zero_std": 0.875, "grad_norm": 1.0213149785995483, "learning_rate": 7.904040404040404e-07, "loss": -0.0, "num_tokens": 9152019.0, "reward": 0.740234375, "reward_std": 0.058214765042066574, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 84 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/max_terminated_length": 118.0, "completions/mean_length": 69.296875, "completions/mean_terminated_length": 69.296875, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 0.9853086471557617, "epoch": 0.21464646464646464, "frac_reward_zero_std": 0.5625, "grad_norm": 1.8322724103927612, "learning_rate": 7.878787878787878e-07, "loss": 0.0, "num_tokens": 9262145.0, "reward": 0.666015625, "reward_std": 0.17940622568130493, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 85 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/max_terminated_length": 123.0, "completions/mean_length": 68.265625, "completions/mean_terminated_length": 68.265625, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.9777708649635315, "epoch": 0.21717171717171718, "frac_reward_zero_std": 0.8125, "grad_norm": 1.12037193775177, "learning_rate": 7.853535353535353e-07, "loss": -0.0, "num_tokens": 9371435.0, "reward": 0.46562501788139343, "reward_std": 0.08345898985862732, "rewards/video_r1_accuracy_reward/mean": 0.4375, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 86 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/max_terminated_length": 155.0, "completions/mean_length": 70.34375, "completions/mean_terminated_length": 70.34375, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "entropy": 0.9397503733634949, "epoch": 0.2196969696969697, "frac_reward_zero_std": 0.75, "grad_norm": 1.3667945861816406, "learning_rate": 7.828282828282829e-07, "loss": -0.0, "num_tokens": 9478271.0, "reward": 0.606640636920929, "reward_std": 0.1196737289428711, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 87 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/max_terminated_length": 181.0, "completions/mean_length": 66.671875, "completions/mean_terminated_length": 66.671875, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.9511775970458984, "epoch": 0.2222222222222222, "frac_reward_zero_std": 0.625, "grad_norm": 1.7731772661209106, "learning_rate": 7.803030303030302e-07, "loss": -0.0, "num_tokens": 9587653.0, "reward": 0.5992187261581421, "reward_std": 0.16266599297523499, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 88 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/max_terminated_length": 145.0, "completions/mean_length": 64.3203125, "completions/mean_terminated_length": 64.3203125, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 0.9355500936508179, "epoch": 0.22474747474747475, "frac_reward_zero_std": 0.8125, "grad_norm": 1.7302086353302002, "learning_rate": 7.777777777777778e-07, "loss": -0.0, "num_tokens": 9691022.0, "reward": 0.799609363079071, "reward_std": 0.09218844771385193, "rewards/video_r1_accuracy_reward/mean": 0.7890625, "rewards/video_r1_accuracy_reward/std": 0.4095771610736847, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 89 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/max_terminated_length": 144.0, "completions/mean_length": 68.2109375, "completions/mean_terminated_length": 68.2109375, "completions/min_length": 35.0, "completions/min_terminated_length": 35.0, "entropy": 0.9529180526733398, "epoch": 0.22727272727272727, "frac_reward_zero_std": 0.8125, "grad_norm": 1.217525839805603, "learning_rate": 7.752525252525253e-07, "loss": -0.0, "num_tokens": 9797753.0, "reward": 0.651171863079071, "reward_std": 0.07596279680728912, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 90 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/max_terminated_length": 158.0, "completions/mean_length": 75.765625, "completions/mean_terminated_length": 75.765625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 0.9758607745170593, "epoch": 0.2297979797979798, "frac_reward_zero_std": 0.6875, "grad_norm": 1.5119833946228027, "learning_rate": 7.727272727272727e-07, "loss": -0.0, "num_tokens": 9904523.0, "reward": 0.6363281011581421, "reward_std": 0.1416737586259842, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 91 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/max_terminated_length": 131.0, "completions/mean_length": 68.4453125, "completions/mean_terminated_length": 68.4453125, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "entropy": 1.0206053256988525, "epoch": 0.23232323232323232, "frac_reward_zero_std": 0.6875, "grad_norm": 1.599021315574646, "learning_rate": 7.702020202020202e-07, "loss": -0.0, "num_tokens": 10007580.0, "reward": 0.5695312023162842, "reward_std": 0.1406659632921219, "rewards/video_r1_accuracy_reward/mean": 0.546875, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 92 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/max_terminated_length": 163.0, "completions/mean_length": 71.671875, "completions/mean_terminated_length": 71.671875, "completions/min_length": 36.0, "completions/min_terminated_length": 36.0, "entropy": 0.9673388600349426, "epoch": 0.23484848484848486, "frac_reward_zero_std": 0.75, "grad_norm": 1.1879236698150635, "learning_rate": 7.676767676767675e-07, "loss": 0.0, "num_tokens": 10129522.0, "reward": 0.5843749642372131, "reward_std": 0.10120701789855957, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 93 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/max_terminated_length": 149.0, "completions/mean_length": 68.9609375, "completions/mean_terminated_length": 68.9609375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.0109703540802002, "epoch": 0.23737373737373738, "frac_reward_zero_std": 0.9375, "grad_norm": 0.7206881046295166, "learning_rate": 7.651515151515151e-07, "loss": -0.0, "num_tokens": 10232605.0, "reward": 0.725390613079071, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 94 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/max_terminated_length": 158.0, "completions/mean_length": 69.828125, "completions/mean_terminated_length": 69.828125, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.0361416339874268, "epoch": 0.2398989898989899, "frac_reward_zero_std": 0.625, "grad_norm": 1.9655052423477173, "learning_rate": 7.626262626262626e-07, "loss": -0.0, "num_tokens": 10343447.0, "reward": 0.7699218988418579, "reward_std": 0.1551697999238968, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 95 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/max_terminated_length": 139.0, "completions/mean_length": 70.5078125, "completions/mean_terminated_length": 70.5078125, "completions/min_length": 30.0, "completions/min_terminated_length": 30.0, "entropy": 1.0260932445526123, "epoch": 0.24242424242424243, "frac_reward_zero_std": 0.5625, "grad_norm": 1.967199683189392, "learning_rate": 7.6010101010101e-07, "loss": 0.0, "num_tokens": 10455376.0, "reward": 0.6511719226837158, "reward_std": 0.18265508115291595, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/max_terminated_length": 139.0, "completions/mean_length": 66.625, "completions/mean_terminated_length": 66.625, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.0024924278259277, "epoch": 0.24494949494949494, "frac_reward_zero_std": 0.875, "grad_norm": 0.9387697577476501, "learning_rate": 7.575757575757575e-07, "loss": -0.0, "num_tokens": 10563352.0, "reward": 0.6066405773162842, "reward_std": 0.05821476876735687, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 97 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/max_terminated_length": 191.0, "completions/mean_length": 66.890625, "completions/mean_terminated_length": 66.890625, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 0.9643306732177734, "epoch": 0.2474747474747475, "frac_reward_zero_std": 0.8125, "grad_norm": 1.325685739517212, "learning_rate": 7.550505050505051e-07, "loss": 0.0, "num_tokens": 10671386.0, "reward": 0.5992187261581421, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 98 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/max_terminated_length": 144.0, "completions/mean_length": 70.015625, "completions/mean_terminated_length": 70.015625, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "entropy": 1.0711150169372559, "epoch": 0.25, "frac_reward_zero_std": 0.5625, "grad_norm": 1.8700460195541382, "learning_rate": 7.525252525252524e-07, "loss": 0.0, "num_tokens": 10769676.0, "reward": 0.49531251192092896, "reward_std": 0.16317594051361084, "rewards/video_r1_accuracy_reward/mean": 0.46875, "rewards/video_r1_accuracy_reward/std": 0.5009832978248596, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 99 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/max_terminated_length": 141.0, "completions/mean_length": 66.1875, "completions/mean_terminated_length": 66.1875, "completions/min_length": 32.0, "completions/min_terminated_length": 32.0, "entropy": 1.032357096672058, "epoch": 0.25252525252525254, "frac_reward_zero_std": 0.8125, "grad_norm": 1.2700526714324951, "learning_rate": 7.5e-07, "loss": 0.0, "num_tokens": 10868492.0, "reward": 0.7328125238418579, "reward_std": 0.0737217366695404, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 100 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/max_terminated_length": 133.0, "completions/mean_length": 68.2890625, "completions/mean_terminated_length": 68.2890625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.9825550317764282, "epoch": 0.255050505050505, "frac_reward_zero_std": 0.9375, "grad_norm": 0.7565488219261169, "learning_rate": 7.474747474747475e-07, "loss": -0.0, "num_tokens": 10983977.0, "reward": 0.6585937738418579, "reward_std": 0.027485283091664314, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 101 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/max_terminated_length": 131.0, "completions/mean_length": 66.1484375, "completions/mean_terminated_length": 66.1484375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 1.0391050577163696, "epoch": 0.25757575757575757, "frac_reward_zero_std": 0.625, "grad_norm": 1.6984504461288452, "learning_rate": 7.449494949494948e-07, "loss": 0.0, "num_tokens": 11084348.0, "reward": 0.6363281011581421, "reward_std": 0.15942178666591644, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 102 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/max_terminated_length": 107.0, "completions/mean_length": 61.875, "completions/mean_terminated_length": 61.875, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.0670381784439087, "epoch": 0.2601010101010101, "frac_reward_zero_std": 0.4375, "grad_norm": 2.5665719509124756, "learning_rate": 7.424242424242424e-07, "loss": -0.0, "num_tokens": 11193508.0, "reward": 0.6585937738418579, "reward_std": 0.23437213897705078, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 103 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/max_terminated_length": 113.0, "completions/mean_length": 62.8984375, "completions/mean_terminated_length": 62.8984375, "completions/min_length": 33.0, "completions/min_terminated_length": 33.0, "entropy": 1.031665563583374, "epoch": 0.26262626262626265, "frac_reward_zero_std": 0.6875, "grad_norm": 1.8193395137786865, "learning_rate": 7.398989898989899e-07, "loss": -0.0, "num_tokens": 11299487.0, "reward": 0.688281238079071, "reward_std": 0.13842955231666565, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/max_terminated_length": 104.0, "completions/mean_length": 55.46875, "completions/mean_terminated_length": 55.46875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.050790786743164, "epoch": 0.26515151515151514, "frac_reward_zero_std": 0.75, "grad_norm": 1.4435532093048096, "learning_rate": 7.373737373737373e-07, "loss": -0.0, "num_tokens": 11408659.0, "reward": 0.6214843988418579, "reward_std": 0.10993648320436478, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 105 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/max_terminated_length": 109.0, "completions/mean_length": 61.3515625, "completions/mean_terminated_length": 61.3515625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0249364376068115, "epoch": 0.2676767676767677, "frac_reward_zero_std": 0.75, "grad_norm": 1.6191179752349854, "learning_rate": 7.348484848484848e-07, "loss": 0.0, "num_tokens": 11512456.0, "reward": 0.6066405773162842, "reward_std": 0.10019923746585846, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 106 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/max_terminated_length": 132.0, "completions/mean_length": 59.1484375, "completions/mean_terminated_length": 59.1484375, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.0390393733978271, "epoch": 0.2702020202020202, "frac_reward_zero_std": 0.625, "grad_norm": 1.6593204736709595, "learning_rate": 7.323232323232324e-07, "loss": -0.0, "num_tokens": 11622035.0, "reward": 0.606640636920929, "reward_std": 0.16165819764137268, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 107 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/max_terminated_length": 113.0, "completions/mean_length": 61.7890625, "completions/mean_terminated_length": 61.7890625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0370471477508545, "epoch": 0.2727272727272727, "frac_reward_zero_std": 0.6875, "grad_norm": 1.7022017240524292, "learning_rate": 7.297979797979797e-07, "loss": -0.0, "num_tokens": 11727192.0, "reward": 0.688281238079071, "reward_std": 0.13518071174621582, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 108 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/max_terminated_length": 130.0, "completions/mean_length": 59.1484375, "completions/mean_terminated_length": 59.1484375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.060609221458435, "epoch": 0.27525252525252525, "frac_reward_zero_std": 0.75, "grad_norm": 1.5183703899383545, "learning_rate": 7.272727272727272e-07, "loss": -0.0, "num_tokens": 11834915.0, "reward": 0.6957031488418579, "reward_std": 0.10993649065494537, "rewards/video_r1_accuracy_reward/mean": 0.6796875, "rewards/video_r1_accuracy_reward/std": 0.4684300124645233, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 109 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/max_terminated_length": 147.0, "completions/mean_length": 61.46875, "completions/mean_terminated_length": 61.46875, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 1.0464633703231812, "epoch": 0.2777777777777778, "frac_reward_zero_std": 0.8125, "grad_norm": 1.4522336721420288, "learning_rate": 7.247474747474747e-07, "loss": -0.0, "num_tokens": 11946295.0, "reward": 0.7105468511581421, "reward_std": 0.08570004999637604, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 110 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/max_terminated_length": 124.0, "completions/mean_length": 63.34375, "completions/mean_terminated_length": 63.34375, "completions/min_length": 31.0, "completions/min_terminated_length": 31.0, "entropy": 1.0706329345703125, "epoch": 0.2803030303030303, "frac_reward_zero_std": 0.75, "grad_norm": 1.7645903825759888, "learning_rate": 7.222222222222221e-07, "loss": -0.0, "num_tokens": 12049227.0, "reward": 0.8292968273162842, "reward_std": 0.11967373639345169, "rewards/video_r1_accuracy_reward/mean": 0.8203125, "rewards/video_r1_accuracy_reward/std": 0.3854354918003082, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 111 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/max_terminated_length": 107.0, "completions/mean_length": 58.9375, "completions/mean_terminated_length": 58.9375, "completions/min_length": 29.0, "completions/min_terminated_length": 29.0, "entropy": 1.1456776857376099, "epoch": 0.2828282828282828, "frac_reward_zero_std": 0.75, "grad_norm": 1.4835776090621948, "learning_rate": 7.196969696969697e-07, "loss": 0.0, "num_tokens": 12153771.0, "reward": 0.7105468511581421, "reward_std": 0.11418846249580383, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/max_terminated_length": 111.0, "completions/mean_length": 60.28125, "completions/mean_terminated_length": 60.28125, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.2109147310256958, "epoch": 0.28535353535353536, "frac_reward_zero_std": 0.75, "grad_norm": 1.5145522356033325, "learning_rate": 7.171717171717171e-07, "loss": -0.0, "num_tokens": 12257335.0, "reward": 0.45820310711860657, "reward_std": 0.10019923746585846, "rewards/video_r1_accuracy_reward/mean": 0.4296875, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 113 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/max_terminated_length": 109.0, "completions/mean_length": 57.6875, "completions/mean_terminated_length": 57.6875, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.1032978296279907, "epoch": 0.2878787878787879, "frac_reward_zero_std": 0.75, "grad_norm": 1.8097097873687744, "learning_rate": 7.146464646464646e-07, "loss": -0.0, "num_tokens": 12374687.0, "reward": 0.740234375, "reward_std": 0.10993648320436478, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 114 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 55.1953125, "completions/mean_terminated_length": 55.1953125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.2030807733535767, "epoch": 0.2904040404040404, "frac_reward_zero_std": 0.625, "grad_norm": 2.118196487426758, "learning_rate": 7.121212121212121e-07, "loss": -0.0, "num_tokens": 12485544.0, "reward": 0.443359375, "reward_std": 0.1649070382118225, "rewards/video_r1_accuracy_reward/mean": 0.4140625, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 115 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 52.40625, "completions/mean_terminated_length": 52.40625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1323903799057007, "epoch": 0.29292929292929293, "frac_reward_zero_std": 0.75, "grad_norm": 1.8301972150802612, "learning_rate": 7.095959595959596e-07, "loss": 0.0, "num_tokens": 12584092.0, "reward": 0.7476562261581421, "reward_std": 0.09695503115653992, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 116 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/max_terminated_length": 126.0, "completions/mean_length": 63.2578125, "completions/mean_terminated_length": 63.2578125, "completions/min_length": 28.0, "completions/min_terminated_length": 28.0, "entropy": 1.2527742385864258, "epoch": 0.29545454545454547, "frac_reward_zero_std": 0.6875, "grad_norm": 1.677696943283081, "learning_rate": 7.07070707070707e-07, "loss": -0.0, "num_tokens": 12687173.0, "reward": 0.6511719226837158, "reward_std": 0.12768451869487762, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 117 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/max_terminated_length": 135.0, "completions/mean_length": 57.9140625, "completions/mean_terminated_length": 57.9140625, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.1632239818572998, "epoch": 0.29797979797979796, "frac_reward_zero_std": 0.8125, "grad_norm": 1.4128031730651855, "learning_rate": 7.045454545454545e-07, "loss": -0.0, "num_tokens": 12777050.0, "reward": 0.762499988079071, "reward_std": 0.08894424885511398, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 118 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/max_terminated_length": 121.0, "completions/mean_length": 55.078125, "completions/mean_terminated_length": 55.078125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.1989972591400146, "epoch": 0.3005050505050505, "frac_reward_zero_std": 0.6875, "grad_norm": 1.9097048044204712, "learning_rate": 7.02020202020202e-07, "loss": 0.0, "num_tokens": 12894444.0, "reward": 0.6734375357627869, "reward_std": 0.12869229912757874, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 119 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/max_terminated_length": 139.0, "completions/mean_length": 59.7578125, "completions/mean_terminated_length": 59.7578125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.3133735656738281, "epoch": 0.30303030303030304, "frac_reward_zero_std": 0.625, "grad_norm": 1.8875597715377808, "learning_rate": 6.994949494949494e-07, "loss": 0.0, "num_tokens": 12986525.0, "reward": 0.717578113079071, "reward_std": 0.1526612639427185, "rewards/video_r1_accuracy_reward/mean": 0.703125, "rewards/video_r1_accuracy_reward/std": 0.45867621898651123, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 47.015625, "completions/mean_terminated_length": 47.015625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1269464492797852, "epoch": 0.3055555555555556, "frac_reward_zero_std": 0.875, "grad_norm": 1.4073408842086792, "learning_rate": 6.96969696969697e-07, "loss": -0.0, "num_tokens": 13101023.0, "reward": 0.799609363079071, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.7890625, "rewards/video_r1_accuracy_reward/std": 0.4095771610736847, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 121 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/max_terminated_length": 116.0, "completions/mean_length": 52.6640625, "completions/mean_terminated_length": 52.6640625, "completions/min_length": 27.0, "completions/min_terminated_length": 27.0, "entropy": 1.16685152053833, "epoch": 0.30808080808080807, "frac_reward_zero_std": 0.8125, "grad_norm": 1.5721867084503174, "learning_rate": 6.944444444444444e-07, "loss": 0.0, "num_tokens": 13208740.0, "reward": 0.6214843988418579, "reward_std": 0.08021478354930878, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 122 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/max_terminated_length": 89.0, "completions/mean_length": 47.6953125, "completions/mean_terminated_length": 47.6953125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.2507858276367188, "epoch": 0.3106060606060606, "frac_reward_zero_std": 0.75, "grad_norm": 2.312094211578369, "learning_rate": 6.919191919191919e-07, "loss": -0.0, "num_tokens": 13313453.0, "reward": 0.5621094107627869, "reward_std": 0.10770007222890854, "rewards/video_r1_accuracy_reward/mean": 0.5390625, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 123 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/max_terminated_length": 112.0, "completions/mean_length": 53.2578125, "completions/mean_terminated_length": 53.2578125, "completions/min_length": 26.0, "completions/min_terminated_length": 26.0, "entropy": 1.216563105583191, "epoch": 0.31313131313131315, "frac_reward_zero_std": 0.8125, "grad_norm": 1.8229986429214478, "learning_rate": 6.893939393939394e-07, "loss": 0.0, "num_tokens": 13423294.0, "reward": 0.5843750238418579, "reward_std": 0.07920699566602707, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 124 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 49.171875, "completions/mean_terminated_length": 49.171875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.2454454898834229, "epoch": 0.31565656565656564, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0392420291900635, "learning_rate": 6.868686868686868e-07, "loss": -0.0, "num_tokens": 13520556.0, "reward": 0.725390613079071, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 125 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 48.1640625, "completions/mean_terminated_length": 48.1640625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1565489768981934, "epoch": 0.3181818181818182, "frac_reward_zero_std": 0.875, "grad_norm": 1.4579185247421265, "learning_rate": 6.843434343434343e-07, "loss": -0.0, "num_tokens": 13634321.0, "reward": 0.688281238079071, "reward_std": 0.05922255665063858, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 126 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 51.28125, "completions/mean_terminated_length": 51.28125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.2307085990905762, "epoch": 0.3207070707070707, "frac_reward_zero_std": 0.75, "grad_norm": 2.3030145168304443, "learning_rate": 6.818181818181817e-07, "loss": -0.0, "num_tokens": 13727629.0, "reward": 0.532421886920929, "reward_std": 0.1131853386759758, "rewards/video_r1_accuracy_reward/mean": 0.5078125, "rewards/video_r1_accuracy_reward/std": 0.5019033551216125, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 127 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.0, "completions/max_terminated_length": 147.0, "completions/mean_length": 47.484375, "completions/mean_terminated_length": 47.484375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1688158512115479, "epoch": 0.32323232323232326, "frac_reward_zero_std": 0.8125, "grad_norm": 1.8600326776504517, "learning_rate": 6.792929292929293e-07, "loss": 0.0, "num_tokens": 13837475.0, "reward": 0.666015625, "reward_std": 0.0727139487862587, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/max_terminated_length": 111.0, "completions/mean_length": 47.390625, "completions/mean_terminated_length": 47.390625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1925861835479736, "epoch": 0.32575757575757575, "frac_reward_zero_std": 0.8125, "grad_norm": 1.420927882194519, "learning_rate": 6.767676767676767e-07, "loss": -0.0, "num_tokens": 13950165.0, "reward": 0.8070312738418579, "reward_std": 0.0737217366695404, "rewards/video_r1_accuracy_reward/mean": 0.796875, "rewards/video_r1_accuracy_reward/std": 0.40390563011169434, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 129 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 45.359375, "completions/mean_terminated_length": 45.359375, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 1.1712085008621216, "epoch": 0.3282828282828283, "frac_reward_zero_std": 0.9375, "grad_norm": 0.66424560546875, "learning_rate": 6.742424242424242e-07, "loss": 0.0, "num_tokens": 14042243.0, "reward": 0.6363281011581421, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 130 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/max_terminated_length": 112.0, "completions/mean_length": 43.6171875, "completions/mean_terminated_length": 43.6171875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.274552583694458, "epoch": 0.33080808080808083, "frac_reward_zero_std": 0.6875, "grad_norm": 2.4996423721313477, "learning_rate": 6.717171717171717e-07, "loss": 0.0, "num_tokens": 14142794.0, "reward": 0.591796875, "reward_std": 0.12219925224781036, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 131 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/max_terminated_length": 91.0, "completions/mean_length": 42.3203125, "completions/mean_terminated_length": 42.3203125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1475682258605957, "epoch": 0.3333333333333333, "frac_reward_zero_std": 0.8125, "grad_norm": 1.9287738800048828, "learning_rate": 6.691919191919192e-07, "loss": 0.0, "num_tokens": 14232227.0, "reward": 0.6285156011581421, "reward_std": 0.06965583562850952, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 132 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/max_terminated_length": 100.0, "completions/mean_length": 42.59375, "completions/mean_terminated_length": 42.59375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.2122106552124023, "epoch": 0.33585858585858586, "frac_reward_zero_std": 0.75, "grad_norm": 3.023392915725708, "learning_rate": 6.666666666666666e-07, "loss": 0.0, "num_tokens": 14331927.0, "reward": 0.740234375, "reward_std": 0.10344808548688889, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 133 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 40.53125, "completions/mean_terminated_length": 40.53125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1993948221206665, "epoch": 0.3383838383838384, "frac_reward_zero_std": 0.8125, "grad_norm": 1.7871482372283936, "learning_rate": 6.641414141414141e-07, "loss": 0.0, "num_tokens": 14422979.0, "reward": 0.5472656488418579, "reward_std": 0.0727139487862587, "rewards/video_r1_accuracy_reward/mean": 0.5234375, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 134 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 44.0859375, "completions/mean_terminated_length": 44.0859375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1800475120544434, "epoch": 0.3409090909090909, "frac_reward_zero_std": 0.75, "grad_norm": 2.805823564529419, "learning_rate": 6.616161616161616e-07, "loss": -0.0, "num_tokens": 14521638.0, "reward": 0.62109375, "reward_std": 0.08131963759660721, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 135 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/max_terminated_length": 89.0, "completions/mean_length": 39.5, "completions/mean_terminated_length": 39.5, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1338804960250854, "epoch": 0.3434343434343434, "frac_reward_zero_std": 0.6875, "grad_norm": 2.844430446624756, "learning_rate": 6.59090909090909e-07, "loss": -0.0, "num_tokens": 14621022.0, "reward": 0.703125, "reward_std": 0.12119147181510925, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/max_terminated_length": 101.0, "completions/mean_length": 37.484375, "completions/mean_terminated_length": 37.484375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0793958902359009, "epoch": 0.34595959595959597, "frac_reward_zero_std": 0.8125, "grad_norm": 2.934971332550049, "learning_rate": 6.565656565656566e-07, "loss": -0.0, "num_tokens": 14735044.0, "reward": 0.5621093511581421, "reward_std": 0.09420402348041534, "rewards/video_r1_accuracy_reward/mean": 0.5390625, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 137 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 39.546875, "completions/mean_terminated_length": 39.546875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1664319038391113, "epoch": 0.3484848484848485, "frac_reward_zero_std": 0.625, "grad_norm": 2.60278058052063, "learning_rate": 6.54040404040404e-07, "loss": -0.0, "num_tokens": 14846898.0, "reward": 0.6140625476837158, "reward_std": 0.1454278975725174, "rewards/video_r1_accuracy_reward/mean": 0.59375, "rewards/video_r1_accuracy_reward/std": 0.4930621087551117, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 138 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 69.0, "completions/max_terminated_length": 69.0, "completions/mean_length": 34.8671875, "completions/mean_terminated_length": 34.8671875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1905630826950073, "epoch": 0.351010101010101, "frac_reward_zero_std": 0.6875, "grad_norm": 2.826327323913574, "learning_rate": 6.515151515151515e-07, "loss": -0.0, "num_tokens": 14937953.0, "reward": 0.680859386920929, "reward_std": 0.13193649053573608, "rewards/video_r1_accuracy_reward/mean": 0.6640625, "rewards/video_r1_accuracy_reward/std": 0.47417303919792175, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 139 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 34.3203125, "completions/mean_terminated_length": 34.3203125, "completions/min_length": 13.0, "completions/min_terminated_length": 13.0, "entropy": 1.1751891374588013, "epoch": 0.35353535353535354, "frac_reward_zero_std": 0.8125, "grad_norm": 2.3793742656707764, "learning_rate": 6.48989898989899e-07, "loss": 0.0, "num_tokens": 15043954.0, "reward": 0.8070312738418579, "reward_std": 0.08345898985862732, "rewards/video_r1_accuracy_reward/mean": 0.796875, "rewards/video_r1_accuracy_reward/std": 0.40390563011169434, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 140 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/max_terminated_length": 82.0, "completions/mean_length": 35.5, "completions/mean_terminated_length": 35.5, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.2330318689346313, "epoch": 0.3560606060606061, "frac_reward_zero_std": 0.875, "grad_norm": 1.5116065740585327, "learning_rate": 6.464646464646465e-07, "loss": -0.0, "num_tokens": 15128850.0, "reward": 0.5695312023162842, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.546875, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 141 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 70.0, "completions/max_terminated_length": 70.0, "completions/mean_length": 32.6171875, "completions/mean_terminated_length": 32.6171875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.1521470546722412, "epoch": 0.35858585858585856, "frac_reward_zero_std": 0.75, "grad_norm": 2.4774539470672607, "learning_rate": 6.439393939393939e-07, "loss": -0.0, "num_tokens": 15236257.0, "reward": 0.7105468511581421, "reward_std": 0.10019923001527786, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 142 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 31.734375, "completions/mean_terminated_length": 31.734375, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.1453057527542114, "epoch": 0.3611111111111111, "frac_reward_zero_std": 0.875, "grad_norm": 1.7422317266464233, "learning_rate": 6.414141414141414e-07, "loss": 0.0, "num_tokens": 15338215.0, "reward": 0.6437499523162842, "reward_std": 0.05497056990861893, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 143 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 34.3671875, "completions/mean_terminated_length": 34.3671875, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.2442141771316528, "epoch": 0.36363636363636365, "frac_reward_zero_std": 0.8125, "grad_norm": 2.5866944789886475, "learning_rate": 6.388888888888888e-07, "loss": 0.0, "num_tokens": 15450086.0, "reward": 0.4878906011581421, "reward_std": 0.0727139487862587, "rewards/video_r1_accuracy_reward/mean": 0.4609375, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 33.3125, "completions/mean_terminated_length": 33.3125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.2739644050598145, "epoch": 0.3661616161616162, "frac_reward_zero_std": 0.6875, "grad_norm": 2.671504259109497, "learning_rate": 6.363636363636363e-07, "loss": -0.0, "num_tokens": 15550070.0, "reward": 0.680859386920929, "reward_std": 0.12768451869487762, "rewards/video_r1_accuracy_reward/mean": 0.6640625, "rewards/video_r1_accuracy_reward/std": 0.47417303919792175, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 145 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/max_terminated_length": 86.0, "completions/mean_length": 30.2109375, "completions/mean_terminated_length": 30.2109375, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.149760127067566, "epoch": 0.3686868686868687, "frac_reward_zero_std": 0.875, "grad_norm": 1.9195950031280518, "learning_rate": 6.338383838383839e-07, "loss": -0.0, "num_tokens": 15658265.0, "reward": 0.6734374761581421, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 146 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 31.5703125, "completions/mean_terminated_length": 31.5703125, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 1.2297152280807495, "epoch": 0.3712121212121212, "frac_reward_zero_std": 0.8125, "grad_norm": 2.8299896717071533, "learning_rate": 6.313131313131312e-07, "loss": -0.0, "num_tokens": 15760138.0, "reward": 0.651171863079071, "reward_std": 0.08021478354930878, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 147 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 60.0, "completions/max_terminated_length": 60.0, "completions/mean_length": 29.1875, "completions/mean_terminated_length": 29.1875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.1559841632843018, "epoch": 0.37373737373737376, "frac_reward_zero_std": 0.8125, "grad_norm": 2.4495553970336914, "learning_rate": 6.287878787878788e-07, "loss": -0.0, "num_tokens": 15859386.0, "reward": 0.6214843392372131, "reward_std": 0.08570004999637604, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 148 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 29.859375, "completions/mean_terminated_length": 29.859375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 1.2058125734329224, "epoch": 0.37626262626262624, "frac_reward_zero_std": 0.875, "grad_norm": 1.5699256658554077, "learning_rate": 6.262626262626263e-07, "loss": -0.0, "num_tokens": 15961120.0, "reward": 0.703125, "reward_std": 0.06145896762609482, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 149 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 33.84375, "completions/mean_terminated_length": 33.84375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 1.2072830200195312, "epoch": 0.3787878787878788, "frac_reward_zero_std": 0.8125, "grad_norm": 2.6067988872528076, "learning_rate": 6.237373737373736e-07, "loss": -0.0, "num_tokens": 16063572.0, "reward": 0.4878906309604645, "reward_std": 0.06297669559717178, "rewards/video_r1_accuracy_reward/mean": 0.4609375, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 150 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 66.0, "completions/max_terminated_length": 66.0, "completions/mean_length": 32.734375, "completions/mean_terminated_length": 32.734375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.239713191986084, "epoch": 0.3813131313131313, "frac_reward_zero_std": 0.75, "grad_norm": 3.205798387527466, "learning_rate": 6.212121212121212e-07, "loss": -0.0, "num_tokens": 16174394.0, "reward": 0.5914062261581421, "reward_std": 0.06408154964447021, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 151 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 70.0, "completions/max_terminated_length": 70.0, "completions/mean_length": 34.765625, "completions/mean_terminated_length": 34.765625, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.320723533630371, "epoch": 0.3838383838383838, "frac_reward_zero_std": 0.75, "grad_norm": 2.119114875793457, "learning_rate": 6.186868686868687e-07, "loss": 0.0, "num_tokens": 16276724.0, "reward": 0.635937511920929, "reward_std": 0.09064806997776031, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 36.5390625, "completions/mean_terminated_length": 36.5390625, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.2870080471038818, "epoch": 0.38636363636363635, "frac_reward_zero_std": 0.875, "grad_norm": 1.5376033782958984, "learning_rate": 6.161616161616161e-07, "loss": -0.0, "num_tokens": 16371001.0, "reward": 0.6214843988418579, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 153 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/max_terminated_length": 89.0, "completions/mean_length": 35.984375, "completions/mean_terminated_length": 35.984375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.2718448638916016, "epoch": 0.3888888888888889, "frac_reward_zero_std": 0.875, "grad_norm": 1.5065395832061768, "learning_rate": 6.136363636363636e-07, "loss": -0.0, "num_tokens": 16482023.0, "reward": 0.7847656011581421, "reward_std": 0.05821476876735687, "rewards/video_r1_accuracy_reward/mean": 0.7734375, "rewards/video_r1_accuracy_reward/std": 0.4202519655227661, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 154 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 34.0, "completions/mean_terminated_length": 34.0, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.2422447204589844, "epoch": 0.39141414141414144, "frac_reward_zero_std": 0.8125, "grad_norm": 2.1209592819213867, "learning_rate": 6.111111111111112e-07, "loss": -0.0, "num_tokens": 16578319.0, "reward": 0.5621093511581421, "reward_std": 0.09218844771385193, "rewards/video_r1_accuracy_reward/mean": 0.5390625, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 155 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 68.0, "completions/max_terminated_length": 68.0, "completions/mean_length": 33.4375, "completions/mean_terminated_length": 33.4375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.1495954990386963, "epoch": 0.3939393939393939, "frac_reward_zero_std": 0.8125, "grad_norm": 2.415402889251709, "learning_rate": 6.085858585858585e-07, "loss": 0.0, "num_tokens": 16677999.0, "reward": 0.6953125, "reward_std": 0.04958236962556839, "rewards/video_r1_accuracy_reward/mean": 0.6796875, "rewards/video_r1_accuracy_reward/std": 0.4684300124645233, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 156 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 33.4375, "completions/mean_terminated_length": 33.4375, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.2009010314941406, "epoch": 0.39646464646464646, "frac_reward_zero_std": 0.875, "grad_norm": 1.6719857454299927, "learning_rate": 6.060606060606061e-07, "loss": 0.0, "num_tokens": 16781351.0, "reward": 0.606640636920929, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 157 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 33.9296875, "completions/mean_terminated_length": 33.9296875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.2379920482635498, "epoch": 0.398989898989899, "frac_reward_zero_std": 0.6875, "grad_norm": 2.6537461280822754, "learning_rate": 6.035353535353535e-07, "loss": 0.0, "num_tokens": 16878030.0, "reward": 0.8292968273162842, "reward_std": 0.1244356632232666, "rewards/video_r1_accuracy_reward/mean": 0.8203125, "rewards/video_r1_accuracy_reward/std": 0.3854354918003082, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 158 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/max_terminated_length": 79.0, "completions/mean_length": 32.1953125, "completions/mean_terminated_length": 32.1953125, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.1720532178878784, "epoch": 0.4015151515151515, "frac_reward_zero_std": 0.75, "grad_norm": 2.911094903945923, "learning_rate": 6.010101010101009e-07, "loss": 0.0, "num_tokens": 16982935.0, "reward": 0.569531261920929, "reward_std": 0.10120702534914017, "rewards/video_r1_accuracy_reward/mean": 0.546875, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 159 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.0, "completions/max_terminated_length": 74.0, "completions/mean_length": 34.1484375, "completions/mean_terminated_length": 34.1484375, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.1654397249221802, "epoch": 0.40404040404040403, "frac_reward_zero_std": 0.875, "grad_norm": 1.369276762008667, "learning_rate": 5.984848484848485e-07, "loss": -0.0, "num_tokens": 17070618.0, "reward": 0.7328125238418579, "reward_std": 0.05497056990861893, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 34.7890625, "completions/mean_terminated_length": 34.7890625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1633415222167969, "epoch": 0.4065656565656566, "frac_reward_zero_std": 0.875, "grad_norm": 1.564042568206787, "learning_rate": 5.959595959595959e-07, "loss": 0.0, "num_tokens": 17175703.0, "reward": 0.7992187738418579, "reward_std": 0.04921317845582962, "rewards/video_r1_accuracy_reward/mean": 0.7890625, "rewards/video_r1_accuracy_reward/std": 0.4095771610736847, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 161 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/max_terminated_length": 82.0, "completions/mean_length": 35.1796875, "completions/mean_terminated_length": 35.1796875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.3527387380599976, "epoch": 0.4090909090909091, "frac_reward_zero_std": 0.8125, "grad_norm": 2.0236308574676514, "learning_rate": 5.934343434343434e-07, "loss": -0.0, "num_tokens": 17283214.0, "reward": 0.6363281011581421, "reward_std": 0.08245119452476501, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 162 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 34.8046875, "completions/mean_terminated_length": 34.8046875, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.2128992080688477, "epoch": 0.4116161616161616, "frac_reward_zero_std": 0.75, "grad_norm": 2.5096118450164795, "learning_rate": 5.909090909090909e-07, "loss": -0.0, "num_tokens": 17390165.0, "reward": 0.7550780773162842, "reward_std": 0.09046198427677155, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 163 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 69.0, "completions/max_terminated_length": 69.0, "completions/mean_length": 32.9140625, "completions/mean_terminated_length": 32.9140625, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.1064362525939941, "epoch": 0.41414141414141414, "frac_reward_zero_std": 0.6875, "grad_norm": 3.49741268157959, "learning_rate": 5.883838383838384e-07, "loss": -0.0, "num_tokens": 17484994.0, "reward": 0.6359374523162842, "reward_std": 0.10555607080459595, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 164 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/max_terminated_length": 108.0, "completions/mean_length": 34.1015625, "completions/mean_terminated_length": 34.1015625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1739277839660645, "epoch": 0.4166666666666667, "frac_reward_zero_std": 0.9375, "grad_norm": 1.649794578552246, "learning_rate": 5.858585858585858e-07, "loss": 0.0, "num_tokens": 17582335.0, "reward": 0.576953113079071, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.5546875, "rewards/video_r1_accuracy_reward/std": 0.4989531338214874, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 165 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 38.046875, "completions/mean_terminated_length": 38.046875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.2043449878692627, "epoch": 0.41919191919191917, "frac_reward_zero_std": 0.875, "grad_norm": 1.621335506439209, "learning_rate": 5.833333333333334e-07, "loss": -0.0, "num_tokens": 17673021.0, "reward": 0.5249999761581421, "reward_std": 0.06145896762609482, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 166 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 69.0, "completions/max_terminated_length": 69.0, "completions/mean_length": 35.953125, "completions/mean_terminated_length": 35.953125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.2063016891479492, "epoch": 0.4217171717171717, "frac_reward_zero_std": 0.8125, "grad_norm": 2.0640013217926025, "learning_rate": 5.808080808080808e-07, "loss": 0.0, "num_tokens": 17767647.0, "reward": 0.7476562261581421, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 167 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 73.0, "completions/max_terminated_length": 73.0, "completions/mean_length": 35.9765625, "completions/mean_terminated_length": 35.9765625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1426618099212646, "epoch": 0.42424242424242425, "frac_reward_zero_std": 0.8125, "grad_norm": 1.6504007577896118, "learning_rate": 5.782828282828282e-07, "loss": 0.0, "num_tokens": 17882004.0, "reward": 0.740234375, "reward_std": 0.08021478354930878, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 36.078125, "completions/mean_terminated_length": 36.078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.167665719985962, "epoch": 0.42676767676767674, "frac_reward_zero_std": 0.8125, "grad_norm": 1.703382968902588, "learning_rate": 5.757575757575758e-07, "loss": -0.0, "num_tokens": 17961174.0, "reward": 0.598828136920929, "reward_std": 0.07939308881759644, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 169 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 38.3671875, "completions/mean_terminated_length": 38.3671875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1713335514068604, "epoch": 0.4292929292929293, "frac_reward_zero_std": 0.875, "grad_norm": 2.324023962020874, "learning_rate": 5.732323232323232e-07, "loss": 0.0, "num_tokens": 18065077.0, "reward": 0.42851561307907104, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.3984375, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 170 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 64.0, "completions/max_terminated_length": 64.0, "completions/mean_length": 35.40625, "completions/mean_terminated_length": 35.40625, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.166663646697998, "epoch": 0.4318181818181818, "frac_reward_zero_std": 0.75, "grad_norm": 2.060908317565918, "learning_rate": 5.707070707070707e-07, "loss": 0.0, "num_tokens": 18166713.0, "reward": 0.5992187261581421, "reward_std": 0.10120701789855957, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 171 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 34.8515625, "completions/mean_terminated_length": 34.8515625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0551965236663818, "epoch": 0.43434343434343436, "frac_reward_zero_std": 0.8125, "grad_norm": 2.146479606628418, "learning_rate": 5.681818181818182e-07, "loss": -0.0, "num_tokens": 18271646.0, "reward": 0.48046875, "reward_std": 0.07920699566602707, "rewards/video_r1_accuracy_reward/mean": 0.453125, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 172 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 72.0, "completions/max_terminated_length": 72.0, "completions/mean_length": 36.4921875, "completions/mean_terminated_length": 36.4921875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0619423389434814, "epoch": 0.43686868686868685, "frac_reward_zero_std": 0.875, "grad_norm": 1.411008358001709, "learning_rate": 5.656565656565657e-07, "loss": 0.0, "num_tokens": 18372037.0, "reward": 0.5992187261581421, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 173 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 36.5625, "completions/mean_terminated_length": 36.5625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.162034511566162, "epoch": 0.4393939393939394, "frac_reward_zero_std": 0.875, "grad_norm": 1.3831713199615479, "learning_rate": 5.631313131313131e-07, "loss": 0.0, "num_tokens": 18462141.0, "reward": 0.814453125, "reward_std": 0.058214765042066574, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 174 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 38.4453125, "completions/mean_terminated_length": 38.4453125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.2333970069885254, "epoch": 0.44191919191919193, "frac_reward_zero_std": 0.6875, "grad_norm": 2.2612643241882324, "learning_rate": 5.606060606060605e-07, "loss": 0.0, "num_tokens": 18564110.0, "reward": 0.829296886920929, "reward_std": 0.11469841748476028, "rewards/video_r1_accuracy_reward/mean": 0.8203125, "rewards/video_r1_accuracy_reward/std": 0.3854354918003082, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 175 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 35.8125, "completions/mean_terminated_length": 35.8125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1255345344543457, "epoch": 0.4444444444444444, "frac_reward_zero_std": 0.875, "grad_norm": 1.9899846315383911, "learning_rate": 5.58080808080808e-07, "loss": 0.0, "num_tokens": 18665734.0, "reward": 0.8960937261581421, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.890625, "rewards/video_r1_accuracy_reward/std": 0.31333550810813904, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 65.0, "completions/max_terminated_length": 65.0, "completions/mean_length": 34.0, "completions/mean_terminated_length": 34.0, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.142645239830017, "epoch": 0.44696969696969696, "frac_reward_zero_std": 0.625, "grad_norm": 2.496107578277588, "learning_rate": 5.555555555555555e-07, "loss": 0.0, "num_tokens": 18759270.0, "reward": 0.532421886920929, "reward_std": 0.13244643807411194, "rewards/video_r1_accuracy_reward/mean": 0.5078125, "rewards/video_r1_accuracy_reward/std": 0.5019033551216125, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 177 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/max_terminated_length": 82.0, "completions/mean_length": 38.4609375, "completions/mean_terminated_length": 38.4609375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1225965023040771, "epoch": 0.4494949494949495, "frac_reward_zero_std": 0.6875, "grad_norm": 2.3409671783447266, "learning_rate": 5.53030303030303e-07, "loss": -0.0, "num_tokens": 18862553.0, "reward": 0.6585937738418579, "reward_std": 0.14693352580070496, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 178 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/max_terminated_length": 100.0, "completions/mean_length": 37.390625, "completions/mean_terminated_length": 37.390625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1464502811431885, "epoch": 0.45202020202020204, "frac_reward_zero_std": 0.8125, "grad_norm": 2.6066229343414307, "learning_rate": 5.505050505050505e-07, "loss": -0.0, "num_tokens": 18956595.0, "reward": 0.814453125, "reward_std": 0.08570004999637604, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 179 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 39.1171875, "completions/mean_terminated_length": 39.1171875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1246540546417236, "epoch": 0.45454545454545453, "frac_reward_zero_std": 0.625, "grad_norm": 3.036703109741211, "learning_rate": 5.47979797979798e-07, "loss": 0.0, "num_tokens": 19059722.0, "reward": 0.7476562261581421, "reward_std": 0.14319148659706116, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 180 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/max_terminated_length": 126.0, "completions/mean_length": 41.46875, "completions/mean_terminated_length": 41.46875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1428744792938232, "epoch": 0.45707070707070707, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 5.454545454545454e-07, "loss": 0.0, "num_tokens": 19158318.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 181 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 40.1796875, "completions/mean_terminated_length": 40.1796875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1367346048355103, "epoch": 0.4595959595959596, "frac_reward_zero_std": 0.875, "grad_norm": 2.153904676437378, "learning_rate": 5.42929292929293e-07, "loss": -0.0, "num_tokens": 19266637.0, "reward": 0.5621093511581421, "reward_std": 0.058214765042066574, "rewards/video_r1_accuracy_reward/mean": 0.5390625, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 182 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 40.1875, "completions/mean_terminated_length": 40.1875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1929256916046143, "epoch": 0.4621212121212121, "frac_reward_zero_std": 0.75, "grad_norm": 2.8813252449035645, "learning_rate": 5.404040404040404e-07, "loss": -0.0, "num_tokens": 19361085.0, "reward": 0.7843749523162842, "reward_std": 0.07706765085458755, "rewards/video_r1_accuracy_reward/mean": 0.7734375, "rewards/video_r1_accuracy_reward/std": 0.4202519655227661, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 183 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 39.515625, "completions/mean_terminated_length": 39.515625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1405987739562988, "epoch": 0.46464646464646464, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 5.378787878787878e-07, "loss": 0.0, "num_tokens": 19462775.0, "reward": 0.5249999761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 38.4296875, "completions/mean_terminated_length": 38.4296875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.2080726623535156, "epoch": 0.4671717171717172, "frac_reward_zero_std": 0.6875, "grad_norm": 2.3265841007232666, "learning_rate": 5.353535353535354e-07, "loss": -0.0, "num_tokens": 19543830.0, "reward": 0.703125, "reward_std": 0.13842955231666565, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 185 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 73.0, "completions/max_terminated_length": 73.0, "completions/mean_length": 40.359375, "completions/mean_terminated_length": 40.359375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1394485235214233, "epoch": 0.4696969696969697, "frac_reward_zero_std": 0.8125, "grad_norm": 2.003188371658325, "learning_rate": 5.328282828282828e-07, "loss": -0.0, "num_tokens": 19636020.0, "reward": 0.6734374761581421, "reward_std": 0.07920700311660767, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 186 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/max_terminated_length": 116.0, "completions/mean_length": 42.0703125, "completions/mean_terminated_length": 42.0703125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.2103469371795654, "epoch": 0.4722222222222222, "frac_reward_zero_std": 0.8125, "grad_norm": 1.9415223598480225, "learning_rate": 5.303030303030303e-07, "loss": 0.0, "num_tokens": 19746389.0, "reward": 0.717968761920929, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.703125, "rewards/video_r1_accuracy_reward/std": 0.45867621898651123, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 187 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/max_terminated_length": 115.0, "completions/mean_length": 39.9921875, "completions/mean_terminated_length": 39.9921875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0821011066436768, "epoch": 0.47474747474747475, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 5.277777777777777e-07, "loss": 0.0, "num_tokens": 19840548.0, "reward": 0.8218749761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.8125, "rewards/video_r1_accuracy_reward/std": 0.39184603095054626, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 188 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/max_terminated_length": 109.0, "completions/mean_length": 39.6953125, "completions/mean_terminated_length": 39.6953125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.1291919946670532, "epoch": 0.4772727272727273, "frac_reward_zero_std": 0.875, "grad_norm": 1.641886830329895, "learning_rate": 5.252525252525253e-07, "loss": 0.0, "num_tokens": 19931517.0, "reward": 0.762499988079071, "reward_std": 0.04198446497321129, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 189 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/max_terminated_length": 108.0, "completions/mean_length": 43.8828125, "completions/mean_terminated_length": 43.8828125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.197535753250122, "epoch": 0.4797979797979798, "frac_reward_zero_std": 0.6875, "grad_norm": 2.2403042316436768, "learning_rate": 5.227272727272727e-07, "loss": 0.0, "num_tokens": 20039918.0, "reward": 0.62890625, "reward_std": 0.13092872500419617, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 190 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 41.3671875, "completions/mean_terminated_length": 41.3671875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.253387451171875, "epoch": 0.4823232323232323, "frac_reward_zero_std": 0.8125, "grad_norm": 1.8688163757324219, "learning_rate": 5.202020202020201e-07, "loss": 0.0, "num_tokens": 20148373.0, "reward": 0.4136718511581421, "reward_std": 0.08570004999637604, "rewards/video_r1_accuracy_reward/mean": 0.3828125, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 191 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 36.578125, "completions/mean_terminated_length": 36.578125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1290706396102905, "epoch": 0.48484848484848486, "frac_reward_zero_std": 0.875, "grad_norm": 1.7567572593688965, "learning_rate": 5.176767676767676e-07, "loss": -0.0, "num_tokens": 20245007.0, "reward": 0.814453125, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/max_terminated_length": 89.0, "completions/mean_length": 43.1875, "completions/mean_terminated_length": 43.1875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1787712574005127, "epoch": 0.48737373737373735, "frac_reward_zero_std": 0.875, "grad_norm": 1.6133161783218384, "learning_rate": 5.151515151515151e-07, "loss": -0.0, "num_tokens": 20340607.0, "reward": 0.703125, "reward_std": 0.06145896762609482, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 193 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 41.0, "completions/mean_terminated_length": 41.0, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.215273141860962, "epoch": 0.4898989898989899, "frac_reward_zero_std": 0.75, "grad_norm": 2.7843291759490967, "learning_rate": 5.126262626262626e-07, "loss": 0.0, "num_tokens": 20446183.0, "reward": 0.5992187261581421, "reward_std": 0.09695503115653992, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 194 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/max_terminated_length": 95.0, "completions/mean_length": 42.1640625, "completions/mean_terminated_length": 42.1640625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1246238946914673, "epoch": 0.49242424242424243, "frac_reward_zero_std": 0.875, "grad_norm": 1.0955389738082886, "learning_rate": 5.1010101010101e-07, "loss": 0.0, "num_tokens": 20552172.0, "reward": 0.6507812738418579, "reward_std": 0.04958236962556839, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 195 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 42.5, "completions/mean_terminated_length": 42.5, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.1459453105926514, "epoch": 0.494949494949495, "frac_reward_zero_std": 0.875, "grad_norm": 1.4178069829940796, "learning_rate": 5.075757575757576e-07, "loss": 0.0, "num_tokens": 20654036.0, "reward": 0.591796875, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 196 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 39.59375, "completions/mean_terminated_length": 39.59375, "completions/min_length": 16.0, "completions/min_terminated_length": 16.0, "entropy": 1.142756700515747, "epoch": 0.49747474747474746, "frac_reward_zero_std": 0.8125, "grad_norm": 2.4916136264801025, "learning_rate": 5.05050505050505e-07, "loss": -0.0, "num_tokens": 20759896.0, "reward": 0.4804687201976776, "reward_std": 0.08345898985862732, "rewards/video_r1_accuracy_reward/mean": 0.453125, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 197 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 40.7421875, "completions/mean_terminated_length": 40.7421875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1972713470458984, "epoch": 0.5, "frac_reward_zero_std": 0.6875, "grad_norm": 3.071634292602539, "learning_rate": 5.025252525252525e-07, "loss": -0.0, "num_tokens": 20869311.0, "reward": 0.703125, "reward_std": 0.1254434585571289, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 198 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/max_terminated_length": 91.0, "completions/mean_length": 41.9609375, "completions/mean_terminated_length": 41.9609375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.2184077501296997, "epoch": 0.5025252525252525, "frac_reward_zero_std": 0.8125, "grad_norm": 2.4103143215179443, "learning_rate": 5e-07, "loss": -0.0, "num_tokens": 20965562.0, "reward": 0.5992187857627869, "reward_std": 0.08345898985862732, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 199 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 37.8515625, "completions/mean_terminated_length": 37.8515625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0560106039047241, "epoch": 0.5050505050505051, "frac_reward_zero_std": 0.875, "grad_norm": 1.7731906175613403, "learning_rate": 4.974747474747474e-07, "loss": -0.0, "num_tokens": 21069135.0, "reward": 0.7328125238418579, "reward_std": 0.05497056990861893, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 40.6953125, "completions/mean_terminated_length": 40.6953125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1646404266357422, "epoch": 0.5075757575757576, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.949494949494949e-07, "loss": 0.0, "num_tokens": 21182136.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 201 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 39.3828125, "completions/mean_terminated_length": 39.3828125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.173717975616455, "epoch": 0.51010101010101, "frac_reward_zero_std": 0.8125, "grad_norm": 1.3920958042144775, "learning_rate": 4.924242424242424e-07, "loss": 0.0, "num_tokens": 21284945.0, "reward": 0.6882812976837158, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 202 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/max_terminated_length": 95.0, "completions/mean_length": 38.6875, "completions/mean_terminated_length": 38.6875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1436386108398438, "epoch": 0.5126262626262627, "frac_reward_zero_std": 0.875, "grad_norm": 1.3957781791687012, "learning_rate": 4.898989898989898e-07, "loss": 0.0, "num_tokens": 21389041.0, "reward": 0.762499988079071, "reward_std": 0.04198446497321129, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 203 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 37.1171875, "completions/mean_terminated_length": 37.1171875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0701844692230225, "epoch": 0.5151515151515151, "frac_reward_zero_std": 0.75, "grad_norm": 6.448522090911865, "learning_rate": 4.873737373737373e-07, "loss": -0.0, "num_tokens": 21490960.0, "reward": 0.5249999761581421, "reward_std": 0.11094427108764648, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 204 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 36.6484375, "completions/mean_terminated_length": 36.6484375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1014225482940674, "epoch": 0.5176767676767676, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2016708850860596, "learning_rate": 4.848484848484849e-07, "loss": -0.0, "num_tokens": 21589139.0, "reward": 0.7699218988418579, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 205 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/max_terminated_length": 103.0, "completions/mean_length": 42.125, "completions/mean_terminated_length": 42.125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1724364757537842, "epoch": 0.5202020202020202, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.823232323232323e-07, "loss": 0.0, "num_tokens": 21683795.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 206 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/max_terminated_length": 79.0, "completions/mean_length": 38.78125, "completions/mean_terminated_length": 38.78125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0875164270401, "epoch": 0.5227272727272727, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.797979797979798e-07, "loss": 0.0, "num_tokens": 21786679.0, "reward": 0.6437499523162842, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 207 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 38.7734375, "completions/mean_terminated_length": 38.7734375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1065541505813599, "epoch": 0.5252525252525253, "frac_reward_zero_std": 0.875, "grad_norm": 2.205068588256836, "learning_rate": 4.772727272727273e-07, "loss": 0.0, "num_tokens": 21881450.0, "reward": 0.7847656607627869, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.7734375, "rewards/video_r1_accuracy_reward/std": 0.4202519655227661, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 36.2109375, "completions/mean_terminated_length": 36.2109375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0509859323501587, "epoch": 0.5277777777777778, "frac_reward_zero_std": 0.875, "grad_norm": 1.425723671913147, "learning_rate": 4.7474747474747474e-07, "loss": -0.0, "num_tokens": 21985117.0, "reward": 0.7699218988418579, "reward_std": 0.05821476876735687, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 209 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 38.3671875, "completions/mean_terminated_length": 38.3671875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0938265323638916, "epoch": 0.5303030303030303, "frac_reward_zero_std": 0.75, "grad_norm": 2.6129071712493896, "learning_rate": 4.722222222222222e-07, "loss": -0.0, "num_tokens": 22083148.0, "reward": 0.8070312738418579, "reward_std": 0.11094427108764648, "rewards/video_r1_accuracy_reward/mean": 0.796875, "rewards/video_r1_accuracy_reward/std": 0.40390563011169434, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 210 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/max_terminated_length": 105.0, "completions/mean_length": 38.5390625, "completions/mean_terminated_length": 38.5390625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1249573230743408, "epoch": 0.5328282828282829, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0192317962646484, "learning_rate": 4.696969696969697e-07, "loss": 0.0, "num_tokens": 22182537.0, "reward": 0.651171863079071, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 211 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/max_terminated_length": 79.0, "completions/mean_length": 37.875, "completions/mean_terminated_length": 37.875, "completions/min_length": 14.0, "completions/min_terminated_length": 14.0, "entropy": 1.1420645713806152, "epoch": 0.5353535353535354, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2227110862731934, "learning_rate": 4.6717171717171714e-07, "loss": 0.0, "num_tokens": 22284025.0, "reward": 0.755078136920929, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 212 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/max_terminated_length": 105.0, "completions/mean_length": 39.8828125, "completions/mean_terminated_length": 39.8828125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0884701013565063, "epoch": 0.5378787878787878, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.646464646464646e-07, "loss": 0.0, "num_tokens": 22386154.0, "reward": 0.8812500238418579, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.875, "rewards/video_r1_accuracy_reward/std": 0.3320184051990509, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 213 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 39.0625, "completions/mean_terminated_length": 39.0625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0740153789520264, "epoch": 0.5404040404040404, "frac_reward_zero_std": 0.8125, "grad_norm": 2.3575637340545654, "learning_rate": 4.6212121212121207e-07, "loss": -0.0, "num_tokens": 22478426.0, "reward": 0.7105468511581421, "reward_std": 0.07596279680728912, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 214 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 36.9765625, "completions/mean_terminated_length": 36.9765625, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.0606670379638672, "epoch": 0.5429292929292929, "frac_reward_zero_std": 0.9375, "grad_norm": 0.6686134338378906, "learning_rate": 4.595959595959596e-07, "loss": 0.0, "num_tokens": 22570903.0, "reward": 0.6957031488418579, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6796875, "rewards/video_r1_accuracy_reward/std": 0.4684300124645233, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 215 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 39.2109375, "completions/mean_terminated_length": 39.2109375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.117194414138794, "epoch": 0.5454545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.5707070707070705e-07, "loss": 0.0, "num_tokens": 22660930.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/max_terminated_length": 86.0, "completions/mean_length": 38.84375, "completions/mean_terminated_length": 38.84375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1242808103561401, "epoch": 0.547979797979798, "frac_reward_zero_std": 0.9375, "grad_norm": 1.227063536643982, "learning_rate": 4.545454545454545e-07, "loss": 0.0, "num_tokens": 22753702.0, "reward": 0.814453125, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 217 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 68.0, "completions/max_terminated_length": 68.0, "completions/mean_length": 36.9453125, "completions/mean_terminated_length": 36.9453125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.083737850189209, "epoch": 0.5505050505050505, "frac_reward_zero_std": 0.8125, "grad_norm": 3.3731985092163086, "learning_rate": 4.5202020202020204e-07, "loss": 0.0, "num_tokens": 22856391.0, "reward": 0.7996094226837158, "reward_std": 0.08245119452476501, "rewards/video_r1_accuracy_reward/mean": 0.7890625, "rewards/video_r1_accuracy_reward/std": 0.4095771610736847, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 218 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/max_terminated_length": 92.0, "completions/mean_length": 39.375, "completions/mean_terminated_length": 39.375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.153437614440918, "epoch": 0.553030303030303, "frac_reward_zero_std": 0.8125, "grad_norm": 3.4006295204162598, "learning_rate": 4.494949494949495e-07, "loss": 0.0, "num_tokens": 22955055.0, "reward": 0.651171863079071, "reward_std": 0.0727139487862587, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 219 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 71.0, "completions/max_terminated_length": 71.0, "completions/mean_length": 38.5703125, "completions/mean_terminated_length": 38.5703125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.126805305480957, "epoch": 0.5555555555555556, "frac_reward_zero_std": 0.625, "grad_norm": 2.5174307823181152, "learning_rate": 4.469696969696969e-07, "loss": -0.0, "num_tokens": 23062816.0, "reward": 0.666015625, "reward_std": 0.1464356929063797, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 220 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 37.0625, "completions/mean_terminated_length": 37.0625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0423493385314941, "epoch": 0.5580808080808081, "frac_reward_zero_std": 0.875, "grad_norm": 1.49821937084198, "learning_rate": 4.444444444444444e-07, "loss": -0.0, "num_tokens": 23170032.0, "reward": 0.7398437857627869, "reward_std": 0.05291558802127838, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 221 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 70.0, "completions/max_terminated_length": 70.0, "completions/mean_length": 34.9765625, "completions/mean_terminated_length": 34.9765625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0868003368377686, "epoch": 0.5606060606060606, "frac_reward_zero_std": 0.9375, "grad_norm": 1.3574455976486206, "learning_rate": 4.419191919191919e-07, "loss": 0.0, "num_tokens": 23272485.0, "reward": 0.6363281011581421, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 222 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 71.0, "completions/max_terminated_length": 71.0, "completions/mean_length": 37.90625, "completions/mean_terminated_length": 37.90625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0563578605651855, "epoch": 0.5631313131313131, "frac_reward_zero_std": 0.9375, "grad_norm": 0.896294355392456, "learning_rate": 4.3939393939393937e-07, "loss": -0.0, "num_tokens": 23366809.0, "reward": 0.799609363079071, "reward_std": 0.03072948381304741, "rewards/video_r1_accuracy_reward/mean": 0.7890625, "rewards/video_r1_accuracy_reward/std": 0.4095771610736847, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 223 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 37.359375, "completions/mean_terminated_length": 37.359375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0919694900512695, "epoch": 0.5656565656565656, "frac_reward_zero_std": 0.875, "grad_norm": 1.2921442985534668, "learning_rate": 4.3686868686868683e-07, "loss": -0.0, "num_tokens": 23468999.0, "reward": 0.591796875, "reward_std": 0.05821476876735687, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 39.7734375, "completions/mean_terminated_length": 39.7734375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1395623683929443, "epoch": 0.5681818181818182, "frac_reward_zero_std": 0.75, "grad_norm": 3.0386202335357666, "learning_rate": 4.3434343434343435e-07, "loss": -0.0, "num_tokens": 23574730.0, "reward": 0.7476562261581421, "reward_std": 0.11519625782966614, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 225 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 72.0, "completions/max_terminated_length": 72.0, "completions/mean_length": 36.4453125, "completions/mean_terminated_length": 36.4453125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0388587713241577, "epoch": 0.5707070707070707, "frac_reward_zero_std": 0.75, "grad_norm": 3.514066219329834, "learning_rate": 4.318181818181818e-07, "loss": -0.0, "num_tokens": 23671659.0, "reward": 0.6957030892372131, "reward_std": 0.0947139710187912, "rewards/video_r1_accuracy_reward/mean": 0.6796875, "rewards/video_r1_accuracy_reward/std": 0.4684300124645233, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 226 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/max_terminated_length": 97.0, "completions/mean_length": 37.9375, "completions/mean_terminated_length": 37.9375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0328450202941895, "epoch": 0.5732323232323232, "frac_reward_zero_std": 0.875, "grad_norm": 1.3931517601013184, "learning_rate": 4.292929292929293e-07, "loss": -0.0, "num_tokens": 23776019.0, "reward": 0.688281238079071, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 227 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 43.9921875, "completions/mean_terminated_length": 41.31496047973633, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0174750089645386, "epoch": 0.5757575757575758, "frac_reward_zero_std": 0.875, "grad_norm": 1.0087193250656128, "learning_rate": 4.267676767676767e-07, "loss": 0.0, "num_tokens": 23879906.0, "reward": 0.784375011920929, "reward_std": 0.03183433786034584, "rewards/video_r1_accuracy_reward/mean": 0.7734375, "rewards/video_r1_accuracy_reward/std": 0.4202519655227661, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 228 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 39.2734375, "completions/mean_terminated_length": 39.2734375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9948429465293884, "epoch": 0.5782828282828283, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.242424242424242e-07, "loss": 0.0, "num_tokens": 23975525.0, "reward": 0.5843749642372131, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 229 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/max_terminated_length": 91.0, "completions/mean_length": 39.765625, "completions/mean_terminated_length": 39.765625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0840778350830078, "epoch": 0.5808080808080808, "frac_reward_zero_std": 0.9375, "grad_norm": 1.692561388015747, "learning_rate": 4.217171717171717e-07, "loss": 0.0, "num_tokens": 24076743.0, "reward": 0.7550780773162842, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 230 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 39.5546875, "completions/mean_terminated_length": 39.5546875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0627765655517578, "epoch": 0.5833333333333334, "frac_reward_zero_std": 0.9375, "grad_norm": 1.6762282848358154, "learning_rate": 4.1919191919191915e-07, "loss": 0.0, "num_tokens": 24168014.0, "reward": 0.6585937738418579, "reward_std": 0.027485283091664314, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 231 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 42.03125, "completions/mean_terminated_length": 42.03125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1012775897979736, "epoch": 0.5858585858585859, "frac_reward_zero_std": 0.875, "grad_norm": 2.1221230030059814, "learning_rate": 4.1666666666666667e-07, "loss": 0.0, "num_tokens": 24264154.0, "reward": 0.4359374940395355, "reward_std": 0.05497056618332863, "rewards/video_r1_accuracy_reward/mean": 0.40625, "rewards/video_r1_accuracy_reward/std": 0.4930621087551117, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/max_terminated_length": 92.0, "completions/mean_length": 41.5625, "completions/mean_terminated_length": 41.5625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.053170919418335, "epoch": 0.5883838383838383, "frac_reward_zero_std": 0.9375, "grad_norm": 1.132128357887268, "learning_rate": 4.1414141414141413e-07, "loss": -0.0, "num_tokens": 24373658.0, "reward": 0.7328125238418579, "reward_std": 0.03173727169632912, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 233 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 41.6484375, "completions/mean_terminated_length": 41.6484375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1390644311904907, "epoch": 0.5909090909090909, "frac_reward_zero_std": 0.8125, "grad_norm": 1.8743163347244263, "learning_rate": 4.116161616161616e-07, "loss": 0.0, "num_tokens": 24482197.0, "reward": 0.6363281607627869, "reward_std": 0.07596279680728912, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 234 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/max_terminated_length": 91.0, "completions/mean_length": 41.4921875, "completions/mean_terminated_length": 41.4921875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1158329248428345, "epoch": 0.5934343434343434, "frac_reward_zero_std": 0.9375, "grad_norm": 0.9406242966651917, "learning_rate": 4.090909090909091e-07, "loss": 0.0, "num_tokens": 24573972.0, "reward": 0.576953113079071, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.5546875, "rewards/video_r1_accuracy_reward/std": 0.4989531338214874, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 235 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 38.7734375, "completions/mean_terminated_length": 38.7734375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0798749923706055, "epoch": 0.5959595959595959, "frac_reward_zero_std": 0.9375, "grad_norm": 1.1411508321762085, "learning_rate": 4.065656565656566e-07, "loss": -0.0, "num_tokens": 24676151.0, "reward": 0.7027343511581421, "reward_std": 0.0011048546293750405, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 236 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/max_terminated_length": 97.0, "completions/mean_length": 43.1640625, "completions/mean_terminated_length": 43.1640625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0829260349273682, "epoch": 0.5984848484848485, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0807678699493408, "learning_rate": 4.04040404040404e-07, "loss": -0.0, "num_tokens": 24789604.0, "reward": 0.7847656011581421, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.7734375, "rewards/video_r1_accuracy_reward/std": 0.4202519655227661, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 237 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 42.8828125, "completions/mean_terminated_length": 42.8828125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1113200187683105, "epoch": 0.601010101010101, "frac_reward_zero_std": 0.8125, "grad_norm": 1.7582932710647583, "learning_rate": 4.0151515151515146e-07, "loss": 0.0, "num_tokens": 24890173.0, "reward": 0.7105469107627869, "reward_std": 0.06297669559717178, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 238 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 38.59375, "completions/mean_terminated_length": 38.59375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0123108625411987, "epoch": 0.6035353535353535, "frac_reward_zero_std": 0.75, "grad_norm": 2.2676467895507812, "learning_rate": 3.98989898989899e-07, "loss": -0.0, "num_tokens": 24995257.0, "reward": 0.606640636920929, "reward_std": 0.1131853312253952, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 239 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/max_terminated_length": 118.0, "completions/mean_length": 41.9765625, "completions/mean_terminated_length": 41.9765625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0618376731872559, "epoch": 0.6060606060606061, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 3.9646464646464644e-07, "loss": 0.0, "num_tokens": 25087574.0, "reward": 0.5843749642372131, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 70.0, "completions/max_terminated_length": 70.0, "completions/mean_length": 39.2421875, "completions/mean_terminated_length": 39.2421875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.0472171306610107, "epoch": 0.6085858585858586, "frac_reward_zero_std": 0.875, "grad_norm": 1.201456069946289, "learning_rate": 3.939393939393939e-07, "loss": 0.0, "num_tokens": 25177613.0, "reward": 0.7105468511581421, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 241 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 41.1953125, "completions/mean_terminated_length": 41.1953125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0318273305892944, "epoch": 0.6111111111111112, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 3.9141414141414143e-07, "loss": 0.0, "num_tokens": 25275414.0, "reward": 0.5843749642372131, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 242 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/max_terminated_length": 114.0, "completions/mean_length": 40.625, "completions/mean_terminated_length": 40.625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0426480770111084, "epoch": 0.6136363636363636, "frac_reward_zero_std": 0.8125, "grad_norm": 1.9526947736740112, "learning_rate": 3.888888888888889e-07, "loss": 0.0, "num_tokens": 25373318.0, "reward": 0.614062488079071, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.59375, "rewards/video_r1_accuracy_reward/std": 0.4930621087551117, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 243 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 41.3515625, "completions/mean_terminated_length": 41.3515625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0614213943481445, "epoch": 0.6161616161616161, "frac_reward_zero_std": 0.8125, "grad_norm": 2.547713279724121, "learning_rate": 3.8636363636363636e-07, "loss": 0.0, "num_tokens": 25481939.0, "reward": 0.7476562261581421, "reward_std": 0.08345898985862732, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 244 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 42.1015625, "completions/mean_terminated_length": 42.1015625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1329345703125, "epoch": 0.6186868686868687, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2779253721237183, "learning_rate": 3.8383838383838377e-07, "loss": -0.0, "num_tokens": 25588960.0, "reward": 0.7328124642372131, "reward_std": 0.03173727169632912, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 245 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/max_terminated_length": 103.0, "completions/mean_length": 43.3203125, "completions/mean_terminated_length": 43.3203125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.0123302936553955, "epoch": 0.6212121212121212, "frac_reward_zero_std": 0.875, "grad_norm": 1.0937881469726562, "learning_rate": 3.813131313131313e-07, "loss": 0.0, "num_tokens": 25690657.0, "reward": 0.725390613079071, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 246 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/max_terminated_length": 82.0, "completions/mean_length": 41.9765625, "completions/mean_terminated_length": 41.9765625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1118111610412598, "epoch": 0.6237373737373737, "frac_reward_zero_std": 0.875, "grad_norm": 1.778397560119629, "learning_rate": 3.7878787878787876e-07, "loss": 0.0, "num_tokens": 25789478.0, "reward": 0.6140625476837158, "reward_std": 0.05497056618332863, "rewards/video_r1_accuracy_reward/mean": 0.59375, "rewards/video_r1_accuracy_reward/std": 0.4930621087551117, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 247 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 43.578125, "completions/mean_terminated_length": 43.578125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.018457055091858, "epoch": 0.6262626262626263, "frac_reward_zero_std": 0.875, "grad_norm": 1.7696324586868286, "learning_rate": 3.762626262626262e-07, "loss": -0.0, "num_tokens": 25888824.0, "reward": 0.539843738079071, "reward_std": 0.05922255665063858, "rewards/video_r1_accuracy_reward/mean": 0.515625, "rewards/video_r1_accuracy_reward/std": 0.5017194747924805, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/max_terminated_length": 101.0, "completions/mean_length": 43.9453125, "completions/mean_terminated_length": 43.9453125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1335992813110352, "epoch": 0.6287878787878788, "frac_reward_zero_std": 0.875, "grad_norm": 2.2729897499084473, "learning_rate": 3.7373737373737374e-07, "loss": -0.0, "num_tokens": 25983369.0, "reward": 0.8960937261581421, "reward_std": 0.06145896762609482, "rewards/video_r1_accuracy_reward/mean": 0.890625, "rewards/video_r1_accuracy_reward/std": 0.31333550810813904, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 249 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 41.171875, "completions/mean_terminated_length": 41.171875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.062988519668579, "epoch": 0.6313131313131313, "frac_reward_zero_std": 0.875, "grad_norm": 1.235278844833374, "learning_rate": 3.712121212121212e-07, "loss": -0.0, "num_tokens": 26080023.0, "reward": 0.7105468511581421, "reward_std": 0.05821476876735687, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 250 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 39.1328125, "completions/mean_terminated_length": 39.1328125, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.0980217456817627, "epoch": 0.6338383838383839, "frac_reward_zero_std": 0.9375, "grad_norm": 1.177210807800293, "learning_rate": 3.686868686868687e-07, "loss": -0.0, "num_tokens": 26168704.0, "reward": 0.7476562261581421, "reward_std": 0.027485284954309464, "rewards/video_r1_accuracy_reward/mean": 0.734375, "rewards/video_r1_accuracy_reward/std": 0.44340085983276367, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 251 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/max_terminated_length": 79.0, "completions/mean_length": 40.4453125, "completions/mean_terminated_length": 40.4453125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.98891681432724, "epoch": 0.6363636363636364, "frac_reward_zero_std": 0.875, "grad_norm": 2.9645073413848877, "learning_rate": 3.661616161616162e-07, "loss": -0.0, "num_tokens": 26265033.0, "reward": 0.7921874523162842, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.78125, "rewards/video_r1_accuracy_reward/std": 0.41502299904823303, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 252 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/max_terminated_length": 115.0, "completions/mean_length": 41.390625, "completions/mean_terminated_length": 41.390625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0786819458007812, "epoch": 0.6388888888888888, "frac_reward_zero_std": 0.8125, "grad_norm": 1.4744101762771606, "learning_rate": 3.636363636363636e-07, "loss": -0.0, "num_tokens": 26367979.0, "reward": 0.6734374761581421, "reward_std": 0.09319624304771423, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 253 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/max_terminated_length": 128.0, "completions/mean_length": 42.15625, "completions/mean_terminated_length": 42.15625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0700483322143555, "epoch": 0.6414141414141414, "frac_reward_zero_std": 0.5625, "grad_norm": 4.3232808113098145, "learning_rate": 3.6111111111111107e-07, "loss": 0.0, "num_tokens": 26467855.0, "reward": 0.7328125238418579, "reward_std": 0.18041402101516724, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 254 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/max_terminated_length": 92.0, "completions/mean_length": 41.0625, "completions/mean_terminated_length": 41.0625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0175724029541016, "epoch": 0.6439393939393939, "frac_reward_zero_std": 0.9375, "grad_norm": 0.968988835811615, "learning_rate": 3.5858585858585854e-07, "loss": -0.0, "num_tokens": 26581903.0, "reward": 0.651171863079071, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 255 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 71.0, "completions/max_terminated_length": 71.0, "completions/mean_length": 38.3359375, "completions/mean_terminated_length": 38.3359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.042513132095337, "epoch": 0.6464646464646465, "frac_reward_zero_std": 0.875, "grad_norm": 3.0125222206115723, "learning_rate": 3.5606060606060606e-07, "loss": -0.0, "num_tokens": 26688290.0, "reward": 0.7625000476837158, "reward_std": 0.06347454339265823, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/max_terminated_length": 89.0, "completions/mean_length": 41.1953125, "completions/mean_terminated_length": 41.1953125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0545010566711426, "epoch": 0.648989898989899, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0394936800003052, "learning_rate": 3.535353535353535e-07, "loss": 0.0, "num_tokens": 26781019.0, "reward": 0.725390613079071, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 257 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/max_terminated_length": 110.0, "completions/mean_length": 39.71875, "completions/mean_terminated_length": 39.71875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.084099292755127, "epoch": 0.6515151515151515, "frac_reward_zero_std": 0.9375, "grad_norm": 0.8688188791275024, "learning_rate": 3.51010101010101e-07, "loss": 0.0, "num_tokens": 26885175.0, "reward": 0.5472655892372131, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.5234375, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 258 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 40.6875, "completions/mean_terminated_length": 40.6875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0880751609802246, "epoch": 0.6540404040404041, "frac_reward_zero_std": 0.875, "grad_norm": 1.7870049476623535, "learning_rate": 3.484848484848485e-07, "loss": 0.0, "num_tokens": 26989175.0, "reward": 0.539843738079071, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.515625, "rewards/video_r1_accuracy_reward/std": 0.5017194747924805, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 259 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 43.2734375, "completions/mean_terminated_length": 43.2734375, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.10056734085083, "epoch": 0.6565656565656566, "frac_reward_zero_std": 0.8125, "grad_norm": 2.1377336978912354, "learning_rate": 3.4595959595959597e-07, "loss": -0.0, "num_tokens": 27096962.0, "reward": 0.6585937738418579, "reward_std": 0.07920700311660767, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 260 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 72.0, "completions/max_terminated_length": 72.0, "completions/mean_length": 40.546875, "completions/mean_terminated_length": 40.546875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0717473030090332, "epoch": 0.6590909090909091, "frac_reward_zero_std": 0.8125, "grad_norm": 1.8865928649902344, "learning_rate": 3.434343434343434e-07, "loss": 0.0, "num_tokens": 27190608.0, "reward": 0.703125, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 261 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.0, "completions/max_terminated_length": 77.0, "completions/mean_length": 41.6328125, "completions/mean_terminated_length": 41.6328125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0417635440826416, "epoch": 0.6616161616161617, "frac_reward_zero_std": 0.8125, "grad_norm": 2.4803223609924316, "learning_rate": 3.4090909090909085e-07, "loss": -0.0, "num_tokens": 27297481.0, "reward": 0.7328125238418579, "reward_std": 0.08670784533023834, "rewards/video_r1_accuracy_reward/mean": 0.71875, "rewards/video_r1_accuracy_reward/std": 0.4513758420944214, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 262 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/max_terminated_length": 92.0, "completions/mean_length": 41.2578125, "completions/mean_terminated_length": 41.2578125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.068854570388794, "epoch": 0.6641414141414141, "frac_reward_zero_std": 0.8125, "grad_norm": 2.475841522216797, "learning_rate": 3.3838383838383837e-07, "loss": -0.0, "num_tokens": 27403098.0, "reward": 0.688281238079071, "reward_std": 0.07920700311660767, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 263 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/max_terminated_length": 104.0, "completions/mean_length": 40.9765625, "completions/mean_terminated_length": 40.9765625, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.073037028312683, "epoch": 0.6666666666666666, "frac_reward_zero_std": 0.75, "grad_norm": 3.0775296688079834, "learning_rate": 3.3585858585858583e-07, "loss": -0.0, "num_tokens": 27499639.0, "reward": 0.6734374761581421, "reward_std": 0.11844511330127716, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 41.203125, "completions/mean_terminated_length": 41.203125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0243158340454102, "epoch": 0.6691919191919192, "frac_reward_zero_std": 0.8125, "grad_norm": 2.3727004528045654, "learning_rate": 3.333333333333333e-07, "loss": -0.0, "num_tokens": 27602497.0, "reward": 0.666015625, "reward_std": 0.08995203673839569, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 265 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/max_terminated_length": 112.0, "completions/mean_length": 40.1875, "completions/mean_terminated_length": 40.1875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.0149409770965576, "epoch": 0.6717171717171717, "frac_reward_zero_std": 0.8125, "grad_norm": 1.7323969602584839, "learning_rate": 3.308080808080808e-07, "loss": -0.0, "num_tokens": 27704273.0, "reward": 0.606640636920929, "reward_std": 0.08021478354930878, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 266 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 39.5390625, "completions/mean_terminated_length": 39.5390625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.094785451889038, "epoch": 0.6742424242424242, "frac_reward_zero_std": 0.9375, "grad_norm": 1.412737250328064, "learning_rate": 3.282828282828283e-07, "loss": -0.0, "num_tokens": 27806486.0, "reward": 0.5472656488418579, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.5234375, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 267 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 40.75, "completions/mean_terminated_length": 40.75, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0335110425949097, "epoch": 0.6767676767676768, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 3.2575757575757575e-07, "loss": 0.0, "num_tokens": 27915974.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 268 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 39.34375, "completions/mean_terminated_length": 39.34375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0604133605957031, "epoch": 0.6792929292929293, "frac_reward_zero_std": 0.75, "grad_norm": 2.308901786804199, "learning_rate": 3.2323232323232327e-07, "loss": -0.0, "num_tokens": 28020154.0, "reward": 0.6363281011581421, "reward_std": 0.10993647575378418, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 269 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 41.546875, "completions/mean_terminated_length": 41.546875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1338186264038086, "epoch": 0.6818181818181818, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2709474563598633, "learning_rate": 3.207070707070707e-07, "loss": -0.0, "num_tokens": 28127328.0, "reward": 0.532421886920929, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.5078125, "rewards/video_r1_accuracy_reward/std": 0.5019033551216125, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 270 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/max_terminated_length": 86.0, "completions/mean_length": 40.3515625, "completions/mean_terminated_length": 40.3515625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.0553174018859863, "epoch": 0.6843434343434344, "frac_reward_zero_std": 0.875, "grad_norm": 2.8711135387420654, "learning_rate": 3.1818181818181815e-07, "loss": -0.0, "num_tokens": 28229925.0, "reward": 0.7625000476837158, "reward_std": 0.06145896762609482, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 271 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 37.5625, "completions/mean_terminated_length": 37.5625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.9569792151451111, "epoch": 0.6868686868686869, "frac_reward_zero_std": 0.875, "grad_norm": 1.4626072645187378, "learning_rate": 3.156565656565656e-07, "loss": 0.0, "num_tokens": 28331109.0, "reward": 0.6066405773162842, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/max_terminated_length": 86.0, "completions/mean_length": 41.34375, "completions/mean_terminated_length": 41.34375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1569232940673828, "epoch": 0.6893939393939394, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 3.1313131313131313e-07, "loss": 0.0, "num_tokens": 28436025.0, "reward": 0.5843749642372131, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 273 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 38.3828125, "completions/mean_terminated_length": 38.3828125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 0.9568972587585449, "epoch": 0.6919191919191919, "frac_reward_zero_std": 0.8125, "grad_norm": 2.252964735031128, "learning_rate": 3.106060606060606e-07, "loss": -0.0, "num_tokens": 28538722.0, "reward": 0.6808593273162842, "reward_std": 0.0727139487862587, "rewards/video_r1_accuracy_reward/mean": 0.6640625, "rewards/video_r1_accuracy_reward/std": 0.47417303919792175, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 274 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 42.71875, "completions/mean_terminated_length": 42.71875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.112146258354187, "epoch": 0.6944444444444444, "frac_reward_zero_std": 0.875, "grad_norm": 2.2951831817626953, "learning_rate": 3.0808080808080806e-07, "loss": 0.0, "num_tokens": 28637126.0, "reward": 0.5394531488418579, "reward_std": 0.028590137138962746, "rewards/video_r1_accuracy_reward/mean": 0.515625, "rewards/video_r1_accuracy_reward/std": 0.5017194747924805, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 275 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/max_terminated_length": 115.0, "completions/mean_length": 39.984375, "completions/mean_terminated_length": 39.984375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1185648441314697, "epoch": 0.696969696969697, "frac_reward_zero_std": 0.6875, "grad_norm": 3.1425654888153076, "learning_rate": 3.055555555555556e-07, "loss": 0.0, "num_tokens": 28740852.0, "reward": 0.77734375, "reward_std": 0.12119146436452866, "rewards/video_r1_accuracy_reward/mean": 0.765625, "rewards/video_r1_accuracy_reward/std": 0.42527204751968384, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 276 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.0, "completions/max_terminated_length": 77.0, "completions/mean_length": 39.4609375, "completions/mean_terminated_length": 39.4609375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.014233112335205, "epoch": 0.6994949494949495, "frac_reward_zero_std": 0.75, "grad_norm": 1.9896016120910645, "learning_rate": 3.0303030303030305e-07, "loss": 0.0, "num_tokens": 28848535.0, "reward": 0.666015625, "reward_std": 0.09046198427677155, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 277 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 40.828125, "completions/mean_terminated_length": 40.828125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0652706623077393, "epoch": 0.702020202020202, "frac_reward_zero_std": 0.875, "grad_norm": 2.8897011280059814, "learning_rate": 3.0050505050505046e-07, "loss": 0.0, "num_tokens": 28951817.0, "reward": 0.5027344226837158, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.4765625, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 278 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 40.046875, "completions/mean_terminated_length": 40.046875, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.0515589714050293, "epoch": 0.7045454545454546, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2125859260559082, "learning_rate": 2.9797979797979793e-07, "loss": 0.0, "num_tokens": 29046855.0, "reward": 0.576953113079071, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.5546875, "rewards/video_r1_accuracy_reward/std": 0.4989531338214874, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 279 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 41.078125, "completions/mean_terminated_length": 41.078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0689277648925781, "epoch": 0.7070707070707071, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.9545454545454545e-07, "loss": 0.0, "num_tokens": 29154393.0, "reward": 0.8218749761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.8125, "rewards/video_r1_accuracy_reward/std": 0.39184603095054626, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 39.515625, "completions/mean_terminated_length": 39.515625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.11039137840271, "epoch": 0.7095959595959596, "frac_reward_zero_std": 0.9375, "grad_norm": 1.8850462436676025, "learning_rate": 2.929292929292929e-07, "loss": -0.0, "num_tokens": 29251595.0, "reward": 0.9183593988418579, "reward_std": 0.03072948381304741, "rewards/video_r1_accuracy_reward/mean": 0.9140625, "rewards/video_r1_accuracy_reward/std": 0.2813730239868164, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 281 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/max_terminated_length": 86.0, "completions/mean_length": 39.34375, "completions/mean_terminated_length": 39.34375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1349459886550903, "epoch": 0.7121212121212122, "frac_reward_zero_std": 0.875, "grad_norm": 1.9888182878494263, "learning_rate": 2.904040404040404e-07, "loss": -0.0, "num_tokens": 29345079.0, "reward": 0.46562498807907104, "reward_std": 0.05497056990861893, "rewards/video_r1_accuracy_reward/mean": 0.4375, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 282 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 73.0, "completions/max_terminated_length": 73.0, "completions/mean_length": 38.6328125, "completions/mean_terminated_length": 38.6328125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.9957581162452698, "epoch": 0.7146464646464646, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.878787878787879e-07, "loss": 0.0, "num_tokens": 29436368.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 283 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.0, "completions/max_terminated_length": 77.0, "completions/mean_length": 38.5390625, "completions/mean_terminated_length": 38.5390625, "completions/min_length": 18.0, "completions/min_terminated_length": 18.0, "entropy": 1.0308442115783691, "epoch": 0.7171717171717171, "frac_reward_zero_std": 0.8125, "grad_norm": 2.872098684310913, "learning_rate": 2.8535353535353536e-07, "loss": 0.0, "num_tokens": 29536589.0, "reward": 0.6585937142372131, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 284 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/max_terminated_length": 101.0, "completions/mean_length": 41.53125, "completions/mean_terminated_length": 41.53125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.080606460571289, "epoch": 0.7196969696969697, "frac_reward_zero_std": 0.875, "grad_norm": 1.6087223291397095, "learning_rate": 2.8282828282828283e-07, "loss": -0.0, "num_tokens": 29646105.0, "reward": 0.762499988079071, "reward_std": 0.06145896390080452, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 285 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/max_terminated_length": 116.0, "completions/mean_length": 39.109375, "completions/mean_terminated_length": 39.109375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9553197622299194, "epoch": 0.7222222222222222, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0064791440963745, "learning_rate": 2.8030303030303024e-07, "loss": 0.0, "num_tokens": 29756375.0, "reward": 0.7550780773162842, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 286 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/max_terminated_length": 91.0, "completions/mean_length": 42.5234375, "completions/mean_terminated_length": 42.5234375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1082098484039307, "epoch": 0.7247474747474747, "frac_reward_zero_std": 0.875, "grad_norm": 1.7488712072372437, "learning_rate": 2.7777777777777776e-07, "loss": 0.0, "num_tokens": 29862402.0, "reward": 0.703125, "reward_std": 0.04198446497321129, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 287 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 42.578125, "completions/mean_terminated_length": 42.578125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0919723510742188, "epoch": 0.7272727272727273, "frac_reward_zero_std": 0.875, "grad_norm": 2.1887331008911133, "learning_rate": 2.752525252525252e-07, "loss": 0.0, "num_tokens": 29962308.0, "reward": 0.6214843392372131, "reward_std": 0.058214765042066574, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/max_terminated_length": 105.0, "completions/mean_length": 44.5390625, "completions/mean_terminated_length": 44.5390625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0653269290924072, "epoch": 0.7297979797979798, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.727272727272727e-07, "loss": 0.0, "num_tokens": 30067217.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 289 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 41.0703125, "completions/mean_terminated_length": 41.0703125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1131395101547241, "epoch": 0.7323232323232324, "frac_reward_zero_std": 0.875, "grad_norm": 1.3746576309204102, "learning_rate": 2.702020202020202e-07, "loss": -0.0, "num_tokens": 30166226.0, "reward": 0.688281238079071, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 290 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/max_terminated_length": 139.0, "completions/mean_length": 45.3203125, "completions/mean_terminated_length": 45.3203125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1243813037872314, "epoch": 0.7348484848484849, "frac_reward_zero_std": 0.8125, "grad_norm": 2.915048360824585, "learning_rate": 2.676767676767677e-07, "loss": -0.0, "num_tokens": 30259571.0, "reward": 0.7847656011581421, "reward_std": 0.08245119452476501, "rewards/video_r1_accuracy_reward/mean": 0.7734375, "rewards/video_r1_accuracy_reward/std": 0.4202519655227661, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 291 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/max_terminated_length": 79.0, "completions/mean_length": 42.1875, "completions/mean_terminated_length": 42.1875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.050102949142456, "epoch": 0.7373737373737373, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.6515151515151514e-07, "loss": 0.0, "num_tokens": 30374483.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 292 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/max_terminated_length": 121.0, "completions/mean_length": 41.5234375, "completions/mean_terminated_length": 41.5234375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.032031536102295, "epoch": 0.73989898989899, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.6262626262626266e-07, "loss": 0.0, "num_tokens": 30471046.0, "reward": 0.5249999761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 293 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 41.453125, "completions/mean_terminated_length": 41.453125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 1.0804343223571777, "epoch": 0.7424242424242424, "frac_reward_zero_std": 0.75, "grad_norm": 2.5472633838653564, "learning_rate": 2.6010101010101007e-07, "loss": -0.0, "num_tokens": 30581344.0, "reward": 0.5249999761581421, "reward_std": 0.10344342887401581, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 294 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 103.0, "completions/mean_length": 45.3359375, "completions/mean_terminated_length": 42.669288635253906, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.9924347400665283, "epoch": 0.7449494949494949, "frac_reward_zero_std": 0.875, "grad_norm": 3.591127395629883, "learning_rate": 2.5757575757575754e-07, "loss": 0.0, "num_tokens": 30692403.0, "reward": 0.821484386920929, "reward_std": 0.04308931902050972, "rewards/video_r1_accuracy_reward/mean": 0.8125, "rewards/video_r1_accuracy_reward/std": 0.39184603095054626, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 295 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 43.6015625, "completions/mean_terminated_length": 43.6015625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1211515665054321, "epoch": 0.7474747474747475, "frac_reward_zero_std": 0.8125, "grad_norm": 1.6249940395355225, "learning_rate": 2.55050505050505e-07, "loss": -0.0, "num_tokens": 30794800.0, "reward": 0.62890625, "reward_std": 0.07920699566602707, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/max_terminated_length": 112.0, "completions/mean_length": 39.15625, "completions/mean_terminated_length": 39.15625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1058385372161865, "epoch": 0.75, "frac_reward_zero_std": 0.625, "grad_norm": 3.0298686027526855, "learning_rate": 2.525252525252525e-07, "loss": -0.0, "num_tokens": 30891148.0, "reward": 0.6734375357627869, "reward_std": 0.17140009999275208, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 297 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/max_terminated_length": 152.0, "completions/mean_length": 42.15625, "completions/mean_terminated_length": 42.15625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0815918445587158, "epoch": 0.7525252525252525, "frac_reward_zero_std": 0.875, "grad_norm": 2.4456374645233154, "learning_rate": 2.5e-07, "loss": 0.0, "num_tokens": 30999208.0, "reward": 0.5250000357627869, "reward_std": 0.04198446497321129, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 298 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/max_terminated_length": 98.0, "completions/mean_length": 40.9453125, "completions/mean_terminated_length": 40.9453125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1108628511428833, "epoch": 0.7550505050505051, "frac_reward_zero_std": 0.75, "grad_norm": 2.825129508972168, "learning_rate": 2.4747474747474745e-07, "loss": 0.0, "num_tokens": 31100649.0, "reward": 0.591796875, "reward_std": 0.11195206642150879, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 299 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/max_terminated_length": 100.0, "completions/mean_length": 44.2578125, "completions/mean_terminated_length": 44.2578125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.1574559211730957, "epoch": 0.7575757575757576, "frac_reward_zero_std": 0.75, "grad_norm": 1.9903881549835205, "learning_rate": 2.449494949494949e-07, "loss": -0.0, "num_tokens": 31207226.0, "reward": 0.62890625, "reward_std": 0.11642953008413315, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 300 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/max_terminated_length": 82.0, "completions/mean_length": 37.9921875, "completions/mean_terminated_length": 37.9921875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0374675989151, "epoch": 0.76010101010101, "frac_reward_zero_std": 0.75, "grad_norm": 3.569753885269165, "learning_rate": 2.4242424242424244e-07, "loss": 0.0, "num_tokens": 31304289.0, "reward": 0.6953125, "reward_std": 0.07706765830516815, "rewards/video_r1_accuracy_reward/mean": 0.6796875, "rewards/video_r1_accuracy_reward/std": 0.4684300124645233, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 301 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 41.484375, "completions/mean_terminated_length": 41.484375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.127359390258789, "epoch": 0.7626262626262627, "frac_reward_zero_std": 0.875, "grad_norm": 1.5562926530838013, "learning_rate": 2.398989898989899e-07, "loss": -0.0, "num_tokens": 31400983.0, "reward": 0.5621093511581421, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.5390625, "rewards/video_r1_accuracy_reward/std": 0.5004304051399231, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 302 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/max_terminated_length": 107.0, "completions/mean_length": 42.6328125, "completions/mean_terminated_length": 42.6328125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.028510332107544, "epoch": 0.7651515151515151, "frac_reward_zero_std": 0.875, "grad_norm": 2.891268253326416, "learning_rate": 2.3737373737373737e-07, "loss": -0.0, "num_tokens": 31511296.0, "reward": 0.688281238079071, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 303 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 67.0, "completions/max_terminated_length": 67.0, "completions/mean_length": 38.3984375, "completions/mean_terminated_length": 38.3984375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.018940806388855, "epoch": 0.7676767676767676, "frac_reward_zero_std": 0.875, "grad_norm": 1.7596899271011353, "learning_rate": 2.3484848484848486e-07, "loss": -0.0, "num_tokens": 31624579.0, "reward": 0.725390613079071, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 40.8359375, "completions/mean_terminated_length": 40.8359375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.066922664642334, "epoch": 0.7702020202020202, "frac_reward_zero_std": 0.9375, "grad_norm": 0.9264626502990723, "learning_rate": 2.323232323232323e-07, "loss": -0.0, "num_tokens": 31733166.0, "reward": 0.5695312023162842, "reward_std": 0.027485284954309464, "rewards/video_r1_accuracy_reward/mean": 0.546875, "rewards/video_r1_accuracy_reward/std": 0.4997538626194, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 305 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/max_terminated_length": 100.0, "completions/mean_length": 39.703125, "completions/mean_terminated_length": 39.703125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1334049701690674, "epoch": 0.7727272727272727, "frac_reward_zero_std": 0.8125, "grad_norm": 2.070505142211914, "learning_rate": 2.297979797979798e-07, "loss": 0.0, "num_tokens": 31838536.0, "reward": 0.62890625, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.609375, "rewards/video_r1_accuracy_reward/std": 0.4898075461387634, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 306 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/max_terminated_length": 98.0, "completions/mean_length": 41.46875, "completions/mean_terminated_length": 41.46875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.073242425918579, "epoch": 0.7752525252525253, "frac_reward_zero_std": 0.9375, "grad_norm": 0.7374638319015503, "learning_rate": 2.2727272727272726e-07, "loss": -0.0, "num_tokens": 31948676.0, "reward": 0.7105468511581421, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 307 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/max_terminated_length": 112.0, "completions/mean_length": 43.734375, "completions/mean_terminated_length": 43.734375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1168217658996582, "epoch": 0.7777777777777778, "frac_reward_zero_std": 0.75, "grad_norm": 3.1745333671569824, "learning_rate": 2.2474747474747475e-07, "loss": -0.0, "num_tokens": 32042010.0, "reward": 0.7996094226837158, "reward_std": 0.10019923746585846, "rewards/video_r1_accuracy_reward/mean": 0.7890625, "rewards/video_r1_accuracy_reward/std": 0.4095771610736847, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 308 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 41.578125, "completions/mean_terminated_length": 41.578125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.1183019876480103, "epoch": 0.7803030303030303, "frac_reward_zero_std": 0.8125, "grad_norm": 2.8185198307037354, "learning_rate": 2.222222222222222e-07, "loss": -0.0, "num_tokens": 32146732.0, "reward": 0.591796875, "reward_std": 0.08245119452476501, "rewards/video_r1_accuracy_reward/mean": 0.5703125, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 309 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 42.5078125, "completions/mean_terminated_length": 42.5078125, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.156842589378357, "epoch": 0.7828282828282829, "frac_reward_zero_std": 0.8125, "grad_norm": 2.6074113845825195, "learning_rate": 2.1969696969696968e-07, "loss": -0.0, "num_tokens": 32241525.0, "reward": 0.703125, "reward_std": 0.08670784533023834, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 310 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 42.2421875, "completions/mean_terminated_length": 42.2421875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.058318853378296, "epoch": 0.7853535353535354, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0678150653839111, "learning_rate": 2.1717171717171718e-07, "loss": -0.0, "num_tokens": 32347036.0, "reward": 0.910937488079071, "reward_std": 0.03173727169632912, "rewards/video_r1_accuracy_reward/mean": 0.90625, "rewards/video_r1_accuracy_reward/std": 0.29262590408325195, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 311 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/max_terminated_length": 97.0, "completions/mean_length": 42.40625, "completions/mean_terminated_length": 42.40625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1335667371749878, "epoch": 0.7878787878787878, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.1464646464646464e-07, "loss": 0.0, "num_tokens": 32443480.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 41.234375, "completions/mean_terminated_length": 41.234375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.0559073686599731, "epoch": 0.7904040404040404, "frac_reward_zero_std": 0.875, "grad_norm": 1.3612805604934692, "learning_rate": 2.121212121212121e-07, "loss": 0.0, "num_tokens": 32548430.0, "reward": 0.8367187976837158, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.828125, "rewards/video_r1_accuracy_reward/std": 0.378754198551178, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 313 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 42.0390625, "completions/mean_terminated_length": 42.0390625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.055532693862915, "epoch": 0.7929292929292929, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.0959595959595957e-07, "loss": 0.0, "num_tokens": 32640787.0, "reward": 0.5843749642372131, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 314 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 39.9375, "completions/mean_terminated_length": 39.9375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1142704486846924, "epoch": 0.7954545454545454, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.0707070707070707e-07, "loss": 0.0, "num_tokens": 32744659.0, "reward": 0.6437499523162842, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 315 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 40.3671875, "completions/mean_terminated_length": 40.3671875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0762892961502075, "epoch": 0.797979797979798, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.0454545454545456e-07, "loss": 0.0, "num_tokens": 32838018.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 316 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 41.578125, "completions/mean_terminated_length": 41.578125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.0358078479766846, "epoch": 0.8005050505050505, "frac_reward_zero_std": 0.9375, "grad_norm": 0.9207355380058289, "learning_rate": 2.02020202020202e-07, "loss": 0.0, "num_tokens": 32945708.0, "reward": 0.6957031488418579, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6796875, "rewards/video_r1_accuracy_reward/std": 0.4684300124645233, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 317 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 39.0625, "completions/mean_terminated_length": 39.0625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0110433101654053, "epoch": 0.803030303030303, "frac_reward_zero_std": 0.8125, "grad_norm": 3.387294054031372, "learning_rate": 1.994949494949495e-07, "loss": 0.0, "num_tokens": 33047076.0, "reward": 0.703125, "reward_std": 0.07920699566602707, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 318 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/max_terminated_length": 105.0, "completions/mean_length": 41.5078125, "completions/mean_terminated_length": 41.5078125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.0886144638061523, "epoch": 0.8055555555555556, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.9696969696969696e-07, "loss": 0.0, "num_tokens": 33161533.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 319 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 39.40625, "completions/mean_terminated_length": 39.40625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0247807502746582, "epoch": 0.8080808080808081, "frac_reward_zero_std": 0.9375, "grad_norm": 2.800685167312622, "learning_rate": 1.9444444444444445e-07, "loss": 0.0, "num_tokens": 33261601.0, "reward": 0.45820313692092896, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.4296875, "rewards/video_r1_accuracy_reward/std": 0.4969765841960907, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 41.890625, "completions/mean_terminated_length": 41.890625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0938504934310913, "epoch": 0.8106060606060606, "frac_reward_zero_std": 0.9375, "grad_norm": 1.4721579551696777, "learning_rate": 1.9191919191919189e-07, "loss": 0.0, "num_tokens": 33362019.0, "reward": 0.7179687023162842, "reward_std": 0.027485283091664314, "rewards/video_r1_accuracy_reward/mean": 0.703125, "rewards/video_r1_accuracy_reward/std": 0.45867621898651123, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 321 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/max_terminated_length": 120.0, "completions/mean_length": 42.2265625, "completions/mean_terminated_length": 42.2265625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0467185974121094, "epoch": 0.8131313131313131, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.8939393939393938e-07, "loss": 0.0, "num_tokens": 33472384.0, "reward": 0.6437499523162842, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 322 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/max_terminated_length": 139.0, "completions/mean_length": 46.7421875, "completions/mean_terminated_length": 46.7421875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.163482427597046, "epoch": 0.8156565656565656, "frac_reward_zero_std": 0.875, "grad_norm": 1.9332994222640991, "learning_rate": 1.8686868686868687e-07, "loss": -0.0, "num_tokens": 33566471.0, "reward": 0.8070312738418579, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.796875, "rewards/video_r1_accuracy_reward/std": 0.40390563011169434, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 323 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/max_terminated_length": 111.0, "completions/mean_length": 41.0546875, "completions/mean_terminated_length": 41.0546875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.9953482151031494, "epoch": 0.8181818181818182, "frac_reward_zero_std": 0.75, "grad_norm": 2.8781511783599854, "learning_rate": 1.8434343434343434e-07, "loss": -0.0, "num_tokens": 33663286.0, "reward": 0.814453125, "reward_std": 0.10993649065494537, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 324 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 42.5, "completions/mean_terminated_length": 42.5, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0891491174697876, "epoch": 0.8207070707070707, "frac_reward_zero_std": 0.9375, "grad_norm": 2.3374204635620117, "learning_rate": 1.818181818181818e-07, "loss": 0.0, "num_tokens": 33767966.0, "reward": 0.814453125, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 325 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/max_terminated_length": 84.0, "completions/mean_length": 41.984375, "completions/mean_terminated_length": 41.984375, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "entropy": 1.0346364974975586, "epoch": 0.8232323232323232, "frac_reward_zero_std": 0.8125, "grad_norm": 4.739593029022217, "learning_rate": 1.7929292929292927e-07, "loss": -0.0, "num_tokens": 33880908.0, "reward": 0.7250000238418579, "reward_std": 0.059319622814655304, "rewards/video_r1_accuracy_reward/mean": 0.7109375, "rewards/video_r1_accuracy_reward/std": 0.45510825514793396, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 326 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 41.0078125, "completions/mean_terminated_length": 41.0078125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.019489049911499, "epoch": 0.8257575757575758, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.7676767676767676e-07, "loss": 0.0, "num_tokens": 33992085.0, "reward": 0.8218749761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.8125, "rewards/video_r1_accuracy_reward/std": 0.39184603095054626, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 327 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 39.9140625, "completions/mean_terminated_length": 39.9140625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0503275394439697, "epoch": 0.8282828282828283, "frac_reward_zero_std": 0.9375, "grad_norm": 1.8522095680236816, "learning_rate": 1.7424242424242425e-07, "loss": 0.0, "num_tokens": 34098090.0, "reward": 0.814453125, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 39.390625, "completions/mean_terminated_length": 39.390625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0890402793884277, "epoch": 0.8308080808080808, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.717171717171717e-07, "loss": 0.0, "num_tokens": 34197460.0, "reward": 0.5843750238418579, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 329 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 40.515625, "completions/mean_terminated_length": 40.515625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0883162021636963, "epoch": 0.8333333333333334, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.6919191919191918e-07, "loss": 0.0, "num_tokens": 34295606.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 330 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 38.5546875, "completions/mean_terminated_length": 38.5546875, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9863436222076416, "epoch": 0.8358585858585859, "frac_reward_zero_std": 0.9375, "grad_norm": 0.8784723281860352, "learning_rate": 1.6666666666666665e-07, "loss": -0.0, "num_tokens": 34403301.0, "reward": 0.7699218988418579, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 331 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/max_terminated_length": 98.0, "completions/mean_length": 39.4296875, "completions/mean_terminated_length": 39.4296875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0463258028030396, "epoch": 0.8383838383838383, "frac_reward_zero_std": 0.875, "grad_norm": 2.3829569816589355, "learning_rate": 1.6414141414141414e-07, "loss": -0.0, "num_tokens": 34498212.0, "reward": 0.8515625, "reward_std": 0.05497056990861893, "rewards/video_r1_accuracy_reward/mean": 0.84375, "rewards/video_r1_accuracy_reward/std": 0.3645188808441162, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 332 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 71.0, "completions/max_terminated_length": 71.0, "completions/mean_length": 39.7734375, "completions/mean_terminated_length": 39.7734375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0662963390350342, "epoch": 0.8409090909090909, "frac_reward_zero_std": 0.875, "grad_norm": 1.7944689989089966, "learning_rate": 1.6161616161616163e-07, "loss": 0.0, "num_tokens": 34595415.0, "reward": 0.666015625, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 333 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/max_terminated_length": 92.0, "completions/mean_length": 41.2421875, "completions/mean_terminated_length": 41.2421875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.035414695739746, "epoch": 0.8434343434343434, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.5909090909090907e-07, "loss": 0.0, "num_tokens": 34693454.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 334 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.0, "completions/max_terminated_length": 74.0, "completions/mean_length": 40.5625, "completions/mean_terminated_length": 40.5625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.992094874382019, "epoch": 0.8459595959595959, "frac_reward_zero_std": 0.875, "grad_norm": 4.349198341369629, "learning_rate": 1.5656565656565657e-07, "loss": -0.0, "num_tokens": 34788646.0, "reward": 0.740234375, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 335 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 38.90625, "completions/mean_terminated_length": 38.90625, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.0111061334609985, "epoch": 0.8484848484848485, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.5404040404040403e-07, "loss": 0.0, "num_tokens": 34894258.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/max_terminated_length": 89.0, "completions/mean_length": 41.484375, "completions/mean_terminated_length": 41.484375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1294646263122559, "epoch": 0.851010101010101, "frac_reward_zero_std": 0.875, "grad_norm": 1.3885397911071777, "learning_rate": 1.5151515151515152e-07, "loss": -0.0, "num_tokens": 34997048.0, "reward": 0.6214843988418579, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 337 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 44.3671875, "completions/mean_terminated_length": 41.69291305541992, "completions/min_length": 17.0, "completions/min_terminated_length": 17.0, "entropy": 1.0213119983673096, "epoch": 0.8535353535353535, "frac_reward_zero_std": 0.9375, "grad_norm": 0.31857022643089294, "learning_rate": 1.4898989898989896e-07, "loss": 0.0, "num_tokens": 35089799.0, "reward": 0.7621093988418579, "reward_std": 0.0011048543965443969, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 338 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/max_terminated_length": 95.0, "completions/mean_length": 43.2265625, "completions/mean_terminated_length": 43.2265625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0955770015716553, "epoch": 0.8560606060606061, "frac_reward_zero_std": 0.9375, "grad_norm": 1.10260009765625, "learning_rate": 1.4646464646464646e-07, "loss": -0.0, "num_tokens": 35189916.0, "reward": 0.688281238079071, "reward_std": 0.027485284954309464, "rewards/video_r1_accuracy_reward/mean": 0.671875, "rewards/video_r1_accuracy_reward/std": 0.4713755249977112, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 339 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 72.0, "completions/max_terminated_length": 72.0, "completions/mean_length": 39.0078125, "completions/mean_terminated_length": 39.0078125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0868955850601196, "epoch": 0.8585858585858586, "frac_reward_zero_std": 0.8125, "grad_norm": 2.8857719898223877, "learning_rate": 1.4393939393939395e-07, "loss": 0.0, "num_tokens": 35299301.0, "reward": 0.5992187261581421, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 340 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 39.328125, "completions/mean_terminated_length": 39.328125, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0072779655456543, "epoch": 0.8611111111111112, "frac_reward_zero_std": 0.875, "grad_norm": 2.1714649200439453, "learning_rate": 1.4141414141414141e-07, "loss": -0.0, "num_tokens": 35401055.0, "reward": 0.643750011920929, "reward_std": 0.05497056990861893, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 341 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 38.5, "completions/mean_terminated_length": 38.5, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0291508436203003, "epoch": 0.8636363636363636, "frac_reward_zero_std": 0.875, "grad_norm": 5.02390718460083, "learning_rate": 1.3888888888888888e-07, "loss": -0.0, "num_tokens": 35502143.0, "reward": 0.651171863079071, "reward_std": 0.062466755509376526, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 342 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/max_terminated_length": 113.0, "completions/mean_length": 40.9453125, "completions/mean_terminated_length": 40.9453125, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.0368475914001465, "epoch": 0.8661616161616161, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3636363636363635e-07, "loss": 0.0, "num_tokens": 35610304.0, "reward": 0.6437499523162842, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 343 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.0, "completions/max_terminated_length": 77.0, "completions/mean_length": 40.2734375, "completions/mean_terminated_length": 40.2734375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0711452960968018, "epoch": 0.8686868686868687, "frac_reward_zero_std": 0.875, "grad_norm": 2.956529140472412, "learning_rate": 1.3383838383838384e-07, "loss": -0.0, "num_tokens": 35720067.0, "reward": 0.5992187261581421, "reward_std": 0.05922255665063858, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 42.9140625, "completions/mean_terminated_length": 42.9140625, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.1545445919036865, "epoch": 0.8712121212121212, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.3131313131313133e-07, "loss": 0.0, "num_tokens": 35819584.0, "reward": 0.5249999761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 345 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/max_terminated_length": 79.0, "completions/mean_length": 43.2421875, "completions/mean_terminated_length": 43.2421875, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1233935356140137, "epoch": 0.8737373737373737, "frac_reward_zero_std": 0.875, "grad_norm": 1.0883266925811768, "learning_rate": 1.2878787878787877e-07, "loss": 0.0, "num_tokens": 35926447.0, "reward": 0.6066405773162842, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 346 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 44.890625, "completions/mean_terminated_length": 44.890625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1265748739242554, "epoch": 0.8762626262626263, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2674680948257446, "learning_rate": 1.2626262626262626e-07, "loss": 0.0, "num_tokens": 36026345.0, "reward": 0.814453125, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 347 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 39.609375, "completions/mean_terminated_length": 39.609375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.024735450744629, "epoch": 0.8787878787878788, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2373737373737373e-07, "loss": 0.0, "num_tokens": 36128031.0, "reward": 0.5249999761581421, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5, "rewards/video_r1_accuracy_reward/std": 0.5019646286964417, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 348 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 44.5546875, "completions/mean_terminated_length": 44.5546875, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.0595622062683105, "epoch": 0.8813131313131313, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.2121212121212122e-07, "loss": 0.0, "num_tokens": 36225950.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 349 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 39.4765625, "completions/mean_terminated_length": 39.4765625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0925328731536865, "epoch": 0.8838383838383839, "frac_reward_zero_std": 0.75, "grad_norm": 2.037301778793335, "learning_rate": 1.1868686868686869e-07, "loss": 0.0, "num_tokens": 36321339.0, "reward": 0.614062488079071, "reward_std": 0.0937061756849289, "rewards/video_r1_accuracy_reward/mean": 0.59375, "rewards/video_r1_accuracy_reward/std": 0.4930621087551117, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 350 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.0, "completions/max_terminated_length": 77.0, "completions/mean_length": 39.9609375, "completions/mean_terminated_length": 39.9609375, "completions/min_length": 19.0, "completions/min_terminated_length": 19.0, "entropy": 1.053574800491333, "epoch": 0.8863636363636364, "frac_reward_zero_std": 0.875, "grad_norm": 2.9315614700317383, "learning_rate": 1.1616161616161615e-07, "loss": 0.0, "num_tokens": 36427534.0, "reward": 0.7179687023162842, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.703125, "rewards/video_r1_accuracy_reward/std": 0.45867621898651123, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 351 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 41.765625, "completions/mean_terminated_length": 41.765625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0955579280853271, "epoch": 0.8888888888888888, "frac_reward_zero_std": 0.875, "grad_norm": 2.852755069732666, "learning_rate": 1.1363636363636363e-07, "loss": 0.0, "num_tokens": 36517216.0, "reward": 0.666015625, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/max_terminated_length": 100.0, "completions/mean_length": 41.1484375, "completions/mean_terminated_length": 41.1484375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0147664546966553, "epoch": 0.8914141414141414, "frac_reward_zero_std": 0.875, "grad_norm": 3.0033349990844727, "learning_rate": 1.111111111111111e-07, "loss": -0.0, "num_tokens": 36631027.0, "reward": 0.4433593451976776, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.4140625, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 353 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/max_terminated_length": 92.0, "completions/mean_length": 41.4140625, "completions/mean_terminated_length": 41.4140625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0533812046051025, "epoch": 0.8939393939393939, "frac_reward_zero_std": 0.8125, "grad_norm": 2.2410473823547363, "learning_rate": 1.0858585858585859e-07, "loss": -0.0, "num_tokens": 36731992.0, "reward": 0.666015625, "reward_std": 0.06297669559717178, "rewards/video_r1_accuracy_reward/mean": 0.6484375, "rewards/video_r1_accuracy_reward/std": 0.4793342351913452, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 354 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 43.734375, "completions/mean_terminated_length": 43.734375, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 1.12202787399292, "epoch": 0.8964646464646465, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.0606060606060605e-07, "loss": 0.0, "num_tokens": 36848974.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 355 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/max_terminated_length": 128.0, "completions/mean_length": 43.1796875, "completions/mean_terminated_length": 43.1796875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0783835649490356, "epoch": 0.898989898989899, "frac_reward_zero_std": 0.9375, "grad_norm": 4.910101890563965, "learning_rate": 1.0353535353535353e-07, "loss": 0.0, "num_tokens": 36956045.0, "reward": 0.7179687023162842, "reward_std": 0.027485283091664314, "rewards/video_r1_accuracy_reward/mean": 0.703125, "rewards/video_r1_accuracy_reward/std": 0.45867621898651123, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 356 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/max_terminated_length": 120.0, "completions/mean_length": 38.59375, "completions/mean_terminated_length": 38.59375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0902092456817627, "epoch": 0.9015151515151515, "frac_reward_zero_std": 0.9375, "grad_norm": 1.2750098705291748, "learning_rate": 1.01010101010101e-07, "loss": -0.0, "num_tokens": 37047657.0, "reward": 0.6066405773162842, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.5859375, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 357 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/max_terminated_length": 107.0, "completions/mean_length": 45.1796875, "completions/mean_terminated_length": 45.1796875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1572962999343872, "epoch": 0.9040404040404041, "frac_reward_zero_std": 0.875, "grad_norm": 2.8475706577301025, "learning_rate": 9.848484848484848e-08, "loss": 0.0, "num_tokens": 37147744.0, "reward": 0.829296886920929, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.8203125, "rewards/video_r1_accuracy_reward/std": 0.3854354918003082, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 358 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 42.375, "completions/mean_terminated_length": 42.375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0927423238754272, "epoch": 0.9065656565656566, "frac_reward_zero_std": 0.8125, "grad_norm": 2.16367244720459, "learning_rate": 9.595959595959594e-08, "loss": 0.0, "num_tokens": 37259240.0, "reward": 0.7921874523162842, "reward_std": 0.0737217366695404, "rewards/video_r1_accuracy_reward/mean": 0.78125, "rewards/video_r1_accuracy_reward/std": 0.41502299904823303, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 359 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/max_terminated_length": 119.0, "completions/mean_length": 38.2265625, "completions/mean_terminated_length": 38.2265625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0505998134613037, "epoch": 0.9090909090909091, "frac_reward_zero_std": 0.875, "grad_norm": 2.0892300605773926, "learning_rate": 9.343434343434344e-08, "loss": -0.0, "num_tokens": 37351021.0, "reward": 0.5992187261581421, "reward_std": 0.04198446497321129, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 384.0, "completions/max_terminated_length": 95.0, "completions/mean_length": 45.1015625, "completions/mean_terminated_length": 42.433067321777344, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9791218042373657, "epoch": 0.9116161616161617, "frac_reward_zero_std": 0.875, "grad_norm": 3.24613881111145, "learning_rate": 9.09090909090909e-08, "loss": 0.0, "num_tokens": 37463530.0, "reward": 0.7101562023162842, "reward_std": 0.04958236962556839, "rewards/video_r1_accuracy_reward/mean": 0.6953125, "rewards/video_r1_accuracy_reward/std": 0.46208351850509644, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 361 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/max_terminated_length": 107.0, "completions/mean_length": 43.7890625, "completions/mean_terminated_length": 43.7890625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1600117683410645, "epoch": 0.9141414141414141, "frac_reward_zero_std": 0.8125, "grad_norm": 2.4082045555114746, "learning_rate": 8.838383838383838e-08, "loss": -0.0, "num_tokens": 37546919.0, "reward": 0.4507812559604645, "reward_std": 0.090959832072258, "rewards/video_r1_accuracy_reward/mean": 0.421875, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 362 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.0, "completions/max_terminated_length": 74.0, "completions/mean_length": 40.6640625, "completions/mean_terminated_length": 40.6640625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0563390254974365, "epoch": 0.9166666666666666, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 8.585858585858585e-08, "loss": 0.0, "num_tokens": 37642260.0, "reward": 0.5843749642372131, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 363 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 40.3359375, "completions/mean_terminated_length": 40.3359375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0542752742767334, "epoch": 0.9191919191919192, "frac_reward_zero_std": 0.75, "grad_norm": 2.0764169692993164, "learning_rate": 8.333333333333333e-08, "loss": 0.0, "num_tokens": 37752327.0, "reward": 0.539843738079071, "reward_std": 0.0937061756849289, "rewards/video_r1_accuracy_reward/mean": 0.515625, "rewards/video_r1_accuracy_reward/std": 0.5017194747924805, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 364 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 39.75, "completions/mean_terminated_length": 39.75, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0389442443847656, "epoch": 0.9217171717171717, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 8.080808080808082e-08, "loss": 0.0, "num_tokens": 37840775.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 365 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/max_terminated_length": 82.0, "completions/mean_length": 43.8359375, "completions/mean_terminated_length": 43.8359375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0553057193756104, "epoch": 0.9242424242424242, "frac_reward_zero_std": 0.8125, "grad_norm": 1.5467336177825928, "learning_rate": 7.828282828282828e-08, "loss": 0.0, "num_tokens": 37939746.0, "reward": 0.6363281011581421, "reward_std": 0.06297669559717178, "rewards/video_r1_accuracy_reward/mean": 0.6171875, "rewards/video_r1_accuracy_reward/std": 0.4879830479621887, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 366 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/max_terminated_length": 93.0, "completions/mean_length": 40.75, "completions/mean_terminated_length": 40.75, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.1079405546188354, "epoch": 0.9267676767676768, "frac_reward_zero_std": 0.9375, "grad_norm": 2.4892454147338867, "learning_rate": 7.575757575757576e-08, "loss": 0.0, "num_tokens": 38040370.0, "reward": 0.7550780773162842, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 367 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 39.0625, "completions/mean_terminated_length": 39.0625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0692365169525146, "epoch": 0.9292929292929293, "frac_reward_zero_std": 0.875, "grad_norm": 1.5513192415237427, "learning_rate": 7.323232323232323e-08, "loss": 0.0, "num_tokens": 38149562.0, "reward": 0.5992187261581421, "reward_std": 0.051721714437007904, "rewards/video_r1_accuracy_reward/mean": 0.578125, "rewards/video_r1_accuracy_reward/std": 0.4957992732524872, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/max_terminated_length": 107.0, "completions/mean_length": 38.9609375, "completions/mean_terminated_length": 38.9609375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9736450910568237, "epoch": 0.9318181818181818, "frac_reward_zero_std": 0.75, "grad_norm": 2.6586601734161377, "learning_rate": 7.070707070707071e-08, "loss": 0.0, "num_tokens": 38255013.0, "reward": 0.7550780773162842, "reward_std": 0.09046198427677155, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 369 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/max_terminated_length": 114.0, "completions/mean_length": 42.0625, "completions/mean_terminated_length": 42.0625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0568822622299194, "epoch": 0.9343434343434344, "frac_reward_zero_std": 0.875, "grad_norm": 1.516427993774414, "learning_rate": 6.818181818181817e-08, "loss": -0.0, "num_tokens": 38344869.0, "reward": 0.6214843392372131, "reward_std": 0.05272950232028961, "rewards/video_r1_accuracy_reward/mean": 0.6015625, "rewards/video_r1_accuracy_reward/std": 0.4915000796318054, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 370 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/max_terminated_length": 78.0, "completions/mean_length": 41.4609375, "completions/mean_terminated_length": 41.4609375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.016709566116333, "epoch": 0.9368686868686869, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 6.565656565656566e-08, "loss": 0.0, "num_tokens": 38439152.0, "reward": 0.703125, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.6875, "rewards/video_r1_accuracy_reward/std": 0.4653336703777313, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 371 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/max_terminated_length": 96.0, "completions/mean_length": 41.9921875, "completions/mean_terminated_length": 41.9921875, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.039790391921997, "epoch": 0.9393939393939394, "frac_reward_zero_std": 0.9375, "grad_norm": 1.421544075012207, "learning_rate": 6.313131313131313e-08, "loss": -0.0, "num_tokens": 38528439.0, "reward": 0.5472656488418579, "reward_std": 0.03072948195040226, "rewards/video_r1_accuracy_reward/mean": 0.5234375, "rewards/video_r1_accuracy_reward/std": 0.5014128684997559, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 372 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/max_terminated_length": 121.0, "completions/mean_length": 39.5390625, "completions/mean_terminated_length": 39.5390625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0156900882720947, "epoch": 0.9419191919191919, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0844476222991943, "learning_rate": 6.060606060606061e-08, "loss": -0.0, "num_tokens": 38620764.0, "reward": 0.8070312738418579, "reward_std": 0.027485284954309464, "rewards/video_r1_accuracy_reward/mean": 0.796875, "rewards/video_r1_accuracy_reward/std": 0.40390563011169434, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 373 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/max_terminated_length": 91.0, "completions/mean_length": 41.515625, "completions/mean_terminated_length": 41.515625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.1009835004806519, "epoch": 0.9444444444444444, "frac_reward_zero_std": 0.875, "grad_norm": 2.7400307655334473, "learning_rate": 5.8080808080808076e-08, "loss": 0.0, "num_tokens": 38722894.0, "reward": 0.443359375, "reward_std": 0.058214765042066574, "rewards/video_r1_accuracy_reward/mean": 0.4140625, "rewards/video_r1_accuracy_reward/std": 0.49449479579925537, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 374 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 73.0, "completions/max_terminated_length": 73.0, "completions/mean_length": 39.25, "completions/mean_terminated_length": 39.25, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9999622106552124, "epoch": 0.946969696969697, "frac_reward_zero_std": 0.8125, "grad_norm": 3.1559898853302, "learning_rate": 5.555555555555555e-08, "loss": -0.0, "num_tokens": 38820150.0, "reward": 0.740234375, "reward_std": 0.08995203673839569, "rewards/video_r1_accuracy_reward/mean": 0.7265625, "rewards/video_r1_accuracy_reward/std": 0.447474867105484, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 375 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 75.0, "completions/max_terminated_length": 75.0, "completions/mean_length": 38.1484375, "completions/mean_terminated_length": 38.1484375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0171918869018555, "epoch": 0.9494949494949495, "frac_reward_zero_std": 0.9375, "grad_norm": 1.901557445526123, "learning_rate": 5.303030303030303e-08, "loss": 0.0, "num_tokens": 38918177.0, "reward": 0.814453125, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.8046875, "rewards/video_r1_accuracy_reward/std": 0.3979988098144531, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 40.1484375, "completions/mean_terminated_length": 40.1484375, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0424983501434326, "epoch": 0.952020202020202, "frac_reward_zero_std": 0.9375, "grad_norm": 1.0092467069625854, "learning_rate": 5.05050505050505e-08, "loss": -0.0, "num_tokens": 39023852.0, "reward": 0.5101562738418579, "reward_std": 0.027485284954309464, "rewards/video_r1_accuracy_reward/mean": 0.484375, "rewards/video_r1_accuracy_reward/std": 0.5017194747924805, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 377 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 38.2890625, "completions/mean_terminated_length": 38.2890625, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.9870564341545105, "epoch": 0.9545454545454546, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.797979797979797e-08, "loss": 0.0, "num_tokens": 39119657.0, "reward": 0.6437499523162842, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 378 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/max_terminated_length": 88.0, "completions/mean_length": 41.375, "completions/mean_terminated_length": 41.375, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.1362022161483765, "epoch": 0.9570707070707071, "frac_reward_zero_std": 0.75, "grad_norm": 3.5773839950561523, "learning_rate": 4.545454545454545e-08, "loss": -0.0, "num_tokens": 39225209.0, "reward": 0.673046886920929, "reward_std": 0.07057460397481918, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 379 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 72.0, "completions/max_terminated_length": 72.0, "completions/mean_length": 39.5703125, "completions/mean_terminated_length": 39.5703125, "completions/min_length": 24.0, "completions/min_terminated_length": 24.0, "entropy": 0.9890860319137573, "epoch": 0.9595959595959596, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 4.292929292929292e-08, "loss": 0.0, "num_tokens": 39322978.0, "reward": 0.7625000476837158, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 380 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 42.4375, "completions/mean_terminated_length": 42.4375, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0359077453613281, "epoch": 0.9621212121212122, "frac_reward_zero_std": 0.8125, "grad_norm": 1.6127235889434814, "learning_rate": 4.040404040404041e-08, "loss": 0.0, "num_tokens": 39427570.0, "reward": 0.7699218988418579, "reward_std": 0.06297669559717178, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 381 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/max_terminated_length": 85.0, "completions/mean_length": 43.0, "completions/mean_terminated_length": 43.0, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0722706317901611, "epoch": 0.9646464646464646, "frac_reward_zero_std": 0.8125, "grad_norm": 1.7954891920089722, "learning_rate": 3.787878787878788e-08, "loss": 0.0, "num_tokens": 39525642.0, "reward": 0.8218749761581421, "reward_std": 0.07920700311660767, "rewards/video_r1_accuracy_reward/mean": 0.8125, "rewards/video_r1_accuracy_reward/std": 0.39184603095054626, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 382 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/max_terminated_length": 83.0, "completions/mean_length": 41.796875, "completions/mean_terminated_length": 41.796875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0892385244369507, "epoch": 0.9671717171717171, "frac_reward_zero_std": 0.875, "grad_norm": 2.684129238128662, "learning_rate": 3.5353535353535353e-08, "loss": -0.0, "num_tokens": 39620936.0, "reward": 0.717968761920929, "reward_std": 0.05922255665063858, "rewards/video_r1_accuracy_reward/mean": 0.703125, "rewards/video_r1_accuracy_reward/std": 0.45867621898651123, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 383 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 67.0, "completions/max_terminated_length": 67.0, "completions/mean_length": 37.6328125, "completions/mean_terminated_length": 37.6328125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 0.9923283457756042, "epoch": 0.9696969696969697, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 3.282828282828283e-08, "loss": 0.0, "num_tokens": 39719425.0, "reward": 0.6437499523162842, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.625, "rewards/video_r1_accuracy_reward/std": 0.4860251843929291, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 40.5703125, "completions/mean_terminated_length": 40.5703125, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9553056955337524, "epoch": 0.9722222222222222, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 3.0303030303030305e-08, "loss": 0.0, "num_tokens": 39825234.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 385 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/max_terminated_length": 94.0, "completions/mean_length": 41.765625, "completions/mean_terminated_length": 41.765625, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "entropy": 1.0995062589645386, "epoch": 0.9747474747474747, "frac_reward_zero_std": 0.9375, "grad_norm": 1.4076322317123413, "learning_rate": 2.7777777777777774e-08, "loss": 0.0, "num_tokens": 39925308.0, "reward": 0.7550780773162842, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.7421875, "rewards/video_r1_accuracy_reward/std": 0.43914905190467834, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 386 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/max_terminated_length": 80.0, "completions/mean_length": 42.9453125, "completions/mean_terminated_length": 42.9453125, "completions/min_length": 25.0, "completions/min_terminated_length": 25.0, "entropy": 0.9817566275596619, "epoch": 0.9772727272727273, "frac_reward_zero_std": 0.9375, "grad_norm": 0.9628924131393433, "learning_rate": 2.525252525252525e-08, "loss": 0.0, "num_tokens": 40033437.0, "reward": 0.9332031011581421, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.9296875, "rewards/video_r1_accuracy_reward/std": 0.2566775679588318, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 387 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/max_terminated_length": 87.0, "completions/mean_length": 43.6328125, "completions/mean_terminated_length": 43.6328125, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0863628387451172, "epoch": 0.9797979797979798, "frac_reward_zero_std": 0.8125, "grad_norm": 2.400270462036133, "learning_rate": 2.2727272727272725e-08, "loss": 0.0, "num_tokens": 40137166.0, "reward": 0.6734374761581421, "reward_std": 0.06946974992752075, "rewards/video_r1_accuracy_reward/mean": 0.65625, "rewards/video_r1_accuracy_reward/std": 0.47682511806488037, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 388 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/max_terminated_length": 76.0, "completions/mean_length": 39.21875, "completions/mean_terminated_length": 39.21875, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 0.9649280309677124, "epoch": 0.9823232323232324, "frac_reward_zero_std": 0.875, "grad_norm": 1.8059344291687012, "learning_rate": 2.0202020202020204e-08, "loss": 0.0, "num_tokens": 40238914.0, "reward": 0.6808593273162842, "reward_std": 0.04847751557826996, "rewards/video_r1_accuracy_reward/mean": 0.6640625, "rewards/video_r1_accuracy_reward/std": 0.47417303919792175, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 389 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/max_terminated_length": 106.0, "completions/mean_length": 41.9375, "completions/mean_terminated_length": 41.9375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0557193756103516, "epoch": 0.9848484848484849, "frac_reward_zero_std": 0.875, "grad_norm": 3.0474207401275635, "learning_rate": 1.7676767676767677e-08, "loss": -0.0, "num_tokens": 40355434.0, "reward": 0.7699218988418579, "reward_std": 0.062466755509376526, "rewards/video_r1_accuracy_reward/mean": 0.7578125, "rewards/video_r1_accuracy_reward/std": 0.4300905168056488, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 390 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/max_terminated_length": 102.0, "completions/mean_length": 39.8359375, "completions/mean_terminated_length": 39.8359375, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9982803463935852, "epoch": 0.9873737373737373, "frac_reward_zero_std": 0.9375, "grad_norm": 0.9107327461242676, "learning_rate": 1.5151515151515152e-08, "loss": -0.0, "num_tokens": 40468109.0, "reward": 0.651171863079071, "reward_std": 0.020992232486605644, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 391 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/max_terminated_length": 100.0, "completions/mean_length": 44.515625, "completions/mean_terminated_length": 44.515625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0974993705749512, "epoch": 0.98989898989899, "frac_reward_zero_std": 0.9375, "grad_norm": 1.1084349155426025, "learning_rate": 1.2626262626262625e-08, "loss": 0.0, "num_tokens": 40570487.0, "reward": 0.6585937142372131, "reward_std": 0.027485283091664314, "rewards/video_r1_accuracy_reward/mean": 0.640625, "rewards/video_r1_accuracy_reward/std": 0.481702595949173, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/max_terminated_length": 81.0, "completions/mean_length": 38.5, "completions/mean_terminated_length": 38.5, "completions/min_length": 21.0, "completions/min_terminated_length": 21.0, "entropy": 1.0579657554626465, "epoch": 0.9924242424242424, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 1.0101010101010102e-08, "loss": 0.0, "num_tokens": 40667895.0, "reward": 0.5843750238418579, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.5625, "rewards/video_r1_accuracy_reward/std": 0.49802759289741516, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 393 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/max_terminated_length": 90.0, "completions/mean_length": 39.2890625, "completions/mean_terminated_length": 39.2890625, "completions/min_length": 22.0, "completions/min_terminated_length": 22.0, "entropy": 1.0520402193069458, "epoch": 0.9949494949494949, "frac_reward_zero_std": 0.8125, "grad_norm": 1.8841626644134521, "learning_rate": 7.575757575757576e-09, "loss": -0.0, "num_tokens": 40764580.0, "reward": 0.6804687976837158, "reward_std": 0.04958236962556839, "rewards/video_r1_accuracy_reward/mean": 0.6640625, "rewards/video_r1_accuracy_reward/std": 0.47417303919792175, "rewards/video_r1_format_reward/mean": 0.9921875, "rewards/video_r1_format_reward/std": 0.0883883461356163, "step": 394 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/max_terminated_length": 99.0, "completions/mean_length": 41.4140625, "completions/mean_terminated_length": 41.4140625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 1.0266304016113281, "epoch": 0.9974747474747475, "frac_reward_zero_std": 0.8125, "grad_norm": 4.306983470916748, "learning_rate": 5.050505050505051e-09, "loss": 0.0, "num_tokens": 40861593.0, "reward": 0.651171863079071, "reward_std": 0.06297669559717178, "rewards/video_r1_accuracy_reward/mean": 0.6328125, "rewards/video_r1_accuracy_reward/std": 0.4839322865009308, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 395 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.0, "completions/max_terminated_length": 74.0, "completions/mean_length": 38.1640625, "completions/mean_terminated_length": 38.1640625, "completions/min_length": 23.0, "completions/min_terminated_length": 23.0, "entropy": 0.9408200979232788, "epoch": 1.0, "frac_reward_zero_std": 1.0, "grad_norm": 0.0, "learning_rate": 2.5252525252525255e-09, "loss": 0.0, "num_tokens": 40963742.0, "reward": 0.762499988079071, "reward_std": 0.0, "rewards/video_r1_accuracy_reward/mean": 0.75, "rewards/video_r1_accuracy_reward/std": 0.434714138507843, "rewards/video_r1_format_reward/mean": 1.0, "rewards/video_r1_format_reward/std": 0.0, "step": 396 } ], "logging_steps": 1.0, "max_steps": 396, "num_input_tokens_seen": 40963742, "num_train_epochs": 1, "save_steps": 159, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }