{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 396,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 248.0,
      "completions/max_terminated_length": 248.0,
      "completions/mean_length": 122.875,
      "completions/mean_terminated_length": 122.875,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "entropy": 0.7635231614112854,
      "epoch": 0.0025252525252525255,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1e-06,
      "loss": 0.0,
      "num_tokens": 105648.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 1
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 364.0,
      "completions/mean_length": 131.6328125,
      "completions/mean_terminated_length": 129.64566040039062,
      "completions/min_length": 61.0,
      "completions/min_terminated_length": 61.0,
      "entropy": 0.7612149715423584,
      "epoch": 0.005050505050505051,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.6195482015609741,
      "learning_rate": 9.974747474747475e-07,
      "loss": -0.0,
      "num_tokens": 223745.0,
      "reward": 0.0078125,
      "reward_std": 0.022097086533904076,
      "rewards/video_r1_accuracy_reward/mean": 0.0078125,
      "rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
      "rewards/video_r1_format_reward/mean": 0.0078125,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 2
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 286.0,
      "completions/max_terminated_length": 286.0,
      "completions/mean_length": 134.2265625,
      "completions/mean_terminated_length": 134.2265625,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "entropy": 0.7807673215866089,
      "epoch": 0.007575757575757576,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.5153651833534241,
      "learning_rate": 9.949494949494949e-07,
      "loss": 0.0,
      "num_tokens": 336902.0,
      "reward": 0.0015625000232830644,
      "reward_std": 0.0016703829169273376,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.03125,
      "rewards/video_r1_format_reward/std": 0.1746762990951538,
      "step": 3
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 290.0,
      "completions/mean_length": 125.453125,
      "completions/mean_terminated_length": 123.41732025146484,
      "completions/min_length": 48.0,
      "completions/min_terminated_length": 48.0,
      "entropy": 0.7577059268951416,
      "epoch": 0.010101010101010102,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.924242424242425e-07,
      "loss": 0.0,
      "num_tokens": 450968.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 4
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 291.0,
      "completions/max_terminated_length": 291.0,
      "completions/mean_length": 125.5546875,
      "completions/mean_terminated_length": 125.5546875,
      "completions/min_length": 52.0,
      "completions/min_terminated_length": 52.0,
      "entropy": 0.7316204309463501,
      "epoch": 0.012626262626262626,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.898989898989898e-07,
      "loss": 0.0,
      "num_tokens": 569063.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 5
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 302.0,
      "completions/max_terminated_length": 302.0,
      "completions/mean_length": 134.765625,
      "completions/mean_terminated_length": 134.765625,
      "completions/min_length": 58.0,
      "completions/min_terminated_length": 58.0,
      "entropy": 0.7153864502906799,
      "epoch": 0.015151515151515152,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.873737373737374e-07,
      "loss": 0.0,
      "num_tokens": 677257.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 6
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 347.0,
      "completions/max_terminated_length": 347.0,
      "completions/mean_length": 142.9140625,
      "completions/mean_terminated_length": 142.9140625,
      "completions/min_length": 47.0,
      "completions/min_terminated_length": 47.0,
      "entropy": 0.6941128373146057,
      "epoch": 0.017676767676767676,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.848484848484847e-07,
      "loss": 0.0,
      "num_tokens": 791206.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 7
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 339.0,
      "completions/max_terminated_length": 339.0,
      "completions/mean_length": 153.609375,
      "completions/mean_terminated_length": 153.609375,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "entropy": 0.7712859511375427,
      "epoch": 0.020202020202020204,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.823232323232323e-07,
      "loss": 0.0,
      "num_tokens": 906244.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 8
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 308.0,
      "completions/max_terminated_length": 308.0,
      "completions/mean_length": 133.671875,
      "completions/mean_terminated_length": 133.671875,
      "completions/min_length": 63.0,
      "completions/min_terminated_length": 63.0,
      "entropy": 0.7461484670639038,
      "epoch": 0.022727272727272728,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.797979797979797e-07,
      "loss": 0.0,
      "num_tokens": 1022034.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 9
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 274.0,
      "completions/max_terminated_length": 274.0,
      "completions/mean_length": 127.5390625,
      "completions/mean_terminated_length": 127.5390625,
      "completions/min_length": 55.0,
      "completions/min_terminated_length": 55.0,
      "entropy": 0.7210257649421692,
      "epoch": 0.025252525252525252,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.772727272727273e-07,
      "loss": 0.0,
      "num_tokens": 1149551.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 10
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 297.0,
      "completions/max_terminated_length": 297.0,
      "completions/mean_length": 131.25,
      "completions/mean_terminated_length": 131.25,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "entropy": 0.7806915640830994,
      "epoch": 0.027777777777777776,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.747474747474746e-07,
      "loss": 0.0,
      "num_tokens": 1255743.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 11
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 323.0,
      "completions/max_terminated_length": 323.0,
      "completions/mean_length": 130.3203125,
      "completions/mean_terminated_length": 130.3203125,
      "completions/min_length": 42.0,
      "completions/min_terminated_length": 42.0,
      "entropy": 0.6973187923431396,
      "epoch": 0.030303030303030304,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.722222222222222e-07,
      "loss": 0.0,
      "num_tokens": 1366240.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 12
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 377.0,
      "completions/mean_length": 154.109375,
      "completions/mean_terminated_length": 152.29922485351562,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "entropy": 0.6917202472686768,
      "epoch": 0.03282828282828283,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.3806738555431366,
      "learning_rate": 9.696969696969698e-07,
      "loss": 0.0,
      "num_tokens": 1474046.0,
      "reward": 0.0003906250058207661,
      "reward_std": 0.0011048543965443969,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0078125,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 13
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 288.0,
      "completions/max_terminated_length": 288.0,
      "completions/mean_length": 139.734375,
      "completions/mean_terminated_length": 139.734375,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "entropy": 0.6320770978927612,
      "epoch": 0.03535353535353535,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.671717171717171e-07,
      "loss": 0.0,
      "num_tokens": 1585796.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 14
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 380.0,
      "completions/max_terminated_length": 380.0,
      "completions/mean_length": 158.1953125,
      "completions/mean_terminated_length": 158.1953125,
      "completions/min_length": 50.0,
      "completions/min_terminated_length": 50.0,
      "entropy": 0.6951600313186646,
      "epoch": 0.03787878787878788,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.646464646464647e-07,
      "loss": 0.0,
      "num_tokens": 1710981.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 15
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 276.0,
      "completions/max_terminated_length": 276.0,
      "completions/mean_length": 136.390625,
      "completions/mean_terminated_length": 136.390625,
      "completions/min_length": 57.0,
      "completions/min_terminated_length": 57.0,
      "entropy": 0.7261592149734497,
      "epoch": 0.04040404040404041,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.4154719114303589,
      "learning_rate": 9.62121212121212e-07,
      "loss": -0.0,
      "num_tokens": 1830455.0,
      "reward": 0.0078125,
      "reward_std": 0.022097086533904076,
      "rewards/video_r1_accuracy_reward/mean": 0.0078125,
      "rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
      "rewards/video_r1_format_reward/mean": 0.0078125,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 16
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 279.0,
      "completions/max_terminated_length": 279.0,
      "completions/mean_length": 135.8203125,
      "completions/mean_terminated_length": 135.8203125,
      "completions/min_length": 52.0,
      "completions/min_terminated_length": 52.0,
      "entropy": 0.7299904823303223,
      "epoch": 0.04292929292929293,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.41257989406585693,
      "learning_rate": 9.595959595959596e-07,
      "loss": -0.0,
      "num_tokens": 1935088.0,
      "reward": 0.0074218749068677425,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.0078125,
      "rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 17
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 272.0,
      "completions/max_terminated_length": 272.0,
      "completions/mean_length": 143.9296875,
      "completions/mean_terminated_length": 143.9296875,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "entropy": 0.7212563753128052,
      "epoch": 0.045454545454545456,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.57070707070707e-07,
      "loss": 0.0,
      "num_tokens": 2044351.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 18
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 317.0,
      "completions/max_terminated_length": 317.0,
      "completions/mean_length": 133.453125,
      "completions/mean_terminated_length": 133.453125,
      "completions/min_length": 62.0,
      "completions/min_terminated_length": 62.0,
      "entropy": 0.7506937384605408,
      "epoch": 0.047979797979797977,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.6540532112121582,
      "learning_rate": 9.545454545454546e-07,
      "loss": 0.0,
      "num_tokens": 2156297.0,
      "reward": 0.0003906250058207661,
      "reward_std": 0.0011048543965443969,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0078125,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 19
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 353.0,
      "completions/max_terminated_length": 353.0,
      "completions/mean_length": 141.8515625,
      "completions/mean_terminated_length": 141.8515625,
      "completions/min_length": 60.0,
      "completions/min_terminated_length": 60.0,
      "entropy": 0.7586977481842041,
      "epoch": 0.050505050505050504,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.6235540509223938,
      "learning_rate": 9.520202020202019e-07,
      "loss": 0.0,
      "num_tokens": 2288918.0,
      "reward": 0.0074218749068677425,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.0078125,
      "rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 20
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 380.0,
      "completions/max_terminated_length": 380.0,
      "completions/mean_length": 131.8359375,
      "completions/mean_terminated_length": 131.8359375,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "entropy": 0.7679413557052612,
      "epoch": 0.05303030303030303,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.4877886474132538,
      "learning_rate": 9.494949494949495e-07,
      "loss": 0.0,
      "num_tokens": 2408449.0,
      "reward": 0.0011718750465661287,
      "reward_std": 0.0016173411859199405,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0234375,
      "rewards/video_r1_format_reward/std": 0.15188287198543549,
      "step": 21
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.015625,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 355.0,
      "completions/mean_length": 146.3671875,
      "completions/mean_terminated_length": 142.59524536132812,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "entropy": 0.7015185356140137,
      "epoch": 0.05555555555555555,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.34478825330734253,
      "learning_rate": 9.46969696969697e-07,
      "loss": -0.0,
      "num_tokens": 2526696.0,
      "reward": 0.0078125,
      "reward_std": 0.022097086533904076,
      "rewards/video_r1_accuracy_reward/mean": 0.0078125,
      "rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
      "rewards/video_r1_format_reward/mean": 0.0078125,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 22
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 306.0,
      "completions/max_terminated_length": 306.0,
      "completions/mean_length": 129.15625,
      "completions/mean_terminated_length": 129.15625,
      "completions/min_length": 56.0,
      "completions/min_terminated_length": 56.0,
      "entropy": 0.7712253332138062,
      "epoch": 0.05808080808080808,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 9.444444444444444e-07,
      "loss": 0.0,
      "num_tokens": 2640884.0,
      "reward": 0.0,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.0,
      "rewards/video_r1_accuracy_reward/std": 0.0,
      "rewards/video_r1_format_reward/mean": 0.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 23
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 294.0,
      "completions/max_terminated_length": 294.0,
      "completions/mean_length": 135.5078125,
      "completions/mean_terminated_length": 135.5078125,
      "completions/min_length": 61.0,
      "completions/min_terminated_length": 61.0,
      "entropy": 0.8073737621307373,
      "epoch": 0.06060606060606061,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 0.915313720703125,
      "learning_rate": 9.419191919191919e-07,
      "loss": -0.0,
      "num_tokens": 2747125.0,
      "reward": 0.00859374925494194,
      "reward_std": 0.02430679462850094,
      "rewards/video_r1_accuracy_reward/mean": 0.0078125,
      "rewards/video_r1_accuracy_reward/std": 0.0883883461356163,
      "rewards/video_r1_format_reward/mean": 0.0234375,
      "rewards/video_r1_format_reward/std": 0.15188287198543549,
      "step": 24
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 339.0,
      "completions/max_terminated_length": 339.0,
      "completions/mean_length": 131.3515625,
      "completions/mean_terminated_length": 131.3515625,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "entropy": 0.7123849987983704,
      "epoch": 0.06313131313131314,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.4686827659606934,
      "learning_rate": 9.393939393939395e-07,
      "loss": -0.0,
      "num_tokens": 2863482.0,
      "reward": 0.0859375,
      "reward_std": 0.17301878333091736,
      "rewards/video_r1_accuracy_reward/mean": 0.0859375,
      "rewards/video_r1_accuracy_reward/std": 0.2813730239868164,
      "rewards/video_r1_format_reward/mean": 0.0859375,
      "rewards/video_r1_format_reward/std": 0.2813730239868164,
      "step": 25
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 248.0,
      "completions/max_terminated_length": 248.0,
      "completions/mean_length": 126.0625,
      "completions/mean_terminated_length": 126.0625,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "entropy": 0.7570379972457886,
      "epoch": 0.06565656565656566,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.2738335132598877,
      "learning_rate": 9.368686868686868e-07,
      "loss": 0.0,
      "num_tokens": 2991954.0,
      "reward": 0.04218749701976776,
      "reward_std": 0.09613416343927383,
      "rewards/video_r1_accuracy_reward/mean": 0.0390625,
      "rewards/video_r1_accuracy_reward/std": 0.194504976272583,
      "rewards/video_r1_format_reward/mean": 0.1015625,
      "rewards/video_r1_format_reward/std": 0.3032590448856354,
      "step": 26
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 327.0,
      "completions/max_terminated_length": 327.0,
      "completions/mean_length": 144.0859375,
      "completions/mean_terminated_length": 144.0859375,
      "completions/min_length": 53.0,
      "completions/min_terminated_length": 53.0,
      "entropy": 0.7464326620101929,
      "epoch": 0.06818181818181818,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.6063857078552246,
      "learning_rate": 9.343434343434343e-07,
      "loss": -0.0,
      "num_tokens": 3113973.0,
      "reward": 0.111328125,
      "reward_std": 0.20764078199863434,
      "rewards/video_r1_accuracy_reward/mean": 0.109375,
      "rewards/video_r1_accuracy_reward/std": 0.31333550810813904,
      "rewards/video_r1_format_reward/mean": 0.1484375,
      "rewards/video_r1_format_reward/std": 0.356930136680603,
      "step": 27
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 258.0,
      "completions/max_terminated_length": 258.0,
      "completions/mean_length": 135.25,
      "completions/mean_terminated_length": 135.25,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "entropy": 0.6928939819335938,
      "epoch": 0.0707070707070707,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.5145729780197144,
      "learning_rate": 9.318181818181817e-07,
      "loss": 0.0,
      "num_tokens": 3228973.0,
      "reward": 0.03476562350988388,
      "reward_std": 0.09340079128742218,
      "rewards/video_r1_accuracy_reward/mean": 0.03125,
      "rewards/video_r1_accuracy_reward/std": 0.1746762990951538,
      "rewards/video_r1_format_reward/mean": 0.1015625,
      "rewards/video_r1_format_reward/std": 0.3032590448856354,
      "step": 28
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 378.0,
      "completions/max_terminated_length": 378.0,
      "completions/mean_length": 144.5,
      "completions/mean_terminated_length": 144.5,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "entropy": 0.6991415023803711,
      "epoch": 0.07323232323232323,
      "frac_reward_zero_std": 0.0625,
      "grad_norm": 1.993449091911316,
      "learning_rate": 9.292929292929292e-07,
      "loss": -0.0,
      "num_tokens": 3338453.0,
      "reward": 0.31640625,
      "reward_std": 0.34012913703918457,
      "rewards/video_r1_accuracy_reward/mean": 0.3125,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 0.390625,
      "rewards/video_r1_format_reward/std": 0.4898075461387634,
      "step": 29
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 349.0,
      "completions/max_terminated_length": 349.0,
      "completions/mean_length": 134.1015625,
      "completions/mean_terminated_length": 134.1015625,
      "completions/min_length": 59.0,
      "completions/min_terminated_length": 59.0,
      "entropy": 0.7583435773849487,
      "epoch": 0.07575757575757576,
      "frac_reward_zero_std": 0.0625,
      "grad_norm": 2.37294340133667,
      "learning_rate": 9.267676767676768e-07,
      "loss": -0.0,
      "num_tokens": 3450378.0,
      "reward": 0.3359375,
      "reward_std": 0.32593491673469543,
      "rewards/video_r1_accuracy_reward/mean": 0.328125,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 0.484375,
      "rewards/video_r1_format_reward/std": 0.5017194747924805,
      "step": 30
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 366.0,
      "completions/mean_length": 134.0546875,
      "completions/mean_terminated_length": 132.08660888671875,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "entropy": 0.7964510917663574,
      "epoch": 0.07828282828282829,
      "frac_reward_zero_std": 0.1875,
      "grad_norm": 1.9095652103424072,
      "learning_rate": 9.242424242424241e-07,
      "loss": -0.0,
      "num_tokens": 3560025.0,
      "reward": 0.3550781011581421,
      "reward_std": 0.267575740814209,
      "rewards/video_r1_accuracy_reward/mean": 0.34375,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 0.5703125,
      "rewards/video_r1_format_reward/std": 0.4969765841960907,
      "step": 31
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 289.0,
      "completions/max_terminated_length": 289.0,
      "completions/mean_length": 113.0625,
      "completions/mean_terminated_length": 113.0625,
      "completions/min_length": 46.0,
      "completions/min_terminated_length": 46.0,
      "entropy": 0.6799850463867188,
      "epoch": 0.08080808080808081,
      "frac_reward_zero_std": 0.0625,
      "grad_norm": 2.1304097175598145,
      "learning_rate": 9.217171717171717e-07,
      "loss": -0.0,
      "num_tokens": 3680297.0,
      "reward": 0.5066406726837158,
      "reward_std": 0.4047975242137909,
      "rewards/video_r1_accuracy_reward/mean": 0.4921875,
      "rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
      "rewards/video_r1_format_reward/mean": 0.78125,
      "rewards/video_r1_format_reward/std": 0.41502299904823303,
      "step": 32
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 294.0,
      "completions/max_terminated_length": 294.0,
      "completions/mean_length": 108.75,
      "completions/mean_terminated_length": 108.75,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 0.7177351713180542,
      "epoch": 0.08333333333333333,
      "frac_reward_zero_std": 0.25,
      "grad_norm": 2.0556108951568604,
      "learning_rate": 9.191919191919192e-07,
      "loss": -0.0,
      "num_tokens": 3797793.0,
      "reward": 0.47968751192092896,
      "reward_std": 0.27063843607902527,
      "rewards/video_r1_accuracy_reward/mean": 0.4609375,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 0.8359375,
      "rewards/video_r1_format_reward/std": 0.371787428855896,
      "step": 33
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 270.0,
      "completions/max_terminated_length": 270.0,
      "completions/mean_length": 110.3046875,
      "completions/mean_terminated_length": 110.3046875,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "entropy": 0.7837837934494019,
      "epoch": 0.08585858585858586,
      "frac_reward_zero_std": 0.25,
      "grad_norm": 1.8440849781036377,
      "learning_rate": 9.166666666666665e-07,
      "loss": -0.0,
      "num_tokens": 3912032.0,
      "reward": 0.661328136920929,
      "reward_std": 0.29765215516090393,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 0.90625,
      "rewards/video_r1_format_reward/std": 0.29262590408325195,
      "step": 34
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 283.0,
      "completions/max_terminated_length": 283.0,
      "completions/mean_length": 99.5546875,
      "completions/mean_terminated_length": 99.5546875,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "entropy": 0.8179616928100586,
      "epoch": 0.08838383838383838,
      "frac_reward_zero_std": 0.125,
      "grad_norm": 2.0900862216949463,
      "learning_rate": 9.141414141414141e-07,
      "loss": -0.0,
      "num_tokens": 4013039.0,
      "reward": 0.48281246423721313,
      "reward_std": 0.3318884074687958,
      "rewards/video_r1_accuracy_reward/mean": 0.4609375,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 0.8984375,
      "rewards/video_r1_format_reward/std": 0.3032590448856354,
      "step": 35
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 262.0,
      "completions/max_terminated_length": 262.0,
      "completions/mean_length": 102.671875,
      "completions/mean_terminated_length": 102.671875,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "entropy": 0.8404669761657715,
      "epoch": 0.09090909090909091,
      "frac_reward_zero_std": 0.25,
      "grad_norm": 1.946840524673462,
      "learning_rate": 9.116161616161616e-07,
      "loss": -0.0,
      "num_tokens": 4118005.0,
      "reward": 0.604296863079071,
      "reward_std": 0.27570241689682007,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 0.953125,
      "rewards/video_r1_format_reward/std": 0.21220162510871887,
      "step": 36
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 242.0,
      "completions/max_terminated_length": 242.0,
      "completions/mean_length": 112.7265625,
      "completions/mean_terminated_length": 112.7265625,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "entropy": 0.8344206809997559,
      "epoch": 0.09343434343434344,
      "frac_reward_zero_std": 0.25,
      "grad_norm": 1.875348687171936,
      "learning_rate": 9.09090909090909e-07,
      "loss": -0.0,
      "num_tokens": 4232098.0,
      "reward": 0.701171875,
      "reward_std": 0.3029305934906006,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 0.9609375,
      "rewards/video_r1_format_reward/std": 0.194504976272583,
      "step": 37
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 324.0,
      "completions/max_terminated_length": 324.0,
      "completions/mean_length": 98.640625,
      "completions/mean_terminated_length": 98.640625,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "entropy": 0.8952550888061523,
      "epoch": 0.09595959595959595,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.779735803604126,
      "learning_rate": 9.065656565656565e-07,
      "loss": 0.0,
      "num_tokens": 4337628.0,
      "reward": 0.7101562023162842,
      "reward_std": 0.2632066607475281,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 38
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 263.0,
      "completions/max_terminated_length": 263.0,
      "completions/mean_length": 102.375,
      "completions/mean_terminated_length": 102.375,
      "completions/min_length": 45.0,
      "completions/min_terminated_length": 45.0,
      "entropy": 0.903910756111145,
      "epoch": 0.09848484848484848,
      "frac_reward_zero_std": 0.1875,
      "grad_norm": 2.0209107398986816,
      "learning_rate": 9.040404040404041e-07,
      "loss": -0.0,
      "num_tokens": 4437268.0,
      "reward": 0.5621093511581421,
      "reward_std": 0.36356228590011597,
      "rewards/video_r1_accuracy_reward/mean": 0.5390625,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 39
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 242.0,
      "completions/mean_length": 94.59375,
      "completions/mean_terminated_length": 92.31495666503906,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 0.87197345495224,
      "epoch": 0.10101010101010101,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.7272930145263672,
      "learning_rate": 9.015151515151514e-07,
      "loss": 0.0,
      "num_tokens": 4552040.0,
      "reward": 0.708984375,
      "reward_std": 0.2568144202232361,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 0.96875,
      "rewards/video_r1_format_reward/std": 0.1746762990951538,
      "step": 40
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 195.0,
      "completions/max_terminated_length": 195.0,
      "completions/mean_length": 94.8515625,
      "completions/mean_terminated_length": 94.8515625,
      "completions/min_length": 49.0,
      "completions/min_terminated_length": 49.0,
      "entropy": 0.9597364664077759,
      "epoch": 0.10353535353535354,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.6093056201934814,
      "learning_rate": 8.98989898989899e-07,
      "loss": -0.0,
      "num_tokens": 4640853.0,
      "reward": 0.576953113079071,
      "reward_std": 0.216628760099411,
      "rewards/video_r1_accuracy_reward/mean": 0.5546875,
      "rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 41
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 199.0,
      "completions/max_terminated_length": 199.0,
      "completions/mean_length": 84.984375,
      "completions/mean_terminated_length": 84.984375,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "entropy": 0.9007290601730347,
      "epoch": 0.10606060606060606,
      "frac_reward_zero_std": 0.25,
      "grad_norm": 2.0971033573150635,
      "learning_rate": 8.964646464646465e-07,
      "loss": -0.0,
      "num_tokens": 4748699.0,
      "reward": 0.516796886920929,
      "reward_std": 0.3205876350402832,
      "rewards/video_r1_accuracy_reward/mean": 0.4921875,
      "rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
      "rewards/video_r1_format_reward/mean": 0.984375,
      "rewards/video_r1_format_reward/std": 0.12450689822435379,
      "step": 42
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 173.0,
      "completions/max_terminated_length": 173.0,
      "completions/mean_length": 89.2890625,
      "completions/mean_terminated_length": 89.2890625,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "entropy": 0.9400933980941772,
      "epoch": 0.10858585858585859,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.9215672016143799,
      "learning_rate": 8.939393939393938e-07,
      "loss": -0.0,
      "num_tokens": 4867288.0,
      "reward": 0.6199219226837158,
      "reward_std": 0.26100048422813416,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 0.96875,
      "rewards/video_r1_format_reward/std": 0.1746762990951538,
      "step": 43
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 233.0,
      "completions/max_terminated_length": 233.0,
      "completions/mean_length": 86.1953125,
      "completions/mean_terminated_length": 86.1953125,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "entropy": 0.9206110835075378,
      "epoch": 0.1111111111111111,
      "frac_reward_zero_std": 0.1875,
      "grad_norm": 2.1925864219665527,
      "learning_rate": 8.914141414141414e-07,
      "loss": -0.0,
      "num_tokens": 4986809.0,
      "reward": 0.671875,
      "reward_std": 0.2928203344345093,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 0.96875,
      "rewards/video_r1_format_reward/std": 0.1746762990951538,
      "step": 44
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 217.0,
      "completions/max_terminated_length": 217.0,
      "completions/mean_length": 90.3046875,
      "completions/mean_terminated_length": 90.3046875,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 0.974540650844574,
      "epoch": 0.11363636363636363,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.7343508005142212,
      "learning_rate": 8.888888888888888e-07,
      "loss": 0.0,
      "num_tokens": 5092520.0,
      "reward": 0.725390613079071,
      "reward_std": 0.23314352333545685,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 45
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 167.0,
      "completions/max_terminated_length": 167.0,
      "completions/mean_length": 80.625,
      "completions/mean_terminated_length": 80.625,
      "completions/min_length": 38.0,
      "completions/min_terminated_length": 38.0,
      "entropy": 0.9657076597213745,
      "epoch": 0.11616161616161616,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.9508693218231201,
      "learning_rate": 8.863636363636363e-07,
      "loss": -0.0,
      "num_tokens": 5205544.0,
      "reward": 0.746874988079071,
      "reward_std": 0.2970072031021118,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 0.984375,
      "rewards/video_r1_format_reward/std": 0.12450689822435379,
      "step": 46
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 223.0,
      "completions/max_terminated_length": 223.0,
      "completions/mean_length": 88.828125,
      "completions/mean_terminated_length": 88.828125,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "entropy": 0.9713828563690186,
      "epoch": 0.11868686868686869,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.7221375703811646,
      "learning_rate": 8.838383838383838e-07,
      "loss": -0.0,
      "num_tokens": 5319394.0,
      "reward": 0.5695312023162842,
      "reward_std": 0.26034435629844666,
      "rewards/video_r1_accuracy_reward/mean": 0.546875,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 47
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 150.0,
      "completions/max_terminated_length": 150.0,
      "completions/mean_length": 80.8515625,
      "completions/mean_terminated_length": 80.8515625,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "entropy": 0.9206292629241943,
      "epoch": 0.12121212121212122,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.957437515258789,
      "learning_rate": 8.813131313131313e-07,
      "loss": -0.0,
      "num_tokens": 5412327.0,
      "reward": 0.643750011920929,
      "reward_std": 0.27909553050994873,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 48
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 152.0,
      "completions/max_terminated_length": 152.0,
      "completions/mean_length": 82.21875,
      "completions/mean_terminated_length": 82.21875,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "entropy": 0.9331451654434204,
      "epoch": 0.12373737373737374,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.8745862245559692,
      "learning_rate": 8.787878787878787e-07,
      "loss": -0.0,
      "num_tokens": 5516315.0,
      "reward": 0.4429687559604645,
      "reward_std": 0.25346940755844116,
      "rewards/video_r1_accuracy_reward/mean": 0.4140625,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 49
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 174.0,
      "completions/max_terminated_length": 174.0,
      "completions/mean_length": 79.7890625,
      "completions/mean_terminated_length": 79.7890625,
      "completions/min_length": 43.0,
      "completions/min_terminated_length": 43.0,
      "entropy": 0.9588379859924316,
      "epoch": 0.12626262626262627,
      "frac_reward_zero_std": 0.25,
      "grad_norm": 2.0629332065582275,
      "learning_rate": 8.762626262626263e-07,
      "loss": -0.0,
      "num_tokens": 5615632.0,
      "reward": 0.5464843511581421,
      "reward_std": 0.34427377581596375,
      "rewards/video_r1_accuracy_reward/mean": 0.5234375,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 0.984375,
      "rewards/video_r1_format_reward/std": 0.12450689822435379,
      "step": 50
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 145.0,
      "completions/max_terminated_length": 145.0,
      "completions/mean_length": 79.265625,
      "completions/mean_terminated_length": 79.265625,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 0.9568088054656982,
      "epoch": 0.12878787878787878,
      "frac_reward_zero_std": 0.3125,
      "grad_norm": 2.221264123916626,
      "learning_rate": 8.737373737373737e-07,
      "loss": 0.0,
      "num_tokens": 5714786.0,
      "reward": 0.5914062261581421,
      "reward_std": 0.2784692645072937,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 51
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 148.0,
      "completions/max_terminated_length": 148.0,
      "completions/mean_length": 74.5625,
      "completions/mean_terminated_length": 74.5625,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 0.9146069288253784,
      "epoch": 0.13131313131313133,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.8530735969543457,
      "learning_rate": 8.712121212121211e-07,
      "loss": -0.0,
      "num_tokens": 5827042.0,
      "reward": 0.598828136920929,
      "reward_std": 0.22847865521907806,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 52
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 180.0,
      "completions/max_terminated_length": 180.0,
      "completions/mean_length": 76.890625,
      "completions/mean_terminated_length": 76.890625,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "entropy": 0.9405485987663269,
      "epoch": 0.13383838383838384,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.9973477125167847,
      "learning_rate": 8.686868686868687e-07,
      "loss": -0.0,
      "num_tokens": 5927692.0,
      "reward": 0.666015625,
      "reward_std": 0.2661140561103821,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 53
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 162.0,
      "completions/max_terminated_length": 162.0,
      "completions/mean_length": 74.015625,
      "completions/mean_terminated_length": 74.015625,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "entropy": 1.0538530349731445,
      "epoch": 0.13636363636363635,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 2.2168405055999756,
      "learning_rate": 8.661616161616161e-07,
      "loss": 0.0,
      "num_tokens": 6036374.0,
      "reward": 0.5398437976837158,
      "reward_std": 0.2713738679885864,
      "rewards/video_r1_accuracy_reward/mean": 0.515625,
      "rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 54
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 144.0,
      "completions/mean_length": 79.8203125,
      "completions/mean_terminated_length": 77.42520141601562,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "entropy": 0.9944831132888794,
      "epoch": 0.1388888888888889,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.8037936687469482,
      "learning_rate": 8.636363636363636e-07,
      "loss": 0.0,
      "num_tokens": 6138703.0,
      "reward": 0.7621093988418579,
      "reward_std": 0.2209778130054474,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 55
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 126.0,
      "completions/max_terminated_length": 126.0,
      "completions/mean_length": 73.3359375,
      "completions/mean_terminated_length": 73.3359375,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 1.032738208770752,
      "epoch": 0.1414141414141414,
      "frac_reward_zero_std": 0.5625,
      "grad_norm": 1.762905478477478,
      "learning_rate": 8.611111111111111e-07,
      "loss": -0.0,
      "num_tokens": 6236354.0,
      "reward": 0.6285156011581421,
      "reward_std": 0.18521998822689056,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 56
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 135.0,
      "completions/max_terminated_length": 135.0,
      "completions/mean_length": 75.171875,
      "completions/mean_terminated_length": 75.171875,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "entropy": 1.0041790008544922,
      "epoch": 0.14393939393939395,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.8384010791778564,
      "learning_rate": 8.585858585858586e-07,
      "loss": 0.0,
      "num_tokens": 6336768.0,
      "reward": 0.806640625,
      "reward_std": 0.2180173397064209,
      "rewards/video_r1_accuracy_reward/mean": 0.796875,
      "rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 57
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 145.0,
      "completions/max_terminated_length": 145.0,
      "completions/mean_length": 70.8828125,
      "completions/mean_terminated_length": 70.8828125,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 1.0715279579162598,
      "epoch": 0.14646464646464646,
      "frac_reward_zero_std": 0.3125,
      "grad_norm": 2.389326810836792,
      "learning_rate": 8.56060606060606e-07,
      "loss": 0.0,
      "num_tokens": 6436537.0,
      "reward": 0.627734363079071,
      "reward_std": 0.25768929719924927,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 0.9765625,
      "rewards/video_r1_format_reward/std": 0.15188287198543549,
      "step": 58
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 151.0,
      "completions/max_terminated_length": 151.0,
      "completions/mean_length": 75.09375,
      "completions/mean_terminated_length": 75.09375,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "entropy": 1.02361261844635,
      "epoch": 0.14898989898989898,
      "frac_reward_zero_std": 0.3125,
      "grad_norm": 2.1776626110076904,
      "learning_rate": 8.535353535353534e-07,
      "loss": -0.0,
      "num_tokens": 6547989.0,
      "reward": 0.5546875,
      "reward_std": 0.3171003460884094,
      "rewards/video_r1_accuracy_reward/mean": 0.53125,
      "rewards/video_r1_accuracy_reward/std": 0.5009832978248596,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 59
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 141.0,
      "completions/max_terminated_length": 141.0,
      "completions/mean_length": 81.34375,
      "completions/mean_terminated_length": 81.34375,
      "completions/min_length": 38.0,
      "completions/min_terminated_length": 38.0,
      "entropy": 1.0184237957000732,
      "epoch": 0.15151515151515152,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.7039854526519775,
      "learning_rate": 8.51010101010101e-07,
      "loss": -0.0,
      "num_tokens": 6661905.0,
      "reward": 0.703125,
      "reward_std": 0.2143877148628235,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 60
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 138.0,
      "completions/max_terminated_length": 138.0,
      "completions/mean_length": 76.8125,
      "completions/mean_terminated_length": 76.8125,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "entropy": 1.012675404548645,
      "epoch": 0.15404040404040403,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.9084367752075195,
      "learning_rate": 8.484848484848484e-07,
      "loss": -0.0,
      "num_tokens": 6762833.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.22463490068912506,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 61
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 168.0,
      "completions/max_terminated_length": 168.0,
      "completions/mean_length": 76.7890625,
      "completions/mean_terminated_length": 76.7890625,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "entropy": 0.9767247438430786,
      "epoch": 0.15656565656565657,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 1.992623209953308,
      "learning_rate": 8.459595959595959e-07,
      "loss": -0.0,
      "num_tokens": 6864190.0,
      "reward": 0.666015625,
      "reward_std": 0.27260246872901917,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 62
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 149.0,
      "completions/max_terminated_length": 149.0,
      "completions/mean_length": 73.65625,
      "completions/mean_terminated_length": 73.65625,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "entropy": 0.9239650368690491,
      "epoch": 0.1590909090909091,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 2.0338518619537354,
      "learning_rate": 8.434343434343434e-07,
      "loss": 0.0,
      "num_tokens": 6969522.0,
      "reward": 0.732421875,
      "reward_std": 0.22757862508296967,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 63
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 137.0,
      "completions/max_terminated_length": 137.0,
      "completions/mean_length": 75.9609375,
      "completions/mean_terminated_length": 75.9609375,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "entropy": 0.8829290270805359,
      "epoch": 0.16161616161616163,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.6126983165740967,
      "learning_rate": 8.409090909090909e-07,
      "loss": 0.0,
      "num_tokens": 7077061.0,
      "reward": 0.762499988079071,
      "reward_std": 0.09695503860712051,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 64
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 133.0,
      "completions/max_terminated_length": 133.0,
      "completions/mean_length": 72.6171875,
      "completions/mean_terminated_length": 72.6171875,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "entropy": 0.8675624132156372,
      "epoch": 0.16414141414141414,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.3695436716079712,
      "learning_rate": 8.383838383838383e-07,
      "loss": -0.0,
      "num_tokens": 7192844.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.10669228434562683,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 65
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 147.0,
      "completions/max_terminated_length": 147.0,
      "completions/mean_length": 75.734375,
      "completions/mean_terminated_length": 75.734375,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "entropy": 0.9987907409667969,
      "epoch": 0.16666666666666666,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.6358082294464111,
      "learning_rate": 8.358585858585859e-07,
      "loss": -0.0,
      "num_tokens": 7303458.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.16915903985500336,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 66
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 156.0,
      "completions/max_terminated_length": 156.0,
      "completions/mean_length": 74.1328125,
      "completions/mean_terminated_length": 74.1328125,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "entropy": 0.98237144947052,
      "epoch": 0.1691919191919192,
      "frac_reward_zero_std": 0.5625,
      "grad_norm": 1.6880619525909424,
      "learning_rate": 8.333333333333333e-07,
      "loss": -0.0,
      "num_tokens": 7419707.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.1944032609462738,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 67
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 136.0,
      "completions/max_terminated_length": 136.0,
      "completions/mean_length": 71.609375,
      "completions/mean_terminated_length": 71.609375,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "entropy": 0.9826507568359375,
      "epoch": 0.1717171717171717,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 1.8394359350204468,
      "learning_rate": 8.308080808080807e-07,
      "loss": -0.0,
      "num_tokens": 7521593.0,
      "reward": 0.6808593273162842,
      "reward_std": 0.25810331106185913,
      "rewards/video_r1_accuracy_reward/mean": 0.6640625,
      "rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 68
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 162.0,
      "completions/max_terminated_length": 162.0,
      "completions/mean_length": 70.4609375,
      "completions/mean_terminated_length": 70.4609375,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "entropy": 0.9084649085998535,
      "epoch": 0.17424242424242425,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 2.100003480911255,
      "learning_rate": 8.282828282828283e-07,
      "loss": 0.0,
      "num_tokens": 7627260.0,
      "reward": 0.740234375,
      "reward_std": 0.23314350843429565,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 69
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 162.0,
      "completions/max_terminated_length": 162.0,
      "completions/mean_length": 80.4453125,
      "completions/mean_terminated_length": 80.4453125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.9652878046035767,
      "epoch": 0.17676767676767677,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.6002858877182007,
      "learning_rate": 8.257575757575757e-07,
      "loss": -0.0,
      "num_tokens": 7720845.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.15718072652816772,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 70
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 177.0,
      "completions/max_terminated_length": 177.0,
      "completions/mean_length": 79.203125,
      "completions/mean_terminated_length": 79.203125,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "entropy": 1.0387096405029297,
      "epoch": 0.17929292929292928,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.9980230331420898,
      "learning_rate": 8.232323232323232e-07,
      "loss": -0.0,
      "num_tokens": 7818975.0,
      "reward": 0.49531248211860657,
      "reward_std": 0.2143877148628235,
      "rewards/video_r1_accuracy_reward/mean": 0.46875,
      "rewards/video_r1_accuracy_reward/std": 0.5009832978248596,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 71
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 139.0,
      "completions/max_terminated_length": 139.0,
      "completions/mean_length": 73.6015625,
      "completions/mean_terminated_length": 73.6015625,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "entropy": 1.002305269241333,
      "epoch": 0.18181818181818182,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.7402448654174805,
      "learning_rate": 8.207070707070707e-07,
      "loss": -0.0,
      "num_tokens": 7928484.0,
      "reward": 0.606640636920929,
      "reward_std": 0.20465511083602905,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 72
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 190.0,
      "completions/max_terminated_length": 190.0,
      "completions/mean_length": 74.6953125,
      "completions/mean_terminated_length": 74.6953125,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "entropy": 0.903598427772522,
      "epoch": 0.18434343434343434,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.6977179050445557,
      "learning_rate": 8.181818181818182e-07,
      "loss": -0.0,
      "num_tokens": 8043461.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.21215128898620605,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 73
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 142.0,
      "completions/max_terminated_length": 142.0,
      "completions/mean_length": 76.6328125,
      "completions/mean_terminated_length": 76.6328125,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 0.9879953265190125,
      "epoch": 0.18686868686868688,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.3695952892303467,
      "learning_rate": 8.156565656565656e-07,
      "loss": -0.0,
      "num_tokens": 8141774.0,
      "reward": 0.569531261920929,
      "reward_std": 0.12444031983613968,
      "rewards/video_r1_accuracy_reward/mean": 0.546875,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 74
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 137.0,
      "completions/max_terminated_length": 137.0,
      "completions/mean_length": 73.5625,
      "completions/mean_terminated_length": 73.5625,
      "completions/min_length": 40.0,
      "completions/min_terminated_length": 40.0,
      "entropy": 1.0036814212799072,
      "epoch": 0.1893939393939394,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 2.0377132892608643,
      "learning_rate": 8.131313131313132e-07,
      "loss": 0.0,
      "num_tokens": 8238342.0,
      "reward": 0.673046886920929,
      "reward_std": 0.2601749897003174,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 75
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 227.0,
      "completions/max_terminated_length": 227.0,
      "completions/mean_length": 81.3046875,
      "completions/mean_terminated_length": 81.3046875,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "entropy": 0.9508095383644104,
      "epoch": 0.1919191919191919,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.1749444007873535,
      "learning_rate": 8.106060606060605e-07,
      "loss": -0.0,
      "num_tokens": 8350589.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.12493351101875305,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 76
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 168.0,
      "completions/max_terminated_length": 168.0,
      "completions/mean_length": 71.0390625,
      "completions/mean_terminated_length": 71.0390625,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "entropy": 0.9800167083740234,
      "epoch": 0.19444444444444445,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.6353753805160522,
      "learning_rate": 8.08080808080808e-07,
      "loss": 0.0,
      "num_tokens": 8454770.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.1551697999238968,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 77
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 134.0,
      "completions/max_terminated_length": 134.0,
      "completions/mean_length": 65.9296875,
      "completions/mean_terminated_length": 65.9296875,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "entropy": 0.9639301300048828,
      "epoch": 0.19696969696969696,
      "frac_reward_zero_std": 0.375,
      "grad_norm": 2.4808032512664795,
      "learning_rate": 8.055555555555556e-07,
      "loss": -0.0,
      "num_tokens": 8552857.0,
      "reward": 0.5023437738418579,
      "reward_std": 0.23973365128040314,
      "rewards/video_r1_accuracy_reward/mean": 0.4765625,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 78
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 164.0,
      "completions/max_terminated_length": 164.0,
      "completions/mean_length": 69.28125,
      "completions/mean_terminated_length": 69.28125,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "entropy": 0.9725464582443237,
      "epoch": 0.1994949494949495,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 1.850342869758606,
      "learning_rate": 8.030303030303029e-07,
      "loss": -0.0,
      "num_tokens": 8649085.0,
      "reward": 0.5843750238418579,
      "reward_std": 0.21215128898620605,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 79
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 154.0,
      "completions/max_terminated_length": 154.0,
      "completions/mean_length": 72.0078125,
      "completions/mean_terminated_length": 72.0078125,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "entropy": 0.9206636548042297,
      "epoch": 0.20202020202020202,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.5013765096664429,
      "learning_rate": 8.005050505050505e-07,
      "loss": 0.0,
      "num_tokens": 8745462.0,
      "reward": 0.614062488079071,
      "reward_std": 0.15292873978614807,
      "rewards/video_r1_accuracy_reward/mean": 0.59375,
      "rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 80
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 190.0,
      "completions/max_terminated_length": 190.0,
      "completions/mean_length": 72.53125,
      "completions/mean_terminated_length": 72.53125,
      "completions/min_length": 38.0,
      "completions/min_terminated_length": 38.0,
      "entropy": 0.9708524942398071,
      "epoch": 0.20454545454545456,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.4737831354141235,
      "learning_rate": 7.97979797979798e-07,
      "loss": 0.0,
      "num_tokens": 8852826.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.1426815390586853,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 81
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 136.0,
      "completions/max_terminated_length": 136.0,
      "completions/mean_length": 73.265625,
      "completions/mean_terminated_length": 73.265625,
      "completions/min_length": 41.0,
      "completions/min_terminated_length": 41.0,
      "entropy": 0.9690735936164856,
      "epoch": 0.20707070707070707,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.4358322620391846,
      "learning_rate": 7.954545454545454e-07,
      "loss": -0.0,
      "num_tokens": 8950708.0,
      "reward": 0.740234375,
      "reward_std": 0.10770007222890854,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 82
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 123.0,
      "completions/max_terminated_length": 123.0,
      "completions/mean_length": 58.984375,
      "completions/mean_terminated_length": 58.984375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 0.9185530543327332,
      "epoch": 0.20959595959595959,
      "frac_reward_zero_std": 0.5,
      "grad_norm": 2.4200658798217773,
      "learning_rate": 7.929292929292929e-07,
      "loss": -0.0,
      "num_tokens": 9045026.0,
      "reward": 0.517578125,
      "reward_std": 0.21439234912395477,
      "rewards/video_r1_accuracy_reward/mean": 0.4921875,
      "rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 83
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 122.0,
      "completions/max_terminated_length": 122.0,
      "completions/mean_length": 65.3203125,
      "completions/mean_terminated_length": 65.3203125,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "entropy": 0.9491331577301025,
      "epoch": 0.21212121212121213,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.0213149785995483,
      "learning_rate": 7.904040404040404e-07,
      "loss": -0.0,
      "num_tokens": 9152019.0,
      "reward": 0.740234375,
      "reward_std": 0.058214765042066574,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 84
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 118.0,
      "completions/max_terminated_length": 118.0,
      "completions/mean_length": 69.296875,
      "completions/mean_terminated_length": 69.296875,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "entropy": 0.9853086471557617,
      "epoch": 0.21464646464646464,
      "frac_reward_zero_std": 0.5625,
      "grad_norm": 1.8322724103927612,
      "learning_rate": 7.878787878787878e-07,
      "loss": 0.0,
      "num_tokens": 9262145.0,
      "reward": 0.666015625,
      "reward_std": 0.17940622568130493,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 85
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 123.0,
      "completions/max_terminated_length": 123.0,
      "completions/mean_length": 68.265625,
      "completions/mean_terminated_length": 68.265625,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "entropy": 0.9777708649635315,
      "epoch": 0.21717171717171718,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.12037193775177,
      "learning_rate": 7.853535353535353e-07,
      "loss": -0.0,
      "num_tokens": 9371435.0,
      "reward": 0.46562501788139343,
      "reward_std": 0.08345898985862732,
      "rewards/video_r1_accuracy_reward/mean": 0.4375,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 86
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 155.0,
      "completions/max_terminated_length": 155.0,
      "completions/mean_length": 70.34375,
      "completions/mean_terminated_length": 70.34375,
      "completions/min_length": 39.0,
      "completions/min_terminated_length": 39.0,
      "entropy": 0.9397503733634949,
      "epoch": 0.2196969696969697,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.3667945861816406,
      "learning_rate": 7.828282828282829e-07,
      "loss": -0.0,
      "num_tokens": 9478271.0,
      "reward": 0.606640636920929,
      "reward_std": 0.1196737289428711,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 87
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 181.0,
      "completions/max_terminated_length": 181.0,
      "completions/mean_length": 66.671875,
      "completions/mean_terminated_length": 66.671875,
      "completions/min_length": 28.0,
      "completions/min_terminated_length": 28.0,
      "entropy": 0.9511775970458984,
      "epoch": 0.2222222222222222,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.7731772661209106,
      "learning_rate": 7.803030303030302e-07,
      "loss": -0.0,
      "num_tokens": 9587653.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.16266599297523499,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 88
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 145.0,
      "completions/max_terminated_length": 145.0,
      "completions/mean_length": 64.3203125,
      "completions/mean_terminated_length": 64.3203125,
      "completions/min_length": 28.0,
      "completions/min_terminated_length": 28.0,
      "entropy": 0.9355500936508179,
      "epoch": 0.22474747474747475,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.7302086353302002,
      "learning_rate": 7.777777777777778e-07,
      "loss": -0.0,
      "num_tokens": 9691022.0,
      "reward": 0.799609363079071,
      "reward_std": 0.09218844771385193,
      "rewards/video_r1_accuracy_reward/mean": 0.7890625,
      "rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 89
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 144.0,
      "completions/max_terminated_length": 144.0,
      "completions/mean_length": 68.2109375,
      "completions/mean_terminated_length": 68.2109375,
      "completions/min_length": 35.0,
      "completions/min_terminated_length": 35.0,
      "entropy": 0.9529180526733398,
      "epoch": 0.22727272727272727,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.217525839805603,
      "learning_rate": 7.752525252525253e-07,
      "loss": -0.0,
      "num_tokens": 9797753.0,
      "reward": 0.651171863079071,
      "reward_std": 0.07596279680728912,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 90
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 158.0,
      "completions/max_terminated_length": 158.0,
      "completions/mean_length": 75.765625,
      "completions/mean_terminated_length": 75.765625,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "entropy": 0.9758607745170593,
      "epoch": 0.2297979797979798,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.5119833946228027,
      "learning_rate": 7.727272727272727e-07,
      "loss": -0.0,
      "num_tokens": 9904523.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.1416737586259842,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 91
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 131.0,
      "completions/max_terminated_length": 131.0,
      "completions/mean_length": 68.4453125,
      "completions/mean_terminated_length": 68.4453125,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "entropy": 1.0206053256988525,
      "epoch": 0.23232323232323232,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.599021315574646,
      "learning_rate": 7.702020202020202e-07,
      "loss": -0.0,
      "num_tokens": 10007580.0,
      "reward": 0.5695312023162842,
      "reward_std": 0.1406659632921219,
      "rewards/video_r1_accuracy_reward/mean": 0.546875,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 92
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 163.0,
      "completions/max_terminated_length": 163.0,
      "completions/mean_length": 71.671875,
      "completions/mean_terminated_length": 71.671875,
      "completions/min_length": 36.0,
      "completions/min_terminated_length": 36.0,
      "entropy": 0.9673388600349426,
      "epoch": 0.23484848484848486,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.1879236698150635,
      "learning_rate": 7.676767676767675e-07,
      "loss": 0.0,
      "num_tokens": 10129522.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.10120701789855957,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 93
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 149.0,
      "completions/max_terminated_length": 149.0,
      "completions/mean_length": 68.9609375,
      "completions/mean_terminated_length": 68.9609375,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "entropy": 1.0109703540802002,
      "epoch": 0.23737373737373738,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.7206881046295166,
      "learning_rate": 7.651515151515151e-07,
      "loss": -0.0,
      "num_tokens": 10232605.0,
      "reward": 0.725390613079071,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 94
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 158.0,
      "completions/max_terminated_length": 158.0,
      "completions/mean_length": 69.828125,
      "completions/mean_terminated_length": 69.828125,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "entropy": 1.0361416339874268,
      "epoch": 0.2398989898989899,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.9655052423477173,
      "learning_rate": 7.626262626262626e-07,
      "loss": -0.0,
      "num_tokens": 10343447.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.1551697999238968,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 95
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 139.0,
      "completions/max_terminated_length": 139.0,
      "completions/mean_length": 70.5078125,
      "completions/mean_terminated_length": 70.5078125,
      "completions/min_length": 30.0,
      "completions/min_terminated_length": 30.0,
      "entropy": 1.0260932445526123,
      "epoch": 0.24242424242424243,
      "frac_reward_zero_std": 0.5625,
      "grad_norm": 1.967199683189392,
      "learning_rate": 7.6010101010101e-07,
      "loss": 0.0,
      "num_tokens": 10455376.0,
      "reward": 0.6511719226837158,
      "reward_std": 0.18265508115291595,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 96
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 139.0,
      "completions/max_terminated_length": 139.0,
      "completions/mean_length": 66.625,
      "completions/mean_terminated_length": 66.625,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "entropy": 1.0024924278259277,
      "epoch": 0.24494949494949494,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 0.9387697577476501,
      "learning_rate": 7.575757575757575e-07,
      "loss": -0.0,
      "num_tokens": 10563352.0,
      "reward": 0.6066405773162842,
      "reward_std": 0.05821476876735687,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 97
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 191.0,
      "completions/max_terminated_length": 191.0,
      "completions/mean_length": 66.890625,
      "completions/mean_terminated_length": 66.890625,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "entropy": 0.9643306732177734,
      "epoch": 0.2474747474747475,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.325685739517212,
      "learning_rate": 7.550505050505051e-07,
      "loss": 0.0,
      "num_tokens": 10671386.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 98
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 144.0,
      "completions/max_terminated_length": 144.0,
      "completions/mean_length": 70.015625,
      "completions/mean_terminated_length": 70.015625,
      "completions/min_length": 34.0,
      "completions/min_terminated_length": 34.0,
      "entropy": 1.0711150169372559,
      "epoch": 0.25,
      "frac_reward_zero_std": 0.5625,
      "grad_norm": 1.8700460195541382,
      "learning_rate": 7.525252525252524e-07,
      "loss": 0.0,
      "num_tokens": 10769676.0,
      "reward": 0.49531251192092896,
      "reward_std": 0.16317594051361084,
      "rewards/video_r1_accuracy_reward/mean": 0.46875,
      "rewards/video_r1_accuracy_reward/std": 0.5009832978248596,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 99
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 141.0,
      "completions/max_terminated_length": 141.0,
      "completions/mean_length": 66.1875,
      "completions/mean_terminated_length": 66.1875,
      "completions/min_length": 32.0,
      "completions/min_terminated_length": 32.0,
      "entropy": 1.032357096672058,
      "epoch": 0.25252525252525254,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.2700526714324951,
      "learning_rate": 7.5e-07,
      "loss": 0.0,
      "num_tokens": 10868492.0,
      "reward": 0.7328125238418579,
      "reward_std": 0.0737217366695404,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 100
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 133.0,
      "completions/max_terminated_length": 133.0,
      "completions/mean_length": 68.2890625,
      "completions/mean_terminated_length": 68.2890625,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 0.9825550317764282,
      "epoch": 0.255050505050505,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.7565488219261169,
      "learning_rate": 7.474747474747475e-07,
      "loss": -0.0,
      "num_tokens": 10983977.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.027485283091664314,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 101
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 131.0,
      "completions/max_terminated_length": 131.0,
      "completions/mean_length": 66.1484375,
      "completions/mean_terminated_length": 66.1484375,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "entropy": 1.0391050577163696,
      "epoch": 0.25757575757575757,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.6984504461288452,
      "learning_rate": 7.449494949494948e-07,
      "loss": 0.0,
      "num_tokens": 11084348.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.15942178666591644,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 102
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 107.0,
      "completions/max_terminated_length": 107.0,
      "completions/mean_length": 61.875,
      "completions/mean_terminated_length": 61.875,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 1.0670381784439087,
      "epoch": 0.2601010101010101,
      "frac_reward_zero_std": 0.4375,
      "grad_norm": 2.5665719509124756,
      "learning_rate": 7.424242424242424e-07,
      "loss": -0.0,
      "num_tokens": 11193508.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.23437213897705078,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 103
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 113.0,
      "completions/max_terminated_length": 113.0,
      "completions/mean_length": 62.8984375,
      "completions/mean_terminated_length": 62.8984375,
      "completions/min_length": 33.0,
      "completions/min_terminated_length": 33.0,
      "entropy": 1.031665563583374,
      "epoch": 0.26262626262626265,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.8193395137786865,
      "learning_rate": 7.398989898989899e-07,
      "loss": -0.0,
      "num_tokens": 11299487.0,
      "reward": 0.688281238079071,
      "reward_std": 0.13842955231666565,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 104
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 104.0,
      "completions/max_terminated_length": 104.0,
      "completions/mean_length": 55.46875,
      "completions/mean_terminated_length": 55.46875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.050790786743164,
      "epoch": 0.26515151515151514,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.4435532093048096,
      "learning_rate": 7.373737373737373e-07,
      "loss": -0.0,
      "num_tokens": 11408659.0,
      "reward": 0.6214843988418579,
      "reward_std": 0.10993648320436478,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 105
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 109.0,
      "completions/max_terminated_length": 109.0,
      "completions/mean_length": 61.3515625,
      "completions/mean_terminated_length": 61.3515625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0249364376068115,
      "epoch": 0.2676767676767677,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.6191179752349854,
      "learning_rate": 7.348484848484848e-07,
      "loss": 0.0,
      "num_tokens": 11512456.0,
      "reward": 0.6066405773162842,
      "reward_std": 0.10019923746585846,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 106
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 132.0,
      "completions/max_terminated_length": 132.0,
      "completions/mean_length": 59.1484375,
      "completions/mean_terminated_length": 59.1484375,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "entropy": 1.0390393733978271,
      "epoch": 0.2702020202020202,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.6593204736709595,
      "learning_rate": 7.323232323232324e-07,
      "loss": -0.0,
      "num_tokens": 11622035.0,
      "reward": 0.606640636920929,
      "reward_std": 0.16165819764137268,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 107
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 113.0,
      "completions/max_terminated_length": 113.0,
      "completions/mean_length": 61.7890625,
      "completions/mean_terminated_length": 61.7890625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0370471477508545,
      "epoch": 0.2727272727272727,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.7022017240524292,
      "learning_rate": 7.297979797979797e-07,
      "loss": -0.0,
      "num_tokens": 11727192.0,
      "reward": 0.688281238079071,
      "reward_std": 0.13518071174621582,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 108
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 130.0,
      "completions/max_terminated_length": 130.0,
      "completions/mean_length": 59.1484375,
      "completions/mean_terminated_length": 59.1484375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.060609221458435,
      "epoch": 0.27525252525252525,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.5183703899383545,
      "learning_rate": 7.272727272727272e-07,
      "loss": -0.0,
      "num_tokens": 11834915.0,
      "reward": 0.6957031488418579,
      "reward_std": 0.10993649065494537,
      "rewards/video_r1_accuracy_reward/mean": 0.6796875,
      "rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 109
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 147.0,
      "completions/max_terminated_length": 147.0,
      "completions/mean_length": 61.46875,
      "completions/mean_terminated_length": 61.46875,
      "completions/min_length": 28.0,
      "completions/min_terminated_length": 28.0,
      "entropy": 1.0464633703231812,
      "epoch": 0.2777777777777778,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.4522336721420288,
      "learning_rate": 7.247474747474747e-07,
      "loss": -0.0,
      "num_tokens": 11946295.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.08570004999637604,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 110
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 124.0,
      "completions/max_terminated_length": 124.0,
      "completions/mean_length": 63.34375,
      "completions/mean_terminated_length": 63.34375,
      "completions/min_length": 31.0,
      "completions/min_terminated_length": 31.0,
      "entropy": 1.0706329345703125,
      "epoch": 0.2803030303030303,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.7645903825759888,
      "learning_rate": 7.222222222222221e-07,
      "loss": -0.0,
      "num_tokens": 12049227.0,
      "reward": 0.8292968273162842,
      "reward_std": 0.11967373639345169,
      "rewards/video_r1_accuracy_reward/mean": 0.8203125,
      "rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 111
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 107.0,
      "completions/max_terminated_length": 107.0,
      "completions/mean_length": 58.9375,
      "completions/mean_terminated_length": 58.9375,
      "completions/min_length": 29.0,
      "completions/min_terminated_length": 29.0,
      "entropy": 1.1456776857376099,
      "epoch": 0.2828282828282828,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.4835776090621948,
      "learning_rate": 7.196969696969697e-07,
      "loss": 0.0,
      "num_tokens": 12153771.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.11418846249580383,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 112
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 111.0,
      "completions/max_terminated_length": 111.0,
      "completions/mean_length": 60.28125,
      "completions/mean_terminated_length": 60.28125,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "entropy": 1.2109147310256958,
      "epoch": 0.28535353535353536,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.5145522356033325,
      "learning_rate": 7.171717171717171e-07,
      "loss": -0.0,
      "num_tokens": 12257335.0,
      "reward": 0.45820310711860657,
      "reward_std": 0.10019923746585846,
      "rewards/video_r1_accuracy_reward/mean": 0.4296875,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 113
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 109.0,
      "completions/max_terminated_length": 109.0,
      "completions/mean_length": 57.6875,
      "completions/mean_terminated_length": 57.6875,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "entropy": 1.1032978296279907,
      "epoch": 0.2878787878787879,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.8097097873687744,
      "learning_rate": 7.146464646464646e-07,
      "loss": -0.0,
      "num_tokens": 12374687.0,
      "reward": 0.740234375,
      "reward_std": 0.10993648320436478,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 55.1953125,
      "completions/mean_terminated_length": 55.1953125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.2030807733535767,
      "epoch": 0.2904040404040404,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 2.118196487426758,
      "learning_rate": 7.121212121212121e-07,
      "loss": -0.0,
      "num_tokens": 12485544.0,
      "reward": 0.443359375,
      "reward_std": 0.1649070382118225,
      "rewards/video_r1_accuracy_reward/mean": 0.4140625,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 115
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 52.40625,
      "completions/mean_terminated_length": 52.40625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1323903799057007,
      "epoch": 0.29292929292929293,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.8301972150802612,
      "learning_rate": 7.095959595959596e-07,
      "loss": 0.0,
      "num_tokens": 12584092.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.09695503115653992,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 116
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 126.0,
      "completions/max_terminated_length": 126.0,
      "completions/mean_length": 63.2578125,
      "completions/mean_terminated_length": 63.2578125,
      "completions/min_length": 28.0,
      "completions/min_terminated_length": 28.0,
      "entropy": 1.2527742385864258,
      "epoch": 0.29545454545454547,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.677696943283081,
      "learning_rate": 7.07070707070707e-07,
      "loss": -0.0,
      "num_tokens": 12687173.0,
      "reward": 0.6511719226837158,
      "reward_std": 0.12768451869487762,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 117
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 135.0,
      "completions/max_terminated_length": 135.0,
      "completions/mean_length": 57.9140625,
      "completions/mean_terminated_length": 57.9140625,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 1.1632239818572998,
      "epoch": 0.29797979797979796,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.4128031730651855,
      "learning_rate": 7.045454545454545e-07,
      "loss": -0.0,
      "num_tokens": 12777050.0,
      "reward": 0.762499988079071,
      "reward_std": 0.08894424885511398,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 118
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 121.0,
      "completions/max_terminated_length": 121.0,
      "completions/mean_length": 55.078125,
      "completions/mean_terminated_length": 55.078125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.1989972591400146,
      "epoch": 0.3005050505050505,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 1.9097048044204712,
      "learning_rate": 7.02020202020202e-07,
      "loss": 0.0,
      "num_tokens": 12894444.0,
      "reward": 0.6734375357627869,
      "reward_std": 0.12869229912757874,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 119
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 139.0,
      "completions/max_terminated_length": 139.0,
      "completions/mean_length": 59.7578125,
      "completions/mean_terminated_length": 59.7578125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.3133735656738281,
      "epoch": 0.30303030303030304,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 1.8875597715377808,
      "learning_rate": 6.994949494949494e-07,
      "loss": 0.0,
      "num_tokens": 12986525.0,
      "reward": 0.717578113079071,
      "reward_std": 0.1526612639427185,
      "rewards/video_r1_accuracy_reward/mean": 0.703125,
      "rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 120
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 47.015625,
      "completions/mean_terminated_length": 47.015625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1269464492797852,
      "epoch": 0.3055555555555556,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.4073408842086792,
      "learning_rate": 6.96969696969697e-07,
      "loss": -0.0,
      "num_tokens": 13101023.0,
      "reward": 0.799609363079071,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.7890625,
      "rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 121
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 116.0,
      "completions/max_terminated_length": 116.0,
      "completions/mean_length": 52.6640625,
      "completions/mean_terminated_length": 52.6640625,
      "completions/min_length": 27.0,
      "completions/min_terminated_length": 27.0,
      "entropy": 1.16685152053833,
      "epoch": 0.30808080808080807,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.5721867084503174,
      "learning_rate": 6.944444444444444e-07,
      "loss": 0.0,
      "num_tokens": 13208740.0,
      "reward": 0.6214843988418579,
      "reward_std": 0.08021478354930878,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 122
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 89.0,
      "completions/max_terminated_length": 89.0,
      "completions/mean_length": 47.6953125,
      "completions/mean_terminated_length": 47.6953125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.2507858276367188,
      "epoch": 0.3106060606060606,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.312094211578369,
      "learning_rate": 6.919191919191919e-07,
      "loss": -0.0,
      "num_tokens": 13313453.0,
      "reward": 0.5621094107627869,
      "reward_std": 0.10770007222890854,
      "rewards/video_r1_accuracy_reward/mean": 0.5390625,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 123
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 112.0,
      "completions/max_terminated_length": 112.0,
      "completions/mean_length": 53.2578125,
      "completions/mean_terminated_length": 53.2578125,
      "completions/min_length": 26.0,
      "completions/min_terminated_length": 26.0,
      "entropy": 1.216563105583191,
      "epoch": 0.31313131313131315,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.8229986429214478,
      "learning_rate": 6.893939393939394e-07,
      "loss": 0.0,
      "num_tokens": 13423294.0,
      "reward": 0.5843750238418579,
      "reward_std": 0.07920699566602707,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 124
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 49.171875,
      "completions/mean_terminated_length": 49.171875,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.2454454898834229,
      "epoch": 0.31565656565656564,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0392420291900635,
      "learning_rate": 6.868686868686868e-07,
      "loss": -0.0,
      "num_tokens": 13520556.0,
      "reward": 0.725390613079071,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 125
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 48.1640625,
      "completions/mean_terminated_length": 48.1640625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1565489768981934,
      "epoch": 0.3181818181818182,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.4579185247421265,
      "learning_rate": 6.843434343434343e-07,
      "loss": -0.0,
      "num_tokens": 13634321.0,
      "reward": 0.688281238079071,
      "reward_std": 0.05922255665063858,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 126
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 51.28125,
      "completions/mean_terminated_length": 51.28125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.2307085990905762,
      "epoch": 0.3207070707070707,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.3030145168304443,
      "learning_rate": 6.818181818181817e-07,
      "loss": -0.0,
      "num_tokens": 13727629.0,
      "reward": 0.532421886920929,
      "reward_std": 0.1131853386759758,
      "rewards/video_r1_accuracy_reward/mean": 0.5078125,
      "rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 127
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 147.0,
      "completions/max_terminated_length": 147.0,
      "completions/mean_length": 47.484375,
      "completions/mean_terminated_length": 47.484375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1688158512115479,
      "epoch": 0.32323232323232326,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.8600326776504517,
      "learning_rate": 6.792929292929293e-07,
      "loss": 0.0,
      "num_tokens": 13837475.0,
      "reward": 0.666015625,
      "reward_std": 0.0727139487862587,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 128
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 111.0,
      "completions/max_terminated_length": 111.0,
      "completions/mean_length": 47.390625,
      "completions/mean_terminated_length": 47.390625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1925861835479736,
      "epoch": 0.32575757575757575,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.420927882194519,
      "learning_rate": 6.767676767676767e-07,
      "loss": -0.0,
      "num_tokens": 13950165.0,
      "reward": 0.8070312738418579,
      "reward_std": 0.0737217366695404,
      "rewards/video_r1_accuracy_reward/mean": 0.796875,
      "rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 129
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 45.359375,
      "completions/mean_terminated_length": 45.359375,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "entropy": 1.1712085008621216,
      "epoch": 0.3282828282828283,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.66424560546875,
      "learning_rate": 6.742424242424242e-07,
      "loss": 0.0,
      "num_tokens": 14042243.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 130
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 112.0,
      "completions/max_terminated_length": 112.0,
      "completions/mean_length": 43.6171875,
      "completions/mean_terminated_length": 43.6171875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.274552583694458,
      "epoch": 0.33080808080808083,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.4996423721313477,
      "learning_rate": 6.717171717171717e-07,
      "loss": 0.0,
      "num_tokens": 14142794.0,
      "reward": 0.591796875,
      "reward_std": 0.12219925224781036,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 131
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 91.0,
      "completions/max_terminated_length": 91.0,
      "completions/mean_length": 42.3203125,
      "completions/mean_terminated_length": 42.3203125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1475682258605957,
      "epoch": 0.3333333333333333,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.9287738800048828,
      "learning_rate": 6.691919191919192e-07,
      "loss": 0.0,
      "num_tokens": 14232227.0,
      "reward": 0.6285156011581421,
      "reward_std": 0.06965583562850952,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 132
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 100.0,
      "completions/max_terminated_length": 100.0,
      "completions/mean_length": 42.59375,
      "completions/mean_terminated_length": 42.59375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.2122106552124023,
      "epoch": 0.33585858585858586,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.023392915725708,
      "learning_rate": 6.666666666666666e-07,
      "loss": 0.0,
      "num_tokens": 14331927.0,
      "reward": 0.740234375,
      "reward_std": 0.10344808548688889,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 133
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 40.53125,
      "completions/mean_terminated_length": 40.53125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1993948221206665,
      "epoch": 0.3383838383838384,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.7871482372283936,
      "learning_rate": 6.641414141414141e-07,
      "loss": 0.0,
      "num_tokens": 14422979.0,
      "reward": 0.5472656488418579,
      "reward_std": 0.0727139487862587,
      "rewards/video_r1_accuracy_reward/mean": 0.5234375,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 134
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 44.0859375,
      "completions/mean_terminated_length": 44.0859375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1800475120544434,
      "epoch": 0.3409090909090909,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.805823564529419,
      "learning_rate": 6.616161616161616e-07,
      "loss": -0.0,
      "num_tokens": 14521638.0,
      "reward": 0.62109375,
      "reward_std": 0.08131963759660721,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 135
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 89.0,
      "completions/max_terminated_length": 89.0,
      "completions/mean_length": 39.5,
      "completions/mean_terminated_length": 39.5,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1338804960250854,
      "epoch": 0.3434343434343434,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.844430446624756,
      "learning_rate": 6.59090909090909e-07,
      "loss": -0.0,
      "num_tokens": 14621022.0,
      "reward": 0.703125,
      "reward_std": 0.12119147181510925,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 136
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 101.0,
      "completions/max_terminated_length": 101.0,
      "completions/mean_length": 37.484375,
      "completions/mean_terminated_length": 37.484375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0793958902359009,
      "epoch": 0.34595959595959597,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.934971332550049,
      "learning_rate": 6.565656565656566e-07,
      "loss": -0.0,
      "num_tokens": 14735044.0,
      "reward": 0.5621093511581421,
      "reward_std": 0.09420402348041534,
      "rewards/video_r1_accuracy_reward/mean": 0.5390625,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 137
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 39.546875,
      "completions/mean_terminated_length": 39.546875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1664319038391113,
      "epoch": 0.3484848484848485,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 2.60278058052063,
      "learning_rate": 6.54040404040404e-07,
      "loss": -0.0,
      "num_tokens": 14846898.0,
      "reward": 0.6140625476837158,
      "reward_std": 0.1454278975725174,
      "rewards/video_r1_accuracy_reward/mean": 0.59375,
      "rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 138
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 69.0,
      "completions/max_terminated_length": 69.0,
      "completions/mean_length": 34.8671875,
      "completions/mean_terminated_length": 34.8671875,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1905630826950073,
      "epoch": 0.351010101010101,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.826327323913574,
      "learning_rate": 6.515151515151515e-07,
      "loss": -0.0,
      "num_tokens": 14937953.0,
      "reward": 0.680859386920929,
      "reward_std": 0.13193649053573608,
      "rewards/video_r1_accuracy_reward/mean": 0.6640625,
      "rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 139
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 34.3203125,
      "completions/mean_terminated_length": 34.3203125,
      "completions/min_length": 13.0,
      "completions/min_terminated_length": 13.0,
      "entropy": 1.1751891374588013,
      "epoch": 0.35353535353535354,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.3793742656707764,
      "learning_rate": 6.48989898989899e-07,
      "loss": 0.0,
      "num_tokens": 15043954.0,
      "reward": 0.8070312738418579,
      "reward_std": 0.08345898985862732,
      "rewards/video_r1_accuracy_reward/mean": 0.796875,
      "rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 140
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 82.0,
      "completions/max_terminated_length": 82.0,
      "completions/mean_length": 35.5,
      "completions/mean_terminated_length": 35.5,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.2330318689346313,
      "epoch": 0.3560606060606061,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.5116065740585327,
      "learning_rate": 6.464646464646465e-07,
      "loss": -0.0,
      "num_tokens": 15128850.0,
      "reward": 0.5695312023162842,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.546875,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 141
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 70.0,
      "completions/max_terminated_length": 70.0,
      "completions/mean_length": 32.6171875,
      "completions/mean_terminated_length": 32.6171875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.1521470546722412,
      "epoch": 0.35858585858585856,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.4774539470672607,
      "learning_rate": 6.439393939393939e-07,
      "loss": -0.0,
      "num_tokens": 15236257.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.10019923001527786,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 142
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 31.734375,
      "completions/mean_terminated_length": 31.734375,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.1453057527542114,
      "epoch": 0.3611111111111111,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7422317266464233,
      "learning_rate": 6.414141414141414e-07,
      "loss": 0.0,
      "num_tokens": 15338215.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.05497056990861893,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 143
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 34.3671875,
      "completions/mean_terminated_length": 34.3671875,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.2442141771316528,
      "epoch": 0.36363636363636365,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.5866944789886475,
      "learning_rate": 6.388888888888888e-07,
      "loss": 0.0,
      "num_tokens": 15450086.0,
      "reward": 0.4878906011581421,
      "reward_std": 0.0727139487862587,
      "rewards/video_r1_accuracy_reward/mean": 0.4609375,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 144
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 33.3125,
      "completions/mean_terminated_length": 33.3125,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.2739644050598145,
      "epoch": 0.3661616161616162,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.671504259109497,
      "learning_rate": 6.363636363636363e-07,
      "loss": -0.0,
      "num_tokens": 15550070.0,
      "reward": 0.680859386920929,
      "reward_std": 0.12768451869487762,
      "rewards/video_r1_accuracy_reward/mean": 0.6640625,
      "rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 145
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 86.0,
      "completions/max_terminated_length": 86.0,
      "completions/mean_length": 30.2109375,
      "completions/mean_terminated_length": 30.2109375,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.149760127067566,
      "epoch": 0.3686868686868687,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.9195950031280518,
      "learning_rate": 6.338383838383839e-07,
      "loss": -0.0,
      "num_tokens": 15658265.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 146
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 31.5703125,
      "completions/mean_terminated_length": 31.5703125,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "entropy": 1.2297152280807495,
      "epoch": 0.3712121212121212,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.8299896717071533,
      "learning_rate": 6.313131313131312e-07,
      "loss": -0.0,
      "num_tokens": 15760138.0,
      "reward": 0.651171863079071,
      "reward_std": 0.08021478354930878,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 147
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 60.0,
      "completions/max_terminated_length": 60.0,
      "completions/mean_length": 29.1875,
      "completions/mean_terminated_length": 29.1875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.1559841632843018,
      "epoch": 0.37373737373737376,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.4495553970336914,
      "learning_rate": 6.287878787878788e-07,
      "loss": -0.0,
      "num_tokens": 15859386.0,
      "reward": 0.6214843392372131,
      "reward_std": 0.08570004999637604,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 148
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 29.859375,
      "completions/mean_terminated_length": 29.859375,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "entropy": 1.2058125734329224,
      "epoch": 0.37626262626262624,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.5699256658554077,
      "learning_rate": 6.262626262626263e-07,
      "loss": -0.0,
      "num_tokens": 15961120.0,
      "reward": 0.703125,
      "reward_std": 0.06145896762609482,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 149
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 33.84375,
      "completions/mean_terminated_length": 33.84375,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "entropy": 1.2072830200195312,
      "epoch": 0.3787878787878788,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.6067988872528076,
      "learning_rate": 6.237373737373736e-07,
      "loss": -0.0,
      "num_tokens": 16063572.0,
      "reward": 0.4878906309604645,
      "reward_std": 0.06297669559717178,
      "rewards/video_r1_accuracy_reward/mean": 0.4609375,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 150
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 66.0,
      "completions/max_terminated_length": 66.0,
      "completions/mean_length": 32.734375,
      "completions/mean_terminated_length": 32.734375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.239713191986084,
      "epoch": 0.3813131313131313,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.205798387527466,
      "learning_rate": 6.212121212121212e-07,
      "loss": -0.0,
      "num_tokens": 16174394.0,
      "reward": 0.5914062261581421,
      "reward_std": 0.06408154964447021,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 151
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 70.0,
      "completions/max_terminated_length": 70.0,
      "completions/mean_length": 34.765625,
      "completions/mean_terminated_length": 34.765625,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.320723533630371,
      "epoch": 0.3838383838383838,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.119114875793457,
      "learning_rate": 6.186868686868687e-07,
      "loss": 0.0,
      "num_tokens": 16276724.0,
      "reward": 0.635937511920929,
      "reward_std": 0.09064806997776031,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 152
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 36.5390625,
      "completions/mean_terminated_length": 36.5390625,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.2870080471038818,
      "epoch": 0.38636363636363635,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.5376033782958984,
      "learning_rate": 6.161616161616161e-07,
      "loss": -0.0,
      "num_tokens": 16371001.0,
      "reward": 0.6214843988418579,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 153
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 89.0,
      "completions/max_terminated_length": 89.0,
      "completions/mean_length": 35.984375,
      "completions/mean_terminated_length": 35.984375,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.2718448638916016,
      "epoch": 0.3888888888888889,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.5065395832061768,
      "learning_rate": 6.136363636363636e-07,
      "loss": -0.0,
      "num_tokens": 16482023.0,
      "reward": 0.7847656011581421,
      "reward_std": 0.05821476876735687,
      "rewards/video_r1_accuracy_reward/mean": 0.7734375,
      "rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 154
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 34.0,
      "completions/mean_terminated_length": 34.0,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.2422447204589844,
      "epoch": 0.39141414141414144,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.1209592819213867,
      "learning_rate": 6.111111111111112e-07,
      "loss": -0.0,
      "num_tokens": 16578319.0,
      "reward": 0.5621093511581421,
      "reward_std": 0.09218844771385193,
      "rewards/video_r1_accuracy_reward/mean": 0.5390625,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 155
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 68.0,
      "completions/max_terminated_length": 68.0,
      "completions/mean_length": 33.4375,
      "completions/mean_terminated_length": 33.4375,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.1495954990386963,
      "epoch": 0.3939393939393939,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.415402889251709,
      "learning_rate": 6.085858585858585e-07,
      "loss": 0.0,
      "num_tokens": 16677999.0,
      "reward": 0.6953125,
      "reward_std": 0.04958236962556839,
      "rewards/video_r1_accuracy_reward/mean": 0.6796875,
      "rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 156
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 33.4375,
      "completions/mean_terminated_length": 33.4375,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.2009010314941406,
      "epoch": 0.39646464646464646,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.6719857454299927,
      "learning_rate": 6.060606060606061e-07,
      "loss": 0.0,
      "num_tokens": 16781351.0,
      "reward": 0.606640636920929,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 157
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 33.9296875,
      "completions/mean_terminated_length": 33.9296875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.2379920482635498,
      "epoch": 0.398989898989899,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.6537461280822754,
      "learning_rate": 6.035353535353535e-07,
      "loss": 0.0,
      "num_tokens": 16878030.0,
      "reward": 0.8292968273162842,
      "reward_std": 0.1244356632232666,
      "rewards/video_r1_accuracy_reward/mean": 0.8203125,
      "rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 158
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 79.0,
      "completions/max_terminated_length": 79.0,
      "completions/mean_length": 32.1953125,
      "completions/mean_terminated_length": 32.1953125,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.1720532178878784,
      "epoch": 0.4015151515151515,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.911094903945923,
      "learning_rate": 6.010101010101009e-07,
      "loss": 0.0,
      "num_tokens": 16982935.0,
      "reward": 0.569531261920929,
      "reward_std": 0.10120702534914017,
      "rewards/video_r1_accuracy_reward/mean": 0.546875,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 74.0,
      "completions/max_terminated_length": 74.0,
      "completions/mean_length": 34.1484375,
      "completions/mean_terminated_length": 34.1484375,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.1654397249221802,
      "epoch": 0.40404040404040403,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.369276762008667,
      "learning_rate": 5.984848484848485e-07,
      "loss": -0.0,
      "num_tokens": 17070618.0,
      "reward": 0.7328125238418579,
      "reward_std": 0.05497056990861893,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 160
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 34.7890625,
      "completions/mean_terminated_length": 34.7890625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1633415222167969,
      "epoch": 0.4065656565656566,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.564042568206787,
      "learning_rate": 5.959595959595959e-07,
      "loss": 0.0,
      "num_tokens": 17175703.0,
      "reward": 0.7992187738418579,
      "reward_std": 0.04921317845582962,
      "rewards/video_r1_accuracy_reward/mean": 0.7890625,
      "rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 161
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 82.0,
      "completions/max_terminated_length": 82.0,
      "completions/mean_length": 35.1796875,
      "completions/mean_terminated_length": 35.1796875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.3527387380599976,
      "epoch": 0.4090909090909091,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.0236308574676514,
      "learning_rate": 5.934343434343434e-07,
      "loss": -0.0,
      "num_tokens": 17283214.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.08245119452476501,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 162
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 34.8046875,
      "completions/mean_terminated_length": 34.8046875,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.2128992080688477,
      "epoch": 0.4116161616161616,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.5096118450164795,
      "learning_rate": 5.909090909090909e-07,
      "loss": -0.0,
      "num_tokens": 17390165.0,
      "reward": 0.7550780773162842,
      "reward_std": 0.09046198427677155,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 163
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 69.0,
      "completions/max_terminated_length": 69.0,
      "completions/mean_length": 32.9140625,
      "completions/mean_terminated_length": 32.9140625,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.1064362525939941,
      "epoch": 0.41414141414141414,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 3.49741268157959,
      "learning_rate": 5.883838383838384e-07,
      "loss": -0.0,
      "num_tokens": 17484994.0,
      "reward": 0.6359374523162842,
      "reward_std": 0.10555607080459595,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 164
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 108.0,
      "completions/max_terminated_length": 108.0,
      "completions/mean_length": 34.1015625,
      "completions/mean_terminated_length": 34.1015625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1739277839660645,
      "epoch": 0.4166666666666667,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.649794578552246,
      "learning_rate": 5.858585858585858e-07,
      "loss": 0.0,
      "num_tokens": 17582335.0,
      "reward": 0.576953113079071,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.5546875,
      "rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 165
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 38.046875,
      "completions/mean_terminated_length": 38.046875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.2043449878692627,
      "epoch": 0.41919191919191917,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.621335506439209,
      "learning_rate": 5.833333333333334e-07,
      "loss": -0.0,
      "num_tokens": 17673021.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.06145896762609482,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 166
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 69.0,
      "completions/max_terminated_length": 69.0,
      "completions/mean_length": 35.953125,
      "completions/mean_terminated_length": 35.953125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.2063016891479492,
      "epoch": 0.4217171717171717,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.0640013217926025,
      "learning_rate": 5.808080808080808e-07,
      "loss": 0.0,
      "num_tokens": 17767647.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 167
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 73.0,
      "completions/max_terminated_length": 73.0,
      "completions/mean_length": 35.9765625,
      "completions/mean_terminated_length": 35.9765625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1426618099212646,
      "epoch": 0.42424242424242425,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.6504007577896118,
      "learning_rate": 5.782828282828282e-07,
      "loss": 0.0,
      "num_tokens": 17882004.0,
      "reward": 0.740234375,
      "reward_std": 0.08021478354930878,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 168
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 36.078125,
      "completions/mean_terminated_length": 36.078125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.167665719985962,
      "epoch": 0.42676767676767674,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.703382968902588,
      "learning_rate": 5.757575757575758e-07,
      "loss": -0.0,
      "num_tokens": 17961174.0,
      "reward": 0.598828136920929,
      "reward_std": 0.07939308881759644,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 169
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 38.3671875,
      "completions/mean_terminated_length": 38.3671875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1713335514068604,
      "epoch": 0.4292929292929293,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.324023962020874,
      "learning_rate": 5.732323232323232e-07,
      "loss": 0.0,
      "num_tokens": 18065077.0,
      "reward": 0.42851561307907104,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.3984375,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 170
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 64.0,
      "completions/max_terminated_length": 64.0,
      "completions/mean_length": 35.40625,
      "completions/mean_terminated_length": 35.40625,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.166663646697998,
      "epoch": 0.4318181818181818,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.060908317565918,
      "learning_rate": 5.707070707070707e-07,
      "loss": 0.0,
      "num_tokens": 18166713.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.10120701789855957,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 171
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 34.8515625,
      "completions/mean_terminated_length": 34.8515625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0551965236663818,
      "epoch": 0.43434343434343436,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.146479606628418,
      "learning_rate": 5.681818181818182e-07,
      "loss": -0.0,
      "num_tokens": 18271646.0,
      "reward": 0.48046875,
      "reward_std": 0.07920699566602707,
      "rewards/video_r1_accuracy_reward/mean": 0.453125,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 172
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 72.0,
      "completions/max_terminated_length": 72.0,
      "completions/mean_length": 36.4921875,
      "completions/mean_terminated_length": 36.4921875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0619423389434814,
      "epoch": 0.43686868686868685,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.411008358001709,
      "learning_rate": 5.656565656565657e-07,
      "loss": 0.0,
      "num_tokens": 18372037.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 173
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 36.5625,
      "completions/mean_terminated_length": 36.5625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.162034511566162,
      "epoch": 0.4393939393939394,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.3831713199615479,
      "learning_rate": 5.631313131313131e-07,
      "loss": 0.0,
      "num_tokens": 18462141.0,
      "reward": 0.814453125,
      "reward_std": 0.058214765042066574,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 174
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 38.4453125,
      "completions/mean_terminated_length": 38.4453125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.2333970069885254,
      "epoch": 0.44191919191919193,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.2612643241882324,
      "learning_rate": 5.606060606060605e-07,
      "loss": 0.0,
      "num_tokens": 18564110.0,
      "reward": 0.829296886920929,
      "reward_std": 0.11469841748476028,
      "rewards/video_r1_accuracy_reward/mean": 0.8203125,
      "rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 175
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 35.8125,
      "completions/mean_terminated_length": 35.8125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1255345344543457,
      "epoch": 0.4444444444444444,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.9899846315383911,
      "learning_rate": 5.58080808080808e-07,
      "loss": 0.0,
      "num_tokens": 18665734.0,
      "reward": 0.8960937261581421,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.890625,
      "rewards/video_r1_accuracy_reward/std": 0.31333550810813904,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 176
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 65.0,
      "completions/max_terminated_length": 65.0,
      "completions/mean_length": 34.0,
      "completions/mean_terminated_length": 34.0,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.142645239830017,
      "epoch": 0.44696969696969696,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 2.496107578277588,
      "learning_rate": 5.555555555555555e-07,
      "loss": 0.0,
      "num_tokens": 18759270.0,
      "reward": 0.532421886920929,
      "reward_std": 0.13244643807411194,
      "rewards/video_r1_accuracy_reward/mean": 0.5078125,
      "rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 177
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 82.0,
      "completions/max_terminated_length": 82.0,
      "completions/mean_length": 38.4609375,
      "completions/mean_terminated_length": 38.4609375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1225965023040771,
      "epoch": 0.4494949494949495,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.3409671783447266,
      "learning_rate": 5.53030303030303e-07,
      "loss": -0.0,
      "num_tokens": 18862553.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.14693352580070496,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 178
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 100.0,
      "completions/max_terminated_length": 100.0,
      "completions/mean_length": 37.390625,
      "completions/mean_terminated_length": 37.390625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1464502811431885,
      "epoch": 0.45202020202020204,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.6066229343414307,
      "learning_rate": 5.505050505050505e-07,
      "loss": -0.0,
      "num_tokens": 18956595.0,
      "reward": 0.814453125,
      "reward_std": 0.08570004999637604,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 179
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 39.1171875,
      "completions/mean_terminated_length": 39.1171875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1246540546417236,
      "epoch": 0.45454545454545453,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 3.036703109741211,
      "learning_rate": 5.47979797979798e-07,
      "loss": 0.0,
      "num_tokens": 19059722.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.14319148659706116,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 180
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 126.0,
      "completions/max_terminated_length": 126.0,
      "completions/mean_length": 41.46875,
      "completions/mean_terminated_length": 41.46875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1428744792938232,
      "epoch": 0.45707070707070707,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 5.454545454545454e-07,
      "loss": 0.0,
      "num_tokens": 19158318.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 181
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 40.1796875,
      "completions/mean_terminated_length": 40.1796875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1367346048355103,
      "epoch": 0.4595959595959596,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.153904676437378,
      "learning_rate": 5.42929292929293e-07,
      "loss": -0.0,
      "num_tokens": 19266637.0,
      "reward": 0.5621093511581421,
      "reward_std": 0.058214765042066574,
      "rewards/video_r1_accuracy_reward/mean": 0.5390625,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 182
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 40.1875,
      "completions/mean_terminated_length": 40.1875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1929256916046143,
      "epoch": 0.4621212121212121,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.8813252449035645,
      "learning_rate": 5.404040404040404e-07,
      "loss": -0.0,
      "num_tokens": 19361085.0,
      "reward": 0.7843749523162842,
      "reward_std": 0.07706765085458755,
      "rewards/video_r1_accuracy_reward/mean": 0.7734375,
      "rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 183
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 39.515625,
      "completions/mean_terminated_length": 39.515625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1405987739562988,
      "epoch": 0.46464646464646464,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 5.378787878787878e-07,
      "loss": 0.0,
      "num_tokens": 19462775.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 184
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 38.4296875,
      "completions/mean_terminated_length": 38.4296875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.2080726623535156,
      "epoch": 0.4671717171717172,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.3265841007232666,
      "learning_rate": 5.353535353535354e-07,
      "loss": -0.0,
      "num_tokens": 19543830.0,
      "reward": 0.703125,
      "reward_std": 0.13842955231666565,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 73.0,
      "completions/max_terminated_length": 73.0,
      "completions/mean_length": 40.359375,
      "completions/mean_terminated_length": 40.359375,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1394485235214233,
      "epoch": 0.4696969696969697,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.003188371658325,
      "learning_rate": 5.328282828282828e-07,
      "loss": -0.0,
      "num_tokens": 19636020.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.07920700311660767,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 186
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 116.0,
      "completions/max_terminated_length": 116.0,
      "completions/mean_length": 42.0703125,
      "completions/mean_terminated_length": 42.0703125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.2103469371795654,
      "epoch": 0.4722222222222222,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.9415223598480225,
      "learning_rate": 5.303030303030303e-07,
      "loss": 0.0,
      "num_tokens": 19746389.0,
      "reward": 0.717968761920929,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.703125,
      "rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 187
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 115.0,
      "completions/max_terminated_length": 115.0,
      "completions/mean_length": 39.9921875,
      "completions/mean_terminated_length": 39.9921875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0821011066436768,
      "epoch": 0.47474747474747475,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 5.277777777777777e-07,
      "loss": 0.0,
      "num_tokens": 19840548.0,
      "reward": 0.8218749761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.8125,
      "rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 188
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 109.0,
      "completions/max_terminated_length": 109.0,
      "completions/mean_length": 39.6953125,
      "completions/mean_terminated_length": 39.6953125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.1291919946670532,
      "epoch": 0.4772727272727273,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.641886830329895,
      "learning_rate": 5.252525252525253e-07,
      "loss": 0.0,
      "num_tokens": 19931517.0,
      "reward": 0.762499988079071,
      "reward_std": 0.04198446497321129,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 189
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 108.0,
      "completions/max_terminated_length": 108.0,
      "completions/mean_length": 43.8828125,
      "completions/mean_terminated_length": 43.8828125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.197535753250122,
      "epoch": 0.4797979797979798,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 2.2403042316436768,
      "learning_rate": 5.227272727272727e-07,
      "loss": 0.0,
      "num_tokens": 20039918.0,
      "reward": 0.62890625,
      "reward_std": 0.13092872500419617,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 190
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 41.3671875,
      "completions/mean_terminated_length": 41.3671875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.253387451171875,
      "epoch": 0.4823232323232323,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.8688163757324219,
      "learning_rate": 5.202020202020201e-07,
      "loss": 0.0,
      "num_tokens": 20148373.0,
      "reward": 0.4136718511581421,
      "reward_std": 0.08570004999637604,
      "rewards/video_r1_accuracy_reward/mean": 0.3828125,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 36.578125,
      "completions/mean_terminated_length": 36.578125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1290706396102905,
      "epoch": 0.48484848484848486,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7567572593688965,
      "learning_rate": 5.176767676767676e-07,
      "loss": -0.0,
      "num_tokens": 20245007.0,
      "reward": 0.814453125,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 192
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 89.0,
      "completions/max_terminated_length": 89.0,
      "completions/mean_length": 43.1875,
      "completions/mean_terminated_length": 43.1875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1787712574005127,
      "epoch": 0.48737373737373735,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.6133161783218384,
      "learning_rate": 5.151515151515151e-07,
      "loss": -0.0,
      "num_tokens": 20340607.0,
      "reward": 0.703125,
      "reward_std": 0.06145896762609482,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 193
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 41.0,
      "completions/mean_terminated_length": 41.0,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.215273141860962,
      "epoch": 0.4898989898989899,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.7843291759490967,
      "learning_rate": 5.126262626262626e-07,
      "loss": 0.0,
      "num_tokens": 20446183.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.09695503115653992,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 194
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 95.0,
      "completions/max_terminated_length": 95.0,
      "completions/mean_length": 42.1640625,
      "completions/mean_terminated_length": 42.1640625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1246238946914673,
      "epoch": 0.49242424242424243,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.0955389738082886,
      "learning_rate": 5.1010101010101e-07,
      "loss": 0.0,
      "num_tokens": 20552172.0,
      "reward": 0.6507812738418579,
      "reward_std": 0.04958236962556839,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 42.5,
      "completions/mean_terminated_length": 42.5,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.1459453105926514,
      "epoch": 0.494949494949495,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.4178069829940796,
      "learning_rate": 5.075757575757576e-07,
      "loss": 0.0,
      "num_tokens": 20654036.0,
      "reward": 0.591796875,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 196
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 39.59375,
      "completions/mean_terminated_length": 39.59375,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "entropy": 1.142756700515747,
      "epoch": 0.49747474747474746,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.4916136264801025,
      "learning_rate": 5.05050505050505e-07,
      "loss": -0.0,
      "num_tokens": 20759896.0,
      "reward": 0.4804687201976776,
      "reward_std": 0.08345898985862732,
      "rewards/video_r1_accuracy_reward/mean": 0.453125,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 197
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 40.7421875,
      "completions/mean_terminated_length": 40.7421875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1972713470458984,
      "epoch": 0.5,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 3.071634292602539,
      "learning_rate": 5.025252525252525e-07,
      "loss": -0.0,
      "num_tokens": 20869311.0,
      "reward": 0.703125,
      "reward_std": 0.1254434585571289,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 198
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 91.0,
      "completions/max_terminated_length": 91.0,
      "completions/mean_length": 41.9609375,
      "completions/mean_terminated_length": 41.9609375,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.2184077501296997,
      "epoch": 0.5025252525252525,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.4103143215179443,
      "learning_rate": 5e-07,
      "loss": -0.0,
      "num_tokens": 20965562.0,
      "reward": 0.5992187857627869,
      "reward_std": 0.08345898985862732,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 199
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 37.8515625,
      "completions/mean_terminated_length": 37.8515625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0560106039047241,
      "epoch": 0.5050505050505051,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7731906175613403,
      "learning_rate": 4.974747474747474e-07,
      "loss": -0.0,
      "num_tokens": 21069135.0,
      "reward": 0.7328125238418579,
      "reward_std": 0.05497056990861893,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 200
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 40.6953125,
      "completions/mean_terminated_length": 40.6953125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1646404266357422,
      "epoch": 0.5075757575757576,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.949494949494949e-07,
      "loss": 0.0,
      "num_tokens": 21182136.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 201
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 39.3828125,
      "completions/mean_terminated_length": 39.3828125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.173717975616455,
      "epoch": 0.51010101010101,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.3920958042144775,
      "learning_rate": 4.924242424242424e-07,
      "loss": 0.0,
      "num_tokens": 21284945.0,
      "reward": 0.6882812976837158,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 202
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 95.0,
      "completions/max_terminated_length": 95.0,
      "completions/mean_length": 38.6875,
      "completions/mean_terminated_length": 38.6875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1436386108398438,
      "epoch": 0.5126262626262627,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.3957781791687012,
      "learning_rate": 4.898989898989898e-07,
      "loss": 0.0,
      "num_tokens": 21389041.0,
      "reward": 0.762499988079071,
      "reward_std": 0.04198446497321129,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 203
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 37.1171875,
      "completions/mean_terminated_length": 37.1171875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0701844692230225,
      "epoch": 0.5151515151515151,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 6.448522090911865,
      "learning_rate": 4.873737373737373e-07,
      "loss": -0.0,
      "num_tokens": 21490960.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.11094427108764648,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 204
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 36.6484375,
      "completions/mean_terminated_length": 36.6484375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1014225482940674,
      "epoch": 0.5176767676767676,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2016708850860596,
      "learning_rate": 4.848484848484849e-07,
      "loss": -0.0,
      "num_tokens": 21589139.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 205
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 103.0,
      "completions/max_terminated_length": 103.0,
      "completions/mean_length": 42.125,
      "completions/mean_terminated_length": 42.125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1724364757537842,
      "epoch": 0.5202020202020202,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.823232323232323e-07,
      "loss": 0.0,
      "num_tokens": 21683795.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 206
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 79.0,
      "completions/max_terminated_length": 79.0,
      "completions/mean_length": 38.78125,
      "completions/mean_terminated_length": 38.78125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0875164270401,
      "epoch": 0.5227272727272727,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.797979797979798e-07,
      "loss": 0.0,
      "num_tokens": 21786679.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 207
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 38.7734375,
      "completions/mean_terminated_length": 38.7734375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1065541505813599,
      "epoch": 0.5252525252525253,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.205068588256836,
      "learning_rate": 4.772727272727273e-07,
      "loss": 0.0,
      "num_tokens": 21881450.0,
      "reward": 0.7847656607627869,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.7734375,
      "rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 208
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 36.2109375,
      "completions/mean_terminated_length": 36.2109375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0509859323501587,
      "epoch": 0.5277777777777778,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.425723671913147,
      "learning_rate": 4.7474747474747474e-07,
      "loss": -0.0,
      "num_tokens": 21985117.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.05821476876735687,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 209
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 38.3671875,
      "completions/mean_terminated_length": 38.3671875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0938265323638916,
      "epoch": 0.5303030303030303,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.6129071712493896,
      "learning_rate": 4.722222222222222e-07,
      "loss": -0.0,
      "num_tokens": 22083148.0,
      "reward": 0.8070312738418579,
      "reward_std": 0.11094427108764648,
      "rewards/video_r1_accuracy_reward/mean": 0.796875,
      "rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 210
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 105.0,
      "completions/max_terminated_length": 105.0,
      "completions/mean_length": 38.5390625,
      "completions/mean_terminated_length": 38.5390625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1249573230743408,
      "epoch": 0.5328282828282829,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0192317962646484,
      "learning_rate": 4.696969696969697e-07,
      "loss": 0.0,
      "num_tokens": 22182537.0,
      "reward": 0.651171863079071,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 211
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 79.0,
      "completions/max_terminated_length": 79.0,
      "completions/mean_length": 37.875,
      "completions/mean_terminated_length": 37.875,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "entropy": 1.1420645713806152,
      "epoch": 0.5353535353535354,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2227110862731934,
      "learning_rate": 4.6717171717171714e-07,
      "loss": 0.0,
      "num_tokens": 22284025.0,
      "reward": 0.755078136920929,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 212
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 105.0,
      "completions/max_terminated_length": 105.0,
      "completions/mean_length": 39.8828125,
      "completions/mean_terminated_length": 39.8828125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0884701013565063,
      "epoch": 0.5378787878787878,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.646464646464646e-07,
      "loss": 0.0,
      "num_tokens": 22386154.0,
      "reward": 0.8812500238418579,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.875,
      "rewards/video_r1_accuracy_reward/std": 0.3320184051990509,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 213
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 39.0625,
      "completions/mean_terminated_length": 39.0625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0740153789520264,
      "epoch": 0.5404040404040404,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.3575637340545654,
      "learning_rate": 4.6212121212121207e-07,
      "loss": -0.0,
      "num_tokens": 22478426.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.07596279680728912,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 214
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 36.9765625,
      "completions/mean_terminated_length": 36.9765625,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.0606670379638672,
      "epoch": 0.5429292929292929,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.6686134338378906,
      "learning_rate": 4.595959595959596e-07,
      "loss": 0.0,
      "num_tokens": 22570903.0,
      "reward": 0.6957031488418579,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6796875,
      "rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 215
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 39.2109375,
      "completions/mean_terminated_length": 39.2109375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.117194414138794,
      "epoch": 0.5454545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.5707070707070705e-07,
      "loss": 0.0,
      "num_tokens": 22660930.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 216
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 86.0,
      "completions/max_terminated_length": 86.0,
      "completions/mean_length": 38.84375,
      "completions/mean_terminated_length": 38.84375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1242808103561401,
      "epoch": 0.547979797979798,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.227063536643982,
      "learning_rate": 4.545454545454545e-07,
      "loss": 0.0,
      "num_tokens": 22753702.0,
      "reward": 0.814453125,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 217
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 68.0,
      "completions/max_terminated_length": 68.0,
      "completions/mean_length": 36.9453125,
      "completions/mean_terminated_length": 36.9453125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.083737850189209,
      "epoch": 0.5505050505050505,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 3.3731985092163086,
      "learning_rate": 4.5202020202020204e-07,
      "loss": 0.0,
      "num_tokens": 22856391.0,
      "reward": 0.7996094226837158,
      "reward_std": 0.08245119452476501,
      "rewards/video_r1_accuracy_reward/mean": 0.7890625,
      "rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 218
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 92.0,
      "completions/max_terminated_length": 92.0,
      "completions/mean_length": 39.375,
      "completions/mean_terminated_length": 39.375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.153437614440918,
      "epoch": 0.553030303030303,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 3.4006295204162598,
      "learning_rate": 4.494949494949495e-07,
      "loss": 0.0,
      "num_tokens": 22955055.0,
      "reward": 0.651171863079071,
      "reward_std": 0.0727139487862587,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 219
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 71.0,
      "completions/max_terminated_length": 71.0,
      "completions/mean_length": 38.5703125,
      "completions/mean_terminated_length": 38.5703125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.126805305480957,
      "epoch": 0.5555555555555556,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 2.5174307823181152,
      "learning_rate": 4.469696969696969e-07,
      "loss": -0.0,
      "num_tokens": 23062816.0,
      "reward": 0.666015625,
      "reward_std": 0.1464356929063797,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 220
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 37.0625,
      "completions/mean_terminated_length": 37.0625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0423493385314941,
      "epoch": 0.5580808080808081,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.49821937084198,
      "learning_rate": 4.444444444444444e-07,
      "loss": -0.0,
      "num_tokens": 23170032.0,
      "reward": 0.7398437857627869,
      "reward_std": 0.05291558802127838,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 221
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 70.0,
      "completions/max_terminated_length": 70.0,
      "completions/mean_length": 34.9765625,
      "completions/mean_terminated_length": 34.9765625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0868003368377686,
      "epoch": 0.5606060606060606,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.3574455976486206,
      "learning_rate": 4.419191919191919e-07,
      "loss": 0.0,
      "num_tokens": 23272485.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 222
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 71.0,
      "completions/max_terminated_length": 71.0,
      "completions/mean_length": 37.90625,
      "completions/mean_terminated_length": 37.90625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0563578605651855,
      "epoch": 0.5631313131313131,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.896294355392456,
      "learning_rate": 4.3939393939393937e-07,
      "loss": -0.0,
      "num_tokens": 23366809.0,
      "reward": 0.799609363079071,
      "reward_std": 0.03072948381304741,
      "rewards/video_r1_accuracy_reward/mean": 0.7890625,
      "rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 223
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 37.359375,
      "completions/mean_terminated_length": 37.359375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0919694900512695,
      "epoch": 0.5656565656565656,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.2921442985534668,
      "learning_rate": 4.3686868686868683e-07,
      "loss": -0.0,
      "num_tokens": 23468999.0,
      "reward": 0.591796875,
      "reward_std": 0.05821476876735687,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 224
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 39.7734375,
      "completions/mean_terminated_length": 39.7734375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1395623683929443,
      "epoch": 0.5681818181818182,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.0386202335357666,
      "learning_rate": 4.3434343434343435e-07,
      "loss": -0.0,
      "num_tokens": 23574730.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.11519625782966614,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 225
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 72.0,
      "completions/max_terminated_length": 72.0,
      "completions/mean_length": 36.4453125,
      "completions/mean_terminated_length": 36.4453125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0388587713241577,
      "epoch": 0.5707070707070707,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.514066219329834,
      "learning_rate": 4.318181818181818e-07,
      "loss": -0.0,
      "num_tokens": 23671659.0,
      "reward": 0.6957030892372131,
      "reward_std": 0.0947139710187912,
      "rewards/video_r1_accuracy_reward/mean": 0.6796875,
      "rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 226
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 97.0,
      "completions/max_terminated_length": 97.0,
      "completions/mean_length": 37.9375,
      "completions/mean_terminated_length": 37.9375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0328450202941895,
      "epoch": 0.5732323232323232,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.3931517601013184,
      "learning_rate": 4.292929292929293e-07,
      "loss": -0.0,
      "num_tokens": 23776019.0,
      "reward": 0.688281238079071,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 227
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 43.9921875,
      "completions/mean_terminated_length": 41.31496047973633,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0174750089645386,
      "epoch": 0.5757575757575758,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.0087193250656128,
      "learning_rate": 4.267676767676767e-07,
      "loss": 0.0,
      "num_tokens": 23879906.0,
      "reward": 0.784375011920929,
      "reward_std": 0.03183433786034584,
      "rewards/video_r1_accuracy_reward/mean": 0.7734375,
      "rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 228
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 39.2734375,
      "completions/mean_terminated_length": 39.2734375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 0.9948429465293884,
      "epoch": 0.5782828282828283,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.242424242424242e-07,
      "loss": 0.0,
      "num_tokens": 23975525.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 229
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 91.0,
      "completions/max_terminated_length": 91.0,
      "completions/mean_length": 39.765625,
      "completions/mean_terminated_length": 39.765625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0840778350830078,
      "epoch": 0.5808080808080808,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.692561388015747,
      "learning_rate": 4.217171717171717e-07,
      "loss": 0.0,
      "num_tokens": 24076743.0,
      "reward": 0.7550780773162842,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 230
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 39.5546875,
      "completions/mean_terminated_length": 39.5546875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0627765655517578,
      "epoch": 0.5833333333333334,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.6762282848358154,
      "learning_rate": 4.1919191919191915e-07,
      "loss": 0.0,
      "num_tokens": 24168014.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.027485283091664314,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 231
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 42.03125,
      "completions/mean_terminated_length": 42.03125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1012775897979736,
      "epoch": 0.5858585858585859,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.1221230030059814,
      "learning_rate": 4.1666666666666667e-07,
      "loss": 0.0,
      "num_tokens": 24264154.0,
      "reward": 0.4359374940395355,
      "reward_std": 0.05497056618332863,
      "rewards/video_r1_accuracy_reward/mean": 0.40625,
      "rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 232
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 92.0,
      "completions/max_terminated_length": 92.0,
      "completions/mean_length": 41.5625,
      "completions/mean_terminated_length": 41.5625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.053170919418335,
      "epoch": 0.5883838383838383,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.132128357887268,
      "learning_rate": 4.1414141414141413e-07,
      "loss": -0.0,
      "num_tokens": 24373658.0,
      "reward": 0.7328125238418579,
      "reward_std": 0.03173727169632912,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 233
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 41.6484375,
      "completions/mean_terminated_length": 41.6484375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1390644311904907,
      "epoch": 0.5909090909090909,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.8743163347244263,
      "learning_rate": 4.116161616161616e-07,
      "loss": 0.0,
      "num_tokens": 24482197.0,
      "reward": 0.6363281607627869,
      "reward_std": 0.07596279680728912,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 234
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 91.0,
      "completions/max_terminated_length": 91.0,
      "completions/mean_length": 41.4921875,
      "completions/mean_terminated_length": 41.4921875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1158329248428345,
      "epoch": 0.5934343434343434,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.9406242966651917,
      "learning_rate": 4.090909090909091e-07,
      "loss": 0.0,
      "num_tokens": 24573972.0,
      "reward": 0.576953113079071,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.5546875,
      "rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 235
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 38.7734375,
      "completions/mean_terminated_length": 38.7734375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0798749923706055,
      "epoch": 0.5959595959595959,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.1411508321762085,
      "learning_rate": 4.065656565656566e-07,
      "loss": -0.0,
      "num_tokens": 24676151.0,
      "reward": 0.7027343511581421,
      "reward_std": 0.0011048546293750405,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 236
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 97.0,
      "completions/max_terminated_length": 97.0,
      "completions/mean_length": 43.1640625,
      "completions/mean_terminated_length": 43.1640625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0829260349273682,
      "epoch": 0.5984848484848485,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0807678699493408,
      "learning_rate": 4.04040404040404e-07,
      "loss": -0.0,
      "num_tokens": 24789604.0,
      "reward": 0.7847656011581421,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.7734375,
      "rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 237
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 42.8828125,
      "completions/mean_terminated_length": 42.8828125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1113200187683105,
      "epoch": 0.601010101010101,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.7582932710647583,
      "learning_rate": 4.0151515151515146e-07,
      "loss": 0.0,
      "num_tokens": 24890173.0,
      "reward": 0.7105469107627869,
      "reward_std": 0.06297669559717178,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 238
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 38.59375,
      "completions/mean_terminated_length": 38.59375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0123108625411987,
      "epoch": 0.6035353535353535,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.2676467895507812,
      "learning_rate": 3.98989898989899e-07,
      "loss": -0.0,
      "num_tokens": 24995257.0,
      "reward": 0.606640636920929,
      "reward_std": 0.1131853312253952,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 239
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 118.0,
      "completions/max_terminated_length": 118.0,
      "completions/mean_length": 41.9765625,
      "completions/mean_terminated_length": 41.9765625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0618376731872559,
      "epoch": 0.6060606060606061,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 3.9646464646464644e-07,
      "loss": 0.0,
      "num_tokens": 25087574.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 240
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 70.0,
      "completions/max_terminated_length": 70.0,
      "completions/mean_length": 39.2421875,
      "completions/mean_terminated_length": 39.2421875,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.0472171306610107,
      "epoch": 0.6085858585858586,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.201456069946289,
      "learning_rate": 3.939393939393939e-07,
      "loss": 0.0,
      "num_tokens": 25177613.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 241
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 41.1953125,
      "completions/mean_terminated_length": 41.1953125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0318273305892944,
      "epoch": 0.6111111111111112,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 3.9141414141414143e-07,
      "loss": 0.0,
      "num_tokens": 25275414.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 242
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 114.0,
      "completions/max_terminated_length": 114.0,
      "completions/mean_length": 40.625,
      "completions/mean_terminated_length": 40.625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0426480770111084,
      "epoch": 0.6136363636363636,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.9526947736740112,
      "learning_rate": 3.888888888888889e-07,
      "loss": 0.0,
      "num_tokens": 25373318.0,
      "reward": 0.614062488079071,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.59375,
      "rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 243
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 41.3515625,
      "completions/mean_terminated_length": 41.3515625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0614213943481445,
      "epoch": 0.6161616161616161,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.547713279724121,
      "learning_rate": 3.8636363636363636e-07,
      "loss": 0.0,
      "num_tokens": 25481939.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.08345898985862732,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 244
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 42.1015625,
      "completions/mean_terminated_length": 42.1015625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1329345703125,
      "epoch": 0.6186868686868687,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2779253721237183,
      "learning_rate": 3.8383838383838377e-07,
      "loss": -0.0,
      "num_tokens": 25588960.0,
      "reward": 0.7328124642372131,
      "reward_std": 0.03173727169632912,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 245
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 103.0,
      "completions/max_terminated_length": 103.0,
      "completions/mean_length": 43.3203125,
      "completions/mean_terminated_length": 43.3203125,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 1.0123302936553955,
      "epoch": 0.6212121212121212,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.0937881469726562,
      "learning_rate": 3.813131313131313e-07,
      "loss": 0.0,
      "num_tokens": 25690657.0,
      "reward": 0.725390613079071,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 246
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 82.0,
      "completions/max_terminated_length": 82.0,
      "completions/mean_length": 41.9765625,
      "completions/mean_terminated_length": 41.9765625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1118111610412598,
      "epoch": 0.6237373737373737,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.778397560119629,
      "learning_rate": 3.7878787878787876e-07,
      "loss": 0.0,
      "num_tokens": 25789478.0,
      "reward": 0.6140625476837158,
      "reward_std": 0.05497056618332863,
      "rewards/video_r1_accuracy_reward/mean": 0.59375,
      "rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 247
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 43.578125,
      "completions/mean_terminated_length": 43.578125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.018457055091858,
      "epoch": 0.6262626262626263,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7696324586868286,
      "learning_rate": 3.762626262626262e-07,
      "loss": -0.0,
      "num_tokens": 25888824.0,
      "reward": 0.539843738079071,
      "reward_std": 0.05922255665063858,
      "rewards/video_r1_accuracy_reward/mean": 0.515625,
      "rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 248
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 101.0,
      "completions/max_terminated_length": 101.0,
      "completions/mean_length": 43.9453125,
      "completions/mean_terminated_length": 43.9453125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1335992813110352,
      "epoch": 0.6287878787878788,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.2729897499084473,
      "learning_rate": 3.7373737373737374e-07,
      "loss": -0.0,
      "num_tokens": 25983369.0,
      "reward": 0.8960937261581421,
      "reward_std": 0.06145896762609482,
      "rewards/video_r1_accuracy_reward/mean": 0.890625,
      "rewards/video_r1_accuracy_reward/std": 0.31333550810813904,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 249
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 41.171875,
      "completions/mean_terminated_length": 41.171875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.062988519668579,
      "epoch": 0.6313131313131313,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.235278844833374,
      "learning_rate": 3.712121212121212e-07,
      "loss": -0.0,
      "num_tokens": 26080023.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.05821476876735687,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 250
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 39.1328125,
      "completions/mean_terminated_length": 39.1328125,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.0980217456817627,
      "epoch": 0.6338383838383839,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.177210807800293,
      "learning_rate": 3.686868686868687e-07,
      "loss": -0.0,
      "num_tokens": 26168704.0,
      "reward": 0.7476562261581421,
      "reward_std": 0.027485284954309464,
      "rewards/video_r1_accuracy_reward/mean": 0.734375,
      "rewards/video_r1_accuracy_reward/std": 0.44340085983276367,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 251
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 79.0,
      "completions/max_terminated_length": 79.0,
      "completions/mean_length": 40.4453125,
      "completions/mean_terminated_length": 40.4453125,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 0.98891681432724,
      "epoch": 0.6363636363636364,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.9645073413848877,
      "learning_rate": 3.661616161616162e-07,
      "loss": -0.0,
      "num_tokens": 26265033.0,
      "reward": 0.7921874523162842,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.78125,
      "rewards/video_r1_accuracy_reward/std": 0.41502299904823303,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 252
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 115.0,
      "completions/max_terminated_length": 115.0,
      "completions/mean_length": 41.390625,
      "completions/mean_terminated_length": 41.390625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0786819458007812,
      "epoch": 0.6388888888888888,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.4744101762771606,
      "learning_rate": 3.636363636363636e-07,
      "loss": -0.0,
      "num_tokens": 26367979.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.09319624304771423,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 253
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 128.0,
      "completions/max_terminated_length": 128.0,
      "completions/mean_length": 42.15625,
      "completions/mean_terminated_length": 42.15625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0700483322143555,
      "epoch": 0.6414141414141414,
      "frac_reward_zero_std": 0.5625,
      "grad_norm": 4.3232808113098145,
      "learning_rate": 3.6111111111111107e-07,
      "loss": 0.0,
      "num_tokens": 26467855.0,
      "reward": 0.7328125238418579,
      "reward_std": 0.18041402101516724,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 254
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 92.0,
      "completions/max_terminated_length": 92.0,
      "completions/mean_length": 41.0625,
      "completions/mean_terminated_length": 41.0625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0175724029541016,
      "epoch": 0.6439393939393939,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.968988835811615,
      "learning_rate": 3.5858585858585854e-07,
      "loss": -0.0,
      "num_tokens": 26581903.0,
      "reward": 0.651171863079071,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 255
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 71.0,
      "completions/max_terminated_length": 71.0,
      "completions/mean_length": 38.3359375,
      "completions/mean_terminated_length": 38.3359375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.042513132095337,
      "epoch": 0.6464646464646465,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 3.0125222206115723,
      "learning_rate": 3.5606060606060606e-07,
      "loss": -0.0,
      "num_tokens": 26688290.0,
      "reward": 0.7625000476837158,
      "reward_std": 0.06347454339265823,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 256
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 89.0,
      "completions/max_terminated_length": 89.0,
      "completions/mean_length": 41.1953125,
      "completions/mean_terminated_length": 41.1953125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0545010566711426,
      "epoch": 0.648989898989899,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0394936800003052,
      "learning_rate": 3.535353535353535e-07,
      "loss": 0.0,
      "num_tokens": 26781019.0,
      "reward": 0.725390613079071,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 257
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 110.0,
      "completions/max_terminated_length": 110.0,
      "completions/mean_length": 39.71875,
      "completions/mean_terminated_length": 39.71875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.084099292755127,
      "epoch": 0.6515151515151515,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.8688188791275024,
      "learning_rate": 3.51010101010101e-07,
      "loss": 0.0,
      "num_tokens": 26885175.0,
      "reward": 0.5472655892372131,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.5234375,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 258
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 40.6875,
      "completions/mean_terminated_length": 40.6875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0880751609802246,
      "epoch": 0.6540404040404041,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7870049476623535,
      "learning_rate": 3.484848484848485e-07,
      "loss": 0.0,
      "num_tokens": 26989175.0,
      "reward": 0.539843738079071,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.515625,
      "rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 259
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 43.2734375,
      "completions/mean_terminated_length": 43.2734375,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 1.10056734085083,
      "epoch": 0.6565656565656566,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.1377336978912354,
      "learning_rate": 3.4595959595959597e-07,
      "loss": -0.0,
      "num_tokens": 27096962.0,
      "reward": 0.6585937738418579,
      "reward_std": 0.07920700311660767,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 260
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 72.0,
      "completions/max_terminated_length": 72.0,
      "completions/mean_length": 40.546875,
      "completions/mean_terminated_length": 40.546875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0717473030090332,
      "epoch": 0.6590909090909091,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.8865928649902344,
      "learning_rate": 3.434343434343434e-07,
      "loss": 0.0,
      "num_tokens": 27190608.0,
      "reward": 0.703125,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 261
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 77.0,
      "completions/max_terminated_length": 77.0,
      "completions/mean_length": 41.6328125,
      "completions/mean_terminated_length": 41.6328125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0417635440826416,
      "epoch": 0.6616161616161617,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.4803223609924316,
      "learning_rate": 3.4090909090909085e-07,
      "loss": -0.0,
      "num_tokens": 27297481.0,
      "reward": 0.7328125238418579,
      "reward_std": 0.08670784533023834,
      "rewards/video_r1_accuracy_reward/mean": 0.71875,
      "rewards/video_r1_accuracy_reward/std": 0.4513758420944214,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 262
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 92.0,
      "completions/max_terminated_length": 92.0,
      "completions/mean_length": 41.2578125,
      "completions/mean_terminated_length": 41.2578125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.068854570388794,
      "epoch": 0.6641414141414141,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.475841522216797,
      "learning_rate": 3.3838383838383837e-07,
      "loss": -0.0,
      "num_tokens": 27403098.0,
      "reward": 0.688281238079071,
      "reward_std": 0.07920700311660767,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 263
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 104.0,
      "completions/max_terminated_length": 104.0,
      "completions/mean_length": 40.9765625,
      "completions/mean_terminated_length": 40.9765625,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.073037028312683,
      "epoch": 0.6666666666666666,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.0775296688079834,
      "learning_rate": 3.3585858585858583e-07,
      "loss": -0.0,
      "num_tokens": 27499639.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.11844511330127716,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 264
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 41.203125,
      "completions/mean_terminated_length": 41.203125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0243158340454102,
      "epoch": 0.6691919191919192,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.3727004528045654,
      "learning_rate": 3.333333333333333e-07,
      "loss": -0.0,
      "num_tokens": 27602497.0,
      "reward": 0.666015625,
      "reward_std": 0.08995203673839569,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 265
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 112.0,
      "completions/max_terminated_length": 112.0,
      "completions/mean_length": 40.1875,
      "completions/mean_terminated_length": 40.1875,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.0149409770965576,
      "epoch": 0.6717171717171717,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.7323969602584839,
      "learning_rate": 3.308080808080808e-07,
      "loss": -0.0,
      "num_tokens": 27704273.0,
      "reward": 0.606640636920929,
      "reward_std": 0.08021478354930878,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 266
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 39.5390625,
      "completions/mean_terminated_length": 39.5390625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.094785451889038,
      "epoch": 0.6742424242424242,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.412737250328064,
      "learning_rate": 3.282828282828283e-07,
      "loss": -0.0,
      "num_tokens": 27806486.0,
      "reward": 0.5472656488418579,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.5234375,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 267
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 40.75,
      "completions/mean_terminated_length": 40.75,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0335110425949097,
      "epoch": 0.6767676767676768,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 3.2575757575757575e-07,
      "loss": 0.0,
      "num_tokens": 27915974.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 268
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 39.34375,
      "completions/mean_terminated_length": 39.34375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0604133605957031,
      "epoch": 0.6792929292929293,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.308901786804199,
      "learning_rate": 3.2323232323232327e-07,
      "loss": -0.0,
      "num_tokens": 28020154.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.10993647575378418,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 269
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 41.546875,
      "completions/mean_terminated_length": 41.546875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1338186264038086,
      "epoch": 0.6818181818181818,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2709474563598633,
      "learning_rate": 3.207070707070707e-07,
      "loss": -0.0,
      "num_tokens": 28127328.0,
      "reward": 0.532421886920929,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.5078125,
      "rewards/video_r1_accuracy_reward/std": 0.5019033551216125,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 270
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 86.0,
      "completions/max_terminated_length": 86.0,
      "completions/mean_length": 40.3515625,
      "completions/mean_terminated_length": 40.3515625,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.0553174018859863,
      "epoch": 0.6843434343434344,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.8711135387420654,
      "learning_rate": 3.1818181818181815e-07,
      "loss": -0.0,
      "num_tokens": 28229925.0,
      "reward": 0.7625000476837158,
      "reward_std": 0.06145896762609482,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 271
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 37.5625,
      "completions/mean_terminated_length": 37.5625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 0.9569792151451111,
      "epoch": 0.6868686868686869,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.4626072645187378,
      "learning_rate": 3.156565656565656e-07,
      "loss": 0.0,
      "num_tokens": 28331109.0,
      "reward": 0.6066405773162842,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 272
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 86.0,
      "completions/max_terminated_length": 86.0,
      "completions/mean_length": 41.34375,
      "completions/mean_terminated_length": 41.34375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1569232940673828,
      "epoch": 0.6893939393939394,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 3.1313131313131313e-07,
      "loss": 0.0,
      "num_tokens": 28436025.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 273
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 38.3828125,
      "completions/mean_terminated_length": 38.3828125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 0.9568972587585449,
      "epoch": 0.6919191919191919,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.252964735031128,
      "learning_rate": 3.106060606060606e-07,
      "loss": -0.0,
      "num_tokens": 28538722.0,
      "reward": 0.6808593273162842,
      "reward_std": 0.0727139487862587,
      "rewards/video_r1_accuracy_reward/mean": 0.6640625,
      "rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 274
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 42.71875,
      "completions/mean_terminated_length": 42.71875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.112146258354187,
      "epoch": 0.6944444444444444,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.2951831817626953,
      "learning_rate": 3.0808080808080806e-07,
      "loss": 0.0,
      "num_tokens": 28637126.0,
      "reward": 0.5394531488418579,
      "reward_std": 0.028590137138962746,
      "rewards/video_r1_accuracy_reward/mean": 0.515625,
      "rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 275
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 115.0,
      "completions/max_terminated_length": 115.0,
      "completions/mean_length": 39.984375,
      "completions/mean_terminated_length": 39.984375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1185648441314697,
      "epoch": 0.696969696969697,
      "frac_reward_zero_std": 0.6875,
      "grad_norm": 3.1425654888153076,
      "learning_rate": 3.055555555555556e-07,
      "loss": 0.0,
      "num_tokens": 28740852.0,
      "reward": 0.77734375,
      "reward_std": 0.12119146436452866,
      "rewards/video_r1_accuracy_reward/mean": 0.765625,
      "rewards/video_r1_accuracy_reward/std": 0.42527204751968384,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 276
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 77.0,
      "completions/max_terminated_length": 77.0,
      "completions/mean_length": 39.4609375,
      "completions/mean_terminated_length": 39.4609375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.014233112335205,
      "epoch": 0.6994949494949495,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.9896016120910645,
      "learning_rate": 3.0303030303030305e-07,
      "loss": 0.0,
      "num_tokens": 28848535.0,
      "reward": 0.666015625,
      "reward_std": 0.09046198427677155,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 277
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 40.828125,
      "completions/mean_terminated_length": 40.828125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0652706623077393,
      "epoch": 0.702020202020202,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.8897011280059814,
      "learning_rate": 3.0050505050505046e-07,
      "loss": 0.0,
      "num_tokens": 28951817.0,
      "reward": 0.5027344226837158,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.4765625,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 278
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 40.046875,
      "completions/mean_terminated_length": 40.046875,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.0515589714050293,
      "epoch": 0.7045454545454546,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2125859260559082,
      "learning_rate": 2.9797979797979793e-07,
      "loss": 0.0,
      "num_tokens": 29046855.0,
      "reward": 0.576953113079071,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.5546875,
      "rewards/video_r1_accuracy_reward/std": 0.4989531338214874,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 279
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 41.078125,
      "completions/mean_terminated_length": 41.078125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0689277648925781,
      "epoch": 0.7070707070707071,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.9545454545454545e-07,
      "loss": 0.0,
      "num_tokens": 29154393.0,
      "reward": 0.8218749761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.8125,
      "rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 280
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 39.515625,
      "completions/mean_terminated_length": 39.515625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.11039137840271,
      "epoch": 0.7095959595959596,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.8850462436676025,
      "learning_rate": 2.929292929292929e-07,
      "loss": -0.0,
      "num_tokens": 29251595.0,
      "reward": 0.9183593988418579,
      "reward_std": 0.03072948381304741,
      "rewards/video_r1_accuracy_reward/mean": 0.9140625,
      "rewards/video_r1_accuracy_reward/std": 0.2813730239868164,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 281
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 86.0,
      "completions/max_terminated_length": 86.0,
      "completions/mean_length": 39.34375,
      "completions/mean_terminated_length": 39.34375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1349459886550903,
      "epoch": 0.7121212121212122,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.9888182878494263,
      "learning_rate": 2.904040404040404e-07,
      "loss": -0.0,
      "num_tokens": 29345079.0,
      "reward": 0.46562498807907104,
      "reward_std": 0.05497056990861893,
      "rewards/video_r1_accuracy_reward/mean": 0.4375,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 282
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 73.0,
      "completions/max_terminated_length": 73.0,
      "completions/mean_length": 38.6328125,
      "completions/mean_terminated_length": 38.6328125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 0.9957581162452698,
      "epoch": 0.7146464646464646,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.878787878787879e-07,
      "loss": 0.0,
      "num_tokens": 29436368.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 283
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 77.0,
      "completions/max_terminated_length": 77.0,
      "completions/mean_length": 38.5390625,
      "completions/mean_terminated_length": 38.5390625,
      "completions/min_length": 18.0,
      "completions/min_terminated_length": 18.0,
      "entropy": 1.0308442115783691,
      "epoch": 0.7171717171717171,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.872098684310913,
      "learning_rate": 2.8535353535353536e-07,
      "loss": 0.0,
      "num_tokens": 29536589.0,
      "reward": 0.6585937142372131,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 284
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 101.0,
      "completions/max_terminated_length": 101.0,
      "completions/mean_length": 41.53125,
      "completions/mean_terminated_length": 41.53125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.080606460571289,
      "epoch": 0.7196969696969697,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.6087223291397095,
      "learning_rate": 2.8282828282828283e-07,
      "loss": -0.0,
      "num_tokens": 29646105.0,
      "reward": 0.762499988079071,
      "reward_std": 0.06145896390080452,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 285
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 116.0,
      "completions/max_terminated_length": 116.0,
      "completions/mean_length": 39.109375,
      "completions/mean_terminated_length": 39.109375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 0.9553197622299194,
      "epoch": 0.7222222222222222,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0064791440963745,
      "learning_rate": 2.8030303030303024e-07,
      "loss": 0.0,
      "num_tokens": 29756375.0,
      "reward": 0.7550780773162842,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 286
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 91.0,
      "completions/max_terminated_length": 91.0,
      "completions/mean_length": 42.5234375,
      "completions/mean_terminated_length": 42.5234375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1082098484039307,
      "epoch": 0.7247474747474747,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7488712072372437,
      "learning_rate": 2.7777777777777776e-07,
      "loss": 0.0,
      "num_tokens": 29862402.0,
      "reward": 0.703125,
      "reward_std": 0.04198446497321129,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 287
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 42.578125,
      "completions/mean_terminated_length": 42.578125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0919723510742188,
      "epoch": 0.7272727272727273,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.1887331008911133,
      "learning_rate": 2.752525252525252e-07,
      "loss": 0.0,
      "num_tokens": 29962308.0,
      "reward": 0.6214843392372131,
      "reward_std": 0.058214765042066574,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 288
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 105.0,
      "completions/max_terminated_length": 105.0,
      "completions/mean_length": 44.5390625,
      "completions/mean_terminated_length": 44.5390625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0653269290924072,
      "epoch": 0.7297979797979798,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.727272727272727e-07,
      "loss": 0.0,
      "num_tokens": 30067217.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 289
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 41.0703125,
      "completions/mean_terminated_length": 41.0703125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1131395101547241,
      "epoch": 0.7323232323232324,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.3746576309204102,
      "learning_rate": 2.702020202020202e-07,
      "loss": -0.0,
      "num_tokens": 30166226.0,
      "reward": 0.688281238079071,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 290
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 139.0,
      "completions/max_terminated_length": 139.0,
      "completions/mean_length": 45.3203125,
      "completions/mean_terminated_length": 45.3203125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1243813037872314,
      "epoch": 0.7348484848484849,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.915048360824585,
      "learning_rate": 2.676767676767677e-07,
      "loss": -0.0,
      "num_tokens": 30259571.0,
      "reward": 0.7847656011581421,
      "reward_std": 0.08245119452476501,
      "rewards/video_r1_accuracy_reward/mean": 0.7734375,
      "rewards/video_r1_accuracy_reward/std": 0.4202519655227661,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 291
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 79.0,
      "completions/max_terminated_length": 79.0,
      "completions/mean_length": 42.1875,
      "completions/mean_terminated_length": 42.1875,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.050102949142456,
      "epoch": 0.7373737373737373,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.6515151515151514e-07,
      "loss": 0.0,
      "num_tokens": 30374483.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 292
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 121.0,
      "completions/max_terminated_length": 121.0,
      "completions/mean_length": 41.5234375,
      "completions/mean_terminated_length": 41.5234375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.032031536102295,
      "epoch": 0.73989898989899,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.6262626262626266e-07,
      "loss": 0.0,
      "num_tokens": 30471046.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 293
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 41.453125,
      "completions/mean_terminated_length": 41.453125,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 1.0804343223571777,
      "epoch": 0.7424242424242424,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.5472633838653564,
      "learning_rate": 2.6010101010101007e-07,
      "loss": -0.0,
      "num_tokens": 30581344.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.10344342887401581,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 294
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 103.0,
      "completions/mean_length": 45.3359375,
      "completions/mean_terminated_length": 42.669288635253906,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 0.9924347400665283,
      "epoch": 0.7449494949494949,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 3.591127395629883,
      "learning_rate": 2.5757575757575754e-07,
      "loss": 0.0,
      "num_tokens": 30692403.0,
      "reward": 0.821484386920929,
      "reward_std": 0.04308931902050972,
      "rewards/video_r1_accuracy_reward/mean": 0.8125,
      "rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 295
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 43.6015625,
      "completions/mean_terminated_length": 43.6015625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1211515665054321,
      "epoch": 0.7474747474747475,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.6249940395355225,
      "learning_rate": 2.55050505050505e-07,
      "loss": -0.0,
      "num_tokens": 30794800.0,
      "reward": 0.62890625,
      "reward_std": 0.07920699566602707,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 296
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 112.0,
      "completions/max_terminated_length": 112.0,
      "completions/mean_length": 39.15625,
      "completions/mean_terminated_length": 39.15625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1058385372161865,
      "epoch": 0.75,
      "frac_reward_zero_std": 0.625,
      "grad_norm": 3.0298686027526855,
      "learning_rate": 2.525252525252525e-07,
      "loss": -0.0,
      "num_tokens": 30891148.0,
      "reward": 0.6734375357627869,
      "reward_std": 0.17140009999275208,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 297
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 152.0,
      "completions/max_terminated_length": 152.0,
      "completions/mean_length": 42.15625,
      "completions/mean_terminated_length": 42.15625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0815918445587158,
      "epoch": 0.7525252525252525,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.4456374645233154,
      "learning_rate": 2.5e-07,
      "loss": 0.0,
      "num_tokens": 30999208.0,
      "reward": 0.5250000357627869,
      "reward_std": 0.04198446497321129,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 298
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 98.0,
      "completions/max_terminated_length": 98.0,
      "completions/mean_length": 40.9453125,
      "completions/mean_terminated_length": 40.9453125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1108628511428833,
      "epoch": 0.7550505050505051,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.825129508972168,
      "learning_rate": 2.4747474747474745e-07,
      "loss": 0.0,
      "num_tokens": 31100649.0,
      "reward": 0.591796875,
      "reward_std": 0.11195206642150879,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 299
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 100.0,
      "completions/max_terminated_length": 100.0,
      "completions/mean_length": 44.2578125,
      "completions/mean_terminated_length": 44.2578125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.1574559211730957,
      "epoch": 0.7575757575757576,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 1.9903881549835205,
      "learning_rate": 2.449494949494949e-07,
      "loss": -0.0,
      "num_tokens": 31207226.0,
      "reward": 0.62890625,
      "reward_std": 0.11642953008413315,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 300
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 82.0,
      "completions/max_terminated_length": 82.0,
      "completions/mean_length": 37.9921875,
      "completions/mean_terminated_length": 37.9921875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0374675989151,
      "epoch": 0.76010101010101,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.569753885269165,
      "learning_rate": 2.4242424242424244e-07,
      "loss": 0.0,
      "num_tokens": 31304289.0,
      "reward": 0.6953125,
      "reward_std": 0.07706765830516815,
      "rewards/video_r1_accuracy_reward/mean": 0.6796875,
      "rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 301
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 41.484375,
      "completions/mean_terminated_length": 41.484375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.127359390258789,
      "epoch": 0.7626262626262627,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.5562926530838013,
      "learning_rate": 2.398989898989899e-07,
      "loss": -0.0,
      "num_tokens": 31400983.0,
      "reward": 0.5621093511581421,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.5390625,
      "rewards/video_r1_accuracy_reward/std": 0.5004304051399231,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 302
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 107.0,
      "completions/max_terminated_length": 107.0,
      "completions/mean_length": 42.6328125,
      "completions/mean_terminated_length": 42.6328125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.028510332107544,
      "epoch": 0.7651515151515151,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.891268253326416,
      "learning_rate": 2.3737373737373737e-07,
      "loss": -0.0,
      "num_tokens": 31511296.0,
      "reward": 0.688281238079071,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 303
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 67.0,
      "completions/max_terminated_length": 67.0,
      "completions/mean_length": 38.3984375,
      "completions/mean_terminated_length": 38.3984375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.018940806388855,
      "epoch": 0.7676767676767676,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7596899271011353,
      "learning_rate": 2.3484848484848486e-07,
      "loss": -0.0,
      "num_tokens": 31624579.0,
      "reward": 0.725390613079071,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 304
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 40.8359375,
      "completions/mean_terminated_length": 40.8359375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.066922664642334,
      "epoch": 0.7702020202020202,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.9264626502990723,
      "learning_rate": 2.323232323232323e-07,
      "loss": -0.0,
      "num_tokens": 31733166.0,
      "reward": 0.5695312023162842,
      "reward_std": 0.027485284954309464,
      "rewards/video_r1_accuracy_reward/mean": 0.546875,
      "rewards/video_r1_accuracy_reward/std": 0.4997538626194,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 305
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 100.0,
      "completions/max_terminated_length": 100.0,
      "completions/mean_length": 39.703125,
      "completions/mean_terminated_length": 39.703125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1334049701690674,
      "epoch": 0.7727272727272727,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.070505142211914,
      "learning_rate": 2.297979797979798e-07,
      "loss": 0.0,
      "num_tokens": 31838536.0,
      "reward": 0.62890625,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.609375,
      "rewards/video_r1_accuracy_reward/std": 0.4898075461387634,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 306
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 98.0,
      "completions/max_terminated_length": 98.0,
      "completions/mean_length": 41.46875,
      "completions/mean_terminated_length": 41.46875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.073242425918579,
      "epoch": 0.7752525252525253,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.7374638319015503,
      "learning_rate": 2.2727272727272726e-07,
      "loss": -0.0,
      "num_tokens": 31948676.0,
      "reward": 0.7105468511581421,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 307
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 112.0,
      "completions/max_terminated_length": 112.0,
      "completions/mean_length": 43.734375,
      "completions/mean_terminated_length": 43.734375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1168217658996582,
      "epoch": 0.7777777777777778,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.1745333671569824,
      "learning_rate": 2.2474747474747475e-07,
      "loss": -0.0,
      "num_tokens": 32042010.0,
      "reward": 0.7996094226837158,
      "reward_std": 0.10019923746585846,
      "rewards/video_r1_accuracy_reward/mean": 0.7890625,
      "rewards/video_r1_accuracy_reward/std": 0.4095771610736847,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 308
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 41.578125,
      "completions/mean_terminated_length": 41.578125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.1183019876480103,
      "epoch": 0.7803030303030303,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.8185198307037354,
      "learning_rate": 2.222222222222222e-07,
      "loss": -0.0,
      "num_tokens": 32146732.0,
      "reward": 0.591796875,
      "reward_std": 0.08245119452476501,
      "rewards/video_r1_accuracy_reward/mean": 0.5703125,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 309
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 42.5078125,
      "completions/mean_terminated_length": 42.5078125,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.156842589378357,
      "epoch": 0.7828282828282829,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.6074113845825195,
      "learning_rate": 2.1969696969696968e-07,
      "loss": -0.0,
      "num_tokens": 32241525.0,
      "reward": 0.703125,
      "reward_std": 0.08670784533023834,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 310
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 42.2421875,
      "completions/mean_terminated_length": 42.2421875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.058318853378296,
      "epoch": 0.7853535353535354,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0678150653839111,
      "learning_rate": 2.1717171717171718e-07,
      "loss": -0.0,
      "num_tokens": 32347036.0,
      "reward": 0.910937488079071,
      "reward_std": 0.03173727169632912,
      "rewards/video_r1_accuracy_reward/mean": 0.90625,
      "rewards/video_r1_accuracy_reward/std": 0.29262590408325195,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 311
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 97.0,
      "completions/max_terminated_length": 97.0,
      "completions/mean_length": 42.40625,
      "completions/mean_terminated_length": 42.40625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1335667371749878,
      "epoch": 0.7878787878787878,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.1464646464646464e-07,
      "loss": 0.0,
      "num_tokens": 32443480.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 312
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 41.234375,
      "completions/mean_terminated_length": 41.234375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.0559073686599731,
      "epoch": 0.7904040404040404,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.3612805604934692,
      "learning_rate": 2.121212121212121e-07,
      "loss": 0.0,
      "num_tokens": 32548430.0,
      "reward": 0.8367187976837158,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.828125,
      "rewards/video_r1_accuracy_reward/std": 0.378754198551178,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 313
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 42.0390625,
      "completions/mean_terminated_length": 42.0390625,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.055532693862915,
      "epoch": 0.7929292929292929,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.0959595959595957e-07,
      "loss": 0.0,
      "num_tokens": 32640787.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 314
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 39.9375,
      "completions/mean_terminated_length": 39.9375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1142704486846924,
      "epoch": 0.7954545454545454,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.0707070707070707e-07,
      "loss": 0.0,
      "num_tokens": 32744659.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 315
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 40.3671875,
      "completions/mean_terminated_length": 40.3671875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0762892961502075,
      "epoch": 0.797979797979798,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.0454545454545456e-07,
      "loss": 0.0,
      "num_tokens": 32838018.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 316
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 41.578125,
      "completions/mean_terminated_length": 41.578125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.0358078479766846,
      "epoch": 0.8005050505050505,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.9207355380058289,
      "learning_rate": 2.02020202020202e-07,
      "loss": 0.0,
      "num_tokens": 32945708.0,
      "reward": 0.6957031488418579,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6796875,
      "rewards/video_r1_accuracy_reward/std": 0.4684300124645233,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 317
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 39.0625,
      "completions/mean_terminated_length": 39.0625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0110433101654053,
      "epoch": 0.803030303030303,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 3.387294054031372,
      "learning_rate": 1.994949494949495e-07,
      "loss": 0.0,
      "num_tokens": 33047076.0,
      "reward": 0.703125,
      "reward_std": 0.07920699566602707,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 318
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 105.0,
      "completions/max_terminated_length": 105.0,
      "completions/mean_length": 41.5078125,
      "completions/mean_terminated_length": 41.5078125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.0886144638061523,
      "epoch": 0.8055555555555556,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.9696969696969696e-07,
      "loss": 0.0,
      "num_tokens": 33161533.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 319
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 39.40625,
      "completions/mean_terminated_length": 39.40625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0247807502746582,
      "epoch": 0.8080808080808081,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 2.800685167312622,
      "learning_rate": 1.9444444444444445e-07,
      "loss": 0.0,
      "num_tokens": 33261601.0,
      "reward": 0.45820313692092896,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.4296875,
      "rewards/video_r1_accuracy_reward/std": 0.4969765841960907,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 320
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 41.890625,
      "completions/mean_terminated_length": 41.890625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0938504934310913,
      "epoch": 0.8106060606060606,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.4721579551696777,
      "learning_rate": 1.9191919191919189e-07,
      "loss": 0.0,
      "num_tokens": 33362019.0,
      "reward": 0.7179687023162842,
      "reward_std": 0.027485283091664314,
      "rewards/video_r1_accuracy_reward/mean": 0.703125,
      "rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 321
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 120.0,
      "completions/max_terminated_length": 120.0,
      "completions/mean_length": 42.2265625,
      "completions/mean_terminated_length": 42.2265625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0467185974121094,
      "epoch": 0.8131313131313131,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.8939393939393938e-07,
      "loss": 0.0,
      "num_tokens": 33472384.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 322
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 139.0,
      "completions/max_terminated_length": 139.0,
      "completions/mean_length": 46.7421875,
      "completions/mean_terminated_length": 46.7421875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.163482427597046,
      "epoch": 0.8156565656565656,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.9332994222640991,
      "learning_rate": 1.8686868686868687e-07,
      "loss": -0.0,
      "num_tokens": 33566471.0,
      "reward": 0.8070312738418579,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.796875,
      "rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 323
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 111.0,
      "completions/max_terminated_length": 111.0,
      "completions/mean_length": 41.0546875,
      "completions/mean_terminated_length": 41.0546875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 0.9953482151031494,
      "epoch": 0.8181818181818182,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.8781511783599854,
      "learning_rate": 1.8434343434343434e-07,
      "loss": -0.0,
      "num_tokens": 33663286.0,
      "reward": 0.814453125,
      "reward_std": 0.10993649065494537,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 324
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 42.5,
      "completions/mean_terminated_length": 42.5,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0891491174697876,
      "epoch": 0.8207070707070707,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 2.3374204635620117,
      "learning_rate": 1.818181818181818e-07,
      "loss": 0.0,
      "num_tokens": 33767966.0,
      "reward": 0.814453125,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 325
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 84.0,
      "completions/max_terminated_length": 84.0,
      "completions/mean_length": 41.984375,
      "completions/mean_terminated_length": 41.984375,
      "completions/min_length": 7.0,
      "completions/min_terminated_length": 7.0,
      "entropy": 1.0346364974975586,
      "epoch": 0.8232323232323232,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 4.739593029022217,
      "learning_rate": 1.7929292929292927e-07,
      "loss": -0.0,
      "num_tokens": 33880908.0,
      "reward": 0.7250000238418579,
      "reward_std": 0.059319622814655304,
      "rewards/video_r1_accuracy_reward/mean": 0.7109375,
      "rewards/video_r1_accuracy_reward/std": 0.45510825514793396,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 326
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 41.0078125,
      "completions/mean_terminated_length": 41.0078125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.019489049911499,
      "epoch": 0.8257575757575758,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.7676767676767676e-07,
      "loss": 0.0,
      "num_tokens": 33992085.0,
      "reward": 0.8218749761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.8125,
      "rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 327
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 39.9140625,
      "completions/mean_terminated_length": 39.9140625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0503275394439697,
      "epoch": 0.8282828282828283,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.8522095680236816,
      "learning_rate": 1.7424242424242425e-07,
      "loss": 0.0,
      "num_tokens": 34098090.0,
      "reward": 0.814453125,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 328
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 39.390625,
      "completions/mean_terminated_length": 39.390625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0890402793884277,
      "epoch": 0.8308080808080808,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.717171717171717e-07,
      "loss": 0.0,
      "num_tokens": 34197460.0,
      "reward": 0.5843750238418579,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 329
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 40.515625,
      "completions/mean_terminated_length": 40.515625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0883162021636963,
      "epoch": 0.8333333333333334,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.6919191919191918e-07,
      "loss": 0.0,
      "num_tokens": 34295606.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 330
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 38.5546875,
      "completions/mean_terminated_length": 38.5546875,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.9863436222076416,
      "epoch": 0.8358585858585859,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.8784723281860352,
      "learning_rate": 1.6666666666666665e-07,
      "loss": -0.0,
      "num_tokens": 34403301.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 331
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 98.0,
      "completions/max_terminated_length": 98.0,
      "completions/mean_length": 39.4296875,
      "completions/mean_terminated_length": 39.4296875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0463258028030396,
      "epoch": 0.8383838383838383,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.3829569816589355,
      "learning_rate": 1.6414141414141414e-07,
      "loss": -0.0,
      "num_tokens": 34498212.0,
      "reward": 0.8515625,
      "reward_std": 0.05497056990861893,
      "rewards/video_r1_accuracy_reward/mean": 0.84375,
      "rewards/video_r1_accuracy_reward/std": 0.3645188808441162,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 332
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 71.0,
      "completions/max_terminated_length": 71.0,
      "completions/mean_length": 39.7734375,
      "completions/mean_terminated_length": 39.7734375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0662963390350342,
      "epoch": 0.8409090909090909,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.7944689989089966,
      "learning_rate": 1.6161616161616163e-07,
      "loss": 0.0,
      "num_tokens": 34595415.0,
      "reward": 0.666015625,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 333
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 92.0,
      "completions/max_terminated_length": 92.0,
      "completions/mean_length": 41.2421875,
      "completions/mean_terminated_length": 41.2421875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.035414695739746,
      "epoch": 0.8434343434343434,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.5909090909090907e-07,
      "loss": 0.0,
      "num_tokens": 34693454.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 334
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 74.0,
      "completions/max_terminated_length": 74.0,
      "completions/mean_length": 40.5625,
      "completions/mean_terminated_length": 40.5625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.992094874382019,
      "epoch": 0.8459595959595959,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 4.349198341369629,
      "learning_rate": 1.5656565656565657e-07,
      "loss": -0.0,
      "num_tokens": 34788646.0,
      "reward": 0.740234375,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 335
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 38.90625,
      "completions/mean_terminated_length": 38.90625,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.0111061334609985,
      "epoch": 0.8484848484848485,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.5404040404040403e-07,
      "loss": 0.0,
      "num_tokens": 34894258.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 336
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 89.0,
      "completions/max_terminated_length": 89.0,
      "completions/mean_length": 41.484375,
      "completions/mean_terminated_length": 41.484375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1294646263122559,
      "epoch": 0.851010101010101,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.3885397911071777,
      "learning_rate": 1.5151515151515152e-07,
      "loss": -0.0,
      "num_tokens": 34997048.0,
      "reward": 0.6214843988418579,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 337
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 44.3671875,
      "completions/mean_terminated_length": 41.69291305541992,
      "completions/min_length": 17.0,
      "completions/min_terminated_length": 17.0,
      "entropy": 1.0213119983673096,
      "epoch": 0.8535353535353535,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.31857022643089294,
      "learning_rate": 1.4898989898989896e-07,
      "loss": 0.0,
      "num_tokens": 35089799.0,
      "reward": 0.7621093988418579,
      "reward_std": 0.0011048543965443969,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 338
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 95.0,
      "completions/max_terminated_length": 95.0,
      "completions/mean_length": 43.2265625,
      "completions/mean_terminated_length": 43.2265625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0955770015716553,
      "epoch": 0.8560606060606061,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.10260009765625,
      "learning_rate": 1.4646464646464646e-07,
      "loss": -0.0,
      "num_tokens": 35189916.0,
      "reward": 0.688281238079071,
      "reward_std": 0.027485284954309464,
      "rewards/video_r1_accuracy_reward/mean": 0.671875,
      "rewards/video_r1_accuracy_reward/std": 0.4713755249977112,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 339
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 72.0,
      "completions/max_terminated_length": 72.0,
      "completions/mean_length": 39.0078125,
      "completions/mean_terminated_length": 39.0078125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0868955850601196,
      "epoch": 0.8585858585858586,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.8857719898223877,
      "learning_rate": 1.4393939393939395e-07,
      "loss": 0.0,
      "num_tokens": 35299301.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 340
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 39.328125,
      "completions/mean_terminated_length": 39.328125,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0072779655456543,
      "epoch": 0.8611111111111112,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.1714649200439453,
      "learning_rate": 1.4141414141414141e-07,
      "loss": -0.0,
      "num_tokens": 35401055.0,
      "reward": 0.643750011920929,
      "reward_std": 0.05497056990861893,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 341
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 38.5,
      "completions/mean_terminated_length": 38.5,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0291508436203003,
      "epoch": 0.8636363636363636,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 5.02390718460083,
      "learning_rate": 1.3888888888888888e-07,
      "loss": -0.0,
      "num_tokens": 35502143.0,
      "reward": 0.651171863079071,
      "reward_std": 0.062466755509376526,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 342
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 113.0,
      "completions/max_terminated_length": 113.0,
      "completions/mean_length": 40.9453125,
      "completions/mean_terminated_length": 40.9453125,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.0368475914001465,
      "epoch": 0.8661616161616161,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3636363636363635e-07,
      "loss": 0.0,
      "num_tokens": 35610304.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 343
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 77.0,
      "completions/max_terminated_length": 77.0,
      "completions/mean_length": 40.2734375,
      "completions/mean_terminated_length": 40.2734375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0711452960968018,
      "epoch": 0.8686868686868687,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.956529140472412,
      "learning_rate": 1.3383838383838384e-07,
      "loss": -0.0,
      "num_tokens": 35720067.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.05922255665063858,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 344
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 42.9140625,
      "completions/mean_terminated_length": 42.9140625,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.1545445919036865,
      "epoch": 0.8712121212121212,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.3131313131313133e-07,
      "loss": 0.0,
      "num_tokens": 35819584.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 79.0,
      "completions/max_terminated_length": 79.0,
      "completions/mean_length": 43.2421875,
      "completions/mean_terminated_length": 43.2421875,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1233935356140137,
      "epoch": 0.8737373737373737,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.0883266925811768,
      "learning_rate": 1.2878787878787877e-07,
      "loss": 0.0,
      "num_tokens": 35926447.0,
      "reward": 0.6066405773162842,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 346
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 44.890625,
      "completions/mean_terminated_length": 44.890625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1265748739242554,
      "epoch": 0.8762626262626263,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2674680948257446,
      "learning_rate": 1.2626262626262626e-07,
      "loss": 0.0,
      "num_tokens": 36026345.0,
      "reward": 0.814453125,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 347
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 39.609375,
      "completions/mean_terminated_length": 39.609375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.024735450744629,
      "epoch": 0.8787878787878788,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2373737373737373e-07,
      "loss": 0.0,
      "num_tokens": 36128031.0,
      "reward": 0.5249999761581421,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5,
      "rewards/video_r1_accuracy_reward/std": 0.5019646286964417,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 348
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 44.5546875,
      "completions/mean_terminated_length": 44.5546875,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.0595622062683105,
      "epoch": 0.8813131313131313,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.2121212121212122e-07,
      "loss": 0.0,
      "num_tokens": 36225950.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 349
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 39.4765625,
      "completions/mean_terminated_length": 39.4765625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0925328731536865,
      "epoch": 0.8838383838383839,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.037301778793335,
      "learning_rate": 1.1868686868686869e-07,
      "loss": 0.0,
      "num_tokens": 36321339.0,
      "reward": 0.614062488079071,
      "reward_std": 0.0937061756849289,
      "rewards/video_r1_accuracy_reward/mean": 0.59375,
      "rewards/video_r1_accuracy_reward/std": 0.4930621087551117,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 350
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 77.0,
      "completions/max_terminated_length": 77.0,
      "completions/mean_length": 39.9609375,
      "completions/mean_terminated_length": 39.9609375,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "entropy": 1.053574800491333,
      "epoch": 0.8863636363636364,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.9315614700317383,
      "learning_rate": 1.1616161616161615e-07,
      "loss": 0.0,
      "num_tokens": 36427534.0,
      "reward": 0.7179687023162842,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.703125,
      "rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 351
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 41.765625,
      "completions/mean_terminated_length": 41.765625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0955579280853271,
      "epoch": 0.8888888888888888,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.852755069732666,
      "learning_rate": 1.1363636363636363e-07,
      "loss": 0.0,
      "num_tokens": 36517216.0,
      "reward": 0.666015625,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 352
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 100.0,
      "completions/max_terminated_length": 100.0,
      "completions/mean_length": 41.1484375,
      "completions/mean_terminated_length": 41.1484375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0147664546966553,
      "epoch": 0.8914141414141414,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 3.0033349990844727,
      "learning_rate": 1.111111111111111e-07,
      "loss": -0.0,
      "num_tokens": 36631027.0,
      "reward": 0.4433593451976776,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.4140625,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 353
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 92.0,
      "completions/max_terminated_length": 92.0,
      "completions/mean_length": 41.4140625,
      "completions/mean_terminated_length": 41.4140625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0533812046051025,
      "epoch": 0.8939393939393939,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.2410473823547363,
      "learning_rate": 1.0858585858585859e-07,
      "loss": -0.0,
      "num_tokens": 36731992.0,
      "reward": 0.666015625,
      "reward_std": 0.06297669559717178,
      "rewards/video_r1_accuracy_reward/mean": 0.6484375,
      "rewards/video_r1_accuracy_reward/std": 0.4793342351913452,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 354
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 43.734375,
      "completions/mean_terminated_length": 43.734375,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 1.12202787399292,
      "epoch": 0.8964646464646465,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.0606060606060605e-07,
      "loss": 0.0,
      "num_tokens": 36848974.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 355
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 128.0,
      "completions/max_terminated_length": 128.0,
      "completions/mean_length": 43.1796875,
      "completions/mean_terminated_length": 43.1796875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0783835649490356,
      "epoch": 0.898989898989899,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 4.910101890563965,
      "learning_rate": 1.0353535353535353e-07,
      "loss": 0.0,
      "num_tokens": 36956045.0,
      "reward": 0.7179687023162842,
      "reward_std": 0.027485283091664314,
      "rewards/video_r1_accuracy_reward/mean": 0.703125,
      "rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 356
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 120.0,
      "completions/max_terminated_length": 120.0,
      "completions/mean_length": 38.59375,
      "completions/mean_terminated_length": 38.59375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0902092456817627,
      "epoch": 0.9015151515151515,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.2750098705291748,
      "learning_rate": 1.01010101010101e-07,
      "loss": -0.0,
      "num_tokens": 37047657.0,
      "reward": 0.6066405773162842,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.5859375,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 357
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 107.0,
      "completions/max_terminated_length": 107.0,
      "completions/mean_length": 45.1796875,
      "completions/mean_terminated_length": 45.1796875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1572962999343872,
      "epoch": 0.9040404040404041,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.8475706577301025,
      "learning_rate": 9.848484848484848e-08,
      "loss": 0.0,
      "num_tokens": 37147744.0,
      "reward": 0.829296886920929,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.8203125,
      "rewards/video_r1_accuracy_reward/std": 0.3854354918003082,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 358
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 42.375,
      "completions/mean_terminated_length": 42.375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0927423238754272,
      "epoch": 0.9065656565656566,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.16367244720459,
      "learning_rate": 9.595959595959594e-08,
      "loss": 0.0,
      "num_tokens": 37259240.0,
      "reward": 0.7921874523162842,
      "reward_std": 0.0737217366695404,
      "rewards/video_r1_accuracy_reward/mean": 0.78125,
      "rewards/video_r1_accuracy_reward/std": 0.41502299904823303,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 359
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 119.0,
      "completions/max_terminated_length": 119.0,
      "completions/mean_length": 38.2265625,
      "completions/mean_terminated_length": 38.2265625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0505998134613037,
      "epoch": 0.9090909090909091,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.0892300605773926,
      "learning_rate": 9.343434343434344e-08,
      "loss": -0.0,
      "num_tokens": 37351021.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.04198446497321129,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 360
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0078125,
      "completions/max_length": 384.0,
      "completions/max_terminated_length": 95.0,
      "completions/mean_length": 45.1015625,
      "completions/mean_terminated_length": 42.433067321777344,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 0.9791218042373657,
      "epoch": 0.9116161616161617,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 3.24613881111145,
      "learning_rate": 9.09090909090909e-08,
      "loss": 0.0,
      "num_tokens": 37463530.0,
      "reward": 0.7101562023162842,
      "reward_std": 0.04958236962556839,
      "rewards/video_r1_accuracy_reward/mean": 0.6953125,
      "rewards/video_r1_accuracy_reward/std": 0.46208351850509644,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 361
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 107.0,
      "completions/max_terminated_length": 107.0,
      "completions/mean_length": 43.7890625,
      "completions/mean_terminated_length": 43.7890625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1600117683410645,
      "epoch": 0.9141414141414141,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.4082045555114746,
      "learning_rate": 8.838383838383838e-08,
      "loss": -0.0,
      "num_tokens": 37546919.0,
      "reward": 0.4507812559604645,
      "reward_std": 0.090959832072258,
      "rewards/video_r1_accuracy_reward/mean": 0.421875,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 362
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 74.0,
      "completions/max_terminated_length": 74.0,
      "completions/mean_length": 40.6640625,
      "completions/mean_terminated_length": 40.6640625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0563390254974365,
      "epoch": 0.9166666666666666,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 8.585858585858585e-08,
      "loss": 0.0,
      "num_tokens": 37642260.0,
      "reward": 0.5843749642372131,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 363
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 40.3359375,
      "completions/mean_terminated_length": 40.3359375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0542752742767334,
      "epoch": 0.9191919191919192,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.0764169692993164,
      "learning_rate": 8.333333333333333e-08,
      "loss": 0.0,
      "num_tokens": 37752327.0,
      "reward": 0.539843738079071,
      "reward_std": 0.0937061756849289,
      "rewards/video_r1_accuracy_reward/mean": 0.515625,
      "rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 364
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 39.75,
      "completions/mean_terminated_length": 39.75,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0389442443847656,
      "epoch": 0.9217171717171717,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 8.080808080808082e-08,
      "loss": 0.0,
      "num_tokens": 37840775.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 365
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 82.0,
      "completions/max_terminated_length": 82.0,
      "completions/mean_length": 43.8359375,
      "completions/mean_terminated_length": 43.8359375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0553057193756104,
      "epoch": 0.9242424242424242,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.5467336177825928,
      "learning_rate": 7.828282828282828e-08,
      "loss": 0.0,
      "num_tokens": 37939746.0,
      "reward": 0.6363281011581421,
      "reward_std": 0.06297669559717178,
      "rewards/video_r1_accuracy_reward/mean": 0.6171875,
      "rewards/video_r1_accuracy_reward/std": 0.4879830479621887,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 366
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 93.0,
      "completions/max_terminated_length": 93.0,
      "completions/mean_length": 40.75,
      "completions/mean_terminated_length": 40.75,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.1079405546188354,
      "epoch": 0.9267676767676768,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 2.4892454147338867,
      "learning_rate": 7.575757575757576e-08,
      "loss": 0.0,
      "num_tokens": 38040370.0,
      "reward": 0.7550780773162842,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 367
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 39.0625,
      "completions/mean_terminated_length": 39.0625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0692365169525146,
      "epoch": 0.9292929292929293,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.5513192415237427,
      "learning_rate": 7.323232323232323e-08,
      "loss": 0.0,
      "num_tokens": 38149562.0,
      "reward": 0.5992187261581421,
      "reward_std": 0.051721714437007904,
      "rewards/video_r1_accuracy_reward/mean": 0.578125,
      "rewards/video_r1_accuracy_reward/std": 0.4957992732524872,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 368
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 107.0,
      "completions/max_terminated_length": 107.0,
      "completions/mean_length": 38.9609375,
      "completions/mean_terminated_length": 38.9609375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.9736450910568237,
      "epoch": 0.9318181818181818,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 2.6586601734161377,
      "learning_rate": 7.070707070707071e-08,
      "loss": 0.0,
      "num_tokens": 38255013.0,
      "reward": 0.7550780773162842,
      "reward_std": 0.09046198427677155,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 369
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 114.0,
      "completions/max_terminated_length": 114.0,
      "completions/mean_length": 42.0625,
      "completions/mean_terminated_length": 42.0625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0568822622299194,
      "epoch": 0.9343434343434344,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.516427993774414,
      "learning_rate": 6.818181818181817e-08,
      "loss": -0.0,
      "num_tokens": 38344869.0,
      "reward": 0.6214843392372131,
      "reward_std": 0.05272950232028961,
      "rewards/video_r1_accuracy_reward/mean": 0.6015625,
      "rewards/video_r1_accuracy_reward/std": 0.4915000796318054,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 370
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 78.0,
      "completions/max_terminated_length": 78.0,
      "completions/mean_length": 41.4609375,
      "completions/mean_terminated_length": 41.4609375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.016709566116333,
      "epoch": 0.9368686868686869,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 6.565656565656566e-08,
      "loss": 0.0,
      "num_tokens": 38439152.0,
      "reward": 0.703125,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.6875,
      "rewards/video_r1_accuracy_reward/std": 0.4653336703777313,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 371
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 96.0,
      "completions/max_terminated_length": 96.0,
      "completions/mean_length": 41.9921875,
      "completions/mean_terminated_length": 41.9921875,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.039790391921997,
      "epoch": 0.9393939393939394,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.421544075012207,
      "learning_rate": 6.313131313131313e-08,
      "loss": -0.0,
      "num_tokens": 38528439.0,
      "reward": 0.5472656488418579,
      "reward_std": 0.03072948195040226,
      "rewards/video_r1_accuracy_reward/mean": 0.5234375,
      "rewards/video_r1_accuracy_reward/std": 0.5014128684997559,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 372
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 121.0,
      "completions/max_terminated_length": 121.0,
      "completions/mean_length": 39.5390625,
      "completions/mean_terminated_length": 39.5390625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0156900882720947,
      "epoch": 0.9419191919191919,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0844476222991943,
      "learning_rate": 6.060606060606061e-08,
      "loss": -0.0,
      "num_tokens": 38620764.0,
      "reward": 0.8070312738418579,
      "reward_std": 0.027485284954309464,
      "rewards/video_r1_accuracy_reward/mean": 0.796875,
      "rewards/video_r1_accuracy_reward/std": 0.40390563011169434,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 373
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 91.0,
      "completions/max_terminated_length": 91.0,
      "completions/mean_length": 41.515625,
      "completions/mean_terminated_length": 41.515625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.1009835004806519,
      "epoch": 0.9444444444444444,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.7400307655334473,
      "learning_rate": 5.8080808080808076e-08,
      "loss": 0.0,
      "num_tokens": 38722894.0,
      "reward": 0.443359375,
      "reward_std": 0.058214765042066574,
      "rewards/video_r1_accuracy_reward/mean": 0.4140625,
      "rewards/video_r1_accuracy_reward/std": 0.49449479579925537,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 374
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 73.0,
      "completions/max_terminated_length": 73.0,
      "completions/mean_length": 39.25,
      "completions/mean_terminated_length": 39.25,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 0.9999622106552124,
      "epoch": 0.946969696969697,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 3.1559898853302,
      "learning_rate": 5.555555555555555e-08,
      "loss": -0.0,
      "num_tokens": 38820150.0,
      "reward": 0.740234375,
      "reward_std": 0.08995203673839569,
      "rewards/video_r1_accuracy_reward/mean": 0.7265625,
      "rewards/video_r1_accuracy_reward/std": 0.447474867105484,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 375
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 75.0,
      "completions/max_terminated_length": 75.0,
      "completions/mean_length": 38.1484375,
      "completions/mean_terminated_length": 38.1484375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0171918869018555,
      "epoch": 0.9494949494949495,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.901557445526123,
      "learning_rate": 5.303030303030303e-08,
      "loss": 0.0,
      "num_tokens": 38918177.0,
      "reward": 0.814453125,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.8046875,
      "rewards/video_r1_accuracy_reward/std": 0.3979988098144531,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 376
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 40.1484375,
      "completions/mean_terminated_length": 40.1484375,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0424983501434326,
      "epoch": 0.952020202020202,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.0092467069625854,
      "learning_rate": 5.05050505050505e-08,
      "loss": -0.0,
      "num_tokens": 39023852.0,
      "reward": 0.5101562738418579,
      "reward_std": 0.027485284954309464,
      "rewards/video_r1_accuracy_reward/mean": 0.484375,
      "rewards/video_r1_accuracy_reward/std": 0.5017194747924805,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 377
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 38.2890625,
      "completions/mean_terminated_length": 38.2890625,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 0.9870564341545105,
      "epoch": 0.9545454545454546,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.797979797979797e-08,
      "loss": 0.0,
      "num_tokens": 39119657.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 378
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 88.0,
      "completions/max_terminated_length": 88.0,
      "completions/mean_length": 41.375,
      "completions/mean_terminated_length": 41.375,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.1362022161483765,
      "epoch": 0.9570707070707071,
      "frac_reward_zero_std": 0.75,
      "grad_norm": 3.5773839950561523,
      "learning_rate": 4.545454545454545e-08,
      "loss": -0.0,
      "num_tokens": 39225209.0,
      "reward": 0.673046886920929,
      "reward_std": 0.07057460397481918,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 379
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 72.0,
      "completions/max_terminated_length": 72.0,
      "completions/mean_length": 39.5703125,
      "completions/mean_terminated_length": 39.5703125,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "entropy": 0.9890860319137573,
      "epoch": 0.9595959595959596,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 4.292929292929292e-08,
      "loss": 0.0,
      "num_tokens": 39322978.0,
      "reward": 0.7625000476837158,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 380
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 42.4375,
      "completions/mean_terminated_length": 42.4375,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0359077453613281,
      "epoch": 0.9621212121212122,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.6127235889434814,
      "learning_rate": 4.040404040404041e-08,
      "loss": 0.0,
      "num_tokens": 39427570.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.06297669559717178,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 381
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 85.0,
      "completions/max_terminated_length": 85.0,
      "completions/mean_length": 43.0,
      "completions/mean_terminated_length": 43.0,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0722706317901611,
      "epoch": 0.9646464646464646,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.7954891920089722,
      "learning_rate": 3.787878787878788e-08,
      "loss": 0.0,
      "num_tokens": 39525642.0,
      "reward": 0.8218749761581421,
      "reward_std": 0.07920700311660767,
      "rewards/video_r1_accuracy_reward/mean": 0.8125,
      "rewards/video_r1_accuracy_reward/std": 0.39184603095054626,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 382
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 83.0,
      "completions/max_terminated_length": 83.0,
      "completions/mean_length": 41.796875,
      "completions/mean_terminated_length": 41.796875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0892385244369507,
      "epoch": 0.9671717171717171,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 2.684129238128662,
      "learning_rate": 3.5353535353535353e-08,
      "loss": -0.0,
      "num_tokens": 39620936.0,
      "reward": 0.717968761920929,
      "reward_std": 0.05922255665063858,
      "rewards/video_r1_accuracy_reward/mean": 0.703125,
      "rewards/video_r1_accuracy_reward/std": 0.45867621898651123,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 383
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 67.0,
      "completions/max_terminated_length": 67.0,
      "completions/mean_length": 37.6328125,
      "completions/mean_terminated_length": 37.6328125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 0.9923283457756042,
      "epoch": 0.9696969696969697,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 3.282828282828283e-08,
      "loss": 0.0,
      "num_tokens": 39719425.0,
      "reward": 0.6437499523162842,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.625,
      "rewards/video_r1_accuracy_reward/std": 0.4860251843929291,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 384
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 40.5703125,
      "completions/mean_terminated_length": 40.5703125,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.9553056955337524,
      "epoch": 0.9722222222222222,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 3.0303030303030305e-08,
      "loss": 0.0,
      "num_tokens": 39825234.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 385
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 94.0,
      "completions/max_terminated_length": 94.0,
      "completions/mean_length": 41.765625,
      "completions/mean_terminated_length": 41.765625,
      "completions/min_length": 20.0,
      "completions/min_terminated_length": 20.0,
      "entropy": 1.0995062589645386,
      "epoch": 0.9747474747474747,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.4076322317123413,
      "learning_rate": 2.7777777777777774e-08,
      "loss": 0.0,
      "num_tokens": 39925308.0,
      "reward": 0.7550780773162842,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.7421875,
      "rewards/video_r1_accuracy_reward/std": 0.43914905190467834,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 386
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 80.0,
      "completions/max_terminated_length": 80.0,
      "completions/mean_length": 42.9453125,
      "completions/mean_terminated_length": 42.9453125,
      "completions/min_length": 25.0,
      "completions/min_terminated_length": 25.0,
      "entropy": 0.9817566275596619,
      "epoch": 0.9772727272727273,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.9628924131393433,
      "learning_rate": 2.525252525252525e-08,
      "loss": 0.0,
      "num_tokens": 40033437.0,
      "reward": 0.9332031011581421,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.9296875,
      "rewards/video_r1_accuracy_reward/std": 0.2566775679588318,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 387
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 87.0,
      "completions/max_terminated_length": 87.0,
      "completions/mean_length": 43.6328125,
      "completions/mean_terminated_length": 43.6328125,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0863628387451172,
      "epoch": 0.9797979797979798,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 2.400270462036133,
      "learning_rate": 2.2727272727272725e-08,
      "loss": 0.0,
      "num_tokens": 40137166.0,
      "reward": 0.6734374761581421,
      "reward_std": 0.06946974992752075,
      "rewards/video_r1_accuracy_reward/mean": 0.65625,
      "rewards/video_r1_accuracy_reward/std": 0.47682511806488037,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 388
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 76.0,
      "completions/max_terminated_length": 76.0,
      "completions/mean_length": 39.21875,
      "completions/mean_terminated_length": 39.21875,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 0.9649280309677124,
      "epoch": 0.9823232323232324,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 1.8059344291687012,
      "learning_rate": 2.0202020202020204e-08,
      "loss": 0.0,
      "num_tokens": 40238914.0,
      "reward": 0.6808593273162842,
      "reward_std": 0.04847751557826996,
      "rewards/video_r1_accuracy_reward/mean": 0.6640625,
      "rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 389
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 106.0,
      "completions/max_terminated_length": 106.0,
      "completions/mean_length": 41.9375,
      "completions/mean_terminated_length": 41.9375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0557193756103516,
      "epoch": 0.9848484848484849,
      "frac_reward_zero_std": 0.875,
      "grad_norm": 3.0474207401275635,
      "learning_rate": 1.7676767676767677e-08,
      "loss": -0.0,
      "num_tokens": 40355434.0,
      "reward": 0.7699218988418579,
      "reward_std": 0.062466755509376526,
      "rewards/video_r1_accuracy_reward/mean": 0.7578125,
      "rewards/video_r1_accuracy_reward/std": 0.4300905168056488,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 390
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 102.0,
      "completions/max_terminated_length": 102.0,
      "completions/mean_length": 39.8359375,
      "completions/mean_terminated_length": 39.8359375,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.9982803463935852,
      "epoch": 0.9873737373737373,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 0.9107327461242676,
      "learning_rate": 1.5151515151515152e-08,
      "loss": -0.0,
      "num_tokens": 40468109.0,
      "reward": 0.651171863079071,
      "reward_std": 0.020992232486605644,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 391
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 100.0,
      "completions/max_terminated_length": 100.0,
      "completions/mean_length": 44.515625,
      "completions/mean_terminated_length": 44.515625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0974993705749512,
      "epoch": 0.98989898989899,
      "frac_reward_zero_std": 0.9375,
      "grad_norm": 1.1084349155426025,
      "learning_rate": 1.2626262626262625e-08,
      "loss": 0.0,
      "num_tokens": 40570487.0,
      "reward": 0.6585937142372131,
      "reward_std": 0.027485283091664314,
      "rewards/video_r1_accuracy_reward/mean": 0.640625,
      "rewards/video_r1_accuracy_reward/std": 0.481702595949173,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 392
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 81.0,
      "completions/max_terminated_length": 81.0,
      "completions/mean_length": 38.5,
      "completions/mean_terminated_length": 38.5,
      "completions/min_length": 21.0,
      "completions/min_terminated_length": 21.0,
      "entropy": 1.0579657554626465,
      "epoch": 0.9924242424242424,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 1.0101010101010102e-08,
      "loss": 0.0,
      "num_tokens": 40667895.0,
      "reward": 0.5843750238418579,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.5625,
      "rewards/video_r1_accuracy_reward/std": 0.49802759289741516,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 393
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 90.0,
      "completions/max_terminated_length": 90.0,
      "completions/mean_length": 39.2890625,
      "completions/mean_terminated_length": 39.2890625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "entropy": 1.0520402193069458,
      "epoch": 0.9949494949494949,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 1.8841626644134521,
      "learning_rate": 7.575757575757576e-09,
      "loss": -0.0,
      "num_tokens": 40764580.0,
      "reward": 0.6804687976837158,
      "reward_std": 0.04958236962556839,
      "rewards/video_r1_accuracy_reward/mean": 0.6640625,
      "rewards/video_r1_accuracy_reward/std": 0.47417303919792175,
      "rewards/video_r1_format_reward/mean": 0.9921875,
      "rewards/video_r1_format_reward/std": 0.0883883461356163,
      "step": 394
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 99.0,
      "completions/max_terminated_length": 99.0,
      "completions/mean_length": 41.4140625,
      "completions/mean_terminated_length": 41.4140625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 1.0266304016113281,
      "epoch": 0.9974747474747475,
      "frac_reward_zero_std": 0.8125,
      "grad_norm": 4.306983470916748,
      "learning_rate": 5.050505050505051e-09,
      "loss": 0.0,
      "num_tokens": 40861593.0,
      "reward": 0.651171863079071,
      "reward_std": 0.06297669559717178,
      "rewards/video_r1_accuracy_reward/mean": 0.6328125,
      "rewards/video_r1_accuracy_reward/std": 0.4839322865009308,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 395
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 74.0,
      "completions/max_terminated_length": 74.0,
      "completions/mean_length": 38.1640625,
      "completions/mean_terminated_length": 38.1640625,
      "completions/min_length": 23.0,
      "completions/min_terminated_length": 23.0,
      "entropy": 0.9408200979232788,
      "epoch": 1.0,
      "frac_reward_zero_std": 1.0,
      "grad_norm": 0.0,
      "learning_rate": 2.5252525252525255e-09,
      "loss": 0.0,
      "num_tokens": 40963742.0,
      "reward": 0.762499988079071,
      "reward_std": 0.0,
      "rewards/video_r1_accuracy_reward/mean": 0.75,
      "rewards/video_r1_accuracy_reward/std": 0.434714138507843,
      "rewards/video_r1_format_reward/mean": 1.0,
      "rewards/video_r1_format_reward/std": 0.0,
      "step": 396
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 396,
  "num_input_tokens_seen": 40963742,
  "num_train_epochs": 1,
  "save_steps": 159,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}