data: train_files: "" val_files: "" prompt_key: problem answer_key: answer image_key: images video_key: videos image_dir: null video_fps: 2.0 max_prompt_length: 16384 max_response_length: 4096 rollout_batch_size: 128 mini_rollout_batch_size: null val_batch_size: 1024 format_prompt: "" override_chat_template: null shuffle: true seed: 1 min_pixels: 3136 max_pixels: 1048576 resize_size: 336 filter_overlong_prompts: false algorithm: adv_estimator: grpo disable_kl: true use_kl_loss: true kl_penalty: low_var_kl kl_coef: 1.0e-2 online_filtering: false filter_key: accuracy filter_low: 0.0 filter_high: 1.0 worker: actor: global_batch_size: 32 micro_batch_size_per_device_for_update: 1 micro_batch_size_per_device_for_experience: 1 max_grad_norm: 1.0 padding_free: true dynamic_batching: true ulysses_size: 1 model: model_path: "" enable_gradient_checkpointing: true trust_remote_code: false freeze_vision_tower: true optim: lr: 5.0e-6 weight_decay: 1.0e-2 strategy: adamw lr_warmup_ratio: 0.0 fsdp: enable_full_shard: true enable_cpu_offload: false enable_rank0_init: true offload: offload_params: false offload_optimizer: false rollout: n: 8 temperature: 1.0 top_p: 1.0 limit_images: 0 gpu_memory_utilization: 0.7 enforce_eager: false enable_chunked_prefill: false tensor_parallel_size: 4 disable_tqdm: true max_num_batched_tokens: 20480 val_override_config: temperature: 0.7 top_p: 0.95 n: 1 ref: fsdp: enable_full_shard: true enable_cpu_offload: false enable_rank0_init: true offload: offload_params: false reward: reward_type: batch reward_function: EasyR1/verl/reward_function/onethinker_reward.py:compute_score trainer: total_epochs: 1 max_steps: null project_name: easy_r1 experiment_name: "" logger: ["file", "wandb"] nnodes: 1 n_gpus_per_node: 2 max_try_make_batch: 20 val_freq: -1 val_before_train: false val_only: false val_generations_to_log: 3 save_freq: 1 save_limit: 100000 save_model_only: false save_checkpoint_path: "" load_checkpoint_path: null find_last_checkpoint: false