| data: | |
| train_files: "" | |
| val_files: "" | |
| prompt_key: problem | |
| answer_key: answer | |
| image_key: images | |
| video_key: videos | |
| image_dir: null | |
| video_fps: 2.0 | |
| max_prompt_length: 16384 | |
| max_response_length: 4096 | |
| rollout_batch_size: 128 | |
| mini_rollout_batch_size: null | |
| val_batch_size: 1024 | |
| format_prompt: "" | |
| override_chat_template: null | |
| shuffle: true | |
| seed: 1 | |
| min_pixels: 3136 | |
| max_pixels: 1048576 | |
| resize_size: 336 | |
| filter_overlong_prompts: false | |
| algorithm: | |
| adv_estimator: grpo | |
| disable_kl: true | |
| use_kl_loss: true | |
| kl_penalty: low_var_kl | |
| kl_coef: 1.0e-2 | |
| online_filtering: false | |
| filter_key: accuracy | |
| filter_low: 0.0 | |
| filter_high: 1.0 | |
| worker: | |
| actor: | |
| global_batch_size: 32 | |
| micro_batch_size_per_device_for_update: 1 | |
| micro_batch_size_per_device_for_experience: 1 | |
| max_grad_norm: 1.0 | |
| padding_free: true | |
| dynamic_batching: true | |
| ulysses_size: 1 | |
| model: | |
| model_path: "" | |
| enable_gradient_checkpointing: true | |
| trust_remote_code: false | |
| freeze_vision_tower: true | |
| optim: | |
| lr: 5.0e-6 | |
| weight_decay: 1.0e-2 | |
| strategy: adamw | |
| lr_warmup_ratio: 0.0 | |
| fsdp: | |
| enable_full_shard: true | |
| enable_cpu_offload: false | |
| enable_rank0_init: true | |
| offload: | |
| offload_params: false | |
| offload_optimizer: false | |
| rollout: | |
| n: 8 | |
| temperature: 1.0 | |
| top_p: 1.0 | |
| limit_images: 0 | |
| gpu_memory_utilization: 0.7 | |
| enforce_eager: false | |
| enable_chunked_prefill: false | |
| tensor_parallel_size: 4 | |
| disable_tqdm: true | |
| max_num_batched_tokens: 20480 | |
| val_override_config: | |
| temperature: 0.7 | |
| top_p: 0.95 | |
| n: 1 | |
| ref: | |
| fsdp: | |
| enable_full_shard: true | |
| enable_cpu_offload: false | |
| enable_rank0_init: true | |
| offload: | |
| offload_params: false | |
| reward: | |
| reward_type: batch | |
| reward_function: EasyR1/verl/reward_function/onethinker_reward.py:compute_score | |
| trainer: | |
| total_epochs: 1 | |
| max_steps: null | |
| project_name: easy_r1 | |
| experiment_name: "" | |
| logger: ["file", "wandb"] | |
| nnodes: 1 | |
| n_gpus_per_node: 2 | |
| max_try_make_batch: 20 | |
| val_freq: -1 | |
| val_before_train: false | |
| val_only: false | |
| val_generations_to_log: 3 | |
| save_freq: 1 | |
| save_limit: 100000 | |
| save_model_only: false | |
| save_checkpoint_path: "" | |
| load_checkpoint_path: null | |
| find_last_checkpoint: false | |