RL / model /EasyR1 /examples /config_grpo.yaml
WangYe007's picture
Upload folder using huggingface_hub
d65b589 verified
data:
train_files: ""
val_files: ""
prompt_key: problem
answer_key: answer
image_key: images
video_key: videos
image_dir: null
video_fps: 2.0
max_prompt_length: 16384
max_response_length: 4096
rollout_batch_size: 128
mini_rollout_batch_size: null
val_batch_size: 1024
format_prompt: ""
override_chat_template: null
shuffle: true
seed: 1
min_pixels: 3136
max_pixels: 1048576
resize_size: 336
filter_overlong_prompts: false
algorithm:
adv_estimator: grpo
disable_kl: true
use_kl_loss: true
kl_penalty: low_var_kl
kl_coef: 1.0e-2
online_filtering: false
filter_key: accuracy
filter_low: 0.0
filter_high: 1.0
worker:
actor:
global_batch_size: 32
micro_batch_size_per_device_for_update: 1
micro_batch_size_per_device_for_experience: 1
max_grad_norm: 1.0
padding_free: true
dynamic_batching: true
ulysses_size: 1
model:
model_path: ""
enable_gradient_checkpointing: true
trust_remote_code: false
freeze_vision_tower: true
optim:
lr: 5.0e-6
weight_decay: 1.0e-2
strategy: adamw
lr_warmup_ratio: 0.0
fsdp:
enable_full_shard: true
enable_cpu_offload: false
enable_rank0_init: true
offload:
offload_params: false
offload_optimizer: false
rollout:
n: 8
temperature: 1.0
top_p: 1.0
limit_images: 0
gpu_memory_utilization: 0.7
enforce_eager: false
enable_chunked_prefill: false
tensor_parallel_size: 4
disable_tqdm: true
max_num_batched_tokens: 20480
val_override_config:
temperature: 0.7
top_p: 0.95
n: 1
ref:
fsdp:
enable_full_shard: true
enable_cpu_offload: false
enable_rank0_init: true
offload:
offload_params: false
reward:
reward_type: batch
reward_function: EasyR1/verl/reward_function/onethinker_reward.py:compute_score
trainer:
total_epochs: 1
max_steps: null
project_name: easy_r1
experiment_name: ""
logger: ["file", "wandb"]
nnodes: 1
n_gpus_per_node: 2
max_try_make_batch: 20
val_freq: -1
val_before_train: false
val_only: false
val_generations_to_log: 3
save_freq: 1
save_limit: 100000
save_model_only: false
save_checkpoint_path: ""
load_checkpoint_path: null
find_last_checkpoint: false