WangYe007
/

RL

Model card Files Files and versions

RL / model /EasyR1 /examples /config_grpo.yaml

WangYe007's picture

Upload folder using huggingface_hub

d65b589 verified 20 days ago

history blame contribute delete

2.33 kB

	data:
	train_files: ""
	val_files: ""
	prompt_key: problem
	answer_key: answer
	image_key: images
	video_key: videos
	image_dir: null
	video_fps: 2.0
	max_prompt_length: 16384
	max_response_length: 4096
	rollout_batch_size: 128
	mini_rollout_batch_size: null
	val_batch_size: 1024
	format_prompt: ""
	override_chat_template: null
	shuffle: true
	seed: 1
	min_pixels: 3136
	max_pixels: 1048576
	resize_size: 336
	filter_overlong_prompts: false

	algorithm:
	adv_estimator: grpo
	disable_kl: true
	use_kl_loss: true
	kl_penalty: low_var_kl
	kl_coef: 1.0e-2
	online_filtering: false
	filter_key: accuracy
	filter_low: 0.0
	filter_high: 1.0

	worker:
	actor:
	global_batch_size: 32
	micro_batch_size_per_device_for_update: 1
	micro_batch_size_per_device_for_experience: 1
	max_grad_norm: 1.0
	padding_free: true
	dynamic_batching: true
	ulysses_size: 1
	model:
	model_path: ""
	enable_gradient_checkpointing: true
	trust_remote_code: false
	freeze_vision_tower: true
	optim:
	lr: 5.0e-6
	weight_decay: 1.0e-2
	strategy: adamw
	lr_warmup_ratio: 0.0
	fsdp:
	enable_full_shard: true
	enable_cpu_offload: false
	enable_rank0_init: true
	offload:
	offload_params: false
	offload_optimizer: false

	rollout:
	n: 8
	temperature: 1.0
	top_p: 1.0
	limit_images: 0
	gpu_memory_utilization: 0.7
	enforce_eager: false
	enable_chunked_prefill: false
	tensor_parallel_size: 4
	disable_tqdm: true
	max_num_batched_tokens: 20480
	val_override_config:
	temperature: 0.7
	top_p: 0.95
	n: 1

	ref:
	fsdp:
	enable_full_shard: true
	enable_cpu_offload: false
	enable_rank0_init: true
	offload:
	offload_params: false

	reward:
	reward_type: batch
	reward_function: EasyR1/verl/reward_function/onethinker_reward.py:compute_score

	trainer:
	total_epochs: 1
	max_steps: null
	project_name: easy_r1
	experiment_name: ""
	logger: ["file", "wandb"]
	nnodes: 1
	n_gpus_per_node: 2
	max_try_make_batch: 20
	val_freq: -1
	val_before_train: false
	val_only: false
	val_generations_to_log: 3
	save_freq: 1
	save_limit: 100000
	save_model_only: false
	save_checkpoint_path: ""
	load_checkpoint_path: null
	find_last_checkpoint: false