Zzyy2000
/

llama-cot

Model card Files Files and versions

llama-cot / cfg.yaml

Zzyy2000's picture

Upload folder using huggingface_hub

640d5df verified 9 months ago

history blame contribute delete

872 Bytes

	model_id: /home/ec2-user/_Zhengyue/download/models/Meta-Llama-3.1-8B-Instruct
	model_family: llama-3.1
	LoRA:
	r: 0
	alpha: 128
	dropout: 0.05
	used_policy: true
	used_strategy: true
	step_token: <step>,</step>
	answer_token: <answer>,</answer>
	loss_type: grad_ascent
	tune_vision_tower: false
	tune_mm_projector: true
	tune_language_model: true
	data_path: /home/ec2-user/_Zhengyue/workspace/Step-DPO/sft/Safety-Reasoning/safety_reasoning-mixed_benign4k_beh7k_jailbreak6k_vanilla_6k_helpful23k.json
	split: with_strategy_policy
	batch_size: 1
	gradient_accumulation_steps: 32
	max_grad_norm: 1.0
	num_epochs: 3
	max_length: 4096
	save_dir: models/llama8b_${num_epochs}_epochs_lr${lr}_${model_family}_${split}
	save_steps: 100
	lr: 5.0e-06
	weight_decay: 0.01
	seed: 233
	workers: 6
	lr_scheduler_type: cosine
	warmup_ratio: 0.0
	max_train_steps: -1
	report_to: wandb
	resume_from_checkpoint: ''