# export MODEL_NAME="models/Diffusion_Transformer/Wan2.1-T2V-14B" # export DATASET_NAME="datasets/internal_datasets/" # export DATASET_META_NAME="datasets/internal_datasets/metadata.json" # # NCCL_IB_DISABLE=1 and NCCL_P2P_DISABLE=1 are used in multi nodes without RDMA. # # export NCCL_IB_DISABLE=1 # # export NCCL_P2P_DISABLE=1 # NCCL_DEBUG=INFO # accelerate launch --mixed_precision="bf16" scripts/wan2.1/train_lora.py \ # --config_path="config/wan2.1/wan_civitai.yaml" \ # --pretrained_model_name_or_path=$MODEL_NAME \ # --train_data_dir=$DATASET_NAME \ # --train_data_meta=$DATASET_META_NAME \ # --image_sample_size=1024 \ # --video_sample_size=256 \ # --token_sample_size=512 \ # --video_sample_stride=2 \ # --video_sample_n_frames=81 \ # --train_batch_size=1 \ # --video_repeat=1 \ # --gradient_accumulation_steps=1 \ # --dataloader_num_workers=8 \ # --num_train_epochs=100 \ # --checkpointing_steps=50 \ # --learning_rate=1e-04 \ # --seed=42 \ # --output_dir="output_dir" \ # --gradient_checkpointing \ # --mixed_precision="bf16" \ # --adam_weight_decay=3e-2 \ # --adam_epsilon=1e-10 \ # --vae_mini_batch=1 \ # --max_grad_norm=0.05 \ # --random_hw_adapt \ # --training_with_video_token_length \ # --enable_bucket \ # --uniform_sampling \ # --low_vram # Training command for I2V export MODEL_NAME="/mnt/data/public_ckpt/videogen_public/Wan-AI/Wan2.1-I2V-14B-480P" export DATASET_NAME="" export DATASET_META_NAME="/mnt/data/2002-ssd/filtered_clips_from_static/lora_train_combined_datasets/combined_captions_1021.json" # export CHECKPOINT="checkpoint-800" # export NCCL_IB_DISABLE=1 # export NCCL_P2P_DISABLE=1 NCCL_DEBUG=INFO accelerate launch --use_deepspeed --deepspeed_config_file config/zero_stage2_config.json --deepspeed_multinode_launcher standard scripts/wan2.1/train_lora.py \ --config_path="config/wan2.1/wan_civitai.yaml" \ --pretrained_model_name_or_path=$MODEL_NAME \ --train_data_dir=$DATASET_NAME \ --train_data_meta=$DATASET_META_NAME \ --image_sample_size=512 \ --video_sample_size=632 \ --token_sample_size=632 \ --video_sample_stride=1 \ --video_sample_n_frames=81 \ --train_batch_size=1 \ --video_repeat=1 \ --gradient_accumulation_steps=1 \ --dataloader_num_workers=8 \ --num_train_epochs=1000 \ --checkpointing_steps=500 \ --learning_rate=2e-05 \ --seed=42 \ --output_dir="output_dir_14b_1022_7directions_8batchsize_2e-5lr" \ --gradient_checkpointing \ --mixed_precision="bf16" \ --adam_weight_decay=3e-2 \ --adam_epsilon=1e-10 \ --vae_mini_batch=1 \ --max_grad_norm=0.05 \ --random_hw_adapt \ --training_with_video_token_length \ --enable_bucket \ --low_vram \ --train_mode="i2v" \ --save_state \ # --resume_from_checkpoint="latest" \ # --uniform_sampling \