| #!/bin/bash |
|
|
| export LOWRES_RESIZE=384x32 |
| export VIDEO_RESIZE="0x32" |
| export HIGHRES_BASE="0x32" |
| export MAXRES=1536 |
| export MINRES=0 |
| export VIDEO_MAXRES=448 |
| export VIDEO_MINRES=288 |
| export PAD2STRIDE=1 |
| export FORCE_NO_DOWNSAMPLE=1 |
| export LOAD_VISION_EARLY=1 |
|
|
| export PYTHONPATH=/path/to/Ola:$PYTHONPATH |
|
|
| EXP_NAME="ola_7b_stage2" |
| DATA='/path/to/data.json' |
|
|
| CHECKPOINT='/path/to/Ola_Image' |
|
|
| echo $MASTER_ADDR; echo $nnode; echo $nrank |
|
|
| torchrun --nproc_per_node 8 --nnodes=$nnode --node_rank=$nrank --master_addr=$MASTER_ADDR --master_port=12324 \ |
| ola/train/train.py \ |
| --deepspeed ./scripts/zero2.json \ |
| --run_name $EXP_NAME \ |
| --model_name_or_path $CHECKPOINT \ |
| --pretrain_speech_projector $CHECKPOINT/speech_projector.bin \ |
| --vision_tower $VISION_TOWER \ |
| --mm_projector_type ola_mlp \ |
| --speech_projector_type "linear" \ |
| --mm_vision_select_layer -1 \ |
| --mm_use_im_patch_token False \ |
| --tune_speech_adapter False \ |
| --version qwen_1_5 \ |
| --data_path $DATA \ |
| --bf16 True \ |
| --output_dir ./checkpoints/$EXP_NAME \ |
| --sample_independently True \ |
| --fix_speech_encoder True \ |
| --freeze_mm_vision_tower True \ |
| --speech_encoder "./pretrained/large-v3.pt" \ |
| --music_encoder "./pretrained/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt" \ |
| --speech_encoder_type "dual" \ |
| --speech_encoder_hidden_size 2048 \ |
| --speech_encoder_ds_rate 10 \ |
| --num_train_epochs 1 \ |
| --per_device_train_batch_size 2 \ |
| --per_device_eval_batch_size 1 \ |
| --gradient_accumulation_steps 1 \ |
| --evaluation_strategy "no" \ |
| --save_strategy "steps" \ |
| --save_steps 1000 \ |
| --save_total_limit 1 \ |
| --learning_rate 1e-5 \ |
| --weight_decay 0.0 \ |
| --warmup_ratio 0.05 \ |
| --min_lr_ratio 0.01 \ |
| --lr_scheduler_type "cosine" \ |
| --logging_steps 1 \ |
| --tf32 True \ |
| --model_max_length 16384 \ |
| --gradient_checkpointing True \ |
| --dataloader_num_workers 8 \ |
| --frames_upbound 64 \ |
| --lazy_preprocess True \ |
| --report_to none |