jsflow / REG /train_resume_tc_velocity.sh
xiangzai's picture
Add files using upload-large-folder tool
b65e56d verified
#!/usr/bin/env bash
# 基于指定 checkpoint 续训,并额外加入 t_c 处速度场监督项(用于增强单步性)
set -euo pipefail
NUM_GPUS=4
DATA_DIR="/gemini/space/zhaozy/dataset/Imagenet/imagenet_256"
SEMANTIC_FEATURES_DIR="/gemini/space/zhaozy/dataset/Imagenet/imagenet_256/imagenet_256_features/dinov2-vit-b_tmp/gpu0"
# 用户指定的续训起点 checkpoint
RESUME_CKPT="/gemini/space/zhaozy/guzhenyu/UAVFlow/UAV_Flow_base/exps/jsflow-experiment/samples/REG/exps/jsflow-experiment-0.75-0.01-one-step/checkpoints/1920000.pt"
# 新增的 t_c 速度场损失权重(可按需调大/调小)
TC_VEL_COEFF=2
nohup accelerate launch --multi_gpu --num_processes "${NUM_GPUS}" --mixed_precision bf16 train.py \
--report-to wandb \
--allow-tf32 \
--mixed-precision bf16 \
--seed 0 \
--path-type linear \
--prediction v \
--weighting uniform \
--model SiT-XL/2 \
--enc-type dinov2-vit-b \
--encoder-depth 8 \
--proj-coeff 0.5 \
--output-dir exps \
--exp-name jsflow-experiment-0.75-0.01-one-step \
--batch-size 256 \
--data-dir "${DATA_DIR}" \
--semantic-features-dir "${SEMANTIC_FEATURES_DIR}" \
--learning-rate 0.00005 \
--t-c 0.75 \
--cls 0.005 \
--ot-cls \
--resume-from-ckpt "${RESUME_CKPT}" \
--tc-velocity-loss-coeff "${TC_VEL_COEFF}" \
> jsflow-experiment-0.75-0.01-tcvel.log 2>&1 &
echo "Launched resume training with tc velocity loss."