| # 基于指定 checkpoint 续训,并额外加入 t_c 处速度场监督项(用于增强单步性) | |
| set -euo pipefail | |
| NUM_GPUS=4 | |
| DATA_DIR="/gemini/space/zhaozy/dataset/Imagenet/imagenet_256" | |
| SEMANTIC_FEATURES_DIR="/gemini/space/zhaozy/dataset/Imagenet/imagenet_256/imagenet_256_features/dinov2-vit-b_tmp/gpu0" | |
| # 用户指定的续训起点 checkpoint | |
| RESUME_CKPT="/gemini/space/zhaozy/guzhenyu/UAVFlow/UAV_Flow_base/exps/jsflow-experiment/samples/REG/exps/jsflow-experiment-0.75-0.01-one-step/checkpoints/1920000.pt" | |
| # 新增的 t_c 速度场损失权重(可按需调大/调小) | |
| TC_VEL_COEFF=2 | |
| nohup accelerate launch --multi_gpu --num_processes "${NUM_GPUS}" --mixed_precision bf16 train.py \ | |
| --report-to wandb \ | |
| --allow-tf32 \ | |
| --mixed-precision bf16 \ | |
| --seed 0 \ | |
| --path-type linear \ | |
| --prediction v \ | |
| --weighting uniform \ | |
| --model SiT-XL/2 \ | |
| --enc-type dinov2-vit-b \ | |
| --encoder-depth 8 \ | |
| --proj-coeff 0.5 \ | |
| --output-dir exps \ | |
| --exp-name jsflow-experiment-0.75-0.01-one-step \ | |
| --batch-size 256 \ | |
| --data-dir "${DATA_DIR}" \ | |
| --semantic-features-dir "${SEMANTIC_FEATURES_DIR}" \ | |
| --learning-rate 0.00005 \ | |
| --t-c 0.75 \ | |
| --cls 0.005 \ | |
| --ot-cls \ | |
| --resume-from-ckpt "${RESUME_CKPT}" \ | |
| --tc-velocity-loss-coeff "${TC_VEL_COEFF}" \ | |
| > jsflow-experiment-0.75-0.01-tcvel.log 2>&1 & | |
| echo "Launched resume training with tc velocity loss." | |