xiangzai
/

jsflow

Model card Files Files and versions

jsflow / REG /train_resume_tc_velocity.sh

xiangzai's picture

Add files using upload-large-folder tool

b65e56d verified 9 days ago

history blame contribute delete

1.45 kB

	#!/usr/bin/env bash
	# 基于指定 checkpoint 续训，并额外加入 t_c 处速度场监督项（用于增强单步性）
	set -euo pipefail

	NUM_GPUS=4

	DATA_DIR="/gemini/space/zhaozy/dataset/Imagenet/imagenet_256"
	SEMANTIC_FEATURES_DIR="/gemini/space/zhaozy/dataset/Imagenet/imagenet_256/imagenet_256_features/dinov2-vit-b_tmp/gpu0"

	# 用户指定的续训起点 checkpoint
	RESUME_CKPT="/gemini/space/zhaozy/guzhenyu/UAVFlow/UAV_Flow_base/exps/jsflow-experiment/samples/REG/exps/jsflow-experiment-0.75-0.01-one-step/checkpoints/1920000.pt"

	# 新增的 t_c 速度场损失权重（可按需调大/调小）
	TC_VEL_COEFF=2

	nohup accelerate launch --multi_gpu --num_processes "${NUM_GPUS}" --mixed_precision bf16 train.py \
	--report-to wandb \
	--allow-tf32 \
	--mixed-precision bf16 \
	--seed 0 \
	--path-type linear \
	--prediction v \
	--weighting uniform \
	--model SiT-XL/2 \
	--enc-type dinov2-vit-b \
	--encoder-depth 8 \
	--proj-coeff 0.5 \
	--output-dir exps \
	--exp-name jsflow-experiment-0.75-0.01-one-step \
	--batch-size 256 \
	--data-dir "${DATA_DIR}" \
	--semantic-features-dir "${SEMANTIC_FEATURES_DIR}" \
	--learning-rate 0.00005 \
	--t-c 0.75 \
	--cls 0.005 \
	--ot-cls \
	--resume-from-ckpt "${RESUME_CKPT}" \
	--tc-velocity-loss-coeff "${TC_VEL_COEFF}" \
	> jsflow-experiment-0.75-0.01-tcvel.log 2>&1 &

	echo "Launched resume training with tc velocity loss."