lingbot-va / script /run_launch_va_server_sync.sh
bazaar-research's picture
Upload folder using huggingface_hub
0a7036f verified
#!/usr/bin/bash
set -x
umask 007
NGPU=${NGPU:-"8"}
MASTER_PORT=${MASTER_PORT:-"29501"}
PORT=${PORT:-"1106"}
LOG_RANK=${LOG_RANK:-"0"}
TORCHFT_LIGHTHOUSE=${TORCHFT_LIGHTHOUSE:-"http://localhost:29510"}
CONFIG_NAME=${CONFIG_NAME:-"robotwin"}
overrides=""
if [ $# -ne 0 ]; then
overrides="$*"
fi
## node setting
num_gpu=${NGPU}
master_port=${MASTER_PORT}
log_rank=${LOG_RANK}
torchft_lighthouse=${TORCHFT_LIGHTHOUSE}
config_name=${CONFIG_NAME}
## cmd setting
export TOKENIZERS_PARALLELISM=false
PYTORCH_CUDA_ALLOC_CONF="expandable_segments:True" TORCHFT_LIGHTHOUSE=${torchft_lighthouse} \
python -m torch.distributed.run \
--nproc_per_node=${num_gpu} \
--local-ranks-filter=${log_rank} \
--master_port ${master_port} \
--tee 3 \
-m wan_va.wan_va_server --config-name ${config_name} $overrides