| #!/usr/bin/env bash |
|
|
| T=`date +%m%d%H%M` |
|
|
| |
| |
| CFG=$1 |
| GPUS=$2 |
| |
| |
| GPUS_PER_NODE=$(($GPUS<8?$GPUS:8)) |
| NNODES=`expr $GPUS / $GPUS_PER_NODE` |
|
|
| MASTER_PORT=${MASTER_PORT:-28567} |
| MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} |
| RANK=${RANK:-0} |
|
|
| |
| |
|
|
| WORK_DIR=$(echo ${CFG%.*} | sed -e "s/configs/work_dirs/g")/ |
| |
|
|
| if [ ! -d ${WORK_DIR}logs ]; then |
| mkdir -p ${WORK_DIR}logs |
| fi |
| PYTHONPATH="/cpfs04/user/litianyu/projects/paradrive/external":$PYTHONPATH |
| PYTHONPATH=$PYTHONPATH:"/cpfs04/user/litianyu/projects/paradrive/external/toolbox" |
| PYTHONPATH="$(dirname $0)/..":$PYTHONPATH |
| PYTHONPATH=$PYTHONPATH:"/cpfs01/shared/opendrivelab/sii/wangcaojun/repo-wcj/AlgEngine/navsim" |
|
|
| |
|
|
| echo 'WORK_DIR: ' ${WORK_DIR} |
| echo 'GPUS_PER_NODE: ' ${GPUS_PER_NODE} |
| echo 'NNODES: ' ${NNODES} |
| echo 'RANK: ' ${RANK} |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| python -m torch.distributed.launch \ |
| --nproc_per_node=${GPUS_PER_NODE} \ |
| --master_addr=${MASTER_ADDR} \ |
| --master_port=${MASTER_PORT} \ |
| --nnodes=${NNODES} \ |
| --node_rank=${RANK} \ |
| $(dirname "$0")/train.py \ |
| $CFG \ |
| --launcher pytorch \ |
| --deterministic \ |
| --work-dir ${WORK_DIR} \ |
| --cfg-options ${@:3} \ |
| 2>&1 | tee ${WORK_DIR}logs/train.$T |
| |