File size: 1,641 Bytes
c6dfc69 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | #!/bin/bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
CODE_DIR="${REPO_ROOT}/ref-avs.code"
cd "${SCRIPT_DIR}"
DEFAULT_GPUS=4
DEFAULT_EPOCHS=50
DEFAULT_LR=1e-4
OMP_THREADS=8
print_table() {
echo "+-------------+----------------+"
echo "| hyper-param | ref-avs |"
echo "+-------------+----------------+"
printf "| %-11s | %-14s |\n" "epoch" "${DEFAULT_EPOCHS}"
printf "| %-11s | %-14s |\n" "lr" "${DEFAULT_LR}"
printf "| %-11s | %-14s |\n" "gpus(def)" "${DEFAULT_GPUS}"
echo "+-------------+----------------+"
}
usage() {
echo "Usage: $0 [gpus]"
echo "Example: $0"
echo "Example: $0 8"
}
if [[ $# -gt 1 ]]; then
usage
print_table
exit 1
fi
GPUS="${1:-${DEFAULT_GPUS}}"
if ! [[ "${GPUS}" =~ ^[0-9]+$ ]] || [[ "${GPUS}" -le 0 ]]; then
echo "Error: gpus must be a positive integer, got: ${GPUS}"
exit 1
fi
if [[ ! -f "${CODE_DIR}/main.py" ]]; then
echo "Error: training entry not found: ${CODE_DIR}/main.py"
exit 1
fi
export OMP_NUM_THREADS="${OMP_THREADS}"
LOG_FILE="train_ref_avs.log"
CMD=(
python3 "${CODE_DIR}/main.py"
--epochs="${DEFAULT_EPOCHS}"
--gpus="${GPUS}"
--lr="${DEFAULT_LR}"
)
echo "Training job is about to start:"
echo " dataset: ref-avs (REFAVS)"
echo " code: ${CODE_DIR}/main.py"
echo " epochs: ${DEFAULT_EPOCHS}"
echo " lr: ${DEFAULT_LR}"
echo " gpus: ${GPUS}"
echo " log: ${SCRIPT_DIR}/${LOG_FILE}"
echo
print_table
echo
echo "Command: nohup ${CMD[*]} > ${LOG_FILE} 2>&1 &"
nohup "${CMD[@]}" > "${LOG_FILE}" 2>&1 &
echo "Training started in background, PID: $!"
|