#!/bin/bash set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" CODE_DIR="${REPO_ROOT}/ref-avs.code" cd "${SCRIPT_DIR}" DEFAULT_GPUS=4 DEFAULT_EPOCHS=50 DEFAULT_LR=1e-4 OMP_THREADS=8 print_table() { echo "+-------------+----------------+" echo "| hyper-param | ref-avs |" echo "+-------------+----------------+" printf "| %-11s | %-14s |\n" "epoch" "${DEFAULT_EPOCHS}" printf "| %-11s | %-14s |\n" "lr" "${DEFAULT_LR}" printf "| %-11s | %-14s |\n" "gpus(def)" "${DEFAULT_GPUS}" echo "+-------------+----------------+" } usage() { echo "Usage: $0 [gpus]" echo "Example: $0" echo "Example: $0 8" } if [[ $# -gt 1 ]]; then usage print_table exit 1 fi GPUS="${1:-${DEFAULT_GPUS}}" if ! [[ "${GPUS}" =~ ^[0-9]+$ ]] || [[ "${GPUS}" -le 0 ]]; then echo "Error: gpus must be a positive integer, got: ${GPUS}" exit 1 fi if [[ ! -f "${CODE_DIR}/main.py" ]]; then echo "Error: training entry not found: ${CODE_DIR}/main.py" exit 1 fi export OMP_NUM_THREADS="${OMP_THREADS}" LOG_FILE="train_ref_avs.log" CMD=( python3 "${CODE_DIR}/main.py" --epochs="${DEFAULT_EPOCHS}" --gpus="${GPUS}" --lr="${DEFAULT_LR}" ) echo "Training job is about to start:" echo " dataset: ref-avs (REFAVS)" echo " code: ${CODE_DIR}/main.py" echo " epochs: ${DEFAULT_EPOCHS}" echo " lr: ${DEFAULT_LR}" echo " gpus: ${GPUS}" echo " log: ${SCRIPT_DIR}/${LOG_FILE}" echo print_table echo echo "Command: nohup ${CMD[*]} > ${LOG_FILE} 2>&1 &" nohup "${CMD[@]}" > "${LOG_FILE}" 2>&1 & echo "Training started in background, PID: $!"