File size: 2,324 Bytes
c6dfc69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/bin/bash

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
CODE_BASE="${REPO_ROOT}/avs.code"
cd "${SCRIPT_DIR}"

DEFAULT_GPUS=4
OMP_THREADS=8

# Reference hyper-parameter table (for quick view)
EPOCH_V1S=140
EPOCH_V1M=140
EPOCH_V2=90

WEIGHT_V1S=3.0
WEIGHT_V1M=3.0
WEIGHT_V2=3.0

print_table() {
  echo "+-------------+------------+------------+------------+"
  echo "| hyper-param |    v1s     |    v1m     |     v2     |"
  echo "+-------------+------------+------------+------------+"
  printf "| %-11s | %-10s | %-10s | %-10s |\n" "epoch" "${EPOCH_V1S}" "${EPOCH_V1M}" "${EPOCH_V2}"
  printf "| %-11s | %-10s | %-10s | %-10s |\n" "weight" "${WEIGHT_V1S}" "${WEIGHT_V1M}" "${WEIGHT_V2}"
  printf "| %-11s | %-10s | %-10s | %-10s |\n" "gpus(def)" "${DEFAULT_GPUS}" "${DEFAULT_GPUS}" "${DEFAULT_GPUS}"
  echo "+-------------+------------+------------+------------+"
}

usage() {
  echo "Usage: $0 <v1s|v1m|v2> [gpus]"
  echo "Example: $0 v1s"
  echo "Example: $0 v2 8"
}

if [[ $# -lt 1 || $# -gt 2 ]]; then
  usage
  print_table
  exit 1
fi

DATASET="$1"
GPUS="${2:-${DEFAULT_GPUS}}"

case "${DATASET}" in
  v1s)
    CODE_DIR="v1s.code"
    EPOCHS="${EPOCH_V1S}"
    ;;
  v1m)
    CODE_DIR="v1m.code"
    EPOCHS="${EPOCH_V1M}"
    ;;
  v2)
    CODE_DIR="v2.code"
    EPOCHS="${EPOCH_V2}"
    ;;
  *)
    echo "Error: dataset must be one of v1s / v1m / v2, got: ${DATASET}"
    echo
    print_table
    exit 1
    ;;
esac

if ! [[ "${GPUS}" =~ ^[0-9]+$ ]] || [[ "${GPUS}" -le 0 ]]; then
  echo "Error: gpus must be a positive integer, got: ${GPUS}"
  exit 1
fi

if [[ ! -f "${CODE_BASE}/${CODE_DIR}/main.py" ]]; then
  echo "Error: training entry not found: ${CODE_BASE}/${CODE_DIR}/main.py"
  exit 1
fi

export OMP_NUM_THREADS="${OMP_THREADS}"

LOG_FILE="train_${DATASET}.log"
CMD=(python3 "${CODE_BASE}/${CODE_DIR}/main.py" --epochs="${EPOCHS}" --gpus="${GPUS}")

echo "Training job is about to start:"
echo "  dataset: ${DATASET}"
echo "  code:    ${CODE_BASE}/${CODE_DIR}/main.py"
echo "  epochs:  ${EPOCHS}"
echo "  gpus:    ${GPUS}"
echo "  log:     ${SCRIPT_DIR}/${LOG_FILE}"
echo
print_table
echo
echo "Command: nohup ${CMD[*]} > ${LOG_FILE} 2>&1 &"

nohup "${CMD[@]}" > "${LOG_FILE}" 2>&1 &
echo "Training started in background, PID: $!"