| #!/bin/bash |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
| trap 'echo "[TURBO] β Failed at line $LINENO (exit $?)" >&2' ERR |
|
|
| |
| |
| |
| |
| |
| |
| PHYS_CORES=$(lscpu -p | grep -v '^#' | sort -t, -k 2 -un | wc -l) |
| PCORE_THREADS=${CHIMERA_PCORE_THREADS:-8} |
| echo "[TURBO] Physical cores: $PHYS_CORES β P-core threads: $PCORE_THREADS" |
|
|
| |
| export OMP_NUM_THREADS=$PCORE_THREADS |
| export MKL_NUM_THREADS=$PCORE_THREADS |
| |
| |
| |
| export KMP_AFFINITY="granularity=fine,proclist=[0,1,2,3,4,5,6,7],explicit" |
| export KMP_BLOCKTIME=0 |
| |
|
|
| |
| |
| |
| TCMALLOC_LIB="" |
| for candidate in $(ldconfig -p 2>/dev/null | grep -oP '/\S*libtcmalloc(|_minimal)\.so\S*' | grep -v debug || true); do |
| if [ -f "$candidate" ]; then |
| TCMALLOC_LIB="$candidate" |
| break |
| fi |
| done |
|
|
| if [ -n "$TCMALLOC_LIB" ]; then |
| echo "[TURBO] tcmalloc: $TCMALLOC_LIB" |
| export LD_PRELOAD="$TCMALLOC_LIB${LD_PRELOAD:+:$LD_PRELOAD}" |
| else |
| echo "[TURBO] β tcmalloc (non-debug) not found β expect 5-8% throughput loss." |
| echo "[TURBO] Install: sudo apt install libgoogle-perftools4" |
| fi |
|
|
| |
| IOMP_LIB="" |
| IOMP_LIB=$(python3 -c " |
| import intel_extension_for_pytorch, os |
| print(os.path.join(os.path.dirname(intel_extension_for_pytorch.__file__), '..', 'libiomp5.so')) |
| " 2>/dev/null) || true |
|
|
| if [ -n "$IOMP_LIB" ] && [ -f "$IOMP_LIB" ]; then |
| echo "[TURBO] libiomp5: $IOMP_LIB" |
| export LD_PRELOAD="$IOMP_LIB${LD_PRELOAD:+:$LD_PRELOAD}" |
| fi |
|
|
| |
| NUMA_PREFIX="" |
| if command -v numactl &>/dev/null; then |
| echo "[TURBO] NUMA: pinning to node 0" |
| NUMA_PREFIX="numactl --cpunodebind=0 --membind=0" |
| fi |
|
|
| |
| echo "[TURBO] Launching: python3 train_hyper.py $*" |
| echo "βββββββββββββββββββββββββββββββββββββββββββββββββββ" |
|
|
| $NUMA_PREFIX python3 train_hyper.py "$@" |
|
|