File size: 2,278 Bytes
a896ecd f4beb76 a896ecd eccc07b a896ecd eccc07b a896ecd eccc07b a896ecd eccc07b a896ecd 91d636a a896ecd eccc07b a896ecd eccc07b a896ecd f4beb76 a896ecd eccc07b a896ecd f4beb76 a896ecd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | #!/usr/bin/env bash
set -euo pipefail
# Evaluate a completed or checkpointed run under nohup.
# Resumable: existing eval/<split>/predictions.json rows are skipped.
# Usage:
# Adapter mode (default): EVAL_BATCH_SIZE=4 bash scripts/nohup_eval.sh <RUN_DIR> [adapter_path]
# Merged mode: EVAL_BATCH_SIZE=8 bash scripts/nohup_eval.sh <RUN_DIR> <merged_model_dir>
#
# If the second path contains adapter_config.json, it is treated as a PEFT adapter.
# Otherwise, it is treated as a merged/full model directory.
if [ $# -lt 1 ]; then
echo "Usage: $0 <RUN_DIR> [ADAPTER_OR_MERGED_MODEL_PATH]" >&2
exit 1
fi
RUN_DIR="$1"
MODEL_OR_ADAPTER="${2:-$RUN_DIR/outputs/adapter}"
if [ ! -d "$MODEL_OR_ADAPTER" ]; then
echo "ERROR: path not found: $MODEL_OR_ADAPTER" >&2
exit 1
fi
source .venv/bin/activate
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
export PYTHONPATH="$PWD/src:${PYTHONPATH:-}"
export TOKENIZERS_PARALLELISM=false
python scripts/check_gpu.py
mkdir -p "$RUN_DIR/logs" "$RUN_DIR/eval"
LOG="$RUN_DIR/logs/eval_$(date +%Y%m%d-%H%M%S).log"
if [ -f "$MODEL_OR_ADAPTER/adapter_config.json" ]; then
MODE="adapter"
MODEL_ARG="Qwen/Qwen3-8B"
ADAPTER_ARGS="--adapter '$MODEL_OR_ADAPTER' --load_in_4bit"
else
MODE="merged"
MODEL_ARG="$MODEL_OR_ADAPTER"
ADAPTER_ARGS=""
fi
nohup bash -lc "
source .venv/bin/activate
export CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES'
export PYTHONPATH='$PWD/src'
export TOKENIZERS_PARALLELISM=false
python scripts/evaluate_model.py \\
--model '$MODEL_ARG' \\
$ADAPTER_ARGS \\
--dataset nraptisss/TMF921-intent-to-config-research-sota \\
--output_dir '$RUN_DIR/eval' \\
--batch_size '${EVAL_BATCH_SIZE:-4}' \\
--max_new_tokens '${EVAL_MAX_NEW_TOKENS:-1536}' \\
--gold_length_buffer '${EVAL_GOLD_LENGTH_BUFFER:-96}' \\
--save_every '${EVAL_SAVE_EVERY:-25}'
" > "$LOG" 2>&1 &
echo $! > "$RUN_DIR/EVAL_PID.txt"
cat <<EOF
Started nohup evaluation.
RUN_DIR=$RUN_DIR
MODE=$MODE
MODEL_OR_ADAPTER=$MODEL_OR_ADAPTER
PID=$(cat "$RUN_DIR/EVAL_PID.txt")
LOG=$LOG
RESULTS=$RUN_DIR/eval/all_metrics.json
Monitor:
tail -f "$LOG"
Tuning knobs:
EVAL_BATCH_SIZE=${EVAL_BATCH_SIZE:-4}
EVAL_MAX_NEW_TOKENS=${EVAL_MAX_NEW_TOKENS:-1536}
EVAL_GOLD_LENGTH_BUFFER=${EVAL_GOLD_LENGTH_BUFFER:-96}
EOF
|