#!/usr/bin/env bash set -euo pipefail # Evaluate a completed or checkpointed run under nohup. # Resumable: existing eval//predictions.json rows are skipped. # Usage: # Adapter mode (default): EVAL_BATCH_SIZE=4 bash scripts/nohup_eval.sh [adapter_path] # Merged mode: EVAL_BATCH_SIZE=8 bash scripts/nohup_eval.sh # # If the second path contains adapter_config.json, it is treated as a PEFT adapter. # Otherwise, it is treated as a merged/full model directory. if [ $# -lt 1 ]; then echo "Usage: $0 [ADAPTER_OR_MERGED_MODEL_PATH]" >&2 exit 1 fi RUN_DIR="$1" MODEL_OR_ADAPTER="${2:-$RUN_DIR/outputs/adapter}" if [ ! -d "$MODEL_OR_ADAPTER" ]; then echo "ERROR: path not found: $MODEL_OR_ADAPTER" >&2 exit 1 fi source .venv/bin/activate export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}" export PYTHONPATH="$PWD/src:${PYTHONPATH:-}" export TOKENIZERS_PARALLELISM=false python scripts/check_gpu.py mkdir -p "$RUN_DIR/logs" "$RUN_DIR/eval" LOG="$RUN_DIR/logs/eval_$(date +%Y%m%d-%H%M%S).log" if [ -f "$MODEL_OR_ADAPTER/adapter_config.json" ]; then MODE="adapter" MODEL_ARG="Qwen/Qwen3-8B" ADAPTER_ARGS="--adapter '$MODEL_OR_ADAPTER' --load_in_4bit" else MODE="merged" MODEL_ARG="$MODEL_OR_ADAPTER" ADAPTER_ARGS="" fi nohup bash -lc " source .venv/bin/activate export CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES' export PYTHONPATH='$PWD/src' export TOKENIZERS_PARALLELISM=false python scripts/evaluate_model.py \\ --model '$MODEL_ARG' \\ $ADAPTER_ARGS \\ --dataset nraptisss/TMF921-intent-to-config-research-sota \\ --output_dir '$RUN_DIR/eval' \\ --batch_size '${EVAL_BATCH_SIZE:-4}' \\ --max_new_tokens '${EVAL_MAX_NEW_TOKENS:-1536}' \\ --gold_length_buffer '${EVAL_GOLD_LENGTH_BUFFER:-96}' \\ --save_every '${EVAL_SAVE_EVERY:-25}' " > "$LOG" 2>&1 & echo $! > "$RUN_DIR/EVAL_PID.txt" cat <