| #!/usr/bin/env bash |
| set -euo pipefail |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| if [ $# -lt 1 ]; then |
| echo "Usage: $0 <RUN_DIR> [ADAPTER_OR_MERGED_MODEL_PATH]" >&2 |
| exit 1 |
| fi |
|
|
| RUN_DIR="$1" |
| MODEL_OR_ADAPTER="${2:-$RUN_DIR/outputs/adapter}" |
|
|
| if [ ! -d "$MODEL_OR_ADAPTER" ]; then |
| echo "ERROR: path not found: $MODEL_OR_ADAPTER" >&2 |
| exit 1 |
| fi |
|
|
| source .venv/bin/activate |
| export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}" |
| export PYTHONPATH="$PWD/src:${PYTHONPATH:-}" |
| export TOKENIZERS_PARALLELISM=false |
|
|
| python scripts/check_gpu.py |
|
|
| mkdir -p "$RUN_DIR/logs" "$RUN_DIR/eval" |
| LOG="$RUN_DIR/logs/eval_$(date +%Y%m%d-%H%M%S).log" |
|
|
| if [ -f "$MODEL_OR_ADAPTER/adapter_config.json" ]; then |
| MODE="adapter" |
| MODEL_ARG="Qwen/Qwen3-8B" |
| ADAPTER_ARGS="--adapter '$MODEL_OR_ADAPTER' --load_in_4bit" |
| else |
| MODE="merged" |
| MODEL_ARG="$MODEL_OR_ADAPTER" |
| ADAPTER_ARGS="" |
| fi |
|
|
| nohup bash -lc " |
| source .venv/bin/activate |
| export CUDA_VISIBLE_DEVICES='$CUDA_VISIBLE_DEVICES' |
| export PYTHONPATH='$PWD/src' |
| export TOKENIZERS_PARALLELISM=false |
| |
| python scripts/evaluate_model.py \\ |
| --model '$MODEL_ARG' \\ |
| $ADAPTER_ARGS \\ |
| --dataset nraptisss/TMF921-intent-to-config-research-sota \\ |
| --output_dir '$RUN_DIR/eval' \\ |
| --batch_size '${EVAL_BATCH_SIZE:-4}' \\ |
| --max_new_tokens '${EVAL_MAX_NEW_TOKENS:-1536}' \\ |
| --gold_length_buffer '${EVAL_GOLD_LENGTH_BUFFER:-96}' \\ |
| --save_every '${EVAL_SAVE_EVERY:-25}' |
| " > "$LOG" 2>&1 & |
|
|
| echo $! > "$RUN_DIR/EVAL_PID.txt" |
|
|
| cat <<EOF |
| Started nohup evaluation. |
| RUN_DIR=$RUN_DIR |
| MODE=$MODE |
| MODEL_OR_ADAPTER=$MODEL_OR_ADAPTER |
| PID=$(cat "$RUN_DIR/EVAL_PID.txt") |
| LOG=$LOG |
| RESULTS=$RUN_DIR/eval/all_metrics.json |
| |
| Monitor: |
| tail -f "$LOG" |
| |
| Tuning knobs: |
| EVAL_BATCH_SIZE=${EVAL_BATCH_SIZE:-4} |
| EVAL_MAX_NEW_TOKENS=${EVAL_MAX_NEW_TOKENS:-1536} |
| EVAL_GOLD_LENGTH_BUFFER=${EVAL_GOLD_LENGTH_BUFFER:-96} |
| EOF |
|
|