Hanrui / test /run_bench.sh
Lekr0's picture
Add files using upload-large-folder tool
7c50656 verified
#!/bin/bash
# Step 3: Run HumanEval / MT-Bench / GSM8K benchmarks.
# Run AFTER start_server.sh is up.
# Usage:
# bash run_bench.sh # all three benches, full dataset
# bash run_bench.sh humaneval # only humaneval
# bash run_bench.sh mtbench gsm8k # pick any subset
set -e
INTRANET_IP=10.1.1.72
PORT=30000
BASE_MODEL=/workspace/models/Qwen3-8B
MERGED=/workspace/hanrui/syxin_old/Specforge/outputs/qwen3-8b-sft-32gpu-v2-merged
BENCH_DIR=/workspace/hanrui/syxin_old/Specforge/benchmarks
RESULT_DIR=$BENCH_DIR/results
# ---- sanity check ----
echo "Checking server at http://$INTRANET_IP:$PORT ..."
curl -sf http://$INTRANET_IP:$PORT/v1/models > /dev/null || {
echo "[ERROR] Server not reachable. Start it first: bash start_server.sh"
exit 1
}
echo "Server OK."
mkdir -p $RESULT_DIR
cd $BENCH_DIR
export PYTHONPATH=/workspace/hanrui/syxin_old/Specforge:$PYTHONPATH
# ---- decide which benches to run ----
TARGETS=("$@")
if [ ${#TARGETS[@]} -eq 0 ]; then
TARGETS=(humaneval mtbench gsm8k)
fi
BENCH_ARGS=""
for t in "${TARGETS[@]}"; do
case $t in
humaneval) BENCH_ARGS="$BENCH_ARGS humaneval:164" ;;
mtbench) BENCH_ARGS="$BENCH_ARGS mtbench:80" ;;
gsm8k) BENCH_ARGS="$BENCH_ARGS gsm8k:1319" ;;
*)
echo "[ERROR] Unknown bench: $t (choices: humaneval mtbench gsm8k)"
exit 1
;;
esac
done
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
echo "Running: $BENCH_ARGS"
echo "Results -> $RESULT_DIR"
echo ""
python3 bench_eagle3.py \
--model-path $BASE_MODEL \
--speculative-draft-model-path $MERGED \
--host $INTRANET_IP \
--port $PORT \
--config-list "16,4,1,4" \
--benchmark-list $BENCH_ARGS \
--output-dir $RESULT_DIR \
--name dflash_lora_${TIMESTAMP} \
--skip-launch-server \
2>&1 | tee $RESULT_DIR/bench_${TIMESTAMP}.log
echo ""
echo "Done. Latest result files:"
ls -lht $RESULT_DIR/*.jsonl 2>/dev/null | head -5