#!/bin/bash set -euo pipefail MODEL_DIR="$(cd "$(dirname "$0")" && pwd)" PORT=8004 python -m vllm.entrypoints.openai.api_server \ --model "$MODEL_DIR" \ --tensor-parallel-size 1 \ --port $PORT \ --max-num-seqs 1 --gpu-memory-utilization 0.95