collabllm / build_vllm.sh
hour1's picture
Upload folder using huggingface_hub
9114cf2 verified
CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \
--model /home/hector5/models/Qwen2.5-14B-Instruct/ \
--served-model-name Qwen2.5-14B-Instruct \
--dtype auto \
--max-model-len 10000 \
--tensor-parallel-size 1 \
--port 8000
# --served-model-name Llama-2-7b-chat
# --host 0.0.0.0 \
# CUDA_VISIBLE_DEVICES=3 vllm serve /data1/public/models/Qwen2.5-7B-Instruct --port 8000 --served-model-name Qwen2.5-7B-Instruct --dtype auto --max-model-len 10000 --tensor-parallel-size 1 --enable_prefix_caching