model=mistralai/Mistral-7B-Instruct-v0.2
sudo docker run --runtime nvidia --gpus '"device=0"' \
-v <Huggingface cache dir on your local machine>:/root/.cache/huggingface \
-p 8000:8000 \
--env "HF_TOKEN=<Your huggingface access token>" \
--ipc=host \
--network=host \
apostacyh/vllm:lmcache-0.1.0 \
--model $model --gpu-memory-utilization 0.6 --port 8000 \
--lmcache-config-file /lmcache/LMCache/examples/example-local.yaml