Spaces:
Runtime error
Runtime error
| --- a/Dockerfile | |
| +++ b/Dockerfile | |
| -FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 | |
| +FROM vllm/vllm-openai-rocm:latest | |
| WORKDIR /app | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| COPY . . | |
| -ENV NVIDIA_VISIBLE_DEVICES=all | |
| +ENV HIP_VISIBLE_DEVICES=all | |
| CMD ["python", "infer.py"] | |
| --- a/infer.py | |
| +++ b/infer.py | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| +# ROCm PyTorch exposes AMD GPUs through the torch.cuda namespace. | |
| +_rocmport_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| + | |
| MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct" | |
| -device = torch.device("cuda") | |
| +device = _rocmport_device | |
| def main(): | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| - model = AutoModelForCausalLM.from_pretrained(MODEL_ID).cuda() | |
| - inputs = tokenizer("Explain ROCm in one sentence.", return_tensors="pt").to("cuda") | |
| + model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to(_rocmport_device) | |
| + inputs = tokenizer("Explain ROCm in one sentence.", return_tensors="pt").to(_rocmport_device) | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=64) | |
| print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |
| --- a/scripts/serve_vllm.sh | |
| +++ b/scripts/serve_vllm.sh | |
| set -euo pipefail | |
| export CUDA_VISIBLE_DEVICES=0 | |
| -nvidia-smi | |
| +rocm-smi | |
| vllm serve Qwen/Qwen2.5-0.5B-Instruct --tensor-parallel-size 1 | |