Spaces:

lablab-ai-amd-developer-hackathon
/

ROCmPort-AI

Running

Deploy ROCmPort AI — CUDA-to-ROCm migration scanner

786f63c verified 3 days ago

1.54 kB

	--- a/Dockerfile
	+++ b/Dockerfile
	@@ -1,10 +1,10 @@
	-FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
	+FROM vllm/vllm-openai-rocm:latest

	WORKDIR /app
	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt
	COPY . .

	-ENV NVIDIA_VISIBLE_DEVICES=all
	+ENV HIP_VISIBLE_DEVICES=all

	CMD ["python", "infer.py"]
	--- a/infer.py
	+++ b/infer.py
	@@ -1,15 +1,18 @@
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	+# ROCm PyTorch exposes AMD GPUs through the torch.cuda namespace.
	+_rocmport_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	+

	MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
	-device = torch.device("cuda")
	+device = _rocmport_device


	def main():
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	- model = AutoModelForCausalLM.from_pretrained(MODEL_ID).cuda()
	- inputs = tokenizer("Explain ROCm in one sentence.", return_tensors="pt").to("cuda")
	+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to(_rocmport_device)
	+ inputs = tokenizer("Explain ROCm in one sentence.", return_tensors="pt").to(_rocmport_device)
	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=64)
	print(tokenizer.decode(outputs[0], skip_special_tokens=True))
	--- a/scripts/serve_vllm.sh
	+++ b/scripts/serve_vllm.sh
	@@ -2,5 +2,5 @@
	set -euo pipefail

	export CUDA_VISIBLE_DEVICES=0
	-nvidia-smi
	+rocm-smi
	vllm serve Qwen/Qwen2.5-0.5B-Instruct --tensor-parallel-size 1