| Bootstrap: docker | |
| # Support - Traing: fsdp; Inference: vllm | |
| # FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4 | |
| # Support - Traing: fsdp; Inference: vllm, sglang | |
| FROM lmsysorg/sglang:v0.4.5-rocm630 | |
| %environment | |
| export PYTORCH_ROCM_ARCH="gfx90a;gfx942" | |
| export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__" | |
| export CFLAGS="-D__HIP_PLATFORM_AMD__" | |
| export CXXFLAGS="-D__HIP_PLATFORM_AMD__" | |
| %post | |
| # Create source directory | |
| mkdir -p /opt/src | |
| # Uninstall and reinstall vllm | |
| pip uninstall -y vllm | |
| cd /opt/src | |
| git clone -b v0.6.3 https://github.com/vllm-project/vllm.git | |
| cd vllm | |
| MAX_JOBS=$(nproc) python3 setup.py install | |
| cd /opt | |
| rm -rf /opt/src/vllm | |
| # Install dependencies | |
| pip install "tensordict<0.6" --no-deps | |
| pip install accelerate \ | |
| codetiming \ | |
| datasets \ | |
| dill \ | |
| hydra-core \ | |
| liger-kernel \ | |
| numpy \ | |
| pandas \ | |
| peft \ | |
| "pyarrow>=15.0.0" \ | |
| pylatexenc \ | |
| "ray[data,train,tune,serve]" \ | |
| torchdata \ | |
| transformers \ | |
| wandb \ | |
| orjson \ | |
| pybind11 | |
| # Clone and install verl from GitHub | |
| cd /opt | |
| git clone https://github.com/volcengine/verl.git | |
| cd verl | |
| # Uncomment to use a specific version | |
| # git checkout v0.3.0.post0 | |
| pip install -e . --no-deps | |
| # Install torch_memory_saver | |
| pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps |