| |
| |
| FROM nvcr.io/nvidia/pytorch:24.08-py3 |
|
|
| |
| ENV MAX_JOBS=32 |
| ENV VLLM_WORKER_MULTIPROC_METHOD=spawn |
| ENV DEBIAN_FRONTEND=noninteractive |
| ENV NODE_OPTIONS="" |
| ENV PIP_ROOT_USER_ACTION=ignore |
| ENV HF_HUB_ENABLE_HF_TRANSFER="1" |
|
|
| |
| ARG APT_SOURCE=https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ |
| ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple |
|
|
| |
| RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \ |
| { \ |
| echo "deb ${APT_SOURCE} jammy main restricted universe multiverse"; \ |
| echo "deb ${APT_SOURCE} jammy-updates main restricted universe multiverse"; \ |
| echo "deb ${APT_SOURCE} jammy-backports main restricted universe multiverse"; \ |
| echo "deb ${APT_SOURCE} jammy-security main restricted universe multiverse"; \ |
| } > /etc/apt/sources.list |
|
|
| |
| RUN apt-get update && \ |
| apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \ |
| apt-get clean |
|
|
| |
| RUN apt-get update && \ |
| apt-get install -y tini aria2 && \ |
| apt-get clean |
|
|
| |
| RUN pip config set global.index-url "${PIP_INDEX}" && \ |
| pip config set global.extra-index-url "${PIP_INDEX}" && \ |
| python -m pip install --upgrade pip |
|
|
| |
| RUN pip uninstall -y torch torchvision torchaudio \ |
| pytorch-quantization pytorch-triton torch-tensorrt \ |
| xgboost transformer_engine flash_attn apex megatron-core grpcio |
|
|
| |
| RUN aria2c https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \ |
| mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600 |
|
|
| RUN aria2c --always-resume=true --max-tries=99999 https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb && \ |
| dpkg -i cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb && \ |
| cp /var/cuda-repo-ubuntu2204-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/ && \ |
| apt-get update && \ |
| apt-get -y install cuda-toolkit-12-4 && \ |
| rm cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb && \ |
| update-alternatives --set cuda /usr/local/cuda-12.4 && \ |
| rm -rf /usr/local/cuda-12.6 |
|
|
| |
| |
| |
| |
| |
| RUN pip install --resume-retries 999 "sglang[all]==0.4.6.post5" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install --resume-retries 999 torch-memory-saver --no-cache-dir |
|
|
| RUN pip install --resume-retries 999 --no-cache-dir "vllm==0.8.5.post1" "torch==2.6.0" "torchvision==0.21.0" "torchaudio==2.6.0" "tensordict==0.6.2" torchdata |
|
|
| RUN pip install --resume-retries 999 --no-cache-dir "transformers[hf_xet]>=4.51.0" accelerate datasets peft hf-transfer \ |
| "numpy<2.0.0" "pyarrow>=15.0.0" pandas \ |
| ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler blobfile \ |
| pytest py-spy pyext pre-commit ruff |
|
|
| |
| RUN wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \ |
| pip install --no-cache-dir flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl |
|
|
| |
| RUN pip uninstall -y pynvml nvidia-ml-py && \ |
| pip install --resume-retries 999 --no-cache-dir --upgrade "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" "optree>=0.13.0" "pydantic>=2.9" "grpcio>=1.62.1" |
|
|
| |
| RUN aria2c --max-tries=9999 https://developer.download.nvidia.com/compute/cudnn/9.8.0/local_installers/cudnn-local-repo-ubuntu2204-9.8.0_1.0-1_amd64.deb && \ |
| dpkg -i cudnn-local-repo-ubuntu2204-9.8.0_1.0-1_amd64.deb && \ |
| cp /var/cudnn-local-repo-ubuntu2204-9.8.0/cudnn-*-keyring.gpg /usr/share/keyrings/ && \ |
| apt-get update && \ |
| apt-get -y install cudnn-cuda-12 && \ |
| rm cudnn-local-repo-ubuntu2204-9.8.0_1.0-1_amd64.deb |
|
|
| RUN pip install --resume-retries 999 --no-cache-dir nvidia-cudnn-cu12==9.8.0.87 |
|
|
| |
| RUN git clone https://github.com/NVIDIA/apex.git && \ |
| cd apex && \ |
| pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ |
|
|
| |
| RUN export NVTE_FRAMEWORK=pytorch && pip3 install --no-deps --no-cache-dir git+https://github.com/NVIDIA/TransformerEngine.git@v2.3 |
|
|
| |
| RUN pip3 install --no-deps --no-cache-dir git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.12.2 |
|
|
| |
| RUN pip install opencv-python |
|
|
| RUN pip install opencv-fixer && \ |
| python -c "from opencv_fixer import AutoFix; AutoFix()" |
|
|
| |
|
|
| |
| RUN pip config unset global.index-url && \ |
| pip config unset global.extra-index-url |
|
|
| RUN apt-get update && \ |
| apt-get install -y aria2 libfreeimage3 libfreeimage-dev zlib1g |