drugenv-trainer / Dockerfile
anugrahteesdollar's picture
space: add root Dockerfile for trainer Space
7e81b32 verified
# DrugEnv trainer Space (Docker, single H200 GPU)
# Serves the FastAPI control panel (space.training.app:app) on port 8000,
# matched by README YAML app_port: 8000.
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
HF_HOME=/home/user/.cache/huggingface \
TRANSFORMERS_CACHE=/home/user/.cache/huggingface/transformers \
PYTHONPATH=/home/user/app \
PORT=8000
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 python3.11-venv python3.11-dev python3-pip \
git curl ca-certificates build-essential \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python \
&& ln -sf /usr/bin/python3.11 /usr/local/bin/python3
RUN useradd -ms /bin/bash user
USER user
ENV PATH="/home/user/.local/bin:${PATH}"
WORKDIR /home/user/app
# Copy the entire repo first so relative -r references inside the
# trainer requirements file (-r ../../requirements-train.txt etc.)
# resolve correctly. Only after the tree is in place do we install.
COPY --chown=user:user . /home/user/app
RUN python -m pip install --upgrade pip && \
python -m pip install --user -r /home/user/app/space/training/requirements.txt
EXPOSE 8000
CMD ["python", "-m", "uvicorn", "space.training.app:app", "--host", "0.0.0.0", "--port", "8000"]