seriffic's picture
deploy: sync all changes from main at 6904684
b9a10ad
# Riprap Models — droplet inference service.
#
# Self-contained ROCm + PyTorch image that runs every GPU-accelerable
# specialist Riprap consumes (Prithvi-NYC-Pluvial, TerraMind LULC +
# Buildings, Granite TTM r2, Granite Embedding 278M, GLiNER).
#
# Base: AMD's public ROCm 7.2.3 + Python 3.12 + PyTorch 2.9.1 release
# image. Same minor torch version as the bespoke MI300X image the
# bootstrap droplet was hand-built with (`torch==2.9.1+git8907517`),
# but pulled from a public registry so any fresh droplet can recreate
# the env without internal AMD wheels. The released 2.9.1 has the
# kernels we need — none of riprap-models calls into vLLM-specific
# attention paths, so the dev-build vs release-build delta is
# inconsequential for our forward passes.
#
# Build: docker build -t riprap-models:latest -f Dockerfile ../..
# Layout: the build context is the project root so the COPY lines
# below can reach `services/riprap-models/`.
# Use the vLLM ROCm image as base — it ships torch 2.9.1+git8907517
# (the actual AMD bespoke build) and is already cached on DigitalOcean
# AMD GPU droplets, so no download is needed during bring-up.
# The public rocm/pytorch release image is a fallback if this image is
# not available; see the comment block above for background.
FROM vllm/vllm-openai-rocm:v0.17.1
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
HF_HOME=/root/.cache/huggingface \
TRANSFORMERS_CACHE=/root/.cache/huggingface \
# MI300X tuning the running container uses; baking them in so a
# bring-up doesn't require remembering the env-set incantation.
HIP_FORCE_DEV_KERNARG=1 \
HSA_NO_SCRATCH_RECLAIM=1 \
PYTORCH_ROCM_ARCH=gfx942
# git is needed by some HF model-card downloads (terratorch yaml repos
# pull via the git protocol). curl for healthcheck. libgl1 for
# rasterio's Pillow path. The base ROCm image is Ubuntu 24.04, and
# already includes most build-time deps we need.
RUN apt-get update && apt-get install -y --no-install-recommends \
curl git libgl1 libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /workspace/riprap-models
# Install deps in two layers so a code-only change doesn't bust the
# heavy ML wheel cache. requirements.txt holds runtime-narrow
# packages that the service imports; requirements-full.txt is the
# super-set the FSM specialists pull in transitively (terratorch's
# kornia / albumentations chain, granite-tsfm's tsfm_public, etc.).
COPY services/riprap-models/requirements-full.txt /tmp/req-full.txt
RUN pip install --upgrade pip && \
# Freeze the ROCm torch/torchvision/torchaudio at whatever version
# the vLLM base image ships, so transitive deps (peft, torchgeo, etc.)
# don't pull a CUDA build from PyPI and replace the ROCm one.
pip freeze | grep -E "^(torch|torchvision|torchaudio)==" > /tmp/torch-lock.txt && \
cat /tmp/torch-lock.txt && \
pip install -r /tmp/req-full.txt --constraint /tmp/torch-lock.txt
# Service code itself. Cheap to invalidate; lands last.
COPY services/riprap-models/main.py /workspace/riprap-models/main.py
COPY services/riprap-models/requirements.txt /workspace/riprap-models/requirements.txt
EXPOSE 7860
# `--proxy-headers` so a future LB sees the right client IP. The
# /healthz route is unauthenticated by design (operators want
# readiness probes to work without secrets); /v1/* requires the
# bearer token via RIPRAP_MODELS_API_KEY.
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", \
"--log-level", "info", "--proxy-headers"]