# Riprap Models — droplet inference service. # # Self-contained ROCm + PyTorch image that runs every GPU-accelerable # specialist Riprap consumes (Prithvi-NYC-Pluvial, TerraMind LULC + # Buildings, Granite TTM r2, Granite Embedding 278M, GLiNER). # # Base: AMD's public ROCm 7.2.3 + Python 3.12 + PyTorch 2.9.1 release # image. Same minor torch version as the bespoke MI300X image the # bootstrap droplet was hand-built with (`torch==2.9.1+git8907517`), # but pulled from a public registry so any fresh droplet can recreate # the env without internal AMD wheels. The released 2.9.1 has the # kernels we need — none of riprap-models calls into vLLM-specific # attention paths, so the dev-build vs release-build delta is # inconsequential for our forward passes. # # Build: docker build -t riprap-models:latest -f Dockerfile ../.. # Layout: the build context is the project root so the COPY lines # below can reach `services/riprap-models/`. # Use the vLLM ROCm image as base — it ships torch 2.9.1+git8907517 # (the actual AMD bespoke build) and is already cached on DigitalOcean # AMD GPU droplets, so no download is needed during bring-up. # The public rocm/pytorch release image is a fallback if this image is # not available; see the comment block above for background. FROM vllm/vllm-openai-rocm:v0.17.1 ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ HF_HOME=/root/.cache/huggingface \ TRANSFORMERS_CACHE=/root/.cache/huggingface \ # MI300X tuning the running container uses; baking them in so a # bring-up doesn't require remembering the env-set incantation. HIP_FORCE_DEV_KERNARG=1 \ HSA_NO_SCRATCH_RECLAIM=1 \ PYTORCH_ROCM_ARCH=gfx942 # git is needed by some HF model-card downloads (terratorch yaml repos # pull via the git protocol). curl for healthcheck. libgl1 for # rasterio's Pillow path. The base ROCm image is Ubuntu 24.04, and # already includes most build-time deps we need. RUN apt-get update && apt-get install -y --no-install-recommends \ curl git libgl1 libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* WORKDIR /workspace/riprap-models # Install deps in two layers so a code-only change doesn't bust the # heavy ML wheel cache. requirements.txt holds runtime-narrow # packages that the service imports; requirements-full.txt is the # super-set the FSM specialists pull in transitively (terratorch's # kornia / albumentations chain, granite-tsfm's tsfm_public, etc.). COPY services/riprap-models/requirements-full.txt /tmp/req-full.txt RUN pip install --upgrade pip && \ # Freeze the ROCm torch/torchvision/torchaudio at whatever version # the vLLM base image ships, so transitive deps (peft, torchgeo, etc.) # don't pull a CUDA build from PyPI and replace the ROCm one. pip freeze | grep -E "^(torch|torchvision|torchaudio)==" > /tmp/torch-lock.txt && \ cat /tmp/torch-lock.txt && \ pip install -r /tmp/req-full.txt --constraint /tmp/torch-lock.txt # Service code itself. Cheap to invalidate; lands last. COPY services/riprap-models/main.py /workspace/riprap-models/main.py COPY services/riprap-models/requirements.txt /workspace/riprap-models/requirements.txt EXPOSE 7860 # `--proxy-headers` so a future LB sees the right client IP. The # /healthz route is unauthenticated by design (operators want # readiness probes to work without secrets); /v1/* requires the # bearer token via RIPRAP_MODELS_API_KEY. CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", \ "--log-level", "info", "--proxy-headers"]