| # Riprap Models — droplet inference service. | |
| # | |
| # Self-contained ROCm + PyTorch image that runs every GPU-accelerable | |
| # specialist Riprap consumes (Prithvi-NYC-Pluvial, TerraMind LULC + | |
| # Buildings, Granite TTM r2, Granite Embedding 278M, GLiNER). | |
| # | |
| # Base: AMD's public ROCm 7.2.3 + Python 3.12 + PyTorch 2.9.1 release | |
| # image. Same minor torch version as the bespoke MI300X image the | |
| # bootstrap droplet was hand-built with (`torch==2.9.1+git8907517`), | |
| # but pulled from a public registry so any fresh droplet can recreate | |
| # the env without internal AMD wheels. The released 2.9.1 has the | |
| # kernels we need — none of riprap-models calls into vLLM-specific | |
| # attention paths, so the dev-build vs release-build delta is | |
| # inconsequential for our forward passes. | |
| # | |
| # Build: docker build -t riprap-models:latest -f Dockerfile ../.. | |
| # Layout: the build context is the project root so the COPY lines | |
| # below can reach `services/riprap-models/`. | |
| # Use the vLLM ROCm image as base — it ships torch 2.9.1+git8907517 | |
| # (the actual AMD bespoke build) and is already cached on DigitalOcean | |
| # AMD GPU droplets, so no download is needed during bring-up. | |
| # The public rocm/pytorch release image is a fallback if this image is | |
| # not available; see the comment block above for background. | |
| FROM vllm/vllm-openai-rocm:v0.17.1 | |
| ENV DEBIAN_FRONTEND=noninteractive \ | |
| PYTHONUNBUFFERED=1 \ | |
| PIP_NO_CACHE_DIR=1 \ | |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
| HF_HOME=/root/.cache/huggingface \ | |
| TRANSFORMERS_CACHE=/root/.cache/huggingface \ | |
| # MI300X tuning the running container uses; baking them in so a | |
| # bring-up doesn't require remembering the env-set incantation. | |
| HIP_FORCE_DEV_KERNARG=1 \ | |
| HSA_NO_SCRATCH_RECLAIM=1 \ | |
| PYTORCH_ROCM_ARCH=gfx942 | |
| # git is needed by some HF model-card downloads (terratorch yaml repos | |
| # pull via the git protocol). curl for healthcheck. libgl1 for | |
| # rasterio's Pillow path. The base ROCm image is Ubuntu 24.04, and | |
| # already includes most build-time deps we need. | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| curl git libgl1 libglib2.0-0 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /workspace/riprap-models | |
| # Install deps in two layers so a code-only change doesn't bust the | |
| # heavy ML wheel cache. requirements.txt holds runtime-narrow | |
| # packages that the service imports; requirements-full.txt is the | |
| # super-set the FSM specialists pull in transitively (terratorch's | |
| # kornia / albumentations chain, granite-tsfm's tsfm_public, etc.). | |
| COPY services/riprap-models/requirements-full.txt /tmp/req-full.txt | |
| RUN pip install --upgrade pip && \ | |
| # Freeze the ROCm torch/torchvision/torchaudio at whatever version | |
| # the vLLM base image ships, so transitive deps (peft, torchgeo, etc.) | |
| # don't pull a CUDA build from PyPI and replace the ROCm one. | |
| pip freeze | grep -E "^(torch|torchvision|torchaudio)==" > /tmp/torch-lock.txt && \ | |
| cat /tmp/torch-lock.txt && \ | |
| pip install -r /tmp/req-full.txt --constraint /tmp/torch-lock.txt | |
| # Service code itself. Cheap to invalidate; lands last. | |
| COPY services/riprap-models/main.py /workspace/riprap-models/main.py | |
| COPY services/riprap-models/requirements.txt /workspace/riprap-models/requirements.txt | |
| EXPOSE 7860 | |
| # `--proxy-headers` so a future LB sees the right client IP. The | |
| # /healthz route is unauthenticated by design (operators want | |
| # readiness probes to work without secrets); /v1/* requires the | |
| # bearer token via RIPRAP_MODELS_API_KEY. | |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", \ | |
| "--log-level", "info", "--proxy-headers"] | |