Spaces:

lablab-ai-amd-developer-hackathon
/

riprap-nyc

Running

App Files Files Community

riprap-nyc / services /riprap-models /Dockerfile

seriffic

deploy: sync all changes from main at 6904684

b9a10ad 2 days ago

raw

history blame contribute delete

3.61 kB

	# Riprap Models — droplet inference service.
	#
	# Self-contained ROCm + PyTorch image that runs every GPU-accelerable
	# specialist Riprap consumes (Prithvi-NYC-Pluvial, TerraMind LULC +
	# Buildings, Granite TTM r2, Granite Embedding 278M, GLiNER).
	#
	# Base: AMD's public ROCm 7.2.3 + Python 3.12 + PyTorch 2.9.1 release
	# image. Same minor torch version as the bespoke MI300X image the
	# bootstrap droplet was hand-built with (`torch==2.9.1+git8907517`),
	# but pulled from a public registry so any fresh droplet can recreate
	# the env without internal AMD wheels. The released 2.9.1 has the
	# kernels we need — none of riprap-models calls into vLLM-specific
	# attention paths, so the dev-build vs release-build delta is
	# inconsequential for our forward passes.
	#
	# Build: docker build -t riprap-models:latest -f Dockerfile ../..
	# Layout: the build context is the project root so the COPY lines
	# below can reach `services/riprap-models/`.
	# Use the vLLM ROCm image as base — it ships torch 2.9.1+git8907517
	# (the actual AMD bespoke build) and is already cached on DigitalOcean
	# AMD GPU droplets, so no download is needed during bring-up.
	# The public rocm/pytorch release image is a fallback if this image is
	# not available; see the comment block above for background.
	FROM vllm/vllm-openai-rocm:v0.17.1

	ENV DEBIAN_FRONTEND=noninteractive \
	PYTHONUNBUFFERED=1 \
	PIP_NO_CACHE_DIR=1 \
	PIP_DISABLE_PIP_VERSION_CHECK=1 \
	HF_HOME=/root/.cache/huggingface \
	TRANSFORMERS_CACHE=/root/.cache/huggingface \
	# MI300X tuning the running container uses; baking them in so a
	# bring-up doesn't require remembering the env-set incantation.
	HIP_FORCE_DEV_KERNARG=1 \
	HSA_NO_SCRATCH_RECLAIM=1 \
	PYTORCH_ROCM_ARCH=gfx942

	# git is needed by some HF model-card downloads (terratorch yaml repos
	# pull via the git protocol). curl for healthcheck. libgl1 for
	# rasterio's Pillow path. The base ROCm image is Ubuntu 24.04, and
	# already includes most build-time deps we need.
	RUN apt-get update && apt-get install -y --no-install-recommends \
	curl git libgl1 libglib2.0-0 \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /workspace/riprap-models

	# Install deps in two layers so a code-only change doesn't bust the
	# heavy ML wheel cache. requirements.txt holds runtime-narrow
	# packages that the service imports; requirements-full.txt is the
	# super-set the FSM specialists pull in transitively (terratorch's
	# kornia / albumentations chain, granite-tsfm's tsfm_public, etc.).
	COPY services/riprap-models/requirements-full.txt /tmp/req-full.txt
	RUN pip install --upgrade pip && \
	# Freeze the ROCm torch/torchvision/torchaudio at whatever version
	# the vLLM base image ships, so transitive deps (peft, torchgeo, etc.)
	# don't pull a CUDA build from PyPI and replace the ROCm one.
	pip freeze \| grep -E "^(torch\|torchvision\|torchaudio)==" > /tmp/torch-lock.txt && \
	cat /tmp/torch-lock.txt && \
	pip install -r /tmp/req-full.txt --constraint /tmp/torch-lock.txt

	# Service code itself. Cheap to invalidate; lands last.
	COPY services/riprap-models/main.py /workspace/riprap-models/main.py
	COPY services/riprap-models/requirements.txt /workspace/riprap-models/requirements.txt

	EXPOSE 7860

	# `--proxy-headers` so a future LB sees the right client IP. The
	# /healthz route is unauthenticated by design (operators want
	# readiness probes to work without secrets); /v1/* requires the
	# bearer token via RIPRAP_MODELS_API_KEY.
	CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", \
	"--log-level", "info", "--proxy-headers"]