| # Riprap β Hugging Face Spaces deployment (Docker SDK, GPU). | |
| # | |
| # Base: NVIDIA CUDA 12.4 runtime + cuDNN on Ubuntu 22.04. Ollama's | |
| # installer detects the GPU and pulls the CUDA-aware build automatically; | |
| # Granite 4.1:3b inference drops from ~60-180s on CPU Basic to ~2-4s on | |
| # nvidia-t4-small. | |
| # | |
| # Bakes: | |
| # - Python 3.10 (default on 22.04) + pip deps (~2.5 GB once torch is in) | |
| # - Ollama + granite4.1:3b model (~2 GB) | |
| # - All pre-computed fixtures in data/ + corpus/ | |
| # | |
| # Runtime: | |
| # - Ollama daemon serves Granite 4.1 via CUDA | |
| # - Granite Embedding 278M auto-downloads via sentence-transformers | |
| # on first FastAPI startup (~280 MB) | |
| # - uvicorn FastAPI on port 7860 (HF Spaces default) | |
| FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 AS base | |
| # OS deps: Python 3.10 + geo libs + Ollama install dependencies. | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| python3 python3-pip python3-venv python-is-python3 \ | |
| curl ca-certificates zstd procps \ | |
| gdal-bin libgdal-dev libgeos-dev libproj-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # HF Spaces convention: run as a non-root "user" account at /home/user/app. | |
| RUN useradd -m -u 1000 user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:/usr/local/bin:/usr/bin:/bin \ | |
| PYTHONUNBUFFERED=1 \ | |
| HF_HOME=/home/user/.cache/huggingface \ | |
| OLLAMA_HOST=127.0.0.1:11434 \ | |
| OLLAMA_NUM_PARALLEL=1 \ | |
| OLLAMA_KEEP_ALIVE=24h \ | |
| OLLAMA_MAX_LOADED_MODELS=2 \ | |
| OLLAMA_FLASH_ATTENTION=1 \ | |
| OLLAMA_KV_CACHE_TYPE=q8_0 \ | |
| OLLAMA_DEBUG=1 | |
| # Install Ollama. install.sh ships the cuda_v12 dispatcher libs | |
| # unconditionally; the GPU detection at the tail of the script only gates | |
| # host-driver install (a no-op inside a container). So this works fine | |
| # on a CPU builder for a GPU-attached runtime. | |
| RUN curl -fsSL https://ollama.com/install.sh | sh | |
| WORKDIR /home/user/app | |
| # Python deps. CUDA 12.x in base image lets pip pull cu124 torch wheels | |
| # automatically when sentence-transformers asks for torch. | |
| COPY --chown=user:user requirements.txt ./ | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # --- Earth-observation toolchain (Phase 1 Prithvi live + Phase 4 | |
| # TerraMind synthesis) --------------------------------------------------- | |
| # | |
| # Tried four times to land terratorch on HF's Py3.10 image alongside | |
| # our pinned stack (transformers<5, hf_hub<1, granite-tsfm<0.3.4, | |
| # mellea<0.4). Each attempt failed at the same point β a `mkdir` | |
| # immediately after the --no-deps install β with no actionable error | |
| # in HF's build log. The failure pattern is consistent with build- | |
| # sandbox disk exhaustion; even a 4-package narrow install | |
| # (terratorch + einops + diffusers + timm with --no-deps) hits it. | |
| # | |
| # Accepting this: TerraMind synthesis + Prithvi-live remain | |
| # local-/AMD-only on this deployment. The lazy-import pattern in | |
| # app/context/terramind_synthesis.py + app/flood_layers/prithvi_live.py | |
| # returns clean `skipped: deps unavailable on this deployment` on HF; | |
| # the trace card and the map legend make that visible. The other 14 | |
| # specialists run normally. | |
| # | |
| # Re-enable on a deployment with more build disk (Docker SDK on a | |
| # self-hosted machine, AMD droplet, etc.) by adding the EO --no-deps | |
| # install back here. | |
| # Pull both Granite 4.1 variants into the image: | |
| # :3b β fast routing (planner) + live_now reconciler (short outputs) | |
| # :8b β synthesis reconciler for single_address / neighborhood / dev_check | |
| # Both fit warm on the T4 with OLLAMA_MAX_LOADED_MODELS=2 (~10 GB total | |
| # VRAM out of 16). We start ollama in the background, poll its HTTP | |
| # endpoint, pull, and let the layer exit (Docker reaps the daemon β | |
| # don't pkill, it'll match this RUN's own cmdline and exit 143). | |
| ENV OLLAMA_MODELS=/home/user/.ollama/models \ | |
| RIPRAP_OLLAMA_3B_TAG=granite4.1:8b | |
| # Granite weights are pulled at *container start* (see entrypoint.sh) | |
| # instead of at build time. HF's build sandbox can't fit the EO | |
| # toolchain + Granite 8B (5GB) simultaneously, but the runtime | |
| # rootfs is larger and persists between container starts within an | |
| # image lifetime. Cold-start on first launch ~2 min for the 8B pull; | |
| # subsequent restarts are fast since Ollama's cache survives. | |
| RUN mkdir -p $OLLAMA_MODELS | |
| # App code + fixtures | |
| COPY --chown=user:user app/ ./app/ | |
| COPY --chown=user:user web/ ./web/ | |
| COPY --chown=user:user scripts/ ./scripts/ | |
| COPY --chown=user:user data/ ./data/ | |
| COPY --chown=user:user corpus/ ./corpus/ | |
| COPY --chown=user:user agent.py riprap.py ./ | |
| COPY --chown=user:user entrypoint.sh ./ | |
| RUN chmod +x ./entrypoint.sh | |
| # Hand off to a non-root user the way HF Spaces expects | |
| RUN chown -R user:user /home/user | |
| USER user | |
| EXPOSE 7860 | |
| CMD ["./entrypoint.sh"] | |