File size: 1,644 Bytes
caa28aa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | # Riprap — local + self-hosted orchestration.
#
# Default `docker compose up` starts only the app container, which
# expects RIPRAP_LLM_BASE_URL / RIPRAP_ML_BASE_URL to point at an
# external inference backend (the live HF Space, your own
# self-hosted instance, etc. — see .env.example).
#
# Full self-host (requires an AMD ROCm or NVIDIA CUDA GPU):
#
# docker compose --profile with-models up
#
# This adds the riprap-models GPU specialist service; you still need
# a separate vLLM serving Granite 4.1 8B for the Capstone reconciler
# (see docs/DROPLET-RUNBOOK.md for the canonical bring-up).
services:
riprap-app:
image: msradam/riprap-nyc:v0.5.0
build:
context: .
dockerfile: Dockerfile.app
ports:
- "7860:7860"
environment:
- RIPRAP_LLM_PRIMARY=${RIPRAP_LLM_PRIMARY:-vllm}
- RIPRAP_LLM_BASE_URL=${RIPRAP_LLM_BASE_URL}
- RIPRAP_LLM_API_KEY=${RIPRAP_LLM_API_KEY}
- RIPRAP_ML_BASE_URL=${RIPRAP_ML_BASE_URL}
- RIPRAP_ML_API_KEY=${RIPRAP_ML_API_KEY}
- RIPRAP_HARDWARE_LABEL=${RIPRAP_HARDWARE_LABEL:-Self-hosted}
- RIPRAP_ENGINE_LABEL=${RIPRAP_ENGINE_LABEL:-Granite 4.1 / vLLM}
restart: unless-stopped
riprap-models:
image: msradam/riprap-models:v0.5.0
build:
context: .
dockerfile: services/riprap-models/Dockerfile
ports:
- "7861:7860"
environment:
- RIPRAP_MODELS_API_KEY=${RIPRAP_ML_API_KEY}
deploy:
resources:
reservations:
devices:
- driver: amd
count: 1
capabilities: [gpu]
profiles:
- with-models
restart: unless-stopped
|