# ============================================================================ # Swarm-OS — Hugging Face Space (Docker SDK, GPU) # # Single-image stack: # 1. Stage `web` — node:20 builds the React frontend into frontend/dist # 2. Stage `app` — CUDA 12.1 runtime (pre-built llama-cpp-python wheel) # - llama-cpp-python[server] (CUDA wheel) -> 127.0.0.1:1234 # - backend/main.py uvicorn -> 0.0.0.0:7860 # - frontend/dist mounted at / # - inference.py reachable from `python inference.py` # ============================================================================ # -------- Stage 1: frontend build -------- FROM node:20-slim AS web WORKDIR /web COPY frontend/package.json frontend/package-lock.json ./ RUN npm ci --no-audit --no-fund COPY frontend/ ./ RUN npm run build # -------- Stage 2: runtime -------- FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 AS app ENV DEBIAN_FRONTEND=noninteractive \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PORT=7860 \ HF_HOME=/data/hf \ HUGGINGFACE_HUB_CACHE=/data/hf \ LOCAL_OPENAI_BASE_URL=http://127.0.0.1:1234 \ LOCAL_OPENAI_API_KEY=lm-studio \ LLM_PROVIDER=local # System deps: # python3.11 + pip application runtime # curl + ca-certificates start.sh readiness probe # libgomp1 OpenMP runtime required by llama-cpp-python # bsdmainutils provides `script` (PTY allocator) — REQUIRED to # defeat docker stdout buffering so HF Space's # Container tab shows live logs in real time # coreutils ships `stdbuf` for line-buffered subprocess output RUN apt-get update && apt-get install -y --no-install-recommends \ python3.11 python3-pip \ curl ca-certificates libgomp1 \ bsdmainutils coreutils \ && ln -sf /usr/bin/python3.11 /usr/bin/python \ && ln -sf /usr/bin/python3.11 /usr/bin/python3 \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # Python deps (cached layer) # Install llama-cpp-python from the pre-built CUDA 12.1 wheel index (no compilation) COPY requirements.txt /app/requirements.txt RUN pip install --upgrade pip setuptools wheel \ && pip install -r /app/requirements.txt \ && pip install "llama-cpp-python[server]>=0.2.90" \ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121 # App code COPY pyproject.toml README.md openenv.yaml inference.py start.sh /app/ COPY server /app/server COPY swarm_openenv_env /app/swarm_openenv_env COPY backend /app/backend COPY outputs /app/outputs # Built frontend from stage 1 COPY --from=web /web/dist /app/frontend/dist # Persistent cache dir for the GGUF (HF Spaces mount /data as persistent storage) RUN mkdir -p /data/models /data/hf && chmod -R 777 /data RUN chmod +x /app/start.sh EXPOSE 7860 CMD ["bash", "/app/start.sh"]