swarm-os / Dockerfile
aryxn323's picture
fix: allocate PTY via script for HF Space live container logs
a714fdc
# ============================================================================
# Swarm-OS — Hugging Face Space (Docker SDK, GPU)
#
# Single-image stack:
# 1. Stage `web` — node:20 builds the React frontend into frontend/dist
# 2. Stage `app` — CUDA 12.1 runtime (pre-built llama-cpp-python wheel)
# - llama-cpp-python[server] (CUDA wheel) -> 127.0.0.1:1234
# - backend/main.py uvicorn -> 0.0.0.0:7860
# - frontend/dist mounted at /
# - inference.py reachable from `python inference.py`
# ============================================================================
# -------- Stage 1: frontend build --------
FROM node:20-slim AS web
WORKDIR /web
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci --no-audit --no-fund
COPY frontend/ ./
RUN npm run build
# -------- Stage 2: runtime --------
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 AS app
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PORT=7860 \
HF_HOME=/data/hf \
HUGGINGFACE_HUB_CACHE=/data/hf \
LOCAL_OPENAI_BASE_URL=http://127.0.0.1:1234 \
LOCAL_OPENAI_API_KEY=lm-studio \
LLM_PROVIDER=local
# System deps:
# python3.11 + pip application runtime
# curl + ca-certificates start.sh readiness probe
# libgomp1 OpenMP runtime required by llama-cpp-python
# bsdmainutils provides `script` (PTY allocator) — REQUIRED to
# defeat docker stdout buffering so HF Space's
# Container tab shows live logs in real time
# coreutils ships `stdbuf` for line-buffered subprocess output
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.11 python3-pip \
curl ca-certificates libgomp1 \
bsdmainutils coreutils \
&& ln -sf /usr/bin/python3.11 /usr/bin/python \
&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Python deps (cached layer)
# Install llama-cpp-python from the pre-built CUDA 12.1 wheel index (no compilation)
COPY requirements.txt /app/requirements.txt
RUN pip install --upgrade pip setuptools wheel \
&& pip install -r /app/requirements.txt \
&& pip install "llama-cpp-python[server]>=0.2.90" \
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
# App code
COPY pyproject.toml README.md openenv.yaml inference.py start.sh /app/
COPY server /app/server
COPY swarm_openenv_env /app/swarm_openenv_env
COPY backend /app/backend
COPY outputs /app/outputs
# Built frontend from stage 1
COPY --from=web /web/dist /app/frontend/dist
# Persistent cache dir for the GGUF (HF Spaces mount /data as persistent storage)
RUN mkdir -p /data/models /data/hf && chmod -R 777 /data
RUN chmod +x /app/start.sh
EXPOSE 7860
CMD ["bash", "/app/start.sh"]