Spaces:

aryxn323
/

swarm-os

Sleeping

App Files Files Community

swarm-os / Dockerfile

aryxn323

fix: allocate PTY via script for HF Space live container logs

a714fdc 15 days ago

raw

history blame contribute delete

3.03 kB

	# ============================================================================
	# Swarm-OS — Hugging Face Space (Docker SDK, GPU)
	#
	# Single-image stack:
	# 1. Stage `web` — node:20 builds the React frontend into frontend/dist
	# 2. Stage `app` — CUDA 12.1 runtime (pre-built llama-cpp-python wheel)
	# - llama-cpp-python[server] (CUDA wheel) -> 127.0.0.1:1234
	# - backend/main.py uvicorn -> 0.0.0.0:7860
	# - frontend/dist mounted at /
	# - inference.py reachable from `python inference.py`
	# ============================================================================

	# -------- Stage 1: frontend build --------
	FROM node:20-slim AS web
	WORKDIR /web

	COPY frontend/package.json frontend/package-lock.json ./
	RUN npm ci --no-audit --no-fund

	COPY frontend/ ./
	RUN npm run build


	# -------- Stage 2: runtime --------
	FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 AS app

	ENV DEBIAN_FRONTEND=noninteractive \
	PYTHONDONTWRITEBYTECODE=1 \
	PYTHONUNBUFFERED=1 \
	PIP_NO_CACHE_DIR=1 \
	PIP_DISABLE_PIP_VERSION_CHECK=1 \
	PORT=7860 \
	HF_HOME=/data/hf \
	HUGGINGFACE_HUB_CACHE=/data/hf \
	LOCAL_OPENAI_BASE_URL=http://127.0.0.1:1234 \
	LOCAL_OPENAI_API_KEY=lm-studio \
	LLM_PROVIDER=local

	# System deps:
	# python3.11 + pip application runtime
	# curl + ca-certificates start.sh readiness probe
	# libgomp1 OpenMP runtime required by llama-cpp-python
	# bsdmainutils provides `script` (PTY allocator) — REQUIRED to
	# defeat docker stdout buffering so HF Space's
	# Container tab shows live logs in real time
	# coreutils ships `stdbuf` for line-buffered subprocess output
	RUN apt-get update && apt-get install -y --no-install-recommends \
	python3.11 python3-pip \
	curl ca-certificates libgomp1 \
	bsdmainutils coreutils \
	&& ln -sf /usr/bin/python3.11 /usr/bin/python \
	&& ln -sf /usr/bin/python3.11 /usr/bin/python3 \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /app

	# Python deps (cached layer)
	# Install llama-cpp-python from the pre-built CUDA 12.1 wheel index (no compilation)
	COPY requirements.txt /app/requirements.txt
	RUN pip install --upgrade pip setuptools wheel \
	&& pip install -r /app/requirements.txt \
	&& pip install "llama-cpp-python[server]>=0.2.90" \
	--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121

	# App code
	COPY pyproject.toml README.md openenv.yaml inference.py start.sh /app/
	COPY server /app/server
	COPY swarm_openenv_env /app/swarm_openenv_env
	COPY backend /app/backend
	COPY outputs /app/outputs

	# Built frontend from stage 1
	COPY --from=web /web/dist /app/frontend/dist

	# Persistent cache dir for the GGUF (HF Spaces mount /data as persistent storage)
	RUN mkdir -p /data/models /data/hf && chmod -R 777 /data

	RUN chmod +x /app/start.sh

	EXPOSE 7860

	CMD ["bash", "/app/start.sh"]