Spaces:

digifreely
/

brain

Sleeping

App Files Files Community

brain / Dockerfile

digifreely

Update Dockerfile

a27072c verified 10 days ago

raw

history blame contribute delete

4.4 kB

	# ──────────────────────────────────────────────────────────────────────────
	# Dockerfile – Children's Learning Router Service
	# Target: Hugging Face Spaces (CPU-only, Docker SDK)
	# Port: 7860 (required by HF Spaces)
	#
	# Model delivery: via `preload_from_hub` in README.md
	# HF Spaces downloads Qwen/Qwen2.5-1.5B-Instruct before container start
	# and places it under /repo-cache (HF_HOME=/repo-cache).
	# No in-build download is needed or possible (build env has no internet).
	#
	# OOM mitigation: packages are installed in small isolated groups so pip's
	# dependency resolver never spikes RAM. --no-cache-dir and --no-compile
	# keep peak memory low throughout the build.
	# ──────────────────────────────────────────────────────────────────────────

	FROM python:3.10-slim

	# ── System packages ───────────────────────────────────────────────────────
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	git \
	curl \
	&& rm -rf /var/lib/apt/lists/*

	# ── Working directory ─────────────────────────────────────────────────────
	WORKDIR /app

	# ── Pip hygiene: upgrade pip/wheel first (small, fast) ───────────────────
	RUN pip install --no-cache-dir --no-compile --upgrade pip wheel

	# ── 1 of 4 · CPU-only PyTorch (largest wheel – install alone) ────────────
	RUN pip install --no-cache-dir --no-compile \
	torch==2.3.1 \
	--index-url https://download.pytorch.org/whl/cpu

	# ── 2 of 4 · HuggingFace stack (transformers pulls in tokenizers etc.) ───
	RUN pip install --no-cache-dir --no-compile \
	transformers==4.46.3 \
	accelerate==1.1.1

	# ── 3 of 4 · Serialisation libs ──────────────────────────────────────────
	RUN pip install --no-cache-dir --no-compile \
	sentencepiece==0.2.0 \
	protobuf==5.28.3

	# ── 4 of 4 · Async HTTP client + Web framework + ASGI server ─────────────
	RUN pip install --no-cache-dir --no-compile \
	httpx==0.27.2 \
	fastapi==0.115.0 \
	uvicorn[standard]==0.30.6

	# ── Application code ──────────────────────────────────────────────────────
	COPY app.py .

	# ── HuggingFace Spaces: run as non-root user (UID 1000) ──────────────────
	# mkdir -p /repo-cache/hub ensures the cache path exists and is writable
	# by hfuser whether HF Spaces pre-populates it or the model downloads fresh.
	RUN useradd -m -u 1000 hfuser \
	&& mkdir -p /repo-cache/hub \
	&& chown -R hfuser:hfuser /app /repo-cache
	USER hfuser

	# ── Runtime config ────────────────────────────────────────────────────────
	# HF Spaces sets HF_HOME=/repo-cache and places preload_from_hub models
	# there before the container starts. HF_HOME alone is sufficient;
	# TRANSFORMERS_CACHE is deprecated since transformers v4 and removed in v5.
	ENV PYTHONUNBUFFERED=1 \
	PYTHONDONTWRITEBYTECODE=1 \
	HF_HOME=/repo-cache

	EXPOSE 7860

	# ── Start-up command ──────────────────────────────────────────────────────
	# Plain uvicorn — no gunicorn shim. Eliminates the gunicorn health-check
	# race that was killing the worker mid-response and causing 502s.
	# timeout-keep-alive covers the full CPU inference time for the 3B model.
	CMD ["uvicorn", "app:app", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"--timeout-keep-alive", "300", \
	"--log-level", "info"]