brain / Dockerfile
digifreely's picture
Update Dockerfile
a27072c verified
# ──────────────────────────────────────────────────────────────────────────
# Dockerfile – Children's Learning Router Service
# Target: Hugging Face Spaces (CPU-only, Docker SDK)
# Port: 7860 (required by HF Spaces)
#
# Model delivery: via `preload_from_hub` in README.md
# HF Spaces downloads Qwen/Qwen2.5-1.5B-Instruct before container start
# and places it under /repo-cache (HF_HOME=/repo-cache).
# No in-build download is needed or possible (build env has no internet).
#
# OOM mitigation: packages are installed in small isolated groups so pip's
# dependency resolver never spikes RAM. --no-cache-dir and --no-compile
# keep peak memory low throughout the build.
# ──────────────────────────────────────────────────────────────────────────
FROM python:3.10-slim
# ── System packages ───────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# ── Working directory ─────────────────────────────────────────────────────
WORKDIR /app
# ── Pip hygiene: upgrade pip/wheel first (small, fast) ───────────────────
RUN pip install --no-cache-dir --no-compile --upgrade pip wheel
# ── 1 of 4 Β· CPU-only PyTorch (largest wheel – install alone) ────────────
RUN pip install --no-cache-dir --no-compile \
torch==2.3.1 \
--index-url https://download.pytorch.org/whl/cpu
# ── 2 of 4 Β· HuggingFace stack (transformers pulls in tokenizers etc.) ───
RUN pip install --no-cache-dir --no-compile \
transformers==4.46.3 \
accelerate==1.1.1
# ── 3 of 4 Β· Serialisation libs ──────────────────────────────────────────
RUN pip install --no-cache-dir --no-compile \
sentencepiece==0.2.0 \
protobuf==5.28.3
# ── 4 of 4 Β· Async HTTP client + Web framework + ASGI server ─────────────
RUN pip install --no-cache-dir --no-compile \
httpx==0.27.2 \
fastapi==0.115.0 \
uvicorn[standard]==0.30.6
# ── Application code ──────────────────────────────────────────────────────
COPY app.py .
# ── HuggingFace Spaces: run as non-root user (UID 1000) ──────────────────
# mkdir -p /repo-cache/hub ensures the cache path exists and is writable
# by hfuser whether HF Spaces pre-populates it or the model downloads fresh.
RUN useradd -m -u 1000 hfuser \
&& mkdir -p /repo-cache/hub \
&& chown -R hfuser:hfuser /app /repo-cache
USER hfuser
# ── Runtime config ────────────────────────────────────────────────────────
# HF Spaces sets HF_HOME=/repo-cache and places preload_from_hub models
# there before the container starts. HF_HOME alone is sufficient;
# TRANSFORMERS_CACHE is deprecated since transformers v4 and removed in v5.
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
HF_HOME=/repo-cache
EXPOSE 7860
# ── Start-up command ──────────────────────────────────────────────────────
# Plain uvicorn β€” no gunicorn shim. Eliminates the gunicorn health-check
# race that was killing the worker mid-response and causing 502s.
# timeout-keep-alive covers the full CPU inference time for the 3B model.
CMD ["uvicorn", "app:app", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--timeout-keep-alive", "300", \
"--log-level", "info"]