Spaces:
Sleeping
Sleeping
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Dockerfile β Children's Learning Router Service | |
| # Target: Hugging Face Spaces (CPU-only, Docker SDK) | |
| # Port: 7860 (required by HF Spaces) | |
| # | |
| # Model delivery: via `preload_from_hub` in README.md | |
| # HF Spaces downloads Qwen/Qwen2.5-1.5B-Instruct before container start | |
| # and places it under /repo-cache (HF_HOME=/repo-cache). | |
| # No in-build download is needed or possible (build env has no internet). | |
| # | |
| # OOM mitigation: packages are installed in small isolated groups so pip's | |
| # dependency resolver never spikes RAM. --no-cache-dir and --no-compile | |
| # keep peak memory low throughout the build. | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| FROM python:3.10-slim | |
| # ββ System packages βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| git \ | |
| curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # ββ Working directory βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WORKDIR /app | |
| # ββ Pip hygiene: upgrade pip/wheel first (small, fast) βββββββββββββββββββ | |
| RUN pip install --no-cache-dir --no-compile --upgrade pip wheel | |
| # ββ 1 of 4 Β· CPU-only PyTorch (largest wheel β install alone) ββββββββββββ | |
| RUN pip install --no-cache-dir --no-compile \ | |
| torch==2.3.1 \ | |
| --index-url https://download.pytorch.org/whl/cpu | |
| # ββ 2 of 4 Β· HuggingFace stack (transformers pulls in tokenizers etc.) βββ | |
| RUN pip install --no-cache-dir --no-compile \ | |
| transformers==4.46.3 \ | |
| accelerate==1.1.1 | |
| # ββ 3 of 4 Β· Serialisation libs ββββββββββββββββββββββββββββββββββββββββββ | |
| RUN pip install --no-cache-dir --no-compile \ | |
| sentencepiece==0.2.0 \ | |
| protobuf==5.28.3 | |
| # ββ 4 of 4 Β· Async HTTP client + Web framework + ASGI server βββββββββββββ | |
| RUN pip install --no-cache-dir --no-compile \ | |
| httpx==0.27.2 \ | |
| fastapi==0.115.0 \ | |
| uvicorn[standard]==0.30.6 | |
| # ββ Application code ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COPY app.py . | |
| # ββ HuggingFace Spaces: run as non-root user (UID 1000) ββββββββββββββββββ | |
| # mkdir -p /repo-cache/hub ensures the cache path exists and is writable | |
| # by hfuser whether HF Spaces pre-populates it or the model downloads fresh. | |
| RUN useradd -m -u 1000 hfuser \ | |
| && mkdir -p /repo-cache/hub \ | |
| && chown -R hfuser:hfuser /app /repo-cache | |
| USER hfuser | |
| # ββ Runtime config ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HF Spaces sets HF_HOME=/repo-cache and places preload_from_hub models | |
| # there before the container starts. HF_HOME alone is sufficient; | |
| # TRANSFORMERS_CACHE is deprecated since transformers v4 and removed in v5. | |
| ENV PYTHONUNBUFFERED=1 \ | |
| PYTHONDONTWRITEBYTECODE=1 \ | |
| HF_HOME=/repo-cache | |
| EXPOSE 7860 | |
| # ββ Start-up command ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Plain uvicorn β no gunicorn shim. Eliminates the gunicorn health-check | |
| # race that was killing the worker mid-response and causing 502s. | |
| # timeout-keep-alive covers the full CPU inference time for the 3B model. | |
| CMD ["uvicorn", "app:app", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--timeout-keep-alive", "300", \ | |
| "--log-level", "info"] |