Spaces:
Running
Running
| # Dockerfile for HuggingFace Spaces (Docker SDK) | |
| # | |
| # HOW HF SPACES WORKS | |
| # βββββββββββββββββββ | |
| # HF Spaces runs your Docker container on their infrastructure. | |
| # - Port must be 7860 (hard requirement β HF proxies this to HTTPS) | |
| # - Container runs as a non-root user (uid 1000) for security | |
| # - Env vars set in Space Settings β Variables are injected at runtime | |
| # - The image is rebuilt on every push to the Space repo | |
| # | |
| # WHY WE PRE-DOWNLOAD THE RERANKER | |
| # βββββββββββββββββββββββββββββββββ | |
| # The cross-encoder re-ranker (~80MB) downloads from HuggingFace Hub on first use. | |
| # If we leave it lazy, the first search after a cold start takes 30+ seconds. | |
| # By downloading it during the Docker build, it's baked into the image layer. | |
| # Subsequent starts are instant β the model is already on disk. | |
| # | |
| # The embedding model is NOT downloaded here β Voyage/Gemini/Nomic run via API | |
| # (no local file needed). That's how we stay under the RAM limit. | |
| # | |
| # ARCHITECTURE | |
| # ββββββββββββ | |
| # This Dockerfile only runs the FastAPI backend. | |
| # The React frontend is deployed separately on Vercel (free). | |
| # They communicate via: frontend β VITE_API_URL β this Space β Qdrant Cloud | |
| FROM python:3.11-slim | |
| # HF Spaces requires a non-root user with uid 1000 | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH \ | |
| # Silence pip version warnings | |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
| # Prevent Python from buffering stdout (so logs appear in real time) | |
| PYTHONUNBUFFERED=1 \ | |
| # Store HuggingFace model cache in a writable location | |
| HF_HOME=/home/user/.cache/huggingface | |
| WORKDIR $HOME/app | |
| # Install dependencies first (Docker layer cache β only re-runs if requirements.txt changes) | |
| COPY --chown=user requirements.txt . | |
| RUN pip install --user --no-cache-dir -r requirements.txt | |
| # Pre-download the re-ranker model into the image layer. | |
| # This bakes the ~80MB model into the image so cold starts don't download it. | |
| # The embedding model is NOT downloaded here β it lives behind a hosted API. | |
| RUN python -c "\ | |
| from sentence_transformers import CrossEncoder; \ | |
| print('Pre-downloading re-ranker...'); \ | |
| CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2'); \ | |
| print('Re-ranker cached.')" | |
| # Copy source code (after pip install so code changes don't re-run pip) | |
| COPY --chown=user . . | |
| # HF Spaces proxies port 7860 to HTTPS β this is non-negotiable | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"] | |