File size: 2,615 Bytes
3ad88a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be5e148
 
3ad88a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be5e148
3ad88a4
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# Dockerfile for HuggingFace Spaces (Docker SDK)
#
# HOW HF SPACES WORKS
# ───────────────────
# HF Spaces runs your Docker container on their infrastructure.
# - Port must be 7860 (hard requirement β€” HF proxies this to HTTPS)
# - Container runs as a non-root user (uid 1000) for security
# - Env vars set in Space Settings β†’ Variables are injected at runtime
# - The image is rebuilt on every push to the Space repo
#
# WHY WE PRE-DOWNLOAD THE RERANKER
# ─────────────────────────────────
# The cross-encoder re-ranker (~80MB) downloads from HuggingFace Hub on first use.
# If we leave it lazy, the first search after a cold start takes 30+ seconds.
# By downloading it during the Docker build, it's baked into the image layer.
# Subsequent starts are instant β€” the model is already on disk.
#
# The embedding model is NOT downloaded here β€” Voyage/Gemini/Nomic run via API
# (no local file needed). That's how we stay under the RAM limit.
#
# ARCHITECTURE
# ────────────
# This Dockerfile only runs the FastAPI backend.
# The React frontend is deployed separately on Vercel (free).
# They communicate via: frontend β†’ VITE_API_URL β†’ this Space β†’ Qdrant Cloud

FROM python:3.11-slim

# HF Spaces requires a non-root user with uid 1000
RUN useradd -m -u 1000 user
USER user

ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH \
    # Silence pip version warnings
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    # Prevent Python from buffering stdout (so logs appear in real time)
    PYTHONUNBUFFERED=1 \
    # Store HuggingFace model cache in a writable location
    HF_HOME=/home/user/.cache/huggingface

WORKDIR $HOME/app

# Install dependencies first (Docker layer cache β€” only re-runs if requirements.txt changes)
COPY --chown=user requirements.txt .
RUN pip install --user --no-cache-dir -r requirements.txt

# Pre-download the re-ranker model into the image layer.
# This bakes the ~80MB model into the image so cold starts don't download it.
# The embedding model is NOT downloaded here β€” it lives behind a hosted API.
RUN python -c "\
from sentence_transformers import CrossEncoder; \
print('Pre-downloading re-ranker...'); \
CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2'); \
print('Re-ranker cached.')"

# Copy source code (after pip install so code changes don't re-run pip)
COPY --chown=user . .

# HF Spaces proxies port 7860 to HTTPS β€” this is non-negotiable
EXPOSE 7860

CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "7860"]