# CanLex MCP server -- remote (streamable-HTTP) image.
#
# Builds anywhere: Hugging Face Spaces, Google Cloud Run, Fly.io, plain Docker.
# Retrieval is fully local and key-free; the optional CanLII case citator reads
# its key from the CANLII_API_KEY environment variable (supplied as a host
# secret -- the key is never copied into the image).
FROM python:3.12-slim

# libgomp1 is the OpenMP runtime that onnxruntime (the reranker) links against.
RUN apt-get update \
 && apt-get install -y --no-install-recommends libgomp1 \
 && rm -rf /var/lib/apt/lists/*

# Run as a non-root user (UID 1000) -- required by Hugging Face Spaces.
RUN useradd --create-home --home-dir /app --uid 1000 app
WORKDIR /app

# Python dependencies first, so this layer caches across code changes.
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# Application code and the processed corpus (section-chunk JSON).
COPY --chown=app:app canlex/ ./canlex/
COPY --chown=app:app data/processed/*.json ./data/processed/

USER app
ENV HOME=/app \
    HF_HOME=/app/.hf_cache \
    CANLEX_HTTP=1 \
    PORT=7860 \
    PYTHONUNBUFFERED=1

# Build the semantic embeddings and pre-fetch the cross-encoder model, so the
# model cache is baked into the image and the first request needs no network.
RUN python -m canlex.embed \
 && python -c "from canlex.rerank import Reranker; Reranker()"

# From here on, model files are served from the baked cache, never fetched.
ENV HF_HUB_OFFLINE=1

EXPOSE 7860
CMD ["python", "-m", "canlex.server"]