CanLex / Dockerfile
Beemer
CanLex MCP server
21626e7
# CanLex MCP server -- remote (streamable-HTTP) image.
#
# Builds anywhere: Hugging Face Spaces, Google Cloud Run, Fly.io, plain Docker.
# Retrieval is fully local and key-free; the optional CanLII case citator reads
# its key from the CANLII_API_KEY environment variable (supplied as a host
# secret -- the key is never copied into the image).
FROM python:3.12-slim
# libgomp1 is the OpenMP runtime that onnxruntime (the reranker) links against.
RUN apt-get update \
&& apt-get install -y --no-install-recommends libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Run as a non-root user (UID 1000) -- required by Hugging Face Spaces.
RUN useradd --create-home --home-dir /app --uid 1000 app
WORKDIR /app
# Python dependencies first, so this layer caches across code changes.
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Application code and the processed corpus (section-chunk JSON).
COPY --chown=app:app canlex/ ./canlex/
COPY --chown=app:app data/processed/*.json ./data/processed/
USER app
ENV HOME=/app \
HF_HOME=/app/.hf_cache \
CANLEX_HTTP=1 \
PORT=7860 \
PYTHONUNBUFFERED=1
# Build the semantic embeddings and pre-fetch the cross-encoder model, so the
# model cache is baked into the image and the first request needs no network.
RUN python -m canlex.embed \
&& python -c "from canlex.rerank import Reranker; Reranker()"
# From here on, model files are served from the baked cache, never fetched.
ENV HF_HUB_OFFLINE=1
EXPOSE 7860
CMD ["python", "-m", "canlex.server"]