# 1. Use an official lightweight Python image FROM python:3.11-slim # 2. Install system dependencies required for 'unstructured' PDF OCR RUN apt-get update && apt-get install -y \ tesseract-ocr \ poppler-utils \ libmagic-dev \ libgl1 \ libglib2.0-0 \ libgomp1 \ libgthread-2.0-0 \ && rm -rf /var/lib/apt/lists/* # 3. Create a non-root user (Required by Hugging Face for security) RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" ENV HF_HUB_DISABLE_XET=1 \ HF_HUB_ETAG_TIMEOUT=10 \ HF_HUB_DOWNLOAD_TIMEOUT=60 # 4. Set the working directory WORKDIR /app # 5. Copy requirements and install them COPY --chown=user:user requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # 6. Copy the rest of your project files COPY --chown=user:user . . # 7. Optional ML asset pre-build. Disabled by default so HF builds do not hang # on unauthenticated model downloads; runtime code falls back safely if needed. ARG PREBUILD_ML_ASSETS=0 ARG MORPHEUS_BUILD_ASSETS_MODE=skip RUN if [ "$PREBUILD_ML_ASSETS" = "1" ]; then \ MORPHEUS_BUILD_ASSETS_MODE=$MORPHEUS_BUILD_ASSETS_MODE python -m backend.core.build_ml_assets ; \ else \ echo "Skipping ML asset pre-build"; \ fi # 8. Start FastAPI (7860 is the HF standard, but Railway uses $PORT) ENV PORT=7860 CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT}"]