raredx / Dockerfile
Aswin92's picture
Upload folder using huggingface_hub
89c6379 verified
# =============================================================================
# RareDx — Hugging Face Spaces Dockerfile
# Single container: FastAPI (8080, internal) + Streamlit (8501, public)
# =============================================================================
FROM python:3.11-slim
# --------------------------------------------------------------------------
# System dependencies
# --------------------------------------------------------------------------
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
libxml2-dev \
libxslt-dev \
curl \
supervisor \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# --------------------------------------------------------------------------
# Python dependencies
# Install before copying source so this layer is cached on code-only changes
# --------------------------------------------------------------------------
COPY backend/requirements.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# --------------------------------------------------------------------------
# Pre-download BioLORD-2023 model into the image
# This avoids a ~500MB download on every Space restart
# --------------------------------------------------------------------------
ENV HF_HOME=/app/.cache/huggingface
RUN python -c "\
from sentence_transformers import SentenceTransformer; \
print('Downloading BioLORD-2023...'); \
SentenceTransformer('FremyCompany/BioLORD-2023'); \
print('Model cached.')"
# --------------------------------------------------------------------------
# Application source
# --------------------------------------------------------------------------
COPY backend/ ./backend/
# --------------------------------------------------------------------------
# Pre-built knowledge data (bundled — no runtime download needed)
# data/graph_store.json — 33MB Orphanet+HPO knowledge graph (NetworkX JSON)
# data/chromadb/ — 149MB BioLORD disease embeddings (ChromaDB)
# data/hpo_index/ — 26MB BioLORD HPO term embeddings (numpy + JSON)
# --------------------------------------------------------------------------
COPY data/graph_store.json ./data/graph_store.json
COPY data/chromadb/ ./data/chromadb/
COPY data/hpo_index/ ./data/hpo_index/
# --------------------------------------------------------------------------
# supervisord config
# --------------------------------------------------------------------------
COPY supervisord.conf /etc/supervisor/conf.d/raredx.conf
# --------------------------------------------------------------------------
# Runtime environment
# Tell pipeline to use embedded ChromaDB and local graph store
# (no Neo4j or external ChromaDB server needed)
# --------------------------------------------------------------------------
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
CHROMA_HOST=localhost \
CHROMA_PORT=9999 \
CHROMA_COLLECTION=rare_diseases \
EMBED_MODEL=FremyCompany/BioLORD-2023 \
ORPHANET_DATA_DIR=/app/data/orphanet
# Port Streamlit listens on (declared for HF Spaces)
EXPOSE 8501
# --------------------------------------------------------------------------
# Start both services via supervisord
# FastAPI: 127.0.0.1:8080 (internal — Streamlit calls it)
# Streamlit: 0.0.0.0:8501 (public — HF Spaces exposes this)
# --------------------------------------------------------------------------
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/raredx.conf"]