| # ============================================================================= | |
| # RareDx — Hugging Face Spaces Dockerfile | |
| # Single container: FastAPI (8080, internal) + Streamlit (8501, public) | |
| # ============================================================================= | |
| FROM python:3.11-slim | |
| # -------------------------------------------------------------------------- | |
| # System dependencies | |
| # -------------------------------------------------------------------------- | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| gcc \ | |
| g++ \ | |
| libxml2-dev \ | |
| libxslt-dev \ | |
| curl \ | |
| supervisor \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # -------------------------------------------------------------------------- | |
| # Python dependencies | |
| # Install before copying source so this layer is cached on code-only changes | |
| # -------------------------------------------------------------------------- | |
| COPY backend/requirements.txt ./requirements.txt | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # -------------------------------------------------------------------------- | |
| # Pre-download BioLORD-2023 model into the image | |
| # This avoids a ~500MB download on every Space restart | |
| # -------------------------------------------------------------------------- | |
| ENV HF_HOME=/app/.cache/huggingface | |
| RUN python -c "\ | |
| from sentence_transformers import SentenceTransformer; \ | |
| print('Downloading BioLORD-2023...'); \ | |
| SentenceTransformer('FremyCompany/BioLORD-2023'); \ | |
| print('Model cached.')" | |
| # -------------------------------------------------------------------------- | |
| # Application source | |
| # -------------------------------------------------------------------------- | |
| COPY backend/ ./backend/ | |
| # -------------------------------------------------------------------------- | |
| # Pre-built knowledge data (bundled — no runtime download needed) | |
| # data/graph_store.json — 33MB Orphanet+HPO knowledge graph (NetworkX JSON) | |
| # data/chromadb/ — 149MB BioLORD disease embeddings (ChromaDB) | |
| # data/hpo_index/ — 26MB BioLORD HPO term embeddings (numpy + JSON) | |
| # -------------------------------------------------------------------------- | |
| COPY data/graph_store.json ./data/graph_store.json | |
| COPY data/chromadb/ ./data/chromadb/ | |
| COPY data/hpo_index/ ./data/hpo_index/ | |
| # -------------------------------------------------------------------------- | |
| # supervisord config | |
| # -------------------------------------------------------------------------- | |
| COPY supervisord.conf /etc/supervisor/conf.d/raredx.conf | |
| # -------------------------------------------------------------------------- | |
| # Runtime environment | |
| # Tell pipeline to use embedded ChromaDB and local graph store | |
| # (no Neo4j or external ChromaDB server needed) | |
| # -------------------------------------------------------------------------- | |
| ENV PYTHONUNBUFFERED=1 \ | |
| PYTHONDONTWRITEBYTECODE=1 \ | |
| CHROMA_HOST=localhost \ | |
| CHROMA_PORT=9999 \ | |
| CHROMA_COLLECTION=rare_diseases \ | |
| EMBED_MODEL=FremyCompany/BioLORD-2023 \ | |
| ORPHANET_DATA_DIR=/app/data/orphanet | |
| # Port Streamlit listens on (declared for HF Spaces) | |
| EXPOSE 8501 | |
| # -------------------------------------------------------------------------- | |
| # Start both services via supervisord | |
| # FastAPI: 127.0.0.1:8080 (internal — Streamlit calls it) | |
| # Streamlit: 0.0.0.0:8501 (public — HF Spaces exposes this) | |
| # -------------------------------------------------------------------------- | |
| CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/raredx.conf"] | |