FROM python:3.10-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ git \ git-lfs \ && rm -rf /var/lib/apt/lists/* # Copy requirements first (Docker layer caching) COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Cache-bust: forces Docker to re-copy source code on every build # This ensures HuggingFace always gets the latest code from git ARG CACHEBUST=20260414_5 # Copy source code COPY src/ ./src/ COPY config/ ./config/ COPY run_api.py . COPY .env.example ./.env # Copy data (uploaded via HuggingFace web UI) # COPY data/qdrant_db/ ./data/qdrant_db/ # COPY data/embeddings/bm25_index.pkl ./data/embeddings/bm25_index.pkl # COPY data/embeddings/embeddings.npy ./data/embeddings/embeddings.npy # COPY data/embeddings/chunk_ids.npy ./data/embeddings/chunk_ids.npy # COPY data/embeddings/embedding_index.json ./data/embeddings/embedding_index.json # COPY data/chunks/ ./data/chunks/ # Download the 4.4 GB database from the limits-free HF Dataset using git # This happens during the Docker build so the API starts instantly later RUN git lfs install && git clone https://huggingface.co/datasets/Subhadip007/researchpilot-data /app/data # Create remaining data dirs inside the cloned repo RUN mkdir -p data/raw data/processed logs # HuggingFace Spaces uses port 7860 ENV PORT=7860 EXPOSE 7860 # Start the API CMD ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]