FROM python:3.12-slim # Install system dependencies for building llama-cpp-python RUN apt-get update && apt-get install -y \ build-essential \ python3-dev \ cmake \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Install required Python libraries RUN pip install --no-cache-dir \ fastapi \ uvicorn \ pydantic \ huggingface_hub==0.35.3 \ pandas==2.2.2 \ tiktoken==0.12.0 \ pymupdf==1.26.5 \ langchain==0.3.27 \ langchain-community==0.3.31 \ chromadb==1.1.1 \ sentence-transformers==5.1.1 \ llama-cpp-python==0.2.28 \ 'numpy<2.1.0' # Copy the application logic and FastAPI server COPY app_logic.py main.py ./ # Copy the persistent vector database directory COPY chroma_db/ ./chroma_db/ # Create the directory structure for the model to match main.py's MODEL_PATH RUN mkdir -p /root/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/ # Copy the quantized model file to the snapshot directory COPY mistral-7b-instruct-v0.1.Q4_K_M.gguf /root/.cache/huggingface/hub/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/ # Expose port 8000 for FastAPI EXPOSE 8000 # Command to start the FastAPI application CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]