FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV CMAKE_ARGS="-DGGML_CUDA=on" ENV FORCE_CMAKE=1 RUN apt-get update && apt-get install -y \ python3 \ python3-pip \ git \ cmake \ build-essential \ && rm -rf /var/lib/apt/lists/* WORKDIR /app RUN pip3 install --no-cache-dir --upgrade pip RUN pip3 install --no-cache-dir llama-cpp-python COPY requirements.txt . RUN pip3 install --no-cache-dir -r requirements.txt COPY app.py . EXPOSE 7860 CMD ["python3", "app.py"]