FROM python:3.11-slim WORKDIR /app RUN apt-get update && apt-get install -y \ build-essential \ cmake \ libopenblas-dev \ pkg-config \ git \ curl \ wget \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Use latest llama-cpp-python — 0.3.8 does NOT support Gemma 4 MoE (A4B) architecture RUN CMAKE_BUILD_PARALLEL_LEVEL=4 \ CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \ pip install --no-cache-dir "llama-cpp-python" RUN mkdir -p /app/models COPY app.py . EXPOSE 7860 ENV SPACE_URL="" ENV N_CTX="4096" ENV N_THREADS="2" CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]