FROM python:3.11-slim # Install build + runtime dependencies in one layer RUN apt-get update && apt-get install -y \ git cmake clang build-essential curl libgomp1 \ && rm -rf /var/lib/apt/lists/* # Clone bitnet.cpp (shallow clone for speed) RUN git clone --depth 1 --recursive https://github.com/microsoft/BitNet.git /opt/BitNet # Install BitNet Python deps RUN pip install --no-cache-dir -r /opt/BitNet/requirements.txt # Build bitnet.cpp WITHOUT downloading the model (just compile the binary) # We use cmake directly instead of setup_env.py to avoid the model download RUN cd /opt/BitNet && \ cmake -B build -DCMAKE_BUILD_TYPE=Release \ -DGGML_BITNET_ARM_TL1=OFF \ -DGGML_BITNET_X86_TL2=OFF && \ cmake --build build --config Release -j$(nproc) --target llama-server llama-cli # Create non-root user RUN useradd -ms /bin/bash user WORKDIR /home/user/app # Copy just the binaries we need RUN cp /opt/BitNet/build/bin/llama-server /home/user/app/ && \ cp /opt/BitNet/build/bin/llama-cli /home/user/app/ && \ rm -rf /opt/BitNet # Install Python app deps COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy app files COPY app.py start.sh ./ RUN chmod +x start.sh # Create model directory RUN mkdir -p /home/user/app/models && \ chown -R user:user /home/user/app USER user EXPOSE 7860 ENV GRADIO_SERVER_NAME="0.0.0.0" ENV GRADIO_SERVER_PORT="7860" CMD ["./start.sh"]