# Use a lean Python base
FROM python:3.10-slim

# Install system dependencies for building C++
RUN apt-get update && apt-get install -y \
    build-essential \
    git \
    cmake \
    wget \
    && rm -rf /var/lib/apt/lists/*

# Clone and build the 1-bit engine (PrismML Fork)
RUN git clone --depth 1 https://github.com/PrismML-Eng/llama.cpp.git && \
    cd llama.cpp && \
    mkdir build && cd build && \
    cmake .. -DGGML_NATIVE=ON -DCMAKE_BUILD_TYPE=Release && \
    make -j$(nproc)

# Setup app directory
WORKDIR /app
COPY . /app
RUN pip install --no-cache-dir gradio huggingface_hub

# Make the binary accessible
RUN cp /llama.cpp/build/bin/llama-cli /app/llama-cli && chmod +x /app/llama-cli

CMD ["python", "app.py"]