hf_dead / Dockerfile
OrbitMC's picture
Update Dockerfile
7cfb924 verified
# Stage 1: Build llama.cpp
FROM ubuntu:22.04 AS builder
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
libcurl4-openssl-dev \
python3-pip
# Clone raw llama.cpp
WORKDIR /app
RUN git clone https://github.com/ggml-org/llama.cpp.git .
# Build with UI DISABLED for a pure headless API
# We also enable cURL support for remote model loading if needed
RUN cmake -B build \
-DLLAMA_BUILD_WEBUI=OFF \
-DLLAMA_CURL=ON \
-DLLAMA_BUILD_EXAMPLES=OFF
RUN cmake --build build --config Release -j $(nproc) --target llama-server
# Stage 2: Runtime
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y libcurl4 python3-pip && rm -rf /var/lib/apt/lists/*
RUN pip install huggingface_hub
WORKDIR /app
COPY --from=builder /app/build/bin/llama-server /app/llama-server
# Download official Qwen GGUF (Non-Unsloth)
RUN python3 -c 'from huggingface_hub import hf_hub_download; \
hf_hub_download(repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF", \
filename="qwen2.5-7b-instruct-q4_k_m.gguf", local_dir="/app")'
# HF Spaces run on 7860
EXPOSE 7860
# Run headless server
CMD ["./llama-server", \
"-m", "/app/qwen2.5-7b-instruct-q4_k_m.gguf", \
"--host", "0.0.0.0", \
"--port", "7860", \
"-c", "32768", \
"--embedding"]