| |
| FROM ubuntu:22.04 AS builder |
|
|
| RUN apt-get update && apt-get install -y \ |
| build-essential \ |
| cmake \ |
| git \ |
| libcurl4-openssl-dev \ |
| python3-pip |
|
|
| |
| WORKDIR /app |
| RUN git clone https://github.com/ggml-org/llama.cpp.git . |
|
|
| |
| |
| RUN cmake -B build \ |
| -DLLAMA_BUILD_WEBUI=OFF \ |
| -DLLAMA_CURL=ON \ |
| -DLLAMA_BUILD_EXAMPLES=OFF |
| RUN cmake --build build --config Release -j $(nproc) --target llama-server |
|
|
| |
| FROM ubuntu:22.04 |
|
|
| RUN apt-get update && apt-get install -y libcurl4 python3-pip && rm -rf /var/lib/apt/lists/* |
| RUN pip install huggingface_hub |
|
|
| WORKDIR /app |
| COPY --from=builder /app/build/bin/llama-server /app/llama-server |
|
|
| |
| RUN python3 -c 'from huggingface_hub import hf_hub_download; \ |
| hf_hub_download(repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF", \ |
| filename="qwen2.5-7b-instruct-q4_k_m.gguf", local_dir="/app")' |
|
|
| |
| EXPOSE 7860 |
|
|
| |
| CMD ["./llama-server", \ |
| "-m", "/app/qwen2.5-7b-instruct-q4_k_m.gguf", \ |
| "--host", "0.0.0.0", \ |
| "--port", "7860", \ |
| "-c", "32768", \ |
| "--embedding"] |