Spaces:

OrbitMC
/

hf_dead

Runtime error

hf_dead / Dockerfile

Update Dockerfile

7cfb924 verified 24 days ago

1.29 kB

	# Stage 1: Build llama.cpp
	FROM ubuntu:22.04 AS builder

	RUN apt-get update && apt-get install -y \
	build-essential \
	cmake \
	git \
	libcurl4-openssl-dev \
	python3-pip

	# Clone raw llama.cpp
	WORKDIR /app
	RUN git clone https://github.com/ggml-org/llama.cpp.git .

	# Build with UI DISABLED for a pure headless API
	# We also enable cURL support for remote model loading if needed
	RUN cmake -B build \
	-DLLAMA_BUILD_WEBUI=OFF \
	-DLLAMA_CURL=ON \
	-DLLAMA_BUILD_EXAMPLES=OFF
	RUN cmake --build build --config Release -j $(nproc) --target llama-server

	# Stage 2: Runtime
	FROM ubuntu:22.04

	RUN apt-get update && apt-get install -y libcurl4 python3-pip && rm -rf /var/lib/apt/lists/*
	RUN pip install huggingface_hub

	WORKDIR /app
	COPY --from=builder /app/build/bin/llama-server /app/llama-server

	# Download official Qwen GGUF (Non-Unsloth)
	RUN python3 -c 'from huggingface_hub import hf_hub_download; \
	hf_hub_download(repo_id="Qwen/Qwen2.5-7B-Instruct-GGUF", \
	filename="qwen2.5-7b-instruct-q4_k_m.gguf", local_dir="/app")'

	# HF Spaces run on 7860
	EXPOSE 7860

	# Run headless server
	CMD ["./llama-server", \
	"-m", "/app/qwen2.5-7b-instruct-q4_k_m.gguf", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"-c", "32768", \
	"--embedding"]