bonsai-8b

Running

bonsai-8b / Dockerfile

ffreemt

remove --prio 2

bf6259a 18 days ago

869 Bytes

	# FROM ghcr.io/ggml-org/llama.cpp:full
	FROM ghcr.io/ggml-org/llama.cpp:server

	WORKDIR /app

	RUN apt update && apt install -y python3 python3-pip python3-venv pipx
	ENV PATH=/root/.local/bin:$PATH

	# RUN pip install huggingface_hub --break-system-packages
	RUN pipx install huggingface_hub

	# RUN python3 -c 'from huggingface_hub import hf_hub_download; repo="prism-ml/Bonsai-8B-gguf"; hf_hub_download(repo_id=repo, filename="Bonsai-8B.gguf", local_dir="/app")'

	RUN hf download prism-ml/Bonsai-8B-gguf Bonsai-8B.gguf --local-dir /app
	RUN mv /app/Bonsai-8B.gguf /app/bonsai-8b

	# CMD ["--server", \
	CMD ["--model", "/app/bonsai-8b", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"--threads", "2", \
	"--cache-type-k", "q8_0", \
	"--cache-type-v", "iq4_nl", \
	"-c", "65536", \
	"--log-disable", \
	"-n", "38912"]
	# "--prio", "2", \