Spaces:

knoxel
/

bitnet-cpp-explorer

Runtime error

fix: download model at container startup, not build time

4dee3f0 verified 4 days ago

1.22 kB

	#!/bin/bash
	set -e

	MODEL_DIR="/home/user/app/models"
	MODEL_PATH="$MODEL_DIR/ggml-model-i2_s.gguf"
	SERVER_BIN="/home/user/app/llama-server"

	# Download model if not present (runtime download to avoid build timeout)
	if [ ! -f "$MODEL_PATH" ]; then
	echo "Downloading BitNet b1.58 2B4T GGUF model (1.1 GB)..."
	python -c "
	from huggingface_hub import hf_hub_download
	import os
	path = hf_hub_download(
	repo_id='microsoft/bitnet-b1.58-2B-4T-gguf',
	filename='ggml-model-i2_s.gguf',
	local_dir='$MODEL_DIR'
	)
	print(f'Downloaded to: {path}')
	"
	echo "Model downloaded!"
	fi

	# Start llama-server in background
	echo "Starting bitnet.cpp llama-server..."
	$SERVER_BIN \
	-m "$MODEL_PATH" \
	--host 127.0.0.1 \
	--port 8080 \
	-t 2 \
	-c 4096 \
	--log-disable &

	SERVER_PID=$!

	# Wait for server to be ready
	echo "Waiting for server to start..."
	for i in $(seq 1 120); do
	if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
	echo "Server ready! (took ${i}s)"
	break
	fi
	if [ $i -eq 120 ]; then
	echo "ERROR: Server failed to start after 120s"
	exit 1
	fi
	sleep 1
	done

	# Start Gradio app
	echo "Starting Gradio app..."
	exec python app.py