Spaces:
Runtime error
Runtime error
File size: 1,220 Bytes
3092fb9 4dee3f0 3092fb9 4dee3f0 3092fb9 4dee3f0 3092fb9 4dee3f0 3092fb9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | #!/bin/bash
set -e
MODEL_DIR="/home/user/app/models"
MODEL_PATH="$MODEL_DIR/ggml-model-i2_s.gguf"
SERVER_BIN="/home/user/app/llama-server"
# Download model if not present (runtime download to avoid build timeout)
if [ ! -f "$MODEL_PATH" ]; then
echo "Downloading BitNet b1.58 2B4T GGUF model (1.1 GB)..."
python -c "
from huggingface_hub import hf_hub_download
import os
path = hf_hub_download(
repo_id='microsoft/bitnet-b1.58-2B-4T-gguf',
filename='ggml-model-i2_s.gguf',
local_dir='$MODEL_DIR'
)
print(f'Downloaded to: {path}')
"
echo "Model downloaded!"
fi
# Start llama-server in background
echo "Starting bitnet.cpp llama-server..."
$SERVER_BIN \
-m "$MODEL_PATH" \
--host 127.0.0.1 \
--port 8080 \
-t 2 \
-c 4096 \
--log-disable &
SERVER_PID=$!
# Wait for server to be ready
echo "Waiting for server to start..."
for i in $(seq 1 120); do
if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
echo "Server ready! (took ${i}s)"
break
fi
if [ $i -eq 120 ]; then
echo "ERROR: Server failed to start after 120s"
exit 1
fi
sleep 1
done
# Start Gradio app
echo "Starting Gradio app..."
exec python app.py
|