knoxel's picture
fix: download model at container startup, not build time
4dee3f0 verified
raw
history blame
1.22 kB
#!/bin/bash
set -e
MODEL_DIR="/home/user/app/models"
MODEL_PATH="$MODEL_DIR/ggml-model-i2_s.gguf"
SERVER_BIN="/home/user/app/llama-server"
# Download model if not present (runtime download to avoid build timeout)
if [ ! -f "$MODEL_PATH" ]; then
echo "Downloading BitNet b1.58 2B4T GGUF model (1.1 GB)..."
python -c "
from huggingface_hub import hf_hub_download
import os
path = hf_hub_download(
repo_id='microsoft/bitnet-b1.58-2B-4T-gguf',
filename='ggml-model-i2_s.gguf',
local_dir='$MODEL_DIR'
)
print(f'Downloaded to: {path}')
"
echo "Model downloaded!"
fi
# Start llama-server in background
echo "Starting bitnet.cpp llama-server..."
$SERVER_BIN \
-m "$MODEL_PATH" \
--host 127.0.0.1 \
--port 8080 \
-t 2 \
-c 4096 \
--log-disable &
SERVER_PID=$!
# Wait for server to be ready
echo "Waiting for server to start..."
for i in $(seq 1 120); do
if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
echo "Server ready! (took ${i}s)"
break
fi
if [ $i -eq 120 ]; then
echo "ERROR: Server failed to start after 120s"
exit 1
fi
sleep 1
done
# Start Gradio app
echo "Starting Gradio app..."
exec python app.py