Spaces:
Runtime error
Runtime error
| set -e | |
| MODEL_DIR="/home/user/app/models" | |
| MODEL_PATH="$MODEL_DIR/ggml-model-i2_s.gguf" | |
| SERVER_BIN="/home/user/app/llama-server" | |
| # Download model if not present (runtime download to avoid build timeout) | |
| if [ ! -f "$MODEL_PATH" ]; then | |
| echo "Downloading BitNet b1.58 2B4T GGUF model (1.1 GB)..." | |
| python -c " | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| path = hf_hub_download( | |
| repo_id='microsoft/bitnet-b1.58-2B-4T-gguf', | |
| filename='ggml-model-i2_s.gguf', | |
| local_dir='$MODEL_DIR' | |
| ) | |
| print(f'Downloaded to: {path}') | |
| " | |
| echo "Model downloaded!" | |
| fi | |
| # Start llama-server in background | |
| echo "Starting bitnet.cpp llama-server..." | |
| $SERVER_BIN \ | |
| -m "$MODEL_PATH" \ | |
| --host 127.0.0.1 \ | |
| --port 8080 \ | |
| -t 2 \ | |
| -c 4096 \ | |
| --log-disable & | |
| SERVER_PID=$! | |
| # Wait for server to be ready | |
| echo "Waiting for server to start..." | |
| for i in $(seq 1 120); do | |
| if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then | |
| echo "Server ready! (took ${i}s)" | |
| break | |
| fi | |
| if [ $i -eq 120 ]; then | |
| echo "ERROR: Server failed to start after 120s" | |
| exit 1 | |
| fi | |
| sleep 1 | |
| done | |
| # Start Gradio app | |
| echo "Starting Gradio app..." | |
| exec python app.py | |