#!/bin/bash set -e WORK_DIR="/home/user/app" BITNET_DIR="$WORK_DIR/BitNet" MODEL_PATH="$WORK_DIR/BitNet/models/bitnet-b1.58-2B-4T-gguf/ggml-model-i2_s.gguf" SERVER_BIN="$WORK_DIR/BitNet/build/bin/llama-server" # ─── Step 1: Clone and build bitnet.cpp if not already done ────────────────── if [ ! -f "$SERVER_BIN" ]; then echo "=== First run: building bitnet.cpp ===" echo "This takes ~5 minutes. Subsequent restarts will be fast." echo "" if [ ! -d "$BITNET_DIR" ]; then echo "[1/3] Cloning bitnet.cpp..." git clone --depth 1 --recursive https://github.com/microsoft/BitNet.git "$BITNET_DIR" pip install --no-cache-dir -r "$BITNET_DIR/requirements.txt" fi echo "[2/3] Building with I2_S kernel..." cd "$BITNET_DIR" python setup_env.py --hf-repo microsoft/bitnet-b1.58-2B-4T-gguf -q i2_s echo "Build complete!" cd "$WORK_DIR" else echo "bitnet.cpp already built, skipping..." fi # ─── Step 2: Verify model exists ───────────────────────────────────────────── if [ ! -f "$MODEL_PATH" ]; then echo "[3/3] Downloading model..." python -c " from huggingface_hub import hf_hub_download hf_hub_download( repo_id='microsoft/bitnet-b1.58-2B-4T-gguf', filename='ggml-model-i2_s.gguf', local_dir='$BITNET_DIR/models/bitnet-b1.58-2B-4T-gguf' ) print('Model downloaded!') " fi # ─── Step 3: Start llama-server ────────────────────────────────────────────── echo "" echo "=== Starting bitnet.cpp llama-server ===" $SERVER_BIN \ -m "$MODEL_PATH" \ --host 127.0.0.1 \ --port 8080 \ -t 2 \ -c 4096 \ --log-disable & SERVER_PID=$! # Wait for server echo "Waiting for server..." for i in $(seq 1 120); do if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then echo "Server ready! (${i}s)" break fi if [ $i -eq 120 ]; then echo "ERROR: Server failed to start" # Try to show what went wrong $SERVER_BIN -m "$MODEL_PATH" --host 127.0.0.1 --port 8081 -t 2 -c 512 2>&1 | head -20 exit 1 fi sleep 1 done # ─── Step 4: Start Gradio ──────────────────────────────────────────────────── echo "Starting Gradio app..." exec python app.py