File size: 2,552 Bytes
3092fb9
 
 
f76f974
 
 
 
4dee3f0
f76f974
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4dee3f0
f76f974
4dee3f0
 
f76f974
4dee3f0
 
f76f974
4dee3f0
f76f974
4dee3f0
 
3092fb9
f76f974
 
 
3092fb9
 
 
 
 
 
 
 
 
 
f76f974
 
4dee3f0
3092fb9
f76f974
3092fb9
 
4dee3f0
f76f974
 
 
4dee3f0
 
3092fb9
 
 
f76f974
3092fb9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash
set -e

WORK_DIR="/home/user/app"
BITNET_DIR="$WORK_DIR/BitNet"
MODEL_PATH="$WORK_DIR/BitNet/models/bitnet-b1.58-2B-4T-gguf/ggml-model-i2_s.gguf"
SERVER_BIN="$WORK_DIR/BitNet/build/bin/llama-server"

# ─── Step 1: Clone and build bitnet.cpp if not already done ──────────────────
if [ ! -f "$SERVER_BIN" ]; then
    echo "=== First run: building bitnet.cpp ==="
    echo "This takes ~5 minutes. Subsequent restarts will be fast."
    echo ""

    if [ ! -d "$BITNET_DIR" ]; then
        echo "[1/3] Cloning bitnet.cpp..."
        git clone --depth 1 --recursive https://github.com/microsoft/BitNet.git "$BITNET_DIR"
        pip install --no-cache-dir -r "$BITNET_DIR/requirements.txt"
    fi

    echo "[2/3] Building with I2_S kernel..."
    cd "$BITNET_DIR"
    python setup_env.py --hf-repo microsoft/bitnet-b1.58-2B-4T-gguf -q i2_s
    echo "Build complete!"
    cd "$WORK_DIR"
else
    echo "bitnet.cpp already built, skipping..."
fi

# ─── Step 2: Verify model exists ─────────────────────────────────────────────
if [ ! -f "$MODEL_PATH" ]; then
    echo "[3/3] Downloading model..."
    python -c "
from huggingface_hub import hf_hub_download
hf_hub_download(
    repo_id='microsoft/bitnet-b1.58-2B-4T-gguf',
    filename='ggml-model-i2_s.gguf',
    local_dir='$BITNET_DIR/models/bitnet-b1.58-2B-4T-gguf'
)
print('Model downloaded!')
"
fi

# ─── Step 3: Start llama-server ──────────────────────────────────────────────
echo ""
echo "=== Starting bitnet.cpp llama-server ==="
$SERVER_BIN \
    -m "$MODEL_PATH" \
    --host 127.0.0.1 \
    --port 8080 \
    -t 2 \
    -c 4096 \
    --log-disable &

SERVER_PID=$!

# Wait for server
echo "Waiting for server..."
for i in $(seq 1 120); do
    if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
        echo "Server ready! (${i}s)"
        break
    fi
    if [ $i -eq 120 ]; then
        echo "ERROR: Server failed to start"
        # Try to show what went wrong
        $SERVER_BIN -m "$MODEL_PATH" --host 127.0.0.1 --port 8081 -t 2 -c 512 2>&1 | head -20
        exit 1
    fi
    sleep 1
done

# ─── Step 4: Start Gradio ────────────────────────────────────────────────────
echo "Starting Gradio app..."
exec python app.py