knoxel commited on
Commit
f76f974
·
verified ·
1 Parent(s): d391afc

fix: full runtime setup — clone, build, download, serve

Browse files
Files changed (1) hide show
  1. start.sh +40 -17
start.sh CHANGED
@@ -1,28 +1,49 @@
1
  #!/bin/bash
2
  set -e
3
 
4
- MODEL_DIR="/home/user/app/models"
5
- MODEL_PATH="$MODEL_DIR/ggml-model-i2_s.gguf"
6
- SERVER_BIN="/home/user/app/llama-server"
 
7
 
8
- # Download model if not present (runtime download to avoid build timeout)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  if [ ! -f "$MODEL_PATH" ]; then
10
- echo "Downloading BitNet b1.58 2B4T GGUF model (1.1 GB)..."
11
  python -c "
12
  from huggingface_hub import hf_hub_download
13
- import os
14
- path = hf_hub_download(
15
  repo_id='microsoft/bitnet-b1.58-2B-4T-gguf',
16
  filename='ggml-model-i2_s.gguf',
17
- local_dir='$MODEL_DIR'
18
  )
19
- print(f'Downloaded to: {path}')
20
  "
21
- echo "Model downloaded!"
22
  fi
23
 
24
- # Start llama-server in background
25
- echo "Starting bitnet.cpp llama-server..."
 
26
  $SERVER_BIN \
27
  -m "$MODEL_PATH" \
28
  --host 127.0.0.1 \
@@ -33,20 +54,22 @@ $SERVER_BIN \
33
 
34
  SERVER_PID=$!
35
 
36
- # Wait for server to be ready
37
- echo "Waiting for server to start..."
38
  for i in $(seq 1 120); do
39
  if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
40
- echo "Server ready! (took ${i}s)"
41
  break
42
  fi
43
  if [ $i -eq 120 ]; then
44
- echo "ERROR: Server failed to start after 120s"
 
 
45
  exit 1
46
  fi
47
  sleep 1
48
  done
49
 
50
- # Start Gradio app
51
  echo "Starting Gradio app..."
52
  exec python app.py
 
1
  #!/bin/bash
2
  set -e
3
 
4
+ WORK_DIR="/home/user/app"
5
+ BITNET_DIR="$WORK_DIR/BitNet"
6
+ MODEL_PATH="$WORK_DIR/BitNet/models/bitnet-b1.58-2B-4T-gguf/ggml-model-i2_s.gguf"
7
+ SERVER_BIN="$WORK_DIR/BitNet/build/bin/llama-server"
8
 
9
+ # ─── Step 1: Clone and build bitnet.cpp if not already done ──────────────────
10
+ if [ ! -f "$SERVER_BIN" ]; then
11
+ echo "=== First run: building bitnet.cpp ==="
12
+ echo "This takes ~5 minutes. Subsequent restarts will be fast."
13
+ echo ""
14
+
15
+ if [ ! -d "$BITNET_DIR" ]; then
16
+ echo "[1/3] Cloning bitnet.cpp..."
17
+ git clone --depth 1 --recursive https://github.com/microsoft/BitNet.git "$BITNET_DIR"
18
+ pip install --no-cache-dir -r "$BITNET_DIR/requirements.txt"
19
+ fi
20
+
21
+ echo "[2/3] Building with I2_S kernel..."
22
+ cd "$BITNET_DIR"
23
+ python setup_env.py --hf-repo microsoft/bitnet-b1.58-2B-4T-gguf -q i2_s
24
+ echo "Build complete!"
25
+ cd "$WORK_DIR"
26
+ else
27
+ echo "bitnet.cpp already built, skipping..."
28
+ fi
29
+
30
+ # ─── Step 2: Verify model exists ─────────────────────────────────────────────
31
  if [ ! -f "$MODEL_PATH" ]; then
32
+ echo "[3/3] Downloading model..."
33
  python -c "
34
  from huggingface_hub import hf_hub_download
35
+ hf_hub_download(
 
36
  repo_id='microsoft/bitnet-b1.58-2B-4T-gguf',
37
  filename='ggml-model-i2_s.gguf',
38
+ local_dir='$BITNET_DIR/models/bitnet-b1.58-2B-4T-gguf'
39
  )
40
+ print('Model downloaded!')
41
  "
 
42
  fi
43
 
44
+ # ─── Step 3: Start llama-server ──────────────────────────────────────────────
45
+ echo ""
46
+ echo "=== Starting bitnet.cpp llama-server ==="
47
  $SERVER_BIN \
48
  -m "$MODEL_PATH" \
49
  --host 127.0.0.1 \
 
54
 
55
  SERVER_PID=$!
56
 
57
+ # Wait for server
58
+ echo "Waiting for server..."
59
  for i in $(seq 1 120); do
60
  if curl -s http://127.0.0.1:8080/health > /dev/null 2>&1; then
61
+ echo "Server ready! (${i}s)"
62
  break
63
  fi
64
  if [ $i -eq 120 ]; then
65
+ echo "ERROR: Server failed to start"
66
+ # Try to show what went wrong
67
+ $SERVER_BIN -m "$MODEL_PATH" --host 127.0.0.1 --port 8081 -t 2 -c 512 2>&1 | head -20
68
  exit 1
69
  fi
70
  sleep 1
71
  done
72
 
73
+ # ─── Step 4: Start Gradio ────────────────────────────────────────────────────
74
  echo "Starting Gradio app..."
75
  exec python app.py