Spaces:
Runtime error
Runtime error
File size: 4,716 Bytes
0633a27 0a882d1 0633a27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | #!/bin/bash
set -e
echo "Starting Bonsai-demo entrypoint..."
# ββ Validate required secrets βββββββββββββββββββββββββββββββββββββββββββββββββ
if [ -z "$MODEL_REPO" ] || [ -z "$MODEL_FILE" ]; then
echo ""
echo "ERROR: MODEL_REPO and MODEL_FILE secrets must be set."
echo " Go to Space Settings β Repository Secrets and add:"
echo " MODEL_REPO = prism-ml/Bonsai-8B-gguf"
echo " MODEL_FILE = Bonsai-8B.gguf"
echo ""
exit 1
fi
MODEL_DIR="/app/models"
mkdir -p "$MODEL_DIR"
download_model() {
local repo file path retries=5
repo=$(echo "$1" | tr -d '[:space:]')
file=$(echo "$2" | tr -d '[:space:]')
path="$MODEL_DIR/$file"
local url="https://huggingface.co/$repo/resolve/main/$file"
for attempt in $(seq 1 $retries); do
echo "Downloading (attempt $attempt/$retries): $url"
rm -f "$path"
if curl -fL --retry 3 --retry-delay 5 -C - -o "$path" "$url" 2>&1; then
echo "Downloaded: $(ls -lh "$path")"
return 0
fi
echo "Download failed, retrying in 10s..."
sleep 10
done
echo "ERROR: Failed to download $url after $retries attempts"
exit 1
}
# Primary model (required)
MODEL_REPO=$(echo "$MODEL_REPO" | tr -d '[:space:]')
MODEL_FILE=$(echo "$MODEL_FILE" | tr -d '[:space:]')
download_model "$MODEL_REPO" "$MODEL_FILE"
# Additional models (optional)
MODELS_MAX=1
for suffix in B C; do
repo_var="MODEL_REPO_${suffix}"
file_var="MODEL_FILE_${suffix}"
repo=$(echo "${!repo_var:-}" | tr -d '[:space:]')
file=$(echo "${!file_var:-}" | tr -d '[:space:]')
if [ -n "$repo" ] && [ -n "$file" ]; then
download_model "$repo" "$file"
MODELS_MAX=$((MODELS_MAX + 1))
fi
done
# ββ Dashboard auth ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if [ -n "$DASHBOARD_KEY" ]; then
HASH=$(openssl passwd -apr1 "$DASHBOARD_KEY")
echo "admin:$HASH" > /tmp/.htpasswd
echo "Dashboard auth: enabled (user=admin)"
else
echo "WARNING: DASHBOARD_KEY not set, /dash-2e215f981f3f is unprotected"
printf 'admin:$apr1$open$open\n' > /tmp/.htpasswd
fi
# ββ nginx temp dirs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
mkdir -p /tmp/nginx-{client-body,proxy,fastcgi,uwsgi,scgi}
# ββ Detect GPUs and start one llama-server per GPU βββββββββββββββββββββββββββ
GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l || echo 1)
echo "GPUs detected: $GPU_COUNT"
BACKENDS=""
for i in $(seq 0 $((GPU_COUNT - 1))); do
PORT=$((7861 + i))
echo "Starting llama-server on GPU $i β port $PORT"
CUDA_VISIBLE_DEVICES=$i /app/bin/llama-server \
-m "$MODEL_DIR/$MODEL_FILE" \
--host 127.0.0.1 \
--port "$PORT" \
-ngl 99 \
-fa on \
-np 4 \
-c 262144 \
--metrics \
--temp 0.5 --top-p 0.85 --top-k 20 --min-p 0 \
--reasoning-budget 0 --reasoning-format none \
--chat-template-kwargs '{"enable_thinking": false}' \
--log-disable &
BACKENDS="$BACKENDS server 127.0.0.1:$PORT;\n"
done
printf "upstream llama_backends {\n least_conn;\n${BACKENDS}}\n" > /tmp/nginx-upstream.conf
# ββ Write stub files so /gpu and /analytics never 404 before first tick ββββββ
echo '{"ts":null,"gpus":[]}' > /tmp/gpu-stats.json
echo '# waiting for first metrics scrape...' > /tmp/llama-metrics.txt
echo '{"updated_at":null,"summary_24h":{"requests":0,"unique_users":0},"summary_7d":{"requests":0,"unique_users":0},"summary_total":{"requests":0,"unique_users":0},"requests_by_hour":[],"requests_by_day":[],"top_users":[]}' > /tmp/analytics.json
# ββ Start metrics pusher with watchdog ββββββββββββββββββββββββββββββββββββββββ
start_metrics_pusher() {
while true; do
echo "[watchdog] Starting metrics_pusher.py..."
python3 /app/metrics_pusher.py || true
echo "[watchdog] metrics_pusher.py exited β restarting in 5s..."
sleep 5
done
}
start_metrics_pusher &
echo ""
echo "=== Bonsai-demo ==="
echo " Models: $(ls "$MODEL_DIR"/*.gguf | xargs -n1 basename | tr '\n' ', ' | sed 's/,$//')"
echo " GPUs: $GPU_COUNT"
echo " Port: 7860 (nginx β llama-server)"
echo ""
exec nginx -c /app/nginx.conf
|