personalgpt / entrypoint.sh
PrismML Deploy
Fix: metrics push (summary not commit_message), crash resilience + watchdog
0a882d1
#!/bin/bash
set -e
echo "Starting Bonsai-demo entrypoint..."
# ── Validate required secrets ─────────────────────────────────────────────────
if [ -z "$MODEL_REPO" ] || [ -z "$MODEL_FILE" ]; then
echo ""
echo "ERROR: MODEL_REPO and MODEL_FILE secrets must be set."
echo " Go to Space Settings β†’ Repository Secrets and add:"
echo " MODEL_REPO = prism-ml/Bonsai-8B-gguf"
echo " MODEL_FILE = Bonsai-8B.gguf"
echo ""
exit 1
fi
MODEL_DIR="/app/models"
mkdir -p "$MODEL_DIR"
download_model() {
local repo file path retries=5
repo=$(echo "$1" | tr -d '[:space:]')
file=$(echo "$2" | tr -d '[:space:]')
path="$MODEL_DIR/$file"
local url="https://huggingface.co/$repo/resolve/main/$file"
for attempt in $(seq 1 $retries); do
echo "Downloading (attempt $attempt/$retries): $url"
rm -f "$path"
if curl -fL --retry 3 --retry-delay 5 -C - -o "$path" "$url" 2>&1; then
echo "Downloaded: $(ls -lh "$path")"
return 0
fi
echo "Download failed, retrying in 10s..."
sleep 10
done
echo "ERROR: Failed to download $url after $retries attempts"
exit 1
}
# Primary model (required)
MODEL_REPO=$(echo "$MODEL_REPO" | tr -d '[:space:]')
MODEL_FILE=$(echo "$MODEL_FILE" | tr -d '[:space:]')
download_model "$MODEL_REPO" "$MODEL_FILE"
# Additional models (optional)
MODELS_MAX=1
for suffix in B C; do
repo_var="MODEL_REPO_${suffix}"
file_var="MODEL_FILE_${suffix}"
repo=$(echo "${!repo_var:-}" | tr -d '[:space:]')
file=$(echo "${!file_var:-}" | tr -d '[:space:]')
if [ -n "$repo" ] && [ -n "$file" ]; then
download_model "$repo" "$file"
MODELS_MAX=$((MODELS_MAX + 1))
fi
done
# ── Dashboard auth ────────────────────────────────────────────────────────────
if [ -n "$DASHBOARD_KEY" ]; then
HASH=$(openssl passwd -apr1 "$DASHBOARD_KEY")
echo "admin:$HASH" > /tmp/.htpasswd
echo "Dashboard auth: enabled (user=admin)"
else
echo "WARNING: DASHBOARD_KEY not set, /dash-2e215f981f3f is unprotected"
printf 'admin:$apr1$open$open\n' > /tmp/.htpasswd
fi
# ── nginx temp dirs ───────────────────────────────────────────────────────────
mkdir -p /tmp/nginx-{client-body,proxy,fastcgi,uwsgi,scgi}
# ── Detect GPUs and start one llama-server per GPU ───────────────────────────
GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l || echo 1)
echo "GPUs detected: $GPU_COUNT"
BACKENDS=""
for i in $(seq 0 $((GPU_COUNT - 1))); do
PORT=$((7861 + i))
echo "Starting llama-server on GPU $i β†’ port $PORT"
CUDA_VISIBLE_DEVICES=$i /app/bin/llama-server \
-m "$MODEL_DIR/$MODEL_FILE" \
--host 127.0.0.1 \
--port "$PORT" \
-ngl 99 \
-fa on \
-np 4 \
-c 262144 \
--metrics \
--temp 0.5 --top-p 0.85 --top-k 20 --min-p 0 \
--reasoning-budget 0 --reasoning-format none \
--chat-template-kwargs '{"enable_thinking": false}' \
--log-disable &
BACKENDS="$BACKENDS server 127.0.0.1:$PORT;\n"
done
printf "upstream llama_backends {\n least_conn;\n${BACKENDS}}\n" > /tmp/nginx-upstream.conf
# ── Write stub files so /gpu and /analytics never 404 before first tick ──────
echo '{"ts":null,"gpus":[]}' > /tmp/gpu-stats.json
echo '# waiting for first metrics scrape...' > /tmp/llama-metrics.txt
echo '{"updated_at":null,"summary_24h":{"requests":0,"unique_users":0},"summary_7d":{"requests":0,"unique_users":0},"summary_total":{"requests":0,"unique_users":0},"requests_by_hour":[],"requests_by_day":[],"top_users":[]}' > /tmp/analytics.json
# ── Start metrics pusher with watchdog ────────────────────────────────────────
start_metrics_pusher() {
while true; do
echo "[watchdog] Starting metrics_pusher.py..."
python3 /app/metrics_pusher.py || true
echo "[watchdog] metrics_pusher.py exited β€” restarting in 5s..."
sleep 5
done
}
start_metrics_pusher &
echo ""
echo "=== Bonsai-demo ==="
echo " Models: $(ls "$MODEL_DIR"/*.gguf | xargs -n1 basename | tr '\n' ', ' | sed 's/,$//')"
echo " GPUs: $GPU_COUNT"
echo " Port: 7860 (nginx β†’ llama-server)"
echo ""
exec nginx -c /app/nginx.conf