#!/bin/bash set -e echo "Starting Bonsai-demo entrypoint..." # ── Validate required secrets ───────────────────────────────────────────────── if [ -z "$MODEL_REPO" ] || [ -z "$MODEL_FILE" ]; then echo "" echo "ERROR: MODEL_REPO and MODEL_FILE secrets must be set." echo " Go to Space Settings → Repository Secrets and add:" echo " MODEL_REPO = prism-ml/Bonsai-8B-gguf" echo " MODEL_FILE = Bonsai-8B.gguf" echo "" exit 1 fi MODEL_DIR="/app/models" mkdir -p "$MODEL_DIR" download_model() { local repo file path retries=5 repo=$(echo "$1" | tr -d '[:space:]') file=$(echo "$2" | tr -d '[:space:]') path="$MODEL_DIR/$file" local url="https://huggingface.co/$repo/resolve/main/$file" for attempt in $(seq 1 $retries); do echo "Downloading (attempt $attempt/$retries): $url" rm -f "$path" if curl -fL --retry 3 --retry-delay 5 -C - -o "$path" "$url" 2>&1; then echo "Downloaded: $(ls -lh "$path")" return 0 fi echo "Download failed, retrying in 10s..." sleep 10 done echo "ERROR: Failed to download $url after $retries attempts" exit 1 } # Primary model (required) MODEL_REPO=$(echo "$MODEL_REPO" | tr -d '[:space:]') MODEL_FILE=$(echo "$MODEL_FILE" | tr -d '[:space:]') download_model "$MODEL_REPO" "$MODEL_FILE" # Additional models (optional) MODELS_MAX=1 for suffix in B C; do repo_var="MODEL_REPO_${suffix}" file_var="MODEL_FILE_${suffix}" repo=$(echo "${!repo_var:-}" | tr -d '[:space:]') file=$(echo "${!file_var:-}" | tr -d '[:space:]') if [ -n "$repo" ] && [ -n "$file" ]; then download_model "$repo" "$file" MODELS_MAX=$((MODELS_MAX + 1)) fi done # ── Dashboard auth ──────────────────────────────────────────────────────────── if [ -n "$DASHBOARD_KEY" ]; then HASH=$(openssl passwd -apr1 "$DASHBOARD_KEY") echo "admin:$HASH" > /tmp/.htpasswd echo "Dashboard auth: enabled (user=admin)" else echo "WARNING: DASHBOARD_KEY not set, /dash-2e215f981f3f is unprotected" printf 'admin:$apr1$open$open\n' > /tmp/.htpasswd fi # ── nginx temp dirs ─────────────────────────────────────────────────────────── mkdir -p /tmp/nginx-{client-body,proxy,fastcgi,uwsgi,scgi} # ── Detect GPUs and start one llama-server per GPU ─────────────────────────── GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l || echo 1) echo "GPUs detected: $GPU_COUNT" BACKENDS="" for i in $(seq 0 $((GPU_COUNT - 1))); do PORT=$((7861 + i)) echo "Starting llama-server on GPU $i → port $PORT" CUDA_VISIBLE_DEVICES=$i /app/bin/llama-server \ -m "$MODEL_DIR/$MODEL_FILE" \ --host 127.0.0.1 \ --port "$PORT" \ -ngl 99 \ -fa on \ -np 4 \ -c 262144 \ --metrics \ --temp 0.5 --top-p 0.85 --top-k 20 --min-p 0 \ --reasoning-budget 0 --reasoning-format none \ --chat-template-kwargs '{"enable_thinking": false}' \ --log-disable & BACKENDS="$BACKENDS server 127.0.0.1:$PORT;\n" done printf "upstream llama_backends {\n least_conn;\n${BACKENDS}}\n" > /tmp/nginx-upstream.conf # ── Write stub files so /gpu and /analytics never 404 before first tick ────── echo '{"ts":null,"gpus":[]}' > /tmp/gpu-stats.json echo '# waiting for first metrics scrape...' > /tmp/llama-metrics.txt echo '{"updated_at":null,"summary_24h":{"requests":0,"unique_users":0},"summary_7d":{"requests":0,"unique_users":0},"summary_total":{"requests":0,"unique_users":0},"requests_by_hour":[],"requests_by_day":[],"top_users":[]}' > /tmp/analytics.json # ── Start metrics pusher with watchdog ──────────────────────────────────────── start_metrics_pusher() { while true; do echo "[watchdog] Starting metrics_pusher.py..." python3 /app/metrics_pusher.py || true echo "[watchdog] metrics_pusher.py exited — restarting in 5s..." sleep 5 done } start_metrics_pusher & echo "" echo "=== Bonsai-demo ===" echo " Models: $(ls "$MODEL_DIR"/*.gguf | xargs -n1 basename | tr '\n' ', ' | sed 's/,$//')" echo " GPUs: $GPU_COUNT" echo " Port: 7860 (nginx → llama-server)" echo "" exec nginx -c /app/nginx.conf