Spaces:
Runtime error
Runtime error
| set -e | |
| echo "Starting Bonsai-demo entrypoint..." | |
| # ββ Validate required secrets βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if [ -z "$MODEL_REPO" ] || [ -z "$MODEL_FILE" ]; then | |
| echo "" | |
| echo "ERROR: MODEL_REPO and MODEL_FILE secrets must be set." | |
| echo " Go to Space Settings β Repository Secrets and add:" | |
| echo " MODEL_REPO = prism-ml/Bonsai-8B-gguf" | |
| echo " MODEL_FILE = Bonsai-8B.gguf" | |
| echo "" | |
| exit 1 | |
| fi | |
| MODEL_DIR="/app/models" | |
| mkdir -p "$MODEL_DIR" | |
| download_model() { | |
| local repo file path retries=5 | |
| repo=$(echo "$1" | tr -d '[:space:]') | |
| file=$(echo "$2" | tr -d '[:space:]') | |
| path="$MODEL_DIR/$file" | |
| local url="https://huggingface.co/$repo/resolve/main/$file" | |
| for attempt in $(seq 1 $retries); do | |
| echo "Downloading (attempt $attempt/$retries): $url" | |
| rm -f "$path" | |
| if curl -fL --retry 3 --retry-delay 5 -C - -o "$path" "$url" 2>&1; then | |
| echo "Downloaded: $(ls -lh "$path")" | |
| return 0 | |
| fi | |
| echo "Download failed, retrying in 10s..." | |
| sleep 10 | |
| done | |
| echo "ERROR: Failed to download $url after $retries attempts" | |
| exit 1 | |
| } | |
| # Primary model (required) | |
| MODEL_REPO=$(echo "$MODEL_REPO" | tr -d '[:space:]') | |
| MODEL_FILE=$(echo "$MODEL_FILE" | tr -d '[:space:]') | |
| download_model "$MODEL_REPO" "$MODEL_FILE" | |
| # Additional models (optional) | |
| MODELS_MAX=1 | |
| for suffix in B C; do | |
| repo_var="MODEL_REPO_${suffix}" | |
| file_var="MODEL_FILE_${suffix}" | |
| repo=$(echo "${!repo_var:-}" | tr -d '[:space:]') | |
| file=$(echo "${!file_var:-}" | tr -d '[:space:]') | |
| if [ -n "$repo" ] && [ -n "$file" ]; then | |
| download_model "$repo" "$file" | |
| MODELS_MAX=$((MODELS_MAX + 1)) | |
| fi | |
| done | |
| # ββ Dashboard auth ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if [ -n "$DASHBOARD_KEY" ]; then | |
| HASH=$(openssl passwd -apr1 "$DASHBOARD_KEY") | |
| echo "admin:$HASH" > /tmp/.htpasswd | |
| echo "Dashboard auth: enabled (user=admin)" | |
| else | |
| echo "WARNING: DASHBOARD_KEY not set, /dash-2e215f981f3f is unprotected" | |
| printf 'admin:$apr1$open$open\n' > /tmp/.htpasswd | |
| fi | |
| # ββ nginx temp dirs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| mkdir -p /tmp/nginx-{client-body,proxy,fastcgi,uwsgi,scgi} | |
| # ββ Detect GPUs and start one llama-server per GPU βββββββββββββββββββββββββββ | |
| GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l || echo 1) | |
| echo "GPUs detected: $GPU_COUNT" | |
| BACKENDS="" | |
| for i in $(seq 0 $((GPU_COUNT - 1))); do | |
| PORT=$((7861 + i)) | |
| echo "Starting llama-server on GPU $i β port $PORT" | |
| CUDA_VISIBLE_DEVICES=$i /app/bin/llama-server \ | |
| -m "$MODEL_DIR/$MODEL_FILE" \ | |
| --host 127.0.0.1 \ | |
| --port "$PORT" \ | |
| -ngl 99 \ | |
| -fa on \ | |
| -np 4 \ | |
| -c 262144 \ | |
| --metrics \ | |
| --temp 0.5 --top-p 0.85 --top-k 20 --min-p 0 \ | |
| --reasoning-budget 0 --reasoning-format none \ | |
| --chat-template-kwargs '{"enable_thinking": false}' \ | |
| --log-disable & | |
| BACKENDS="$BACKENDS server 127.0.0.1:$PORT;\n" | |
| done | |
| printf "upstream llama_backends {\n least_conn;\n${BACKENDS}}\n" > /tmp/nginx-upstream.conf | |
| # ββ Write stub files so /gpu and /analytics never 404 before first tick ββββββ | |
| echo '{"ts":null,"gpus":[]}' > /tmp/gpu-stats.json | |
| echo '# waiting for first metrics scrape...' > /tmp/llama-metrics.txt | |
| echo '{"updated_at":null,"summary_24h":{"requests":0,"unique_users":0},"summary_7d":{"requests":0,"unique_users":0},"summary_total":{"requests":0,"unique_users":0},"requests_by_hour":[],"requests_by_day":[],"top_users":[]}' > /tmp/analytics.json | |
| # ββ Start metrics pusher with watchdog ββββββββββββββββββββββββββββββββββββββββ | |
| start_metrics_pusher() { | |
| while true; do | |
| echo "[watchdog] Starting metrics_pusher.py..." | |
| python3 /app/metrics_pusher.py || true | |
| echo "[watchdog] metrics_pusher.py exited β restarting in 5s..." | |
| sleep 5 | |
| done | |
| } | |
| start_metrics_pusher & | |
| echo "" | |
| echo "=== Bonsai-demo ===" | |
| echo " Models: $(ls "$MODEL_DIR"/*.gguf | xargs -n1 basename | tr '\n' ', ' | sed 's/,$//')" | |
| echo " GPUs: $GPU_COUNT" | |
| echo " Port: 7860 (nginx β llama-server)" | |
| echo "" | |
| exec nginx -c /app/nginx.conf | |