File size: 4,716 Bytes
0633a27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a882d1
 
 
 
 
 
 
 
 
 
0633a27
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/bash
set -e

echo "Starting Bonsai-demo entrypoint..."

# ── Validate required secrets ─────────────────────────────────────────────────
if [ -z "$MODEL_REPO" ] || [ -z "$MODEL_FILE" ]; then
    echo ""
    echo "ERROR: MODEL_REPO and MODEL_FILE secrets must be set."
    echo "  Go to Space Settings β†’ Repository Secrets and add:"
    echo "    MODEL_REPO = prism-ml/Bonsai-8B-gguf"
    echo "    MODEL_FILE = Bonsai-8B.gguf"
    echo ""
    exit 1
fi

MODEL_DIR="/app/models"
mkdir -p "$MODEL_DIR"

download_model() {
    local repo file path retries=5
    repo=$(echo "$1" | tr -d '[:space:]')
    file=$(echo "$2" | tr -d '[:space:]')
    path="$MODEL_DIR/$file"
    local url="https://huggingface.co/$repo/resolve/main/$file"
    for attempt in $(seq 1 $retries); do
        echo "Downloading (attempt $attempt/$retries): $url"
        rm -f "$path"
        if curl -fL --retry 3 --retry-delay 5 -C - -o "$path" "$url" 2>&1; then
            echo "Downloaded: $(ls -lh "$path")"
            return 0
        fi
        echo "Download failed, retrying in 10s..."
        sleep 10
    done
    echo "ERROR: Failed to download $url after $retries attempts"
    exit 1
}

# Primary model (required)
MODEL_REPO=$(echo "$MODEL_REPO" | tr -d '[:space:]')
MODEL_FILE=$(echo "$MODEL_FILE" | tr -d '[:space:]')
download_model "$MODEL_REPO" "$MODEL_FILE"

# Additional models (optional)
MODELS_MAX=1
for suffix in B C; do
    repo_var="MODEL_REPO_${suffix}"
    file_var="MODEL_FILE_${suffix}"
    repo=$(echo "${!repo_var:-}" | tr -d '[:space:]')
    file=$(echo "${!file_var:-}" | tr -d '[:space:]')
    if [ -n "$repo" ] && [ -n "$file" ]; then
        download_model "$repo" "$file"
        MODELS_MAX=$((MODELS_MAX + 1))
    fi
done

# ── Dashboard auth ────────────────────────────────────────────────────────────
if [ -n "$DASHBOARD_KEY" ]; then
    HASH=$(openssl passwd -apr1 "$DASHBOARD_KEY")
    echo "admin:$HASH" > /tmp/.htpasswd
    echo "Dashboard auth: enabled (user=admin)"
else
    echo "WARNING: DASHBOARD_KEY not set, /dash-2e215f981f3f is unprotected"
    printf 'admin:$apr1$open$open\n' > /tmp/.htpasswd
fi

# ── nginx temp dirs ───────────────────────────────────────────────────────────
mkdir -p /tmp/nginx-{client-body,proxy,fastcgi,uwsgi,scgi}

# ── Detect GPUs and start one llama-server per GPU ───────────────────────────
GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l || echo 1)
echo "GPUs detected: $GPU_COUNT"

BACKENDS=""
for i in $(seq 0 $((GPU_COUNT - 1))); do
    PORT=$((7861 + i))
    echo "Starting llama-server on GPU $i β†’ port $PORT"
    CUDA_VISIBLE_DEVICES=$i /app/bin/llama-server \
        -m "$MODEL_DIR/$MODEL_FILE" \
        --host 127.0.0.1 \
        --port "$PORT" \
        -ngl 99 \
        -fa on \
        -np 4 \
        -c 262144 \
        --metrics \
        --temp 0.5 --top-p 0.85 --top-k 20 --min-p 0 \
        --reasoning-budget 0 --reasoning-format none \
        --chat-template-kwargs '{"enable_thinking": false}' \
        --log-disable &
    BACKENDS="$BACKENDS    server 127.0.0.1:$PORT;\n"
done

printf "upstream llama_backends {\n    least_conn;\n${BACKENDS}}\n" > /tmp/nginx-upstream.conf

# ── Write stub files so /gpu and /analytics never 404 before first tick ──────
echo '{"ts":null,"gpus":[]}' > /tmp/gpu-stats.json
echo '# waiting for first metrics scrape...' > /tmp/llama-metrics.txt
echo '{"updated_at":null,"summary_24h":{"requests":0,"unique_users":0},"summary_7d":{"requests":0,"unique_users":0},"summary_total":{"requests":0,"unique_users":0},"requests_by_hour":[],"requests_by_day":[],"top_users":[]}' > /tmp/analytics.json

# ── Start metrics pusher with watchdog ────────────────────────────────────────
start_metrics_pusher() {
    while true; do
        echo "[watchdog] Starting metrics_pusher.py..."
        python3 /app/metrics_pusher.py || true
        echo "[watchdog] metrics_pusher.py exited β€” restarting in 5s..."
        sleep 5
    done
}
start_metrics_pusher &

echo ""
echo "=== Bonsai-demo ==="
echo "  Models: $(ls "$MODEL_DIR"/*.gguf | xargs -n1 basename | tr '\n' ', ' | sed 's/,$//')"
echo "  GPUs:   $GPU_COUNT"
echo "  Port:   7860 (nginx β†’ llama-server)"
echo ""

exec nginx -c /app/nginx.conf