File size: 3,383 Bytes
0633a27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
daemon off;
pid /tmp/nginx.pid;
worker_processes 1;

events {
    worker_connections 1024;
}

http {
    include /etc/nginx/mime.types;
    default_type application/octet-stream;

    client_body_temp_path  /tmp/nginx-client-body;
    proxy_temp_path        /tmp/nginx-proxy;
    fastcgi_temp_path      /tmp/nginx-fastcgi;
    uwsgi_temp_path        /tmp/nginx-uwsgi;
    scgi_temp_path         /tmp/nginx-scgi;

    # Generated at startup by entrypoint.sh
    include /tmp/nginx-upstream.conf;

    # No usernames, no chat content β€” only method, path, status, size, timing
    log_format bonsai '$time_iso8601|$request_method|$uri|$status|$body_bytes_sent|$request_time';
    access_log /tmp/nginx-access.log bonsai;
    error_log  /tmp/nginx-error.log warn;

    server {
        listen 7860;

        # ── Private: dashboard UI ────────────────────────────────
        location = /dash-2e215f981f3f {
            auth_basic           "Bonsai Internal";
            auth_basic_user_file /tmp/.htpasswd;
            alias                /app/dashboard.html;
            default_type         text/html;
            add_header           Cache-Control "no-store";
        }

        # ── Private: raw Prometheus metrics (served from file written by metrics_pusher) ──
        location = /metrics-2e215f981f3f {
            auth_basic           "Bonsai Internal";
            auth_basic_user_file /tmp/.htpasswd;
            alias                /tmp/llama-metrics.txt;
            add_header           Content-Type "text/plain; charset=utf-8";
            add_header           Cache-Control "no-store";
        }

        # ── Private: live GPU stats ───────────────────────────────
        location = /gpu-2e215f981f3f {
            auth_basic           "Bonsai Internal";
            auth_basic_user_file /tmp/.htpasswd;
            alias                /tmp/gpu-stats.json;
            add_header Content-Type "application/json";
            add_header Cache-Control "no-store";
        }

        # ── Private: pre-computed analytics JSON ─────────────────
        location = /analytics-2e215f981f3f {
            auth_basic           "Bonsai Internal";
            auth_basic_user_file /tmp/.htpasswd;
            alias                /tmp/analytics.json;
            add_header Content-Type "application/json";
            add_header Cache-Control "no-store";
        }

        # ── Public: load-balanced across all GPU backends ────────
        location / {
            proxy_pass            http://llama_backends;
            proxy_http_version    1.1;
            proxy_set_header      Host $host;
            proxy_set_header      X-Real-IP $remote_addr;
            proxy_set_header      Connection "";
            # Strip identity headers before they reach llama-server
            proxy_set_header      X-HF-User "";
            proxy_set_header      Authorization "";

            # SSE / streaming completions
            proxy_buffering       off;
            proxy_cache           off;
            proxy_read_timeout    600s;
            proxy_send_timeout    600s;
            client_max_body_size  10m;
            chunked_transfer_encoding on;
        }
    }
}