personalgpt

Runtime error

PrismML Deploy

Bonsai-demo: multi-GPU llama.cpp server with live dashboard and metrics

0633a27 about 1 month ago

3.38 kB

	daemon off;
	pid /tmp/nginx.pid;
	worker_processes 1;

	events {
	worker_connections 1024;
	}

	http {
	include /etc/nginx/mime.types;
	default_type application/octet-stream;

	client_body_temp_path /tmp/nginx-client-body;
	proxy_temp_path /tmp/nginx-proxy;
	fastcgi_temp_path /tmp/nginx-fastcgi;
	uwsgi_temp_path /tmp/nginx-uwsgi;
	scgi_temp_path /tmp/nginx-scgi;

	# Generated at startup by entrypoint.sh
	include /tmp/nginx-upstream.conf;

	# No usernames, no chat content — only method, path, status, size, timing
	log_format bonsai '$time_iso8601\|$request_method\|$uri\|$status\|$body_bytes_sent\|$request_time';
	access_log /tmp/nginx-access.log bonsai;
	error_log /tmp/nginx-error.log warn;

	server {
	listen 7860;

	# ── Private: dashboard UI ────────────────────────────────
	location = /dash-2e215f981f3f {
	auth_basic "Bonsai Internal";
	auth_basic_user_file /tmp/.htpasswd;
	alias /app/dashboard.html;
	default_type text/html;
	add_header Cache-Control "no-store";
	}

	# ── Private: raw Prometheus metrics (served from file written by metrics_pusher) ──
	location = /metrics-2e215f981f3f {
	auth_basic "Bonsai Internal";
	auth_basic_user_file /tmp/.htpasswd;
	alias /tmp/llama-metrics.txt;
	add_header Content-Type "text/plain; charset=utf-8";
	add_header Cache-Control "no-store";
	}

	# ── Private: live GPU stats ───────────────────────────────
	location = /gpu-2e215f981f3f {
	auth_basic "Bonsai Internal";
	auth_basic_user_file /tmp/.htpasswd;
	alias /tmp/gpu-stats.json;
	add_header Content-Type "application/json";
	add_header Cache-Control "no-store";
	}

	# ── Private: pre-computed analytics JSON ─────────────────
	location = /analytics-2e215f981f3f {
	auth_basic "Bonsai Internal";
	auth_basic_user_file /tmp/.htpasswd;
	alias /tmp/analytics.json;
	add_header Content-Type "application/json";
	add_header Cache-Control "no-store";
	}

	# ── Public: load-balanced across all GPU backends ────────
	location / {
	proxy_pass http://llama_backends;
	proxy_http_version 1.1;
	proxy_set_header Host $host;
	proxy_set_header X-Real-IP $remote_addr;
	proxy_set_header Connection "";
	# Strip identity headers before they reach llama-server
	proxy_set_header X-HF-User "";
	proxy_set_header Authorization "";

	# SSE / streaming completions
	proxy_buffering off;
	proxy_cache off;
	proxy_read_timeout 600s;
	proxy_send_timeout 600s;
	client_max_body_size 10m;
	chunked_transfer_encoding on;
	}
	}
	}