Spaces:

Pratyush-01
/

physix-live

Sleeping

App Files Files Community

physix-live / frontend /src /components /PhysixInferStatus.tsx

Pratyush-01

cleanup: trim verbose comments, drop dead code, fix stale tests, proper Dockerfile + .gitignore

7f40db3 verified 13 days ago

raw

history blame contribute delete

12.4 kB

	/** Status banner for the PhysiX-Infer GPU Space.
	*
	* Why this exists:
	* The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
	* GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
	* weights for both 3B models. Without warning, a user picks the
	* PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
	* convinced something is broken.
	*
	* This panel surfaces the underlying state so the wait is expected,
	* not surprising — and offers a one-click "Prewarm" button so the
	* user can kick the boot off before they pick a system / hit Run.
	*
	* Mechanics:
	* - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
	* - HF Spaces' edge proxy returns one of three observable states:
	* * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
	* → both vLLMs are loaded and serving. Fast next call.
	* * 200 with one upstream not "ok"
	* → container running but vLLM still warming. Some calls fast,
	* some still slow. Treat as "warming".
	* * 503 / connection-stuck-for->5s
	* → Space is asleep. Whatever woke it (this very request) will
	* now ride the cold-boot pipeline.
	* - Re-poll every 15 s while the component is mounted so the badge
	* stays accurate as the user thinks. Polling is cheap and the
	* requests count as activity, which keeps the Space awake while
	* they read — exactly the UX we want during a demo.
	*
	* Note on CORS: the physix-infer FastAPI uses default CORS. The
	* /health endpoint returns plain JSON; modern browsers allow simple
	* GETs across origins to read the status code, but reading the BODY
	* needs Access-Control-Allow-Origin. If we can't read the body, we
	* fall back to "container is up" (best-effort) on any successful
	* response and "asleep" on network failure. */

	import { useCallback, useEffect, useRef, useState } from "react";

	import { cn } from "@/lib/cn";
	import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";

	// /health is mounted at the proxy root, so strip the trailing /v1.
	const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";

	// 15 s strikes a balance: long enough that we don't spam HF's edge with
	// requests, short enough that "GPU is now warm" surfaces well before
	// the user has finished typing their prompt.
	const POLL_INTERVAL_MS = 15_000;

	// Hard ceiling on a single probe. HF holds requests open while a Space
	// boots, and that boot can take ~120 s. We don't want to wait for
	// the boot — we want to detect the asleep state early so we can
	// render "cold" and offer the Prewarm button. Anything past 6 s
	// without a response is "asleep" for our purposes.
	const PROBE_TIMEOUT_MS = 6_000;

	type Status =
	\| { kind: "unknown" }
	\| { kind: "awake"; bothUpstreams: boolean }
	\| { kind: "warming" }
	\| { kind: "asleep" }
	\| { kind: "error"; message: string };

	interface ProbeResult {
	status: Status;
	/** True if the probe itself was successful enough to count as a
	* wake-up signal — i.e. HF Spaces' edge proxy received it and
	* routed it to the container. */
	hitContainer: boolean;
	}

	// Module-level dedup. Multiple mounts share a single in-flight `/health`
	// probe and cache the last successful result for a short window.
	let inFlight: Promise<ProbeResult> \| null = null;
	let lastResult: { result: ProbeResult; at: number } \| null = null;
	const SHARED_RESULT_WINDOW_MS = 5_000;

	async function probe(): Promise<ProbeResult> {
	// Coalesce: a second probe() call that lands while the first is
	// still in flight piggy-backs on the same network request.
	if (inFlight) return inFlight;
	// Replay the last result if it's fresh enough — covers the
	// "two component mounts in the same render commit" case where
	// both useEffects fire microseconds apart but neither has yet
	// populated `inFlight`.
	if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
	return lastResult.result;
	}

	inFlight = (async (): Promise<ProbeResult> => {
	const controller = new AbortController();
	const timeoutId = window.setTimeout(
	() => controller.abort(),
	PROBE_TIMEOUT_MS,
	);

	try {
	return await runProbe(controller.signal);
	} finally {
	window.clearTimeout(timeoutId);
	}
	})();

	try {
	const result = await inFlight;
	lastResult = { result, at: Date.now() };
	return result;
	} finally {
	inFlight = null;
	}
	}

	async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
	try {
	const response = await fetch(HEALTH_URL, {
	method: "GET",
	mode: "cors",
	signal,
	});

	if (!response.ok) {
	// 503 from /health = at least one vLLM still booting. We hit the
	// container, so we did wake the Space (HF Spaces' edge sends a
	// 503 with body during cold-boot, then the body changes to ok
	// once vLLMs come up).
	return { status: { kind: "warming" }, hitContainer: true };
	}

	// 200 — try to read the body. If CORS strips it, default to "awake
	// but unsure about per-upstream status".
	try {
	const body = (await response.json()) as {
	upstreams?: Record<string, string>;
	};
	const upstreams = body.upstreams ?? {};
	const allOk = Object.values(upstreams).every((v) => v === "ok");
	if (allOk && Object.keys(upstreams).length > 0) {
	return {
	status: { kind: "awake", bothUpstreams: true },
	hitContainer: true,
	};
	}
	return { status: { kind: "warming" }, hitContainer: true };
	} catch {
	// CORS or non-JSON body. Best effort: 200 means the container
	// answered, so it's awake; we just can't see the per-upstream
	// detail.
	return {
	status: { kind: "awake", bothUpstreams: false },
	hitContainer: true,
	};
	}
	} catch (exc) {
	// AbortError → timed out. Network error → DNS / offline / cors
	// preflight refused. In either case the Space is effectively
	// unreachable from the browser; the most likely cause is
	// "asleep + slow cold-boot" rather than a real outage, so we
	// render "asleep" (with a Prewarm button).
	if ((exc as Error).name === "AbortError") {
	return { status: { kind: "asleep" }, hitContainer: false };
	}
	return {
	status: { kind: "error", message: (exc as Error).message },
	hitContainer: false,
	};
	}
	}

	export function PhysixInferStatus(): JSX.Element {
	const [status, setStatus] = useState<Status>({ kind: "unknown" });
	const [prewarming, setPrewarming] = useState(false);
	// Track whether we've ever seen "awake" so we don't downgrade to
	// "asleep" on a transient network blip — the 15 s poll will
	// re-confirm soon enough.
	const wasAwakeRef = useRef(false);

	const refresh = useCallback(async () => {
	const result = await probe();
	setStatus((prev) => {
	// Sticky-awake: if we'd previously confirmed awake and this
	// probe came back asleep / error, keep showing awake. If it
	// really did go to sleep, the next 15 s poll will agree and
	// we'll flip then.
	if (
	wasAwakeRef.current &&
	(result.status.kind === "asleep" \|\|
	result.status.kind === "error")
	) {
	return prev;
	}
	if (result.status.kind === "awake") {
	wasAwakeRef.current = true;
	}
	return result.status;
	});
	}, []);

	useEffect(() => {
	void refresh();
	const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
	return () => window.clearInterval(id);
	}, [refresh]);

	async function handlePrewarm(): Promise<void> {
	if (prewarming) return;
	setPrewarming(true);
	setStatus({ kind: "warming" });
	// Fire and forget: HF holds the request open until the container
	// is up. We don't `await` because the result of THIS request is
	// less interesting than the next 15 s poll which will tell us
	// when both upstreams flipped to "ok".
	try {
	// No timeout here — let the browser hold the connection until
	// HF Spaces wakes up and answers.
	await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
	} catch {
	// Ignore — the polling loop will surface the real state.
	} finally {
	setPrewarming(false);
	void refresh();
	}
	}

	return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
	}

	// ---------------------------------------------------------------------
	// Render
	// ---------------------------------------------------------------------

	interface StatusBannerProps {
	status: Status;
	onPrewarm: () => void;
	prewarming: boolean;
	}

	function StatusBanner({
	status,
	onPrewarm,
	prewarming,
	}: StatusBannerProps): JSX.Element {
	const tone = toneFor(status);

	return (
	<div
	className={cn(
	"rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
	tone.bg,
	tone.border,
	)}
	>
	<div className="flex items-start gap-2">
	<span
	aria-hidden
	className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
	/>
	<div className="flex-1 min-w-0">
	<p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
	<p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
	</div>
	{showsPrewarm(status) ? (
	<button
	type="button"
	onClick={onPrewarm}
	disabled={prewarming}
	className={cn(
	"shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
	"hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
	)}
	>
	{prewarming ? "Prewarming…" : "Prewarm GPU"}
	</button>
	) : null}
	</div>
	</div>
	);
	}

	interface Tone {
	bg: string;
	border: string;
	dot: string;
	title: string;
	}

	function toneFor(status: Status): Tone {
	switch (status.kind) {
	case "awake":
	return {
	bg: "bg-emerald-950/40",
	border: "border-emerald-800/60",
	dot: "bg-emerald-400",
	title: "text-emerald-200",
	};
	case "warming":
	return {
	bg: "bg-amber-950/40",
	border: "border-amber-800/60",
	dot: "bg-amber-400 animate-pulse",
	title: "text-amber-200",
	};
	case "asleep":
	return {
	bg: "bg-amber-950/40",
	border: "border-amber-800/60",
	dot: "bg-amber-500",
	title: "text-amber-200",
	};
	case "error":
	return {
	bg: "bg-rose-950/40",
	border: "border-rose-800/60",
	dot: "bg-rose-500",
	title: "text-rose-200",
	};
	case "unknown":
	default:
	return {
	bg: "bg-surfaceMuted",
	border: "border-border",
	dot: "bg-textMuted animate-pulse",
	title: "text-textPrimary",
	};
	}
	}

	function labelFor(status: Status): string {
	switch (status.kind) {
	case "awake":
	return status.bothUpstreams
	? "GPU is warm — both models loaded"
	: "GPU is warm";
	case "warming":
	return "GPU is warming up";
	case "asleep":
	return "GPU is asleep";
	case "error":
	return "Couldn't reach the GPU Space";
	case "unknown":
	default:
	return "Checking GPU status…";
	}
	}

	function descriptionFor(status: Status): string {
	switch (status.kind) {
	case "awake":
	return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
	case "warming":
	return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
	case "asleep":
	return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
	case "error":
	return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
	case "unknown":
	default:
	return "Probing https://pratyush-01-physix-infer.hf.space/health …";
	}
	}

	function showsPrewarm(status: Status): boolean {
	return status.kind === "asleep" \|\| status.kind === "error";
	}