/** Status banner for the PhysiX-Infer GPU Space. * * Why this exists: * The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning * GPU time. A cold-start takes 90-120 s while vLLM downloads / loads * weights for both 3B models. Without warning, a user picks the * PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes * convinced something is broken. * * This panel surfaces the underlying state so the wait is *expected*, * not surprising — and offers a one-click "Prewarm" button so the * user can kick the boot off before they pick a system / hit Run. * * Mechanics: * - On mount, GET https://pratyush-01-physix-infer.hf.space/health. * - HF Spaces' edge proxy returns one of three observable states: * * 200 with body { upstreams: { qwen: "ok", physix: "ok" } } * → both vLLMs are loaded and serving. Fast next call. * * 200 with one upstream not "ok" * → container running but vLLM still warming. Some calls fast, * some still slow. Treat as "warming". * * 503 / connection-stuck-for->5s * → Space is asleep. Whatever woke it (this very request) will * now ride the cold-boot pipeline. * - Re-poll every 15 s while the component is mounted so the badge * stays accurate as the user thinks. Polling is cheap and the * requests count as activity, which keeps the Space awake while * they read — exactly the UX we want during a demo. * * Note on CORS: the physix-infer FastAPI uses default CORS. The * /health endpoint returns plain JSON; modern browsers allow simple * GETs across origins to read the status code, but reading the BODY * needs Access-Control-Allow-Origin. If we can't read the body, we * fall back to "container is up" (best-effort) on any successful * response and "asleep" on network failure. */ import { useCallback, useEffect, useRef, useState } from "react"; import { cn } from "@/lib/cn"; import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets"; // /health is mounted at the proxy root, so strip the trailing /v1. const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health"; // 15 s strikes a balance: long enough that we don't spam HF's edge with // requests, short enough that "GPU is now warm" surfaces well before // the user has finished typing their prompt. const POLL_INTERVAL_MS = 15_000; // Hard ceiling on a single probe. HF holds requests open while a Space // boots, and that boot can take ~120 s. We don't want to *wait* for // the boot — we want to detect the asleep state early so we can // render "cold" and offer the Prewarm button. Anything past 6 s // without a response is "asleep" for our purposes. const PROBE_TIMEOUT_MS = 6_000; type Status = | { kind: "unknown" } | { kind: "awake"; bothUpstreams: boolean } | { kind: "warming" } | { kind: "asleep" } | { kind: "error"; message: string }; interface ProbeResult { status: Status; /** True if the probe itself was successful enough to count as a * wake-up signal — i.e. HF Spaces' edge proxy received it and * routed it to the container. */ hitContainer: boolean; } // Module-level dedup. Multiple mounts share a single in-flight `/health` // probe and cache the last successful result for a short window. let inFlight: Promise | null = null; let lastResult: { result: ProbeResult; at: number } | null = null; const SHARED_RESULT_WINDOW_MS = 5_000; async function probe(): Promise { // Coalesce: a second probe() call that lands while the first is // still in flight piggy-backs on the same network request. if (inFlight) return inFlight; // Replay the last result if it's fresh enough — covers the // "two component mounts in the same render commit" case where // both useEffects fire microseconds apart but neither has yet // populated `inFlight`. if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) { return lastResult.result; } inFlight = (async (): Promise => { const controller = new AbortController(); const timeoutId = window.setTimeout( () => controller.abort(), PROBE_TIMEOUT_MS, ); try { return await runProbe(controller.signal); } finally { window.clearTimeout(timeoutId); } })(); try { const result = await inFlight; lastResult = { result, at: Date.now() }; return result; } finally { inFlight = null; } } async function runProbe(signal: AbortSignal): Promise { try { const response = await fetch(HEALTH_URL, { method: "GET", mode: "cors", signal, }); if (!response.ok) { // 503 from /health = at least one vLLM still booting. We hit the // container, so we *did* wake the Space (HF Spaces' edge sends a // 503 with body during cold-boot, then the body changes to ok // once vLLMs come up). return { status: { kind: "warming" }, hitContainer: true }; } // 200 — try to read the body. If CORS strips it, default to "awake // but unsure about per-upstream status". try { const body = (await response.json()) as { upstreams?: Record; }; const upstreams = body.upstreams ?? {}; const allOk = Object.values(upstreams).every((v) => v === "ok"); if (allOk && Object.keys(upstreams).length > 0) { return { status: { kind: "awake", bothUpstreams: true }, hitContainer: true, }; } return { status: { kind: "warming" }, hitContainer: true }; } catch { // CORS or non-JSON body. Best effort: 200 means the container // answered, so it's awake; we just can't see the per-upstream // detail. return { status: { kind: "awake", bothUpstreams: false }, hitContainer: true, }; } } catch (exc) { // AbortError → timed out. Network error → DNS / offline / cors // preflight refused. In either case the Space is effectively // unreachable from the browser; the most likely cause is // "asleep + slow cold-boot" rather than a real outage, so we // render "asleep" (with a Prewarm button). if ((exc as Error).name === "AbortError") { return { status: { kind: "asleep" }, hitContainer: false }; } return { status: { kind: "error", message: (exc as Error).message }, hitContainer: false, }; } } export function PhysixInferStatus(): JSX.Element { const [status, setStatus] = useState({ kind: "unknown" }); const [prewarming, setPrewarming] = useState(false); // Track whether we've ever seen "awake" so we don't downgrade to // "asleep" on a transient network blip — the 15 s poll will // re-confirm soon enough. const wasAwakeRef = useRef(false); const refresh = useCallback(async () => { const result = await probe(); setStatus((prev) => { // Sticky-awake: if we'd previously confirmed awake and this // probe came back asleep / error, keep showing awake. If it // really did go to sleep, the next 15 s poll will agree and // we'll flip then. if ( wasAwakeRef.current && (result.status.kind === "asleep" || result.status.kind === "error") ) { return prev; } if (result.status.kind === "awake") { wasAwakeRef.current = true; } return result.status; }); }, []); useEffect(() => { void refresh(); const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS); return () => window.clearInterval(id); }, [refresh]); async function handlePrewarm(): Promise { if (prewarming) return; setPrewarming(true); setStatus({ kind: "warming" }); // Fire and forget: HF holds the request open until the container // is up. We don't `await` because the result of THIS request is // less interesting than the next 15 s poll which will tell us // when both upstreams flipped to "ok". try { // No timeout here — let the browser hold the connection until // HF Spaces wakes up and answers. await fetch(HEALTH_URL, { method: "GET", mode: "cors" }); } catch { // Ignore — the polling loop will surface the real state. } finally { setPrewarming(false); void refresh(); } } return ; } // --------------------------------------------------------------------- // Render // --------------------------------------------------------------------- interface StatusBannerProps { status: Status; onPrewarm: () => void; prewarming: boolean; } function StatusBanner({ status, onPrewarm, prewarming, }: StatusBannerProps): JSX.Element { const tone = toneFor(status); return (

{labelFor(status)}

{descriptionFor(status)}

{showsPrewarm(status) ? ( ) : null}
); } interface Tone { bg: string; border: string; dot: string; title: string; } function toneFor(status: Status): Tone { switch (status.kind) { case "awake": return { bg: "bg-emerald-950/40", border: "border-emerald-800/60", dot: "bg-emerald-400", title: "text-emerald-200", }; case "warming": return { bg: "bg-amber-950/40", border: "border-amber-800/60", dot: "bg-amber-400 animate-pulse", title: "text-amber-200", }; case "asleep": return { bg: "bg-amber-950/40", border: "border-amber-800/60", dot: "bg-amber-500", title: "text-amber-200", }; case "error": return { bg: "bg-rose-950/40", border: "border-rose-800/60", dot: "bg-rose-500", title: "text-rose-200", }; case "unknown": default: return { bg: "bg-surfaceMuted", border: "border-border", dot: "bg-textMuted animate-pulse", title: "text-textPrimary", }; } } function labelFor(status: Status): string { switch (status.kind) { case "awake": return status.bothUpstreams ? "GPU is warm — both models loaded" : "GPU is warm"; case "warming": return "GPU is warming up"; case "asleep": return "GPU is asleep"; case "error": return "Couldn't reach the GPU Space"; case "unknown": default: return "Checking GPU status…"; } } function descriptionFor(status: Status): string { switch (status.kind) { case "awake": return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle."; case "warming": return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast."; case "asleep": return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode."; case "error": return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback."; case "unknown": default: return "Probing https://pratyush-01-physix-infer.hf.space/health …"; } } function showsPrewarm(status: Status): boolean { return status.kind === "asleep" || status.kind === "error"; }