physix-live / frontend /src /components /PhysixInferStatus.tsx
Pratyush-01's picture
cleanup: trim verbose comments, drop dead code, fix stale tests, proper Dockerfile + .gitignore
7f40db3 verified
/** Status banner for the PhysiX-Infer GPU Space.
*
* Why this exists:
* The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
* GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
* weights for both 3B models. Without warning, a user picks the
* PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
* convinced something is broken.
*
* This panel surfaces the underlying state so the wait is *expected*,
* not surprising — and offers a one-click "Prewarm" button so the
* user can kick the boot off before they pick a system / hit Run.
*
* Mechanics:
* - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
* - HF Spaces' edge proxy returns one of three observable states:
* * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
* → both vLLMs are loaded and serving. Fast next call.
* * 200 with one upstream not "ok"
* → container running but vLLM still warming. Some calls fast,
* some still slow. Treat as "warming".
* * 503 / connection-stuck-for->5s
* → Space is asleep. Whatever woke it (this very request) will
* now ride the cold-boot pipeline.
* - Re-poll every 15 s while the component is mounted so the badge
* stays accurate as the user thinks. Polling is cheap and the
* requests count as activity, which keeps the Space awake while
* they read — exactly the UX we want during a demo.
*
* Note on CORS: the physix-infer FastAPI uses default CORS. The
* /health endpoint returns plain JSON; modern browsers allow simple
* GETs across origins to read the status code, but reading the BODY
* needs Access-Control-Allow-Origin. If we can't read the body, we
* fall back to "container is up" (best-effort) on any successful
* response and "asleep" on network failure. */
import { useCallback, useEffect, useRef, useState } from "react";
import { cn } from "@/lib/cn";
import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";
// /health is mounted at the proxy root, so strip the trailing /v1.
const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";
// 15 s strikes a balance: long enough that we don't spam HF's edge with
// requests, short enough that "GPU is now warm" surfaces well before
// the user has finished typing their prompt.
const POLL_INTERVAL_MS = 15_000;
// Hard ceiling on a single probe. HF holds requests open while a Space
// boots, and that boot can take ~120 s. We don't want to *wait* for
// the boot — we want to detect the asleep state early so we can
// render "cold" and offer the Prewarm button. Anything past 6 s
// without a response is "asleep" for our purposes.
const PROBE_TIMEOUT_MS = 6_000;
type Status =
| { kind: "unknown" }
| { kind: "awake"; bothUpstreams: boolean }
| { kind: "warming" }
| { kind: "asleep" }
| { kind: "error"; message: string };
interface ProbeResult {
status: Status;
/** True if the probe itself was successful enough to count as a
* wake-up signal — i.e. HF Spaces' edge proxy received it and
* routed it to the container. */
hitContainer: boolean;
}
// Module-level dedup. Multiple mounts share a single in-flight `/health`
// probe and cache the last successful result for a short window.
let inFlight: Promise<ProbeResult> | null = null;
let lastResult: { result: ProbeResult; at: number } | null = null;
const SHARED_RESULT_WINDOW_MS = 5_000;
async function probe(): Promise<ProbeResult> {
// Coalesce: a second probe() call that lands while the first is
// still in flight piggy-backs on the same network request.
if (inFlight) return inFlight;
// Replay the last result if it's fresh enough — covers the
// "two component mounts in the same render commit" case where
// both useEffects fire microseconds apart but neither has yet
// populated `inFlight`.
if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
return lastResult.result;
}
inFlight = (async (): Promise<ProbeResult> => {
const controller = new AbortController();
const timeoutId = window.setTimeout(
() => controller.abort(),
PROBE_TIMEOUT_MS,
);
try {
return await runProbe(controller.signal);
} finally {
window.clearTimeout(timeoutId);
}
})();
try {
const result = await inFlight;
lastResult = { result, at: Date.now() };
return result;
} finally {
inFlight = null;
}
}
async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
try {
const response = await fetch(HEALTH_URL, {
method: "GET",
mode: "cors",
signal,
});
if (!response.ok) {
// 503 from /health = at least one vLLM still booting. We hit the
// container, so we *did* wake the Space (HF Spaces' edge sends a
// 503 with body during cold-boot, then the body changes to ok
// once vLLMs come up).
return { status: { kind: "warming" }, hitContainer: true };
}
// 200 — try to read the body. If CORS strips it, default to "awake
// but unsure about per-upstream status".
try {
const body = (await response.json()) as {
upstreams?: Record<string, string>;
};
const upstreams = body.upstreams ?? {};
const allOk = Object.values(upstreams).every((v) => v === "ok");
if (allOk && Object.keys(upstreams).length > 0) {
return {
status: { kind: "awake", bothUpstreams: true },
hitContainer: true,
};
}
return { status: { kind: "warming" }, hitContainer: true };
} catch {
// CORS or non-JSON body. Best effort: 200 means the container
// answered, so it's awake; we just can't see the per-upstream
// detail.
return {
status: { kind: "awake", bothUpstreams: false },
hitContainer: true,
};
}
} catch (exc) {
// AbortError → timed out. Network error → DNS / offline / cors
// preflight refused. In either case the Space is effectively
// unreachable from the browser; the most likely cause is
// "asleep + slow cold-boot" rather than a real outage, so we
// render "asleep" (with a Prewarm button).
if ((exc as Error).name === "AbortError") {
return { status: { kind: "asleep" }, hitContainer: false };
}
return {
status: { kind: "error", message: (exc as Error).message },
hitContainer: false,
};
}
}
export function PhysixInferStatus(): JSX.Element {
const [status, setStatus] = useState<Status>({ kind: "unknown" });
const [prewarming, setPrewarming] = useState(false);
// Track whether we've ever seen "awake" so we don't downgrade to
// "asleep" on a transient network blip — the 15 s poll will
// re-confirm soon enough.
const wasAwakeRef = useRef(false);
const refresh = useCallback(async () => {
const result = await probe();
setStatus((prev) => {
// Sticky-awake: if we'd previously confirmed awake and this
// probe came back asleep / error, keep showing awake. If it
// really did go to sleep, the next 15 s poll will agree and
// we'll flip then.
if (
wasAwakeRef.current &&
(result.status.kind === "asleep" ||
result.status.kind === "error")
) {
return prev;
}
if (result.status.kind === "awake") {
wasAwakeRef.current = true;
}
return result.status;
});
}, []);
useEffect(() => {
void refresh();
const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
return () => window.clearInterval(id);
}, [refresh]);
async function handlePrewarm(): Promise<void> {
if (prewarming) return;
setPrewarming(true);
setStatus({ kind: "warming" });
// Fire and forget: HF holds the request open until the container
// is up. We don't `await` because the result of THIS request is
// less interesting than the next 15 s poll which will tell us
// when both upstreams flipped to "ok".
try {
// No timeout here — let the browser hold the connection until
// HF Spaces wakes up and answers.
await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
} catch {
// Ignore — the polling loop will surface the real state.
} finally {
setPrewarming(false);
void refresh();
}
}
return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
}
// ---------------------------------------------------------------------
// Render
// ---------------------------------------------------------------------
interface StatusBannerProps {
status: Status;
onPrewarm: () => void;
prewarming: boolean;
}
function StatusBanner({
status,
onPrewarm,
prewarming,
}: StatusBannerProps): JSX.Element {
const tone = toneFor(status);
return (
<div
className={cn(
"rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
tone.bg,
tone.border,
)}
>
<div className="flex items-start gap-2">
<span
aria-hidden
className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
/>
<div className="flex-1 min-w-0">
<p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
<p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
</div>
{showsPrewarm(status) ? (
<button
type="button"
onClick={onPrewarm}
disabled={prewarming}
className={cn(
"shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
"hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
)}
>
{prewarming ? "Prewarming…" : "Prewarm GPU"}
</button>
) : null}
</div>
</div>
);
}
interface Tone {
bg: string;
border: string;
dot: string;
title: string;
}
function toneFor(status: Status): Tone {
switch (status.kind) {
case "awake":
return {
bg: "bg-emerald-950/40",
border: "border-emerald-800/60",
dot: "bg-emerald-400",
title: "text-emerald-200",
};
case "warming":
return {
bg: "bg-amber-950/40",
border: "border-amber-800/60",
dot: "bg-amber-400 animate-pulse",
title: "text-amber-200",
};
case "asleep":
return {
bg: "bg-amber-950/40",
border: "border-amber-800/60",
dot: "bg-amber-500",
title: "text-amber-200",
};
case "error":
return {
bg: "bg-rose-950/40",
border: "border-rose-800/60",
dot: "bg-rose-500",
title: "text-rose-200",
};
case "unknown":
default:
return {
bg: "bg-surfaceMuted",
border: "border-border",
dot: "bg-textMuted animate-pulse",
title: "text-textPrimary",
};
}
}
function labelFor(status: Status): string {
switch (status.kind) {
case "awake":
return status.bothUpstreams
? "GPU is warm — both models loaded"
: "GPU is warm";
case "warming":
return "GPU is warming up";
case "asleep":
return "GPU is asleep";
case "error":
return "Couldn't reach the GPU Space";
case "unknown":
default:
return "Checking GPU status…";
}
}
function descriptionFor(status: Status): string {
switch (status.kind) {
case "awake":
return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
case "warming":
return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
case "asleep":
return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
case "error":
return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
case "unknown":
default:
return "Probing https://pratyush-01-physix-infer.hf.space/health …";
}
}
function showsPrewarm(status: Status): boolean {
return status.kind === "asleep" || status.kind === "error";
}