Spaces:
Sleeping
Sleeping
cleanup: trim verbose comments, drop dead code, fix stale tests, proper Dockerfile + .gitignore
7f40db3 verified | /** Status banner for the PhysiX-Infer GPU Space. | |
| * | |
| * Why this exists: | |
| * The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning | |
| * GPU time. A cold-start takes 90-120 s while vLLM downloads / loads | |
| * weights for both 3B models. Without warning, a user picks the | |
| * PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes | |
| * convinced something is broken. | |
| * | |
| * This panel surfaces the underlying state so the wait is *expected*, | |
| * not surprising — and offers a one-click "Prewarm" button so the | |
| * user can kick the boot off before they pick a system / hit Run. | |
| * | |
| * Mechanics: | |
| * - On mount, GET https://pratyush-01-physix-infer.hf.space/health. | |
| * - HF Spaces' edge proxy returns one of three observable states: | |
| * * 200 with body { upstreams: { qwen: "ok", physix: "ok" } } | |
| * → both vLLMs are loaded and serving. Fast next call. | |
| * * 200 with one upstream not "ok" | |
| * → container running but vLLM still warming. Some calls fast, | |
| * some still slow. Treat as "warming". | |
| * * 503 / connection-stuck-for->5s | |
| * → Space is asleep. Whatever woke it (this very request) will | |
| * now ride the cold-boot pipeline. | |
| * - Re-poll every 15 s while the component is mounted so the badge | |
| * stays accurate as the user thinks. Polling is cheap and the | |
| * requests count as activity, which keeps the Space awake while | |
| * they read — exactly the UX we want during a demo. | |
| * | |
| * Note on CORS: the physix-infer FastAPI uses default CORS. The | |
| * /health endpoint returns plain JSON; modern browsers allow simple | |
| * GETs across origins to read the status code, but reading the BODY | |
| * needs Access-Control-Allow-Origin. If we can't read the body, we | |
| * fall back to "container is up" (best-effort) on any successful | |
| * response and "asleep" on network failure. */ | |
| import { useCallback, useEffect, useRef, useState } from "react"; | |
| import { cn } from "@/lib/cn"; | |
| import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets"; | |
| // /health is mounted at the proxy root, so strip the trailing /v1. | |
| const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health"; | |
| // 15 s strikes a balance: long enough that we don't spam HF's edge with | |
| // requests, short enough that "GPU is now warm" surfaces well before | |
| // the user has finished typing their prompt. | |
| const POLL_INTERVAL_MS = 15_000; | |
| // Hard ceiling on a single probe. HF holds requests open while a Space | |
| // boots, and that boot can take ~120 s. We don't want to *wait* for | |
| // the boot — we want to detect the asleep state early so we can | |
| // render "cold" and offer the Prewarm button. Anything past 6 s | |
| // without a response is "asleep" for our purposes. | |
| const PROBE_TIMEOUT_MS = 6_000; | |
| type Status = | |
| | { kind: "unknown" } | |
| | { kind: "awake"; bothUpstreams: boolean } | |
| | { kind: "warming" } | |
| | { kind: "asleep" } | |
| | { kind: "error"; message: string }; | |
| interface ProbeResult { | |
| status: Status; | |
| /** True if the probe itself was successful enough to count as a | |
| * wake-up signal — i.e. HF Spaces' edge proxy received it and | |
| * routed it to the container. */ | |
| hitContainer: boolean; | |
| } | |
| // Module-level dedup. Multiple mounts share a single in-flight `/health` | |
| // probe and cache the last successful result for a short window. | |
| let inFlight: Promise<ProbeResult> | null = null; | |
| let lastResult: { result: ProbeResult; at: number } | null = null; | |
| const SHARED_RESULT_WINDOW_MS = 5_000; | |
| async function probe(): Promise<ProbeResult> { | |
| // Coalesce: a second probe() call that lands while the first is | |
| // still in flight piggy-backs on the same network request. | |
| if (inFlight) return inFlight; | |
| // Replay the last result if it's fresh enough — covers the | |
| // "two component mounts in the same render commit" case where | |
| // both useEffects fire microseconds apart but neither has yet | |
| // populated `inFlight`. | |
| if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) { | |
| return lastResult.result; | |
| } | |
| inFlight = (async (): Promise<ProbeResult> => { | |
| const controller = new AbortController(); | |
| const timeoutId = window.setTimeout( | |
| () => controller.abort(), | |
| PROBE_TIMEOUT_MS, | |
| ); | |
| try { | |
| return await runProbe(controller.signal); | |
| } finally { | |
| window.clearTimeout(timeoutId); | |
| } | |
| })(); | |
| try { | |
| const result = await inFlight; | |
| lastResult = { result, at: Date.now() }; | |
| return result; | |
| } finally { | |
| inFlight = null; | |
| } | |
| } | |
| async function runProbe(signal: AbortSignal): Promise<ProbeResult> { | |
| try { | |
| const response = await fetch(HEALTH_URL, { | |
| method: "GET", | |
| mode: "cors", | |
| signal, | |
| }); | |
| if (!response.ok) { | |
| // 503 from /health = at least one vLLM still booting. We hit the | |
| // container, so we *did* wake the Space (HF Spaces' edge sends a | |
| // 503 with body during cold-boot, then the body changes to ok | |
| // once vLLMs come up). | |
| return { status: { kind: "warming" }, hitContainer: true }; | |
| } | |
| // 200 — try to read the body. If CORS strips it, default to "awake | |
| // but unsure about per-upstream status". | |
| try { | |
| const body = (await response.json()) as { | |
| upstreams?: Record<string, string>; | |
| }; | |
| const upstreams = body.upstreams ?? {}; | |
| const allOk = Object.values(upstreams).every((v) => v === "ok"); | |
| if (allOk && Object.keys(upstreams).length > 0) { | |
| return { | |
| status: { kind: "awake", bothUpstreams: true }, | |
| hitContainer: true, | |
| }; | |
| } | |
| return { status: { kind: "warming" }, hitContainer: true }; | |
| } catch { | |
| // CORS or non-JSON body. Best effort: 200 means the container | |
| // answered, so it's awake; we just can't see the per-upstream | |
| // detail. | |
| return { | |
| status: { kind: "awake", bothUpstreams: false }, | |
| hitContainer: true, | |
| }; | |
| } | |
| } catch (exc) { | |
| // AbortError → timed out. Network error → DNS / offline / cors | |
| // preflight refused. In either case the Space is effectively | |
| // unreachable from the browser; the most likely cause is | |
| // "asleep + slow cold-boot" rather than a real outage, so we | |
| // render "asleep" (with a Prewarm button). | |
| if ((exc as Error).name === "AbortError") { | |
| return { status: { kind: "asleep" }, hitContainer: false }; | |
| } | |
| return { | |
| status: { kind: "error", message: (exc as Error).message }, | |
| hitContainer: false, | |
| }; | |
| } | |
| } | |
| export function PhysixInferStatus(): JSX.Element { | |
| const [status, setStatus] = useState<Status>({ kind: "unknown" }); | |
| const [prewarming, setPrewarming] = useState(false); | |
| // Track whether we've ever seen "awake" so we don't downgrade to | |
| // "asleep" on a transient network blip — the 15 s poll will | |
| // re-confirm soon enough. | |
| const wasAwakeRef = useRef(false); | |
| const refresh = useCallback(async () => { | |
| const result = await probe(); | |
| setStatus((prev) => { | |
| // Sticky-awake: if we'd previously confirmed awake and this | |
| // probe came back asleep / error, keep showing awake. If it | |
| // really did go to sleep, the next 15 s poll will agree and | |
| // we'll flip then. | |
| if ( | |
| wasAwakeRef.current && | |
| (result.status.kind === "asleep" || | |
| result.status.kind === "error") | |
| ) { | |
| return prev; | |
| } | |
| if (result.status.kind === "awake") { | |
| wasAwakeRef.current = true; | |
| } | |
| return result.status; | |
| }); | |
| }, []); | |
| useEffect(() => { | |
| void refresh(); | |
| const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS); | |
| return () => window.clearInterval(id); | |
| }, [refresh]); | |
| async function handlePrewarm(): Promise<void> { | |
| if (prewarming) return; | |
| setPrewarming(true); | |
| setStatus({ kind: "warming" }); | |
| // Fire and forget: HF holds the request open until the container | |
| // is up. We don't `await` because the result of THIS request is | |
| // less interesting than the next 15 s poll which will tell us | |
| // when both upstreams flipped to "ok". | |
| try { | |
| // No timeout here — let the browser hold the connection until | |
| // HF Spaces wakes up and answers. | |
| await fetch(HEALTH_URL, { method: "GET", mode: "cors" }); | |
| } catch { | |
| // Ignore — the polling loop will surface the real state. | |
| } finally { | |
| setPrewarming(false); | |
| void refresh(); | |
| } | |
| } | |
| return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />; | |
| } | |
| // --------------------------------------------------------------------- | |
| // Render | |
| // --------------------------------------------------------------------- | |
| interface StatusBannerProps { | |
| status: Status; | |
| onPrewarm: () => void; | |
| prewarming: boolean; | |
| } | |
| function StatusBanner({ | |
| status, | |
| onPrewarm, | |
| prewarming, | |
| }: StatusBannerProps): JSX.Element { | |
| const tone = toneFor(status); | |
| return ( | |
| <div | |
| className={cn( | |
| "rounded-lg border px-3 py-2 text-[11px] leading-relaxed", | |
| tone.bg, | |
| tone.border, | |
| )} | |
| > | |
| <div className="flex items-start gap-2"> | |
| <span | |
| aria-hidden | |
| className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)} | |
| /> | |
| <div className="flex-1 min-w-0"> | |
| <p className={cn("font-medium", tone.title)}>{labelFor(status)}</p> | |
| <p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p> | |
| </div> | |
| {showsPrewarm(status) ? ( | |
| <button | |
| type="button" | |
| onClick={onPrewarm} | |
| disabled={prewarming} | |
| className={cn( | |
| "shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition", | |
| "hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60", | |
| )} | |
| > | |
| {prewarming ? "Prewarming…" : "Prewarm GPU"} | |
| </button> | |
| ) : null} | |
| </div> | |
| </div> | |
| ); | |
| } | |
| interface Tone { | |
| bg: string; | |
| border: string; | |
| dot: string; | |
| title: string; | |
| } | |
| function toneFor(status: Status): Tone { | |
| switch (status.kind) { | |
| case "awake": | |
| return { | |
| bg: "bg-emerald-950/40", | |
| border: "border-emerald-800/60", | |
| dot: "bg-emerald-400", | |
| title: "text-emerald-200", | |
| }; | |
| case "warming": | |
| return { | |
| bg: "bg-amber-950/40", | |
| border: "border-amber-800/60", | |
| dot: "bg-amber-400 animate-pulse", | |
| title: "text-amber-200", | |
| }; | |
| case "asleep": | |
| return { | |
| bg: "bg-amber-950/40", | |
| border: "border-amber-800/60", | |
| dot: "bg-amber-500", | |
| title: "text-amber-200", | |
| }; | |
| case "error": | |
| return { | |
| bg: "bg-rose-950/40", | |
| border: "border-rose-800/60", | |
| dot: "bg-rose-500", | |
| title: "text-rose-200", | |
| }; | |
| case "unknown": | |
| default: | |
| return { | |
| bg: "bg-surfaceMuted", | |
| border: "border-border", | |
| dot: "bg-textMuted animate-pulse", | |
| title: "text-textPrimary", | |
| }; | |
| } | |
| } | |
| function labelFor(status: Status): string { | |
| switch (status.kind) { | |
| case "awake": | |
| return status.bothUpstreams | |
| ? "GPU is warm — both models loaded" | |
| : "GPU is warm"; | |
| case "warming": | |
| return "GPU is warming up"; | |
| case "asleep": | |
| return "GPU is asleep"; | |
| case "error": | |
| return "Couldn't reach the GPU Space"; | |
| case "unknown": | |
| default: | |
| return "Checking GPU status…"; | |
| } | |
| } | |
| function descriptionFor(status: Status): string { | |
| switch (status.kind) { | |
| case "awake": | |
| return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle."; | |
| case "warming": | |
| return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast."; | |
| case "asleep": | |
| return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode."; | |
| case "error": | |
| return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback."; | |
| case "unknown": | |
| default: | |
| return "Probing https://pratyush-01-physix-infer.hf.space/health …"; | |
| } | |
| } | |
| function showsPrewarm(status: Status): boolean { | |
| return status.kind === "asleep" || status.kind === "error"; | |
| } | |