File size: 12,744 Bytes
0e24aff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/** Status banner for the PhysiX-Infer GPU Space.
 *
 *  Why this exists:
 *    The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
 *    GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
 *    weights for both 3B models. Without warning, a user picks the
 *    PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
 *    convinced something is broken.
 *
 *    This panel surfaces the underlying state so the wait is *expected*,
 *    not surprising — and offers a one-click "Prewarm" button so the
 *    user can kick the boot off before they pick a system / hit Run.
 *
 *  Mechanics:
 *    - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
 *    - HF Spaces' edge proxy returns one of three observable states:
 *        * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
 *          → both vLLMs are loaded and serving. Fast next call.
 *        * 200 with one upstream not "ok"
 *          → container running but vLLM still warming. Some calls fast,
 *            some still slow. Treat as "warming".
 *        * 503 / connection-stuck-for->5s
 *          → Space is asleep. Whatever woke it (this very request) will
 *            now ride the cold-boot pipeline.
 *    - Re-poll every 15 s while the component is mounted so the badge
 *      stays accurate as the user thinks. Polling is cheap and the
 *      requests count as activity, which keeps the Space awake while
 *      they read — exactly the UX we want during a demo.
 *
 *  Note on CORS: the physix-infer FastAPI uses default CORS. The
 *  /health endpoint returns plain JSON; modern browsers allow simple
 *  GETs across origins to read the status code, but reading the BODY
 *  needs Access-Control-Allow-Origin. If we can't read the body, we
 *  fall back to "container is up" (best-effort) on any successful
 *  response and "asleep" on network failure. */

import { useCallback, useEffect, useRef, useState } from "react";

import { cn } from "@/lib/cn";
import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";

// /health is mounted at the proxy root, so strip the trailing /v1.
const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";

// 15 s strikes a balance: long enough that we don't spam HF's edge with
// requests, short enough that "GPU is now warm" surfaces well before
// the user has finished typing their prompt.
const POLL_INTERVAL_MS = 15_000;

// Hard ceiling on a single probe. HF holds requests open while a Space
// boots, and that boot can take ~120 s. We don't want to *wait* for
// the boot — we want to detect the asleep state early so we can
// render "cold" and offer the Prewarm button. Anything past 6 s
// without a response is "asleep" for our purposes.
const PROBE_TIMEOUT_MS = 6_000;

type Status =
  | { kind: "unknown" }
  | { kind: "awake"; bothUpstreams: boolean }
  | { kind: "warming" }
  | { kind: "asleep" }
  | { kind: "error"; message: string };

interface ProbeResult {
  status: Status;
  /** True if the probe itself was successful enough to count as a
   *  wake-up signal — i.e. HF Spaces' edge proxy received it and
   *  routed it to the container. */
  hitContainer: boolean;
}

// Module-level dedup. The Compare pane mounts TWO copies of this
// component (one per side), and without coalescing they'd each fire
// their own `/health` GET every 15 s — pointless duplicate load on
// the GPU Space's edge. We share a single in-flight promise across
// concurrent callers and cache the last successful result for a
// short window so the second mount on the same tick reuses the
// first probe's answer instead of issuing its own.
let inFlight: Promise<ProbeResult> | null = null;
let lastResult: { result: ProbeResult; at: number } | null = null;
const SHARED_RESULT_WINDOW_MS = 5_000;

async function probe(): Promise<ProbeResult> {
  // Coalesce: a second probe() call that lands while the first is
  // still in flight piggy-backs on the same network request.
  if (inFlight) return inFlight;
  // Replay the last result if it's fresh enough — covers the
  // "two component mounts in the same render commit" case where
  // both useEffects fire microseconds apart but neither has yet
  // populated `inFlight`.
  if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
    return lastResult.result;
  }

  inFlight = (async (): Promise<ProbeResult> => {
    const controller = new AbortController();
    const timeoutId = window.setTimeout(
      () => controller.abort(),
      PROBE_TIMEOUT_MS,
    );

    try {
      return await runProbe(controller.signal);
    } finally {
      window.clearTimeout(timeoutId);
    }
  })();

  try {
    const result = await inFlight;
    lastResult = { result, at: Date.now() };
    return result;
  } finally {
    inFlight = null;
  }
}

async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
  try {
    const response = await fetch(HEALTH_URL, {
      method: "GET",
      mode: "cors",
      signal,
    });

    if (!response.ok) {
      // 503 from /health = at least one vLLM still booting. We hit the
      // container, so we *did* wake the Space (HF Spaces' edge sends a
      // 503 with body during cold-boot, then the body changes to ok
      // once vLLMs come up).
      return { status: { kind: "warming" }, hitContainer: true };
    }

    // 200 — try to read the body. If CORS strips it, default to "awake
    // but unsure about per-upstream status".
    try {
      const body = (await response.json()) as {
        upstreams?: Record<string, string>;
      };
      const upstreams = body.upstreams ?? {};
      const allOk = Object.values(upstreams).every((v) => v === "ok");
      if (allOk && Object.keys(upstreams).length > 0) {
        return {
          status: { kind: "awake", bothUpstreams: true },
          hitContainer: true,
        };
      }
      return { status: { kind: "warming" }, hitContainer: true };
    } catch {
      // CORS or non-JSON body. Best effort: 200 means the container
      // answered, so it's awake; we just can't see the per-upstream
      // detail.
      return {
        status: { kind: "awake", bothUpstreams: false },
        hitContainer: true,
      };
    }
  } catch (exc) {
    // AbortError → timed out. Network error → DNS / offline / cors
    // preflight refused. In either case the Space is effectively
    // unreachable from the browser; the most likely cause is
    // "asleep + slow cold-boot" rather than a real outage, so we
    // render "asleep" (with a Prewarm button).
    if ((exc as Error).name === "AbortError") {
      return { status: { kind: "asleep" }, hitContainer: false };
    }
    return {
      status: { kind: "error", message: (exc as Error).message },
      hitContainer: false,
    };
  }
}

export function PhysixInferStatus(): JSX.Element {
  const [status, setStatus] = useState<Status>({ kind: "unknown" });
  const [prewarming, setPrewarming] = useState(false);
  // Track whether we've ever seen "awake" so we don't downgrade to
  // "asleep" on a transient network blip — the 15 s poll will
  // re-confirm soon enough.
  const wasAwakeRef = useRef(false);

  const refresh = useCallback(async () => {
    const result = await probe();
    setStatus((prev) => {
      // Sticky-awake: if we'd previously confirmed awake and this
      // probe came back asleep / error, keep showing awake. If it
      // really did go to sleep, the next 15 s poll will agree and
      // we'll flip then.
      if (
        wasAwakeRef.current &&
        (result.status.kind === "asleep" ||
          result.status.kind === "error")
      ) {
        return prev;
      }
      if (result.status.kind === "awake") {
        wasAwakeRef.current = true;
      }
      return result.status;
    });
  }, []);

  useEffect(() => {
    void refresh();
    const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
    return () => window.clearInterval(id);
  }, [refresh]);

  async function handlePrewarm(): Promise<void> {
    if (prewarming) return;
    setPrewarming(true);
    setStatus({ kind: "warming" });
    // Fire and forget: HF holds the request open until the container
    // is up. We don't `await` because the result of THIS request is
    // less interesting than the next 15 s poll which will tell us
    // when both upstreams flipped to "ok".
    try {
      // No timeout here — let the browser hold the connection until
      // HF Spaces wakes up and answers.
      await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
    } catch {
      // Ignore — the polling loop will surface the real state.
    } finally {
      setPrewarming(false);
      void refresh();
    }
  }

  return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
}

// ---------------------------------------------------------------------
// Render
// ---------------------------------------------------------------------

interface StatusBannerProps {
  status: Status;
  onPrewarm: () => void;
  prewarming: boolean;
}

function StatusBanner({
  status,
  onPrewarm,
  prewarming,
}: StatusBannerProps): JSX.Element {
  const tone = toneFor(status);

  return (
    <div
      className={cn(
        "rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
        tone.bg,
        tone.border,
      )}
    >
      <div className="flex items-start gap-2">
        <span
          aria-hidden
          className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
        />
        <div className="flex-1 min-w-0">
          <p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
          <p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
        </div>
        {showsPrewarm(status) ? (
          <button
            type="button"
            onClick={onPrewarm}
            disabled={prewarming}
            className={cn(
              "shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
              "hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
            )}
          >
            {prewarming ? "Prewarming…" : "Prewarm GPU"}
          </button>
        ) : null}
      </div>
    </div>
  );
}

interface Tone {
  bg: string;
  border: string;
  dot: string;
  title: string;
}

function toneFor(status: Status): Tone {
  switch (status.kind) {
    case "awake":
      return {
        bg: "bg-emerald-950/40",
        border: "border-emerald-800/60",
        dot: "bg-emerald-400",
        title: "text-emerald-200",
      };
    case "warming":
      return {
        bg: "bg-amber-950/40",
        border: "border-amber-800/60",
        dot: "bg-amber-400 animate-pulse",
        title: "text-amber-200",
      };
    case "asleep":
      return {
        bg: "bg-amber-950/40",
        border: "border-amber-800/60",
        dot: "bg-amber-500",
        title: "text-amber-200",
      };
    case "error":
      return {
        bg: "bg-rose-950/40",
        border: "border-rose-800/60",
        dot: "bg-rose-500",
        title: "text-rose-200",
      };
    case "unknown":
    default:
      return {
        bg: "bg-surfaceMuted",
        border: "border-border",
        dot: "bg-textMuted animate-pulse",
        title: "text-textPrimary",
      };
  }
}

function labelFor(status: Status): string {
  switch (status.kind) {
    case "awake":
      return status.bothUpstreams
        ? "GPU is warm — both models loaded"
        : "GPU is warm";
    case "warming":
      return "GPU is warming up";
    case "asleep":
      return "GPU is asleep";
    case "error":
      return "Couldn't reach the GPU Space";
    case "unknown":
    default:
      return "Checking GPU status…";
  }
}

function descriptionFor(status: Status): string {
  switch (status.kind) {
    case "awake":
      return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
    case "warming":
      return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
    case "asleep":
      return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
    case "error":
      return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
    case "unknown":
    default:
      return "Probing https://pratyush-01-physix-infer.hf.space/health …";
  }
}

function showsPrewarm(status: Status): boolean {
  return status.kind === "asleep" || status.kind === "error";
}