/** Status banner for the PhysiX-Infer GPU Space.
 *
 *  Why this exists:
 *    The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
 *    GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
 *    weights for both 3B models. Without warning, a user picks the
 *    PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
 *    convinced something is broken.
 *
 *    This panel surfaces the underlying state so the wait is *expected*,
 *    not surprising — and offers a one-click "Prewarm" button so the
 *    user can kick the boot off before they pick a system / hit Run.
 *
 *  Mechanics:
 *    - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
 *    - HF Spaces' edge proxy returns one of three observable states:
 *        * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
 *          → both vLLMs are loaded and serving. Fast next call.
 *        * 200 with one upstream not "ok"
 *          → container running but vLLM still warming. Some calls fast,
 *            some still slow. Treat as "warming".
 *        * 503 / connection-stuck-for->5s
 *          → Space is asleep. Whatever woke it (this very request) will
 *            now ride the cold-boot pipeline.
 *    - Re-poll every 15 s while the component is mounted so the badge
 *      stays accurate as the user thinks. Polling is cheap and the
 *      requests count as activity, which keeps the Space awake while
 *      they read — exactly the UX we want during a demo.
 *
 *  Note on CORS: the physix-infer FastAPI uses default CORS. The
 *  /health endpoint returns plain JSON; modern browsers allow simple
 *  GETs across origins to read the status code, but reading the BODY
 *  needs Access-Control-Allow-Origin. If we can't read the body, we
 *  fall back to "container is up" (best-effort) on any successful
 *  response and "asleep" on network failure. */

import { useCallback, useEffect, useRef, useState } from "react";

import { cn } from "@/lib/cn";
import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";

// /health is mounted at the proxy root, so strip the trailing /v1.
const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";

// 15 s strikes a balance: long enough that we don't spam HF's edge with
// requests, short enough that "GPU is now warm" surfaces well before
// the user has finished typing their prompt.
const POLL_INTERVAL_MS = 15_000;

// Hard ceiling on a single probe. HF holds requests open while a Space
// boots, and that boot can take ~120 s. We don't want to *wait* for
// the boot — we want to detect the asleep state early so we can
// render "cold" and offer the Prewarm button. Anything past 6 s
// without a response is "asleep" for our purposes.
const PROBE_TIMEOUT_MS = 6_000;

type Status =
  | { kind: "unknown" }
  | { kind: "awake"; bothUpstreams: boolean }
  | { kind: "warming" }
  | { kind: "asleep" }
  | { kind: "error"; message: string };

interface ProbeResult {
  status: Status;
  /** True if the probe itself was successful enough to count as a
   *  wake-up signal — i.e. HF Spaces' edge proxy received it and
   *  routed it to the container. */
  hitContainer: boolean;
}

// Module-level dedup. Multiple mounts share a single in-flight `/health`
// probe and cache the last successful result for a short window.
let inFlight: Promise<ProbeResult> | null = null;
let lastResult: { result: ProbeResult; at: number } | null = null;
const SHARED_RESULT_WINDOW_MS = 5_000;

async function probe(): Promise<ProbeResult> {
  // Coalesce: a second probe() call that lands while the first is
  // still in flight piggy-backs on the same network request.
  if (inFlight) return inFlight;
  // Replay the last result if it's fresh enough — covers the
  // "two component mounts in the same render commit" case where
  // both useEffects fire microseconds apart but neither has yet
  // populated `inFlight`.
  if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
    return lastResult.result;
  }

  inFlight = (async (): Promise<ProbeResult> => {
    const controller = new AbortController();
    const timeoutId = window.setTimeout(
      () => controller.abort(),
      PROBE_TIMEOUT_MS,
    );

    try {
      return await runProbe(controller.signal);
    } finally {
      window.clearTimeout(timeoutId);
    }
  })();

  try {
    const result = await inFlight;
    lastResult = { result, at: Date.now() };
    return result;
  } finally {
    inFlight = null;
  }
}

async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
  try {
    const response = await fetch(HEALTH_URL, {
      method: "GET",
      mode: "cors",
      signal,
    });

    if (!response.ok) {
      // 503 from /health = at least one vLLM still booting. We hit the
      // container, so we *did* wake the Space (HF Spaces' edge sends a
      // 503 with body during cold-boot, then the body changes to ok
      // once vLLMs come up).
      return { status: { kind: "warming" }, hitContainer: true };
    }

    // 200 — try to read the body. If CORS strips it, default to "awake
    // but unsure about per-upstream status".
    try {
      const body = (await response.json()) as {
        upstreams?: Record<string, string>;
      };
      const upstreams = body.upstreams ?? {};
      const allOk = Object.values(upstreams).every((v) => v === "ok");
      if (allOk && Object.keys(upstreams).length > 0) {
        return {
          status: { kind: "awake", bothUpstreams: true },
          hitContainer: true,
        };
      }
      return { status: { kind: "warming" }, hitContainer: true };
    } catch {
      // CORS or non-JSON body. Best effort: 200 means the container
      // answered, so it's awake; we just can't see the per-upstream
      // detail.
      return {
        status: { kind: "awake", bothUpstreams: false },
        hitContainer: true,
      };
    }
  } catch (exc) {
    // AbortError → timed out. Network error → DNS / offline / cors
    // preflight refused. In either case the Space is effectively
    // unreachable from the browser; the most likely cause is
    // "asleep + slow cold-boot" rather than a real outage, so we
    // render "asleep" (with a Prewarm button).
    if ((exc as Error).name === "AbortError") {
      return { status: { kind: "asleep" }, hitContainer: false };
    }
    return {
      status: { kind: "error", message: (exc as Error).message },
      hitContainer: false,
    };
  }
}

export function PhysixInferStatus(): JSX.Element {
  const [status, setStatus] = useState<Status>({ kind: "unknown" });
  const [prewarming, setPrewarming] = useState(false);
  // Track whether we've ever seen "awake" so we don't downgrade to
  // "asleep" on a transient network blip — the 15 s poll will
  // re-confirm soon enough.
  const wasAwakeRef = useRef(false);

  const refresh = useCallback(async () => {
    const result = await probe();
    setStatus((prev) => {
      // Sticky-awake: if we'd previously confirmed awake and this
      // probe came back asleep / error, keep showing awake. If it
      // really did go to sleep, the next 15 s poll will agree and
      // we'll flip then.
      if (
        wasAwakeRef.current &&
        (result.status.kind === "asleep" ||
          result.status.kind === "error")
      ) {
        return prev;
      }
      if (result.status.kind === "awake") {
        wasAwakeRef.current = true;
      }
      return result.status;
    });
  }, []);

  useEffect(() => {
    void refresh();
    const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
    return () => window.clearInterval(id);
  }, [refresh]);

  async function handlePrewarm(): Promise<void> {
    if (prewarming) return;
    setPrewarming(true);
    setStatus({ kind: "warming" });
    // Fire and forget: HF holds the request open until the container
    // is up. We don't `await` because the result of THIS request is
    // less interesting than the next 15 s poll which will tell us
    // when both upstreams flipped to "ok".
    try {
      // No timeout here — let the browser hold the connection until
      // HF Spaces wakes up and answers.
      await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
    } catch {
      // Ignore — the polling loop will surface the real state.
    } finally {
      setPrewarming(false);
      void refresh();
    }
  }

  return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
}

// ---------------------------------------------------------------------
// Render
// ---------------------------------------------------------------------

interface StatusBannerProps {
  status: Status;
  onPrewarm: () => void;
  prewarming: boolean;
}

function StatusBanner({
  status,
  onPrewarm,
  prewarming,
}: StatusBannerProps): JSX.Element {
  const tone = toneFor(status);

  return (
    <div
      className={cn(
        "rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
        tone.bg,
        tone.border,
      )}
    >
      <div className="flex items-start gap-2">
        <span
          aria-hidden
          className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
        />
        <div className="flex-1 min-w-0">
          <p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
          <p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
        </div>
        {showsPrewarm(status) ? (
          <button
            type="button"
            onClick={onPrewarm}
            disabled={prewarming}
            className={cn(
              "shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
              "hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
            )}
          >
            {prewarming ? "Prewarming…" : "Prewarm GPU"}
          </button>
        ) : null}
      </div>
    </div>
  );
}

interface Tone {
  bg: string;
  border: string;
  dot: string;
  title: string;
}

function toneFor(status: Status): Tone {
  switch (status.kind) {
    case "awake":
      return {
        bg: "bg-emerald-950/40",
        border: "border-emerald-800/60",
        dot: "bg-emerald-400",
        title: "text-emerald-200",
      };
    case "warming":
      return {
        bg: "bg-amber-950/40",
        border: "border-amber-800/60",
        dot: "bg-amber-400 animate-pulse",
        title: "text-amber-200",
      };
    case "asleep":
      return {
        bg: "bg-amber-950/40",
        border: "border-amber-800/60",
        dot: "bg-amber-500",
        title: "text-amber-200",
      };
    case "error":
      return {
        bg: "bg-rose-950/40",
        border: "border-rose-800/60",
        dot: "bg-rose-500",
        title: "text-rose-200",
      };
    case "unknown":
    default:
      return {
        bg: "bg-surfaceMuted",
        border: "border-border",
        dot: "bg-textMuted animate-pulse",
        title: "text-textPrimary",
      };
  }
}

function labelFor(status: Status): string {
  switch (status.kind) {
    case "awake":
      return status.bothUpstreams
        ? "GPU is warm — both models loaded"
        : "GPU is warm";
    case "warming":
      return "GPU is warming up";
    case "asleep":
      return "GPU is asleep";
    case "error":
      return "Couldn't reach the GPU Space";
    case "unknown":
    default:
      return "Checking GPU status…";
  }
}

function descriptionFor(status: Status): string {
  switch (status.kind) {
    case "awake":
      return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
    case "warming":
      return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
    case "asleep":
      return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
    case "error":
      return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
    case "unknown":
    default:
      return "Probing https://pratyush-01-physix-infer.hf.space/health …";
  }
}

function showsPrewarm(status: Status): boolean {
  return status.kind === "asleep" || status.kind === "error";
}