/** Drives two parallel `useLlmEpisodeRunner` instances against the same
 *  episode seed. The whole point of the demo is to put two models on
 *  identical input and compare their behaviour, scored by the same
 *  verifier with no LLM-as-judge.
 *
 *  Implementation note: each side gets its own session because the env
 *  builds a turn-by-turn history that the next prompt depends on. We
 *  *don't* fork a single session — that would corrupt history. Instead
 *  we start two sessions with the same `system_id` + `seed`, which the
 *  server already supports via its existing reset path. */

import { useCallback, useMemo, useRef, useState } from "react";

import {
  type LlmEpisodeRunnerControls,
  type LlmEpisodeRunnerState,
  useLlmEpisodeRunner,
} from "@/hooks/useLlmEpisodeRunner";
import type { LlmConnection } from "@/lib/llmPresets";

export interface CompareSlot {
  id: "a" | "b";
  state: LlmEpisodeRunnerState;
  controls: LlmEpisodeRunnerControls;
}

export interface CompareRunnerControls {
  /** Start both sides on the same seed + system. Each side uses its
   *  own connection. */
  startBoth: (options: {
    systemId?: string | undefined;
    maxTurns?: number | undefined;
    connectionA: LlmConnection;
    connectionB: LlmConnection;
    temperature?: number | undefined;
  }) => Promise<void>;
  /** End both sessions and reset state. */
  endBoth: () => Promise<void>;
}

export interface CompareRunnerState {
  a: CompareSlot;
  b: CompareSlot;
  /** Seed the last `startBoth` call locked in. Surfaces in the UI so
   *  users know both sides really saw the same episode. */
  lastSeed: number | null;
  /** Resolved system_id (same for both slots). */
  systemId: string | null;
}

export function useLlmCompareRunner(): CompareRunnerState & CompareRunnerControls {
  const a = useLlmEpisodeRunner();
  const b = useLlmEpisodeRunner();
  const [lastSeed, setLastSeed] = useState<number | null>(null);
  const [systemId, setSystemId] = useState<string | null>(null);

  // Keep the latest controls on a ref so `startBoth` doesn't have to
  // depend on them — useEpisodeRunner reinstates them on every render
  // and pulling them through the dep array would churn the callback.
  const controlsRef = useRef({ a: a, b: b });
  controlsRef.current = { a, b };

  const startBoth = useCallback(
    async (options: {
      systemId?: string | undefined;
      maxTurns?: number | undefined;
      connectionA: LlmConnection;
      connectionB: LlmConnection;
      temperature?: number | undefined;
    }) => {
      // Generate a single seed so both sides see identical observations.
      // 31 bits keeps us inside JS-safe int range and Numpy-acceptable.
      const seed = Math.floor(Math.random() * 2_147_483_647);
      setLastSeed(seed);
      setSystemId(options.systemId ?? null);

      const common = {
        systemId: options.systemId,
        seed,
        maxTurns: options.maxTurns,
        temperature: options.temperature,
      };

      // Kick off both in parallel — the server makes independent
      // sessions so they can't deadlock on each other.
      await Promise.all([
        controlsRef.current.a.start({ ...common, connection: options.connectionA }),
        controlsRef.current.b.start({ ...common, connection: options.connectionB }),
      ]);
    },
    [],
  );

  const endBoth = useCallback(async () => {
    await Promise.all([
      controlsRef.current.a.end(),
      controlsRef.current.b.end(),
    ]);
    setLastSeed(null);
    setSystemId(null);
  }, []);

  const slotA = useMemo<CompareSlot>(
    () => ({
      id: "a",
      state: { ...a },
      controls: { ...a },
    }),
    [a],
  );
  const slotB = useMemo<CompareSlot>(
    () => ({
      id: "b",
      state: { ...b },
      controls: { ...b },
    }),
    [b],
  );

  return {
    a: slotA,
    b: slotB,
    lastSeed,
    systemId: systemId ?? a.systemId ?? b.systemId,
    startBoth,
    endBoth,
  };
}