/** Single-LLM run pane: pick one of three preset models, hit run, * watch the model step through the episode turn by turn. * * This is the "all the steps" view — every turn's hypothesis, reward * breakdown, latency, and raw completion are surfaced in a scrollable * transcript so judges can audit exactly what the model proposed. * * The model picker is intentionally a hard 3-option choice (trained * PhysiX-3B, Qwen 3B base, Qwen 7B baseline) — typing model ids was * confusing for first-time users and most picks ended up being one of * these three anyway. */ import { useEffect, useMemo, useState } from "react"; import { EquationDisplay } from "@/components/EquationDisplay"; import { PhysixInferStatus } from "@/components/PhysixInferStatus"; import { Skeleton } from "@/components/Skeleton"; import { TrajectoryCanvas } from "@/components/TrajectoryCanvas"; import { type LlmTurn, type RunnerStatus, useLlmEpisodeRunner, } from "@/hooks/useLlmEpisodeRunner"; import { cn } from "@/lib/cn"; import { DEFAULT_SINGLE_LLM_CONNECTION, MODEL_PRESETS, type LlmConnection, type ModelPreset, findEndpoint, loadApiKey, presetForConnection, saveApiKey, } from "@/lib/llmPresets"; import { pickPrimaryVariable } from "@/lib/trajectory"; import type { RewardBreakdown } from "@/types/physix"; const ZERO_REWARD: RewardBreakdown = { match: 0, progress: 0, simplicity: 0, format: 0, total: 0, shape: 0, freq: 0, amplitude: 0, }; const CONVERGENCE_THRESHOLD = 0.93; export function RunWithLlmPane(): JSX.Element { const runner = useLlmEpisodeRunner(); const [connection, setConnection] = useState( () => DEFAULT_SINGLE_LLM_CONNECTION, ); const [systemId, setSystemId] = useState(""); const [maxTurns, setMaxTurns] = useState(8); const [temperature, setTemperature] = useState(0.4); // Default to damped_spring; fall back to first in list. useEffect(() => { if (!systemId && runner.systems && runner.systems.length > 0) { const preferred = runner.systems.find((s) => s.system_id === "damped_spring"); setSystemId(preferred?.system_id ?? runner.systems[0]?.system_id ?? ""); } }, [runner.systems, systemId]); const status = runner.status; const busy = status === "starting" || status === "running"; const hasSession = runner.sessionId !== null; const endpoint = findEndpoint(connection.endpointId); const hasRequiredKey = !endpoint.needsKey || !!connection.apiKey.trim(); const canStart = !busy && !!connection.model.trim() && !!connection.baseUrl.trim() && hasRequiredKey; function handleStart(): void { void runner.start({ systemId: systemId, maxTurns, connection, temperature, }); } // ---- Render data -------------------------------------------------------- const turns = runner.turns; const latestTurn: LlmTurn | undefined = turns[turns.length - 1]; const observation = latestTurn?.observation ?? runner.initialObservation; const lastReward = latestTurn?.observation.reward_breakdown ?? ZERO_REWARD; const observed = runner.initialObservation?.trajectory ?? observation?.trajectory ?? []; const stateVariables = observation?.state_variables ?? []; const primaryVariable = useMemo( () => stateVariables.length > 0 ? pickPrimaryVariable(stateVariables) : "y", [stateVariables], ); const finalMatch = lastReward.match ?? 0; const converged = finalMatch >= CONVERGENCE_THRESHOLD; return (

Run with one LLM

Watch the model propose, verify, and refine — turn by turn.

Pick any OpenAI-compatible endpoint, point it at one PhysiX system, and step through the episode. Every turn's equation, reward breakdown, latency, and raw completion are dumped below so you can audit exactly what the model said and what the verifier did with it. No LLM-as-judge — rewards come from forward-simulating each proposal and comparing to observation.

void runner.resume()} onStepOnce={() => void runner.stepOnce()} onEnd={() => void runner.end()} /> {runner.errorMessage ? ( runner.resetError()} /> ) : null} {observation ? ( <>

Observed vs predicted —{" "} {primaryVariable}(t)

{observed.length} sample {observed.length === 1 ? "" : "s"} ·{" "} {stateVariables.join(", ") || "—"}

Hint: {" "} {observation.hint || "(none)"}

) : ( )}
); } // --------------------------------------------------------------------------- interface ControlBarProps { systems: import("@/lib/interactiveClient").SystemDescriptor[] | null; systemId: string; onSelectSystem: (id: string) => void; temperature: number; onChangeTemperature: (n: number) => void; maxTurns: number; onChangeMaxTurns: (n: number) => void; canStart: boolean; status: RunnerStatus; hasSession: boolean; onStart: () => void; onPause: () => void; onResume: () => void; onStepOnce: () => void; onEnd: () => void; } function ControlBar({ systems, systemId, onSelectSystem, temperature, onChangeTemperature, maxTurns, onChangeMaxTurns, canStart, status, hasSession, onStart, onPause, onResume, onStepOnce, onEnd, }: ControlBarProps): JSX.Element { const busy = status === "starting" || status === "running"; return (

Episode controls

Run kicks off an autoplay loop. Pause to stop the loop without ending the session, then Step to execute a single turn or Resume to continue.

onChangeTemperature( Math.max(0, Math.min(2, Number(e.target.value))), ) } disabled={busy} /> onChangeMaxTurns(Math.max(1, Number(e.target.value))) } disabled={busy} />
{!hasSession ? ( ) : status === "running" ? ( <> ) : ( <> {status !== "ended" && ( )} )}
); } // --------------------------------------------------------------------------- function RunStatusStrip({ status, converged, turnCount, maxTurns, sessionId, systemId, finalMatch, }: { status: RunnerStatus; converged: boolean; turnCount: number; maxTurns: number; sessionId: string | null; systemId: string | null; finalMatch: number; }): JSX.Element { return (
Turn {turnCount} {maxTurns > 0 ? ` / ${maxTurns}` : ""} match R² {finalMatch.toFixed(3)} {systemId && ( system {prettySystemId(systemId)} )} {sessionId && ( session_id {sessionId.slice(0, 12)}… )}
); } function StatusPill({ status, converged, }: { status: RunnerStatus; converged: boolean; }): JSX.Element { if (converged) { return ( converged ); } const map: Record = { idle: { label: "idle", cls: "border-textMuted/40 text-textMuted" }, starting: { label: "starting", cls: "border-accentBlue/40 text-accentBlue" }, running: { label: "running", cls: "border-accentBlue/40 text-accentBlue" }, paused: { label: "paused", cls: "border-accentAmber/40 text-accentAmber" }, ended: { label: "done", cls: "border-textMuted/40 text-textMuted" }, error: { label: "error", cls: "border-primary/50 text-primary" }, }; const { label, cls } = map[status]; return ( {label} ); } // --------------------------------------------------------------------------- function Transcript({ turns, status, }: { turns: LlmTurn[]; status: RunnerStatus; }): JSX.Element { if (turns.length === 0) { return (

{status === "starting" || status === "running" ? "Waiting for the first turn…" : "No turns yet. Click Run to start."}

); } return (

Transcript — every turn

{turns.length} turn{turns.length === 1 ? "" : "s"}
    {turns.map((turn) => ( ))}
); } function TurnCard({ turn }: { turn: LlmTurn }): JSX.Element { const reward = turn.observation.reward_breakdown; const matchPct = (reward.match ?? 0) * 100; return (
  • turn {turn.turn} R²{" "} = 93 && "text-accentGreen", )} > {matchPct.toFixed(1)}% reward{" "} {(reward.total ?? 0).toFixed(3)} {turn.latencyS.toFixed(1)}s {turn.model}
    {turn.observation.done && ( done )}
    {turn.action.equation ? ( ) : ( (model produced no parseable equation this turn) )} {turn.observation.mismatch_summary && (

    verifier{" "} {turn.observation.mismatch_summary}

    )} {turn.rawCompletion && (
    Raw completion
                {turn.rawCompletion}
              
    )}
  • ); } // --------------------------------------------------------------------------- function DenseRewardRow({ reward }: { reward: RewardBreakdown }): JSX.Element { // Top row: 4 reward components that go into the weighted total. These // gate progression and feed the trainer. const rewardComponents: { name: string; value: number }[] = [ { name: "match", value: reward.match ?? 0 }, { name: "progress", value: reward.progress ?? 0 }, { name: "simplicity", value: reward.simplicity ?? 0 }, { name: "format", value: reward.format ?? 0 }, ]; // Bottom row: diagnostic-only sub-scores. NOT in the reward total — // they capture "visual closeness" (shape / freq / amplitude) that R² // collapses to zero on (e.g. oscillator with a 10% frequency error). const diagComponents: { name: string; value: number }[] = [ { name: "shape", value: reward.shape ?? 0 }, { name: "freq", value: reward.freq ?? 0 }, { name: "amplitude", value: reward.amplitude ?? 0 }, ]; return (
    {rewardComponents.map(({ name, value }) => ( ))}
    diag
    {diagComponents.map(({ name, value }) => ( ))}
    ); } function RewardCell({ name, value, muted = false, }: { name: string; value: number; muted?: boolean; }): JSX.Element { return (
    {name} {value.toFixed(2)}
    = 0.7 ? muted ? "bg-accentBlue/60" : "bg-accentGreen/70" : value >= 0.3 ? "bg-accentAmber/70" : "bg-textMuted/40", )} style={{ width: `${Math.max(0, Math.min(1, value)) * 100}%` }} />
    ); } // --------------------------------------------------------------------- // Model preset picker — three buttons + one (optional) API-key field. // --------------------------------------------------------------------- // // The picker replaces the old "Endpoint dropdown + freeform model id + // hint paragraph" UI. Users always pick one of three known-good models; // the API-key field only appears when the picked endpoint needs one // (just the HF Router 7B preset today). Per-preset connections are // persisted in localStorage by base URL via `loadApiKey` / `saveApiKey`, // so a token typed for the 7B preset survives a page reload and isn't // shown when the trained PhysiX preset is selected (it doesn't need // one). interface ModelPresetPickerProps { connection: LlmConnection; onChange: (next: LlmConnection) => void; disabled?: boolean; } function ModelPresetPicker({ connection, onChange, disabled, }: ModelPresetPickerProps): JSX.Element { const selected = presetForConnection(connection) ?? MODEL_PRESETS[0]!; const endpoint = findEndpoint(selected.connection.endpointId); const needsKey = endpoint.needsKey; const [revealKey, setRevealKey] = useState(false); // Hydrate the API key from per-URL storage whenever the preset (and // therefore base URL) changes. useEffect(() => { if (!connection.baseUrl) return; const stored = loadApiKey(connection.baseUrl); if (stored && stored !== connection.apiKey) { onChange({ ...connection, apiKey: stored }); } // eslint-disable-next-line react-hooks/exhaustive-deps }, [connection.baseUrl]); function selectPreset(preset: ModelPreset): void { onChange({ ...preset.connection, apiKey: loadApiKey(preset.connection.baseUrl), }); } function setApiKey(key: string): void { saveApiKey(connection.baseUrl, key); onChange({ ...connection, apiKey: key }); } return (

    Pick a model

    Three known-good options — the trained PhysiX-3B, its Qwen 3B base, and a Qwen 7B baseline. No URLs to type, no model ids to paste.

    {MODEL_PRESETS.map((preset) => ( selectPreset(preset)} /> ))}
    {needsKey ? ( ) : null} {/* Live banner only when the picked preset hits the GPU Space. */} {selected.connection.endpointId === "physix" ? ( ) : null}
    ); } interface PresetCardProps { preset: ModelPreset; selected: boolean; disabled: boolean; onSelect: () => void; } function PresetCard({ preset, selected, disabled, onSelect, }: PresetCardProps): JSX.Element { return ( ); } function ErrorRow({ message, onDismiss, }: { message: string; onDismiss: () => void; }): JSX.Element { return (
    {message}
    ); } function RunPlaceholder({ status }: { status: RunnerStatus }): JSX.Element { if (status === "starting") { return (
    ); } return (

    Configure a connection above and press Run to start an episode.

    ); } // --------------------------------------------------------------------------- function Field({ label, children, }: { label: string; children: React.ReactNode; }): JSX.Element { return ( ); } function prettySystemId(id: string): string { return id.replace(/_/g, " "); }