physix-live / frontend /src /components /RunWithLlmPane.tsx
Pratyush-01's picture
ui: clear stale session/transcript when user switches model preset
c2a38db verified
/** Single-LLM run pane: pick one of three preset models, hit run,
* watch the model step through the episode turn by turn.
*
* This is the "all the steps" view — every turn's hypothesis, reward
* breakdown, latency, and raw completion are surfaced in a scrollable
* transcript so judges can audit exactly what the model proposed.
*
* The model picker is intentionally a hard 3-option choice (trained
* PhysiX-3B, Qwen 3B base, Qwen 7B baseline) — typing model ids was
* confusing for first-time users and most picks ended up being one of
* these three anyway. */
import { useEffect, useMemo, useState } from "react";
import { EquationDisplay } from "@/components/EquationDisplay";
import { PhysixInferStatus } from "@/components/PhysixInferStatus";
import { Skeleton } from "@/components/Skeleton";
import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
import {
type LlmTurn,
type RunnerStatus,
useLlmEpisodeRunner,
} from "@/hooks/useLlmEpisodeRunner";
import { cn } from "@/lib/cn";
import {
DEFAULT_SINGLE_LLM_CONNECTION,
MODEL_PRESETS,
type LlmConnection,
type ModelPreset,
findEndpoint,
loadApiKey,
presetForConnection,
saveApiKey,
} from "@/lib/llmPresets";
import { pickPrimaryVariable } from "@/lib/trajectory";
import type { RewardBreakdown } from "@/types/physix";
const ZERO_REWARD: RewardBreakdown = {
match: 0,
progress: 0,
simplicity: 0,
format: 0,
total: 0,
shape: 0,
freq: 0,
amplitude: 0,
};
const CONVERGENCE_THRESHOLD = 0.93;
export function RunWithLlmPane(): JSX.Element {
const runner = useLlmEpisodeRunner();
const [connection, setConnection] = useState<LlmConnection>(
() => DEFAULT_SINGLE_LLM_CONNECTION,
);
const [systemId, setSystemId] = useState<string>("");
const [maxTurns, setMaxTurns] = useState<number>(8);
const [temperature, setTemperature] = useState<number>(0.4);
// Default to damped_spring; fall back to first in list.
useEffect(() => {
if (!systemId && runner.systems && runner.systems.length > 0) {
const preferred = runner.systems.find((s) => s.system_id === "damped_spring");
setSystemId(preferred?.system_id ?? runner.systems[0]?.system_id ?? "");
}
}, [runner.systems, systemId]);
const status = runner.status;
const busy = status === "starting" || status === "running";
const hasSession = runner.sessionId !== null;
const endpoint = findEndpoint(connection.endpointId);
const hasRequiredKey = !endpoint.needsKey || !!connection.apiKey.trim();
const canStart =
!busy &&
!!connection.model.trim() &&
!!connection.baseUrl.trim() &&
hasRequiredKey;
function handleStart(): void {
void runner.start({
systemId: systemId,
maxTurns,
connection,
temperature,
});
}
// Switching the model preset after a run finished should also clear
// the stale session/trajectory panel — otherwise the picker says one
// thing and the displayed transcript still belongs to the old model
// until the user clicks End or Run. Live runs can't switch (picker is
// disabled while busy), so this only triggers in idle/ended/error.
function handleConnectionChange(next: LlmConnection): void {
const presetChanged =
next.endpointId !== connection.endpointId ||
next.model !== connection.model ||
next.baseUrl !== connection.baseUrl;
if (presetChanged && (hasSession || status === "ended" || status === "error")) {
void runner.end();
}
setConnection(next);
}
// ---- Render data --------------------------------------------------------
const turns = runner.turns;
const latestTurn: LlmTurn | undefined = turns[turns.length - 1];
const observation = latestTurn?.observation ?? runner.initialObservation;
const lastReward = latestTurn?.observation.reward_breakdown ?? ZERO_REWARD;
const observed =
runner.initialObservation?.trajectory ?? observation?.trajectory ?? [];
const stateVariables = observation?.state_variables ?? [];
const primaryVariable = useMemo(
() =>
stateVariables.length > 0 ? pickPrimaryVariable(stateVariables) : "y",
[stateVariables],
);
const finalMatch = lastReward.match ?? 0;
const converged = finalMatch >= CONVERGENCE_THRESHOLD;
return (
<section className="flex flex-col gap-6">
<header className="flex flex-col gap-2">
<p className="heading-eyebrow text-primary">Run with one LLM</p>
<h2 className="text-2xl font-semibold leading-tight">
Watch the model propose, verify, and refine — turn by turn.
</h2>
<p className="max-w-3xl text-sm leading-relaxed text-textMuted">
Pick any OpenAI-compatible endpoint, point it at one PhysiX
system, and step through the episode. Every turn's equation,
reward breakdown, latency, and raw completion are dumped
below so you can audit exactly what the model said and what
the verifier did with it. No LLM-as-judge — rewards come from
forward-simulating each proposal and comparing to observation.
</p>
</header>
<ModelPresetPicker
connection={connection}
onChange={handleConnectionChange}
disabled={busy}
/>
<ControlBar
systems={runner.systems}
systemId={systemId}
onSelectSystem={setSystemId}
temperature={temperature}
onChangeTemperature={setTemperature}
maxTurns={maxTurns}
onChangeMaxTurns={setMaxTurns}
canStart={canStart}
status={status}
hasSession={hasSession}
onStart={handleStart}
onPause={runner.pause}
onResume={() => void runner.resume()}
onStepOnce={() => void runner.stepOnce()}
onEnd={() => void runner.end()}
/>
{runner.errorMessage ? (
<ErrorRow
message={runner.errorMessage}
onDismiss={() => runner.resetError()}
/>
) : null}
<RunStatusStrip
status={status}
converged={converged}
turnCount={turns.length}
maxTurns={runner.maxTurns}
sessionId={runner.sessionId}
systemId={runner.systemId}
finalMatch={finalMatch}
/>
{observation ? (
<>
<div className="panel flex flex-col gap-3">
<div className="flex items-center justify-between">
<h3 className="font-mono text-sm">
Observed vs predicted —{" "}
<span className="text-textMuted">{primaryVariable}(t)</span>
</h3>
<span className="text-xs text-textMuted">
{observed.length} sample
{observed.length === 1 ? "" : "s"} ·{" "}
{stateVariables.join(", ") || "—"}
</span>
</div>
<TrajectoryCanvas
observed={observed}
predicted={latestTurn?.predictedTrajectory ?? []}
variable={primaryVariable}
variableLabel={primaryVariable}
predictedProgress={1}
/>
<DenseRewardRow reward={lastReward} />
<p className="text-[11px] text-textMuted">
<span className="font-semibold uppercase tracking-wide text-textPrimary">
Hint:
</span>{" "}
{observation.hint || "(none)"}
</p>
</div>
<Transcript turns={turns} status={status} />
</>
) : (
<RunPlaceholder status={status} />
)}
</section>
);
}
// ---------------------------------------------------------------------------
interface ControlBarProps {
systems: import("@/lib/interactiveClient").SystemDescriptor[] | null;
systemId: string;
onSelectSystem: (id: string) => void;
temperature: number;
onChangeTemperature: (n: number) => void;
maxTurns: number;
onChangeMaxTurns: (n: number) => void;
canStart: boolean;
status: RunnerStatus;
hasSession: boolean;
onStart: () => void;
onPause: () => void;
onResume: () => void;
onStepOnce: () => void;
onEnd: () => void;
}
function ControlBar({
systems,
systemId,
onSelectSystem,
temperature,
onChangeTemperature,
maxTurns,
onChangeMaxTurns,
canStart,
status,
hasSession,
onStart,
onPause,
onResume,
onStepOnce,
onEnd,
}: ControlBarProps): JSX.Element {
const busy = status === "starting" || status === "running";
return (
<header className="panel flex flex-col gap-3">
<div>
<p className="heading-eyebrow text-primary">Episode controls</p>
<p className="mt-1 text-xs text-textMuted">
Run kicks off an autoplay loop. Pause to stop the loop without
ending the session, then Step to execute a single turn or
Resume to continue.
</p>
</div>
<div className="flex flex-wrap items-end gap-3">
<Field label="System">
<select
className="rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none transition focus:border-textMuted disabled:opacity-50"
value={systemId}
onChange={(e) => onSelectSystem(e.target.value)}
disabled={systems === null || busy}
>
{systems === null ? (
<option value="">Loading…</option>
) : (
systems.map((d) => (
<option key={d.system_id} value={d.system_id}>
{prettySystemId(d.system_id)}
</option>
))
)}
</select>
</Field>
<Field label="Temp">
<input
type="number"
min={0}
max={2}
step={0.1}
className="w-20 rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none focus:border-textMuted"
value={temperature}
onChange={(e) =>
onChangeTemperature(
Math.max(0, Math.min(2, Number(e.target.value))),
)
}
disabled={busy}
/>
</Field>
<Field label="Turn budget">
<input
type="number"
min={1}
max={32}
className="w-24 rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none focus:border-textMuted"
value={maxTurns}
onChange={(e) =>
onChangeMaxTurns(Math.max(1, Number(e.target.value)))
}
disabled={busy}
/>
</Field>
<div className="ml-auto flex flex-wrap items-center gap-2">
{!hasSession ? (
<button
type="button"
className="btn-primary"
onClick={onStart}
disabled={!canStart}
>
▶ Run
</button>
) : status === "running" ? (
<>
<button
type="button"
className="btn-secondary"
onClick={onPause}
>
⏸ Pause
</button>
<button
type="button"
className="btn-secondary"
onClick={onEnd}
>
End
</button>
</>
) : (
<>
<button
type="button"
className="btn-secondary"
onClick={onStepOnce}
disabled={status === "ended"}
>
⏭ Step
</button>
{status !== "ended" && (
<button
type="button"
className="btn-primary"
onClick={onResume}
>
▶ Resume
</button>
)}
<button
type="button"
className="btn-secondary"
onClick={onEnd}
>
End
</button>
</>
)}
</div>
</div>
</header>
);
}
// ---------------------------------------------------------------------------
function RunStatusStrip({
status,
converged,
turnCount,
maxTurns,
sessionId,
systemId,
finalMatch,
}: {
status: RunnerStatus;
converged: boolean;
turnCount: number;
maxTurns: number;
sessionId: string | null;
systemId: string | null;
finalMatch: number;
}): JSX.Element {
return (
<div className="panel-muted flex flex-wrap items-center gap-x-6 gap-y-2 text-sm">
<StatusPill status={status} converged={converged} />
<span className="flex items-center gap-2">
<span className="heading-eyebrow text-textMuted">Turn</span>
<span className="font-mono text-textPrimary">
{turnCount}
{maxTurns > 0 ? ` / ${maxTurns}` : ""}
</span>
</span>
<span className="flex items-center gap-2">
<span className="heading-eyebrow text-textMuted">match R²</span>
<span className="font-mono text-textPrimary">
{finalMatch.toFixed(3)}
</span>
</span>
{systemId && (
<span className="flex items-center gap-2">
<span className="heading-eyebrow text-textMuted">system</span>
<span className="font-mono text-textPrimary">
{prettySystemId(systemId)}
</span>
</span>
)}
{sessionId && (
<span className="flex items-center gap-2">
<span className="heading-eyebrow text-textMuted">session_id</span>
<span className="font-mono text-textPrimary">
{sessionId.slice(0, 12)}…
</span>
</span>
)}
</div>
);
}
function StatusPill({
status,
converged,
}: {
status: RunnerStatus;
converged: boolean;
}): JSX.Element {
if (converged) {
return (
<span className="rounded-full border border-accentGreen/40 bg-accentGreen/10 px-2 py-0.5 text-[11px] uppercase tracking-wider text-accentGreen">
converged
</span>
);
}
const map: Record<RunnerStatus, { label: string; cls: string }> = {
idle: { label: "idle", cls: "border-textMuted/40 text-textMuted" },
starting: { label: "starting", cls: "border-accentBlue/40 text-accentBlue" },
running: { label: "running", cls: "border-accentBlue/40 text-accentBlue" },
paused: { label: "paused", cls: "border-accentAmber/40 text-accentAmber" },
ended: { label: "done", cls: "border-textMuted/40 text-textMuted" },
error: { label: "error", cls: "border-primary/50 text-primary" },
};
const { label, cls } = map[status];
return (
<span
className={cn(
"rounded-full border bg-surface px-2 py-0.5 text-[11px] uppercase tracking-wider",
cls,
)}
>
{label}
</span>
);
}
// ---------------------------------------------------------------------------
function Transcript({
turns,
status,
}: {
turns: LlmTurn[];
status: RunnerStatus;
}): JSX.Element {
if (turns.length === 0) {
return (
<div className="panel">
<p className="text-xs text-textMuted">
{status === "starting" || status === "running"
? "Waiting for the first turn…"
: "No turns yet. Click Run to start."}
</p>
</div>
);
}
return (
<div className="panel flex flex-col gap-3">
<div className="flex items-center justify-between">
<h3 className="text-sm font-semibold text-textPrimary">
Transcript — every turn
</h3>
<span className="text-xs text-textMuted">
{turns.length} turn{turns.length === 1 ? "" : "s"}
</span>
</div>
<ol className="flex flex-col gap-3">
{turns.map((turn) => (
<TurnCard key={turn.turn} turn={turn} />
))}
</ol>
</div>
);
}
function TurnCard({ turn }: { turn: LlmTurn }): JSX.Element {
const reward = turn.observation.reward_breakdown;
const matchPct = (reward.match ?? 0) * 100;
return (
<li className="rounded-lg border border-border bg-surfaceMuted p-3">
<header className="mb-2 flex flex-wrap items-center justify-between gap-2 text-[11px] text-textMuted">
<div className="flex items-center gap-3">
<span className="rounded-full border border-border bg-surface px-2 py-0.5 font-mono">
turn {turn.turn}
</span>
<span className="font-mono">
R²{" "}
<span
className={cn(
"text-textPrimary",
matchPct >= 93 && "text-accentGreen",
)}
>
{matchPct.toFixed(1)}%
</span>
</span>
<span className="font-mono">
reward{" "}
<span className="text-textPrimary">
{(reward.total ?? 0).toFixed(3)}
</span>
</span>
<span className="font-mono">
{turn.latencyS.toFixed(1)}s
</span>
<span className="font-mono">{turn.model}</span>
</div>
{turn.observation.done && (
<span className="rounded-full border border-accentGreen/40 bg-accentGreen/10 px-2 py-0.5 uppercase tracking-wider text-accentGreen">
done
</span>
)}
</header>
{turn.action.equation ? (
<EquationDisplay
equation={turn.action.equation}
rationale={turn.action.rationale}
/>
) : (
<span className="text-accentAmber text-xs">
(model produced no parseable equation this turn)
</span>
)}
<DenseRewardRow reward={reward} />
{turn.observation.mismatch_summary && (
<p className="mt-2 text-[11px] text-textMuted">
<span className="heading-eyebrow text-textMuted">verifier</span>{" "}
{turn.observation.mismatch_summary}
</p>
)}
{turn.rawCompletion && (
<details className="mt-2 rounded border border-border bg-surface px-2 py-1 text-[11px] text-textMuted">
<summary className="cursor-pointer text-textPrimary">
Raw completion
</summary>
<pre className="mt-1 max-h-48 overflow-auto whitespace-pre-wrap font-mono">
{turn.rawCompletion}
</pre>
</details>
)}
</li>
);
}
// ---------------------------------------------------------------------------
function DenseRewardRow({ reward }: { reward: RewardBreakdown }): JSX.Element {
// Top row: 4 reward components that go into the weighted total. These
// gate progression and feed the trainer.
const rewardComponents: { name: string; value: number }[] = [
{ name: "match", value: reward.match ?? 0 },
{ name: "progress", value: reward.progress ?? 0 },
{ name: "simplicity", value: reward.simplicity ?? 0 },
{ name: "format", value: reward.format ?? 0 },
];
// Bottom row: diagnostic-only sub-scores. NOT in the reward total —
// they capture "visual closeness" (shape / freq / amplitude) that R²
// collapses to zero on (e.g. oscillator with a 10% frequency error).
const diagComponents: { name: string; value: number }[] = [
{ name: "shape", value: reward.shape ?? 0 },
{ name: "freq", value: reward.freq ?? 0 },
{ name: "amplitude", value: reward.amplitude ?? 0 },
];
return (
<div className="mt-2 flex flex-col gap-2 rounded-md border border-border bg-surface px-3 py-2 font-mono text-[11px]">
<div className="grid grid-cols-4 gap-2">
{rewardComponents.map(({ name, value }) => (
<RewardCell key={name} name={name} value={value} />
))}
</div>
<div className="flex items-center gap-2 border-t border-border/60 pt-2">
<span
className="text-[10px] uppercase tracking-wider text-textMuted"
title="Diagnostic-only — not part of the reward total or training signal. Shows visual closeness (shape / freq / amplitude) for cases where R² collapses to zero."
>
diag
</span>
<div className="grid flex-1 grid-cols-3 gap-2">
{diagComponents.map(({ name, value }) => (
<RewardCell key={name} name={name} value={value} muted />
))}
</div>
</div>
</div>
);
}
function RewardCell({
name,
value,
muted = false,
}: {
name: string;
value: number;
muted?: boolean;
}): JSX.Element {
return (
<div className="flex flex-col gap-1">
<div className="flex items-baseline justify-between">
<span className="text-textMuted">{name}</span>
<span className={muted ? "text-textMuted" : "text-textPrimary"}>
{value.toFixed(2)}
</span>
</div>
<div
className="h-1 w-full overflow-hidden rounded-full bg-border"
aria-hidden
>
<div
className={cn(
"h-full rounded-full",
value >= 0.7
? muted
? "bg-accentBlue/60"
: "bg-accentGreen/70"
: value >= 0.3
? "bg-accentAmber/70"
: "bg-textMuted/40",
)}
style={{ width: `${Math.max(0, Math.min(1, value)) * 100}%` }}
/>
</div>
</div>
);
}
// ---------------------------------------------------------------------
// Model preset picker — three buttons + one (optional) API-key field.
// ---------------------------------------------------------------------
//
// The picker replaces the old "Endpoint dropdown + freeform model id +
// hint paragraph" UI. Users always pick one of three known-good models;
// the API-key field only appears when the picked endpoint needs one
// (just the HF Router 7B preset today). Per-preset connections are
// persisted in localStorage by base URL via `loadApiKey` / `saveApiKey`,
// so a token typed for the 7B preset survives a page reload and isn't
// shown when the trained PhysiX preset is selected (it doesn't need
// one).
interface ModelPresetPickerProps {
connection: LlmConnection;
onChange: (next: LlmConnection) => void;
disabled?: boolean;
}
function ModelPresetPicker({
connection,
onChange,
disabled,
}: ModelPresetPickerProps): JSX.Element {
const selected = presetForConnection(connection) ?? MODEL_PRESETS[0]!;
const endpoint = findEndpoint(selected.connection.endpointId);
const needsKey = endpoint.needsKey;
const [revealKey, setRevealKey] = useState(false);
// Hydrate the API key from per-URL storage whenever the preset (and
// therefore base URL) changes.
useEffect(() => {
if (!connection.baseUrl) return;
const stored = loadApiKey(connection.baseUrl);
if (stored && stored !== connection.apiKey) {
onChange({ ...connection, apiKey: stored });
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [connection.baseUrl]);
function selectPreset(preset: ModelPreset): void {
onChange({
...preset.connection,
apiKey: loadApiKey(preset.connection.baseUrl),
});
}
function setApiKey(key: string): void {
saveApiKey(connection.baseUrl, key);
onChange({ ...connection, apiKey: key });
}
return (
<section className="panel flex flex-col gap-4">
<header>
<p className="heading-eyebrow text-primary">Pick a model</p>
<p className="mt-1 text-xs text-textMuted">
Three known-good options — the trained PhysiX-3B, its Qwen 3B
base, and a Qwen 7B baseline. No URLs to type, no model ids to
paste.
</p>
</header>
<div
role="radiogroup"
aria-label="Model"
className="grid grid-cols-1 gap-3 md:grid-cols-3"
>
{MODEL_PRESETS.map((preset) => (
<PresetCard
key={preset.id}
preset={preset}
selected={selected.id === preset.id}
disabled={disabled ?? false}
onSelect={() => selectPreset(preset)}
/>
))}
</div>
{needsKey ? (
<label className="flex flex-col gap-1 text-xs text-textMuted">
<span className="heading-eyebrow flex items-baseline justify-between gap-2">
<span>HF token (required)</span>
<button
type="button"
onClick={() => setRevealKey((v) => !v)}
className="text-[10px] uppercase tracking-wider text-textMuted underline hover:text-textPrimary"
>
{revealKey ? "hide" : "show"}
</button>
</span>
<input
type={revealKey ? "text" : "password"}
value={connection.apiKey}
onChange={(e) => setApiKey(e.target.value)}
disabled={disabled}
placeholder="hf_..."
className="w-full rounded-lg border border-border bg-surfaceMuted px-3 py-2 font-mono text-xs text-textPrimary outline-none transition focus:border-textMuted disabled:opacity-50"
/>
<span className="text-[11px] leading-relaxed text-textMuted">
Get one at{" "}
<code className="font-mono text-textPrimary">
huggingface.co/settings/tokens
</code>{" "}
with the &quot;Make calls to Inference Providers&quot;
permission. Saved per endpoint in your browser.
</span>
</label>
) : null}
{/* Live banner only when the picked preset hits the GPU Space. */}
{selected.connection.endpointId === "physix" ? (
<PhysixInferStatus />
) : null}
</section>
);
}
interface PresetCardProps {
preset: ModelPreset;
selected: boolean;
disabled: boolean;
onSelect: () => void;
}
function PresetCard({
preset,
selected,
disabled,
onSelect,
}: PresetCardProps): JSX.Element {
return (
<button
type="button"
role="radio"
aria-checked={selected}
onClick={onSelect}
disabled={disabled}
className={cn(
"flex flex-col gap-2 rounded-xl border bg-surfaceMuted p-3 text-left transition",
"disabled:cursor-not-allowed disabled:opacity-50",
selected
? "border-primary bg-primary/5 shadow-sm"
: "border-border hover:border-textMuted",
)}
>
<div className="flex items-center justify-between gap-2">
<span className="text-sm font-semibold text-textPrimary">
{preset.label}
</span>
<span
className={cn(
"rounded-full border bg-surface px-2 py-0.5 text-[10px] uppercase tracking-wider",
selected
? "border-primary/60 text-primary"
: "border-border text-textMuted",
)}
>
{preset.badge}
</span>
</div>
<p className="text-[11px] leading-relaxed text-textMuted">
{preset.description}
</p>
<code className="font-mono text-[10px] text-textMuted">
{preset.connection.model}
</code>
</button>
);
}
function ErrorRow({
message,
onDismiss,
}: {
message: string;
onDismiss: () => void;
}): JSX.Element {
return (
<div className="rounded-lg border border-primary/40 bg-primary/10 p-3 text-xs text-primary">
<div className="flex items-start justify-between gap-3">
<pre className="whitespace-pre-wrap font-mono">{message}</pre>
<button
type="button"
onClick={onDismiss}
className="text-[10px] uppercase tracking-wide text-primary hover:underline"
>
dismiss
</button>
</div>
</div>
);
}
function RunPlaceholder({ status }: { status: RunnerStatus }): JSX.Element {
if (status === "starting") {
return (
<div className="panel flex flex-col gap-2" aria-busy>
<Skeleton className="h-[260px] w-full" />
<Skeleton className="h-3 w-48" />
</div>
);
}
return (
<div className="panel">
<p className="text-sm text-textMuted">
Configure a connection above and press Run to start an episode.
</p>
</div>
);
}
// ---------------------------------------------------------------------------
function Field({
label,
children,
}: {
label: string;
children: React.ReactNode;
}): JSX.Element {
return (
<label className="flex flex-col gap-1 text-xs text-textMuted">
<span className="heading-eyebrow text-textMuted">{label}</span>
{children}
</label>
);
}
function prettySystemId(id: string): string {
return id.replace(/_/g, " ");
}