Spaces:
Sleeping
Sleeping
frontend: sync clean source — drop ComparePane + Compare LLMs tab + Which-endpoint copy; ship 3-preset picker
Browse files- frontend/src/App.tsx +2 -10
- frontend/src/components/ComparePane.tsx +0 -624
- frontend/src/components/RunWithLlmPane.tsx +189 -65
- frontend/src/hooks/useLlmCompareRunner.ts +0 -128
- frontend/src/lib/llmPresets.ts +89 -35
frontend/src/App.tsx
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
import { useState } from "react";
|
| 2 |
|
| 3 |
-
import { ComparePane } from "@/components/ComparePane";
|
| 4 |
import { HeroIntro } from "@/components/HeroIntro";
|
| 5 |
import { OpenEnvExplorerPane } from "@/components/OpenEnvExplorerPane";
|
| 6 |
import { RunWithLlmPane } from "@/components/RunWithLlmPane";
|
| 7 |
import { cn } from "@/lib/cn";
|
| 8 |
|
| 9 |
-
type TabId = "run" | "
|
| 10 |
|
| 11 |
interface TabDef {
|
| 12 |
id: TabId;
|
|
@@ -15,7 +14,6 @@ interface TabDef {
|
|
| 15 |
|
| 16 |
const TABS: TabDef[] = [
|
| 17 |
{ id: "run", label: "Run with LLM" },
|
| 18 |
-
{ id: "compare", label: "Compare LLMs" },
|
| 19 |
{ id: "openenv", label: "OpenEnv API" },
|
| 20 |
];
|
| 21 |
|
|
@@ -75,13 +73,7 @@ export function App(): JSX.Element {
|
|
| 75 |
id={`panel-${activeTab}`}
|
| 76 |
aria-labelledby={`tab-${activeTab}`}
|
| 77 |
>
|
| 78 |
-
{activeTab === "run" ?
|
| 79 |
-
<RunWithLlmPane />
|
| 80 |
-
) : activeTab === "compare" ? (
|
| 81 |
-
<ComparePane />
|
| 82 |
-
) : (
|
| 83 |
-
<OpenEnvExplorerPane />
|
| 84 |
-
)}
|
| 85 |
</div>
|
| 86 |
</main>
|
| 87 |
);
|
|
|
|
| 1 |
import { useState } from "react";
|
| 2 |
|
|
|
|
| 3 |
import { HeroIntro } from "@/components/HeroIntro";
|
| 4 |
import { OpenEnvExplorerPane } from "@/components/OpenEnvExplorerPane";
|
| 5 |
import { RunWithLlmPane } from "@/components/RunWithLlmPane";
|
| 6 |
import { cn } from "@/lib/cn";
|
| 7 |
|
| 8 |
+
type TabId = "run" | "openenv";
|
| 9 |
|
| 10 |
interface TabDef {
|
| 11 |
id: TabId;
|
|
|
|
| 14 |
|
| 15 |
const TABS: TabDef[] = [
|
| 16 |
{ id: "run", label: "Run with LLM" },
|
|
|
|
| 17 |
{ id: "openenv", label: "OpenEnv API" },
|
| 18 |
];
|
| 19 |
|
|
|
|
| 73 |
id={`panel-${activeTab}`}
|
| 74 |
aria-labelledby={`tab-${activeTab}`}
|
| 75 |
>
|
| 76 |
+
{activeTab === "run" ? <RunWithLlmPane /> : <OpenEnvExplorerPane />}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
</div>
|
| 78 |
</main>
|
| 79 |
);
|
frontend/src/components/ComparePane.tsx
DELETED
|
@@ -1,624 +0,0 @@
|
|
| 1 |
-
/** A/B comparison pane: trained PhysiX vs. baseline, scored by the
|
| 2 |
-
* same verifier on the same episode seed.
|
| 3 |
-
*
|
| 4 |
-
* Two `LlmConnectionPanel`s feed two parallel `useLlmEpisodeRunner`
|
| 5 |
-
* sessions, started via `useLlmCompareRunner`. The panes render the
|
| 6 |
-
* same trajectory canvas (each side overlays its own predicted
|
| 7 |
-
* trajectory) plus a per-side reward strip and turn transcript. */
|
| 8 |
-
|
| 9 |
-
import { useEffect, useMemo, useState } from "react";
|
| 10 |
-
|
| 11 |
-
import { EquationDisplay } from "@/components/EquationDisplay";
|
| 12 |
-
import { LlmConnectionPanel } from "@/components/LlmConnectionPanel";
|
| 13 |
-
import { Skeleton } from "@/components/Skeleton";
|
| 14 |
-
import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
|
| 15 |
-
import { useLlmCompareRunner, type CompareSlot } from "@/hooks/useLlmCompareRunner";
|
| 16 |
-
import { type LlmTurn, type RunnerStatus } from "@/hooks/useLlmEpisodeRunner";
|
| 17 |
-
import { cn } from "@/lib/cn";
|
| 18 |
-
import { formatPercent } from "@/lib/format";
|
| 19 |
-
import {
|
| 20 |
-
DEFAULT_CONNECTION_A,
|
| 21 |
-
DEFAULT_CONNECTION_B,
|
| 22 |
-
type LlmConnection,
|
| 23 |
-
} from "@/lib/llmPresets";
|
| 24 |
-
import { pickPrimaryVariable } from "@/lib/trajectory";
|
| 25 |
-
import type { RewardBreakdown } from "@/types/physix";
|
| 26 |
-
|
| 27 |
-
const ZERO_REWARD: RewardBreakdown = {
|
| 28 |
-
match: 0,
|
| 29 |
-
progress: 0,
|
| 30 |
-
simplicity: 0,
|
| 31 |
-
format: 0,
|
| 32 |
-
total: 0,
|
| 33 |
-
shape: 0,
|
| 34 |
-
freq: 0,
|
| 35 |
-
amplitude: 0,
|
| 36 |
-
};
|
| 37 |
-
const CONVERGENCE_THRESHOLD = 0.93;
|
| 38 |
-
|
| 39 |
-
export function ComparePane(): JSX.Element {
|
| 40 |
-
const runner = useLlmCompareRunner();
|
| 41 |
-
|
| 42 |
-
// Connection state lives here so navigating the system / button bar
|
| 43 |
-
// doesn't clear the API key or model id.
|
| 44 |
-
const [connectionA, setConnectionA] = useState<LlmConnection>(
|
| 45 |
-
() => DEFAULT_CONNECTION_A,
|
| 46 |
-
);
|
| 47 |
-
const [connectionB, setConnectionB] = useState<LlmConnection>(
|
| 48 |
-
() => DEFAULT_CONNECTION_B,
|
| 49 |
-
);
|
| 50 |
-
const [systemId, setSystemId] = useState<string>("");
|
| 51 |
-
const [maxTurns, setMaxTurns] = useState<number>(8);
|
| 52 |
-
const [temperature, setTemperature] = useState<number>(0.4);
|
| 53 |
-
|
| 54 |
-
// Default to damped_spring; fall back to first in list.
|
| 55 |
-
useEffect(() => {
|
| 56 |
-
if (!systemId && runner.a.state.systems && runner.a.state.systems.length > 0) {
|
| 57 |
-
const preferred = runner.a.state.systems.find((s) => s.system_id === "damped_spring");
|
| 58 |
-
setSystemId(preferred?.system_id ?? runner.a.state.systems[0]?.system_id ?? "");
|
| 59 |
-
}
|
| 60 |
-
}, [runner.a.state.systems, systemId]);
|
| 61 |
-
|
| 62 |
-
const eitherRunning =
|
| 63 |
-
isActive(runner.a.state.status) || isActive(runner.b.state.status);
|
| 64 |
-
const eitherStarting =
|
| 65 |
-
runner.a.state.status === "starting" || runner.b.state.status === "starting";
|
| 66 |
-
const canStart =
|
| 67 |
-
!eitherRunning &&
|
| 68 |
-
!eitherStarting &&
|
| 69 |
-
!!connectionA.model.trim() &&
|
| 70 |
-
!!connectionB.model.trim() &&
|
| 71 |
-
!!connectionA.baseUrl.trim() &&
|
| 72 |
-
!!connectionB.baseUrl.trim();
|
| 73 |
-
|
| 74 |
-
function handleStart(): void {
|
| 75 |
-
void runner.startBoth({
|
| 76 |
-
systemId: systemId,
|
| 77 |
-
maxTurns,
|
| 78 |
-
connectionA,
|
| 79 |
-
connectionB,
|
| 80 |
-
temperature,
|
| 81 |
-
});
|
| 82 |
-
}
|
| 83 |
-
|
| 84 |
-
return (
|
| 85 |
-
<section className="flex flex-col gap-6">
|
| 86 |
-
<div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
|
| 87 |
-
<LlmConnectionPanel
|
| 88 |
-
title="A"
|
| 89 |
-
subtitle="Left side. Suggested: the trained model."
|
| 90 |
-
accent="primary"
|
| 91 |
-
value={connectionA}
|
| 92 |
-
onChange={setConnectionA}
|
| 93 |
-
disabled={eitherRunning || eitherStarting}
|
| 94 |
-
installedOllamaModels={runner.a.state.models ?? []}
|
| 95 |
-
installedOllamaLoading={runner.a.state.models === null}
|
| 96 |
-
installedOllamaError={runner.a.state.modelsError}
|
| 97 |
-
onRefreshOllama={() => void runner.a.controls.refreshModels()}
|
| 98 |
-
/>
|
| 99 |
-
<LlmConnectionPanel
|
| 100 |
-
title="B"
|
| 101 |
-
subtitle="Right side. Suggested: a baseline you'd expect A to beat."
|
| 102 |
-
accent="blue"
|
| 103 |
-
value={connectionB}
|
| 104 |
-
onChange={setConnectionB}
|
| 105 |
-
disabled={eitherRunning || eitherStarting}
|
| 106 |
-
installedOllamaModels={runner.b.state.models ?? []}
|
| 107 |
-
installedOllamaLoading={runner.b.state.models === null}
|
| 108 |
-
installedOllamaError={runner.b.state.modelsError}
|
| 109 |
-
onRefreshOllama={() => void runner.b.controls.refreshModels()}
|
| 110 |
-
/>
|
| 111 |
-
</div>
|
| 112 |
-
|
| 113 |
-
<CompareControlBar
|
| 114 |
-
systems={runner.a.state.systems}
|
| 115 |
-
systemId={systemId}
|
| 116 |
-
onSelectSystem={setSystemId}
|
| 117 |
-
temperature={temperature}
|
| 118 |
-
onChangeTemperature={setTemperature}
|
| 119 |
-
maxTurns={maxTurns}
|
| 120 |
-
onChangeMaxTurns={setMaxTurns}
|
| 121 |
-
canStart={canStart}
|
| 122 |
-
eitherRunning={eitherRunning}
|
| 123 |
-
eitherStarting={eitherStarting}
|
| 124 |
-
onStart={handleStart}
|
| 125 |
-
onEnd={() => void runner.endBoth()}
|
| 126 |
-
seed={runner.lastSeed}
|
| 127 |
-
/>
|
| 128 |
-
|
| 129 |
-
<div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
|
| 130 |
-
<SlotColumn
|
| 131 |
-
slot={runner.a}
|
| 132 |
-
connection={connectionA}
|
| 133 |
-
accent="primary"
|
| 134 |
-
title="A"
|
| 135 |
-
/>
|
| 136 |
-
<SlotColumn
|
| 137 |
-
slot={runner.b}
|
| 138 |
-
connection={connectionB}
|
| 139 |
-
accent="blue"
|
| 140 |
-
title="B"
|
| 141 |
-
/>
|
| 142 |
-
</div>
|
| 143 |
-
|
| 144 |
-
<ScoreboardBanner a={runner.a} b={runner.b} />
|
| 145 |
-
</section>
|
| 146 |
-
);
|
| 147 |
-
}
|
| 148 |
-
|
| 149 |
-
// ---------------------------------------------------------------------------
|
| 150 |
-
|
| 151 |
-
function CompareControlBar({
|
| 152 |
-
systems,
|
| 153 |
-
systemId,
|
| 154 |
-
onSelectSystem,
|
| 155 |
-
temperature,
|
| 156 |
-
onChangeTemperature,
|
| 157 |
-
maxTurns,
|
| 158 |
-
onChangeMaxTurns,
|
| 159 |
-
canStart,
|
| 160 |
-
eitherRunning,
|
| 161 |
-
eitherStarting,
|
| 162 |
-
onStart,
|
| 163 |
-
onEnd,
|
| 164 |
-
seed,
|
| 165 |
-
}: {
|
| 166 |
-
systems: import("@/lib/interactiveClient").SystemDescriptor[] | null;
|
| 167 |
-
systemId: string;
|
| 168 |
-
onSelectSystem: (id: string) => void;
|
| 169 |
-
temperature: number;
|
| 170 |
-
onChangeTemperature: (n: number) => void;
|
| 171 |
-
maxTurns: number;
|
| 172 |
-
onChangeMaxTurns: (n: number) => void;
|
| 173 |
-
canStart: boolean;
|
| 174 |
-
eitherRunning: boolean;
|
| 175 |
-
eitherStarting: boolean;
|
| 176 |
-
onStart: () => void;
|
| 177 |
-
onEnd: () => void;
|
| 178 |
-
seed: number | null;
|
| 179 |
-
}): JSX.Element {
|
| 180 |
-
return (
|
| 181 |
-
<header className="panel flex flex-col gap-3">
|
| 182 |
-
<div>
|
| 183 |
-
<p className="heading-eyebrow text-primary">Run side-by-side</p>
|
| 184 |
-
<h2 className="mt-1 text-xl font-semibold leading-tight">
|
| 185 |
-
Same episode, two models, one verifier
|
| 186 |
-
</h2>
|
| 187 |
-
<p className="mt-1 max-w-3xl text-xs text-textMuted">
|
| 188 |
-
Both sides see the same trajectory, hint, and seed. Reward
|
| 189 |
-
comes only from <code className="font-mono">scipy.odeint</code>{" "}
|
| 190 |
-
and per-step R² — there is no LLM-as-judge. Differences in the
|
| 191 |
-
score are differences in the physics the model proposes.
|
| 192 |
-
</p>
|
| 193 |
-
</div>
|
| 194 |
-
|
| 195 |
-
<div className="flex flex-wrap items-end gap-3">
|
| 196 |
-
<Field label="System">
|
| 197 |
-
<select
|
| 198 |
-
className="rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none transition focus:border-textMuted disabled:opacity-50"
|
| 199 |
-
value={systemId}
|
| 200 |
-
onChange={(event) => onSelectSystem(event.target.value)}
|
| 201 |
-
disabled={systems === null || eitherRunning || eitherStarting}
|
| 202 |
-
>
|
| 203 |
-
{systems === null ? (
|
| 204 |
-
<option value="">Loading…</option>
|
| 205 |
-
) : (
|
| 206 |
-
systems.map((descriptor) => (
|
| 207 |
-
<option key={descriptor.system_id} value={descriptor.system_id}>
|
| 208 |
-
{prettySystemId(descriptor.system_id)}
|
| 209 |
-
</option>
|
| 210 |
-
))
|
| 211 |
-
)}
|
| 212 |
-
</select>
|
| 213 |
-
</Field>
|
| 214 |
-
|
| 215 |
-
<Field label="Temp">
|
| 216 |
-
<input
|
| 217 |
-
type="number"
|
| 218 |
-
min={0}
|
| 219 |
-
max={2}
|
| 220 |
-
step={0.1}
|
| 221 |
-
className="w-20 rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none focus:border-textMuted"
|
| 222 |
-
value={temperature}
|
| 223 |
-
onChange={(event) =>
|
| 224 |
-
onChangeTemperature(
|
| 225 |
-
Math.max(0, Math.min(2, Number(event.target.value))),
|
| 226 |
-
)
|
| 227 |
-
}
|
| 228 |
-
disabled={eitherRunning || eitherStarting}
|
| 229 |
-
/>
|
| 230 |
-
</Field>
|
| 231 |
-
|
| 232 |
-
<Field label="Turn budget">
|
| 233 |
-
<input
|
| 234 |
-
type="number"
|
| 235 |
-
min={1}
|
| 236 |
-
max={32}
|
| 237 |
-
className="w-24 rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none focus:border-textMuted"
|
| 238 |
-
value={maxTurns}
|
| 239 |
-
onChange={(event) =>
|
| 240 |
-
onChangeMaxTurns(Math.max(1, Number(event.target.value)))
|
| 241 |
-
}
|
| 242 |
-
disabled={eitherRunning || eitherStarting}
|
| 243 |
-
/>
|
| 244 |
-
</Field>
|
| 245 |
-
|
| 246 |
-
<div className="ml-auto flex flex-wrap items-center gap-2">
|
| 247 |
-
{seed !== null ? (
|
| 248 |
-
<span className="rounded-full border border-border bg-surface px-2 py-1 font-mono text-[10px] text-textMuted">
|
| 249 |
-
seed {seed}
|
| 250 |
-
</span>
|
| 251 |
-
) : null}
|
| 252 |
-
{!eitherRunning && !eitherStarting ? (
|
| 253 |
-
<button
|
| 254 |
-
type="button"
|
| 255 |
-
className="btn-primary"
|
| 256 |
-
onClick={onStart}
|
| 257 |
-
disabled={!canStart}
|
| 258 |
-
>
|
| 259 |
-
▶ Run side-by-side
|
| 260 |
-
</button>
|
| 261 |
-
) : (
|
| 262 |
-
<button type="button" className="btn-secondary" onClick={onEnd}>
|
| 263 |
-
End both
|
| 264 |
-
</button>
|
| 265 |
-
)}
|
| 266 |
-
</div>
|
| 267 |
-
</div>
|
| 268 |
-
</header>
|
| 269 |
-
);
|
| 270 |
-
}
|
| 271 |
-
|
| 272 |
-
// ---------------------------------------------------------------------------
|
| 273 |
-
|
| 274 |
-
function SlotColumn({
|
| 275 |
-
slot,
|
| 276 |
-
connection,
|
| 277 |
-
accent,
|
| 278 |
-
title,
|
| 279 |
-
}: {
|
| 280 |
-
slot: CompareSlot;
|
| 281 |
-
connection: LlmConnection;
|
| 282 |
-
accent: "primary" | "blue";
|
| 283 |
-
title: string;
|
| 284 |
-
}): JSX.Element {
|
| 285 |
-
const turns = slot.state.turns;
|
| 286 |
-
const latestTurn: LlmTurn | undefined = turns[turns.length - 1];
|
| 287 |
-
const observation = latestTurn?.observation ?? slot.state.initialObservation;
|
| 288 |
-
const lastReward = latestTurn?.observation.reward_breakdown ?? ZERO_REWARD;
|
| 289 |
-
const observed = slot.state.initialObservation?.trajectory ?? observation?.trajectory ?? [];
|
| 290 |
-
const stateVariables = observation?.state_variables ?? [];
|
| 291 |
-
const primaryVariable = useMemo(
|
| 292 |
-
() =>
|
| 293 |
-
stateVariables.length > 0 ? pickPrimaryVariable(stateVariables) : "y",
|
| 294 |
-
[stateVariables],
|
| 295 |
-
);
|
| 296 |
-
const finalMatch = lastReward.match ?? 0;
|
| 297 |
-
const converged = finalMatch >= CONVERGENCE_THRESHOLD;
|
| 298 |
-
const accentBorder = accent === "primary" ? "border-primary/40" : "border-accentBlue/40";
|
| 299 |
-
const accentText = accent === "primary" ? "text-primary" : "text-accentBlue";
|
| 300 |
-
|
| 301 |
-
return (
|
| 302 |
-
<section className={cn("panel flex flex-col gap-3 border-l-4", accentBorder)}>
|
| 303 |
-
<header className="flex items-baseline justify-between gap-2">
|
| 304 |
-
<div className="flex items-baseline gap-2">
|
| 305 |
-
<span className={cn("heading-eyebrow", accentText)}>Side {title}</span>
|
| 306 |
-
<span className="font-mono text-xs text-textPrimary">
|
| 307 |
-
{connection.model || "(no model selected)"}
|
| 308 |
-
</span>
|
| 309 |
-
</div>
|
| 310 |
-
<SlotStatusBadge status={slot.state.status} converged={converged} />
|
| 311 |
-
</header>
|
| 312 |
-
|
| 313 |
-
{slot.state.errorMessage ? (
|
| 314 |
-
<ErrorRow
|
| 315 |
-
message={slot.state.errorMessage}
|
| 316 |
-
onDismiss={() => slot.controls.resetError()}
|
| 317 |
-
/>
|
| 318 |
-
) : null}
|
| 319 |
-
|
| 320 |
-
{observation ? (
|
| 321 |
-
<>
|
| 322 |
-
<div className="rounded-lg border border-border bg-surfaceMuted p-3">
|
| 323 |
-
<TrajectoryCanvas
|
| 324 |
-
observed={observed}
|
| 325 |
-
predicted={latestTurn?.predictedTrajectory ?? []}
|
| 326 |
-
variable={primaryVariable}
|
| 327 |
-
variableLabel={primaryVariable}
|
| 328 |
-
predictedProgress={1}
|
| 329 |
-
/>
|
| 330 |
-
</div>
|
| 331 |
-
|
| 332 |
-
<DenseRewardRow reward={lastReward} />
|
| 333 |
-
|
| 334 |
-
<p className="text-[11px] text-textMuted">
|
| 335 |
-
<span className="font-semibold uppercase tracking-wide text-textPrimary">
|
| 336 |
-
Hint:
|
| 337 |
-
</span>{" "}
|
| 338 |
-
{observation.hint || "(none)"}
|
| 339 |
-
</p>
|
| 340 |
-
|
| 341 |
-
<SlotTurns turns={turns} />
|
| 342 |
-
</>
|
| 343 |
-
) : (
|
| 344 |
-
<SlotPlaceholder status={slot.state.status} />
|
| 345 |
-
)}
|
| 346 |
-
</section>
|
| 347 |
-
);
|
| 348 |
-
}
|
| 349 |
-
|
| 350 |
-
function SlotStatusBadge({
|
| 351 |
-
status,
|
| 352 |
-
converged,
|
| 353 |
-
}: {
|
| 354 |
-
status: RunnerStatus;
|
| 355 |
-
converged: boolean;
|
| 356 |
-
}): JSX.Element {
|
| 357 |
-
if (converged) {
|
| 358 |
-
return (
|
| 359 |
-
<span className="rounded-full border border-accentGreen/40 bg-accentGreen/10 px-2 py-0.5 text-[10px] uppercase tracking-wider text-accentGreen">
|
| 360 |
-
converged
|
| 361 |
-
</span>
|
| 362 |
-
);
|
| 363 |
-
}
|
| 364 |
-
const tone =
|
| 365 |
-
status === "running"
|
| 366 |
-
? "border-accentBlue/40 text-accentBlue"
|
| 367 |
-
: status === "ended"
|
| 368 |
-
? "border-textMuted/40 text-textMuted"
|
| 369 |
-
: status === "error"
|
| 370 |
-
? "border-primary/40 text-primary"
|
| 371 |
-
: "border-textMuted/40 text-textMuted";
|
| 372 |
-
return (
|
| 373 |
-
<span className={cn("rounded-full border bg-surface px-2 py-0.5 text-[10px] uppercase tracking-wider", tone)}>
|
| 374 |
-
{labelForStatus(status)}
|
| 375 |
-
</span>
|
| 376 |
-
);
|
| 377 |
-
}
|
| 378 |
-
|
| 379 |
-
function labelForStatus(status: RunnerStatus): string {
|
| 380 |
-
switch (status) {
|
| 381 |
-
case "starting":
|
| 382 |
-
return "starting";
|
| 383 |
-
case "running":
|
| 384 |
-
return "running";
|
| 385 |
-
case "paused":
|
| 386 |
-
return "paused";
|
| 387 |
-
case "ended":
|
| 388 |
-
return "done";
|
| 389 |
-
case "error":
|
| 390 |
-
return "error";
|
| 391 |
-
case "idle":
|
| 392 |
-
default:
|
| 393 |
-
return "idle";
|
| 394 |
-
}
|
| 395 |
-
}
|
| 396 |
-
|
| 397 |
-
function SlotPlaceholder({ status }: { status: RunnerStatus }): JSX.Element {
|
| 398 |
-
if (status === "starting") {
|
| 399 |
-
return (
|
| 400 |
-
<div className="flex flex-col gap-2" aria-busy>
|
| 401 |
-
<Skeleton className="h-[240px] w-full" />
|
| 402 |
-
<Skeleton className="h-3 w-48" />
|
| 403 |
-
</div>
|
| 404 |
-
);
|
| 405 |
-
}
|
| 406 |
-
return (
|
| 407 |
-
<p className="text-xs text-textMuted">
|
| 408 |
-
Configure both connections and press Run to start.
|
| 409 |
-
</p>
|
| 410 |
-
);
|
| 411 |
-
}
|
| 412 |
-
|
| 413 |
-
function SlotTurns({ turns }: { turns: LlmTurn[] }): JSX.Element {
|
| 414 |
-
if (turns.length === 0) {
|
| 415 |
-
return <p className="text-[11px] text-textMuted">No turns yet.</p>;
|
| 416 |
-
}
|
| 417 |
-
// Render a short transcript: just the latest two turns to keep the
|
| 418 |
-
// column readable in the side-by-side layout. Power users can read
|
| 419 |
-
// the raw turn-by-turn dump from the original single-model view.
|
| 420 |
-
const visible = turns.slice(-2);
|
| 421 |
-
return (
|
| 422 |
-
<details className="rounded-md border border-border bg-surface px-3 py-2 text-[11px] text-textMuted">
|
| 423 |
-
<summary className="cursor-pointer text-textPrimary">
|
| 424 |
-
Latest turns ({turns.length})
|
| 425 |
-
</summary>
|
| 426 |
-
<ol className="mt-2 flex flex-col gap-2">
|
| 427 |
-
{visible.map((turn) => (
|
| 428 |
-
<li
|
| 429 |
-
key={turn.turn}
|
| 430 |
-
className="rounded border border-border bg-surfaceMuted p-2"
|
| 431 |
-
>
|
| 432 |
-
<div className="mb-1 flex items-center justify-between text-[10px] text-textMuted">
|
| 433 |
-
<span>turn {turn.turn}</span>
|
| 434 |
-
<span className="font-mono">
|
| 435 |
-
R² {(turn.observation.reward_breakdown.match * 100).toFixed(0)}%
|
| 436 |
-
</span>
|
| 437 |
-
</div>
|
| 438 |
-
{turn.action.equation ? (
|
| 439 |
-
<EquationDisplay
|
| 440 |
-
equation={turn.action.equation}
|
| 441 |
-
rationale={turn.action.rationale}
|
| 442 |
-
/>
|
| 443 |
-
) : (
|
| 444 |
-
<span className="text-accentAmber">unparseable</span>
|
| 445 |
-
)}
|
| 446 |
-
</li>
|
| 447 |
-
))}
|
| 448 |
-
</ol>
|
| 449 |
-
</details>
|
| 450 |
-
);
|
| 451 |
-
}
|
| 452 |
-
|
| 453 |
-
function DenseRewardRow({ reward }: { reward: RewardBreakdown }): JSX.Element {
|
| 454 |
-
// Reward components (top) feed the trainer's weighted total.
|
| 455 |
-
// Diagnostic sub-scores (bottom) are visual-closeness signals only —
|
| 456 |
-
// see RewardBreakdown class docstring on the backend.
|
| 457 |
-
const rewardComponents: { name: string; value: number }[] = [
|
| 458 |
-
{ name: "match", value: reward.match ?? 0 },
|
| 459 |
-
{ name: "progress", value: reward.progress ?? 0 },
|
| 460 |
-
{ name: "simplicity", value: reward.simplicity ?? 0 },
|
| 461 |
-
{ name: "format", value: reward.format ?? 0 },
|
| 462 |
-
];
|
| 463 |
-
const diagComponents: { name: string; value: number }[] = [
|
| 464 |
-
{ name: "shape", value: reward.shape ?? 0 },
|
| 465 |
-
{ name: "freq", value: reward.freq ?? 0 },
|
| 466 |
-
{ name: "amplitude", value: reward.amplitude ?? 0 },
|
| 467 |
-
];
|
| 468 |
-
return (
|
| 469 |
-
<div className="flex flex-col gap-2 rounded-md border border-border bg-surface px-3 py-2 font-mono text-[11px]">
|
| 470 |
-
<div className="grid grid-cols-4 gap-2">
|
| 471 |
-
{rewardComponents.map(({ name, value }) => (
|
| 472 |
-
<RewardCell key={name} name={name} value={value} />
|
| 473 |
-
))}
|
| 474 |
-
</div>
|
| 475 |
-
<div className="flex items-center gap-2 border-t border-border/60 pt-2">
|
| 476 |
-
<span
|
| 477 |
-
className="text-[10px] uppercase tracking-wider text-textMuted"
|
| 478 |
-
title="Diagnostic-only — not part of the reward total. Captures visual closeness (shape / freq / amplitude) where R² collapses to 0."
|
| 479 |
-
>
|
| 480 |
-
diag
|
| 481 |
-
</span>
|
| 482 |
-
<div className="grid flex-1 grid-cols-3 gap-2">
|
| 483 |
-
{diagComponents.map(({ name, value }) => (
|
| 484 |
-
<RewardCell key={name} name={name} value={value} muted />
|
| 485 |
-
))}
|
| 486 |
-
</div>
|
| 487 |
-
</div>
|
| 488 |
-
</div>
|
| 489 |
-
);
|
| 490 |
-
}
|
| 491 |
-
|
| 492 |
-
function RewardCell({
|
| 493 |
-
name,
|
| 494 |
-
value,
|
| 495 |
-
muted = false,
|
| 496 |
-
}: {
|
| 497 |
-
name: string;
|
| 498 |
-
value: number;
|
| 499 |
-
muted?: boolean;
|
| 500 |
-
}): JSX.Element {
|
| 501 |
-
return (
|
| 502 |
-
<div className="flex flex-col gap-1">
|
| 503 |
-
<div className="flex items-baseline justify-between">
|
| 504 |
-
<span className="text-textMuted">{name}</span>
|
| 505 |
-
<span className={muted ? "text-textMuted" : "text-textPrimary"}>
|
| 506 |
-
{value.toFixed(2)}
|
| 507 |
-
</span>
|
| 508 |
-
</div>
|
| 509 |
-
<div
|
| 510 |
-
className="h-1 w-full overflow-hidden rounded-full bg-border"
|
| 511 |
-
aria-hidden
|
| 512 |
-
>
|
| 513 |
-
<div
|
| 514 |
-
className={cn(
|
| 515 |
-
"h-full rounded-full",
|
| 516 |
-
value >= 0.7
|
| 517 |
-
? muted
|
| 518 |
-
? "bg-accentBlue/60"
|
| 519 |
-
: "bg-accentGreen/70"
|
| 520 |
-
: value >= 0.3
|
| 521 |
-
? "bg-accentAmber/70"
|
| 522 |
-
: "bg-textMuted/40",
|
| 523 |
-
)}
|
| 524 |
-
style={{ width: `${Math.max(0, Math.min(1, value)) * 100}%` }}
|
| 525 |
-
/>
|
| 526 |
-
</div>
|
| 527 |
-
</div>
|
| 528 |
-
);
|
| 529 |
-
}
|
| 530 |
-
|
| 531 |
-
function ScoreboardBanner({
|
| 532 |
-
a,
|
| 533 |
-
b,
|
| 534 |
-
}: {
|
| 535 |
-
a: CompareSlot;
|
| 536 |
-
b: CompareSlot;
|
| 537 |
-
}): JSX.Element | null {
|
| 538 |
-
const aDone = a.state.status === "ended";
|
| 539 |
-
const bDone = b.state.status === "ended";
|
| 540 |
-
if (!aDone || !bDone) return null;
|
| 541 |
-
|
| 542 |
-
const aMatch = lastMatch(a);
|
| 543 |
-
const bMatch = lastMatch(b);
|
| 544 |
-
const winner = aMatch === bMatch ? null : aMatch > bMatch ? "A" : "B";
|
| 545 |
-
|
| 546 |
-
return (
|
| 547 |
-
<div className="panel border border-accentGreen/30 bg-accentGreen/5 text-sm">
|
| 548 |
-
<p className="heading-eyebrow text-accentGreen">Scoreboard</p>
|
| 549 |
-
<div className="mt-2 flex flex-wrap items-baseline gap-6 text-textPrimary">
|
| 550 |
-
<span>
|
| 551 |
-
A: <span className="font-mono">{formatPercent(aMatch)}</span> R²
|
| 552 |
-
</span>
|
| 553 |
-
<span>
|
| 554 |
-
B: <span className="font-mono">{formatPercent(bMatch)}</span> R²
|
| 555 |
-
</span>
|
| 556 |
-
{winner ? (
|
| 557 |
-
<span className="text-accentGreen">
|
| 558 |
-
Winner:{" "}
|
| 559 |
-
<strong className="font-semibold text-textPrimary">{winner}</strong>
|
| 560 |
-
</span>
|
| 561 |
-
) : (
|
| 562 |
-
<span className="text-textMuted">Tied.</span>
|
| 563 |
-
)}
|
| 564 |
-
</div>
|
| 565 |
-
</div>
|
| 566 |
-
);
|
| 567 |
-
}
|
| 568 |
-
|
| 569 |
-
function ErrorRow({
|
| 570 |
-
message,
|
| 571 |
-
onDismiss,
|
| 572 |
-
}: {
|
| 573 |
-
message: string;
|
| 574 |
-
onDismiss: () => void;
|
| 575 |
-
}): JSX.Element {
|
| 576 |
-
return (
|
| 577 |
-
<div
|
| 578 |
-
role="alert"
|
| 579 |
-
className="flex items-start justify-between gap-2 rounded-md border border-accentAmber/40 bg-accentAmber/5 px-3 py-2 text-xs text-accentAmber"
|
| 580 |
-
>
|
| 581 |
-
<span className="whitespace-pre-line">{message}</span>
|
| 582 |
-
<button
|
| 583 |
-
type="button"
|
| 584 |
-
onClick={onDismiss}
|
| 585 |
-
className="text-[10px] text-textMuted underline hover:text-textPrimary"
|
| 586 |
-
>
|
| 587 |
-
dismiss
|
| 588 |
-
</button>
|
| 589 |
-
</div>
|
| 590 |
-
);
|
| 591 |
-
}
|
| 592 |
-
|
| 593 |
-
function Field({
|
| 594 |
-
label,
|
| 595 |
-
children,
|
| 596 |
-
}: {
|
| 597 |
-
label: string;
|
| 598 |
-
children: React.ReactNode;
|
| 599 |
-
}): JSX.Element {
|
| 600 |
-
return (
|
| 601 |
-
<label className="flex flex-col gap-1 text-xs text-textMuted">
|
| 602 |
-
<span className="heading-eyebrow">{label}</span>
|
| 603 |
-
{children}
|
| 604 |
-
</label>
|
| 605 |
-
);
|
| 606 |
-
}
|
| 607 |
-
|
| 608 |
-
function isActive(status: RunnerStatus): boolean {
|
| 609 |
-
return status === "running" || status === "paused";
|
| 610 |
-
}
|
| 611 |
-
|
| 612 |
-
function lastMatch(slot: CompareSlot): number {
|
| 613 |
-
const turns = slot.state.turns;
|
| 614 |
-
const last = turns[turns.length - 1];
|
| 615 |
-
return last?.observation.reward_breakdown.match ?? 0;
|
| 616 |
-
}
|
| 617 |
-
|
| 618 |
-
function prettySystemId(systemId: string): string {
|
| 619 |
-
if (!systemId) return "(none)";
|
| 620 |
-
return systemId
|
| 621 |
-
.split("_")
|
| 622 |
-
.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
|
| 623 |
-
.join(" ");
|
| 624 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/RunWithLlmPane.tsx
CHANGED
|
@@ -1,17 +1,19 @@
|
|
| 1 |
-
/** Single-LLM run pane: pick
|
| 2 |
-
* step through the episode turn by turn.
|
| 3 |
*
|
| 4 |
* This is the "all the steps" view — every turn's hypothesis, reward
|
| 5 |
* breakdown, latency, and raw completion are surfaced in a scrollable
|
| 6 |
* transcript so judges can audit exactly what the model proposed.
|
| 7 |
*
|
| 8 |
-
*
|
| 9 |
-
*
|
|
|
|
|
|
|
| 10 |
|
| 11 |
import { useEffect, useMemo, useState } from "react";
|
| 12 |
|
| 13 |
import { EquationDisplay } from "@/components/EquationDisplay";
|
| 14 |
-
import {
|
| 15 |
import { Skeleton } from "@/components/Skeleton";
|
| 16 |
import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
|
| 17 |
import {
|
|
@@ -22,7 +24,13 @@ import {
|
|
| 22 |
import { cn } from "@/lib/cn";
|
| 23 |
import {
|
| 24 |
DEFAULT_SINGLE_LLM_CONNECTION,
|
|
|
|
| 25 |
type LlmConnection,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
} from "@/lib/llmPresets";
|
| 27 |
import { pickPrimaryVariable } from "@/lib/trajectory";
|
| 28 |
import type { RewardBreakdown } from "@/types/physix";
|
|
@@ -60,7 +68,13 @@ export function RunWithLlmPane(): JSX.Element {
|
|
| 60 |
const status = runner.status;
|
| 61 |
const busy = status === "starting" || status === "running";
|
| 62 |
const hasSession = runner.sessionId !== null;
|
| 63 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
function handleStart(): void {
|
| 66 |
void runner.start({
|
|
@@ -105,19 +119,10 @@ export function RunWithLlmPane(): JSX.Element {
|
|
| 105 |
</p>
|
| 106 |
</header>
|
| 107 |
|
| 108 |
-
<
|
| 109 |
-
|
| 110 |
-
<LlmConnectionPanel
|
| 111 |
-
title="LLM"
|
| 112 |
-
subtitle="One model drives the episode."
|
| 113 |
-
accent="primary"
|
| 114 |
-
value={connection}
|
| 115 |
onChange={setConnection}
|
| 116 |
disabled={busy}
|
| 117 |
-
installedOllamaModels={runner.models ?? []}
|
| 118 |
-
installedOllamaLoading={runner.models === null}
|
| 119 |
-
installedOllamaError={runner.modelsError}
|
| 120 |
-
onRefreshOllama={() => void runner.refreshModels()}
|
| 121 |
/>
|
| 122 |
|
| 123 |
<ControlBar
|
|
@@ -639,56 +644,175 @@ function RewardCell({
|
|
| 639 |
);
|
| 640 |
}
|
| 641 |
|
| 642 |
-
/
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
return (
|
| 655 |
-
<
|
| 656 |
-
<
|
| 657 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
</p>
|
| 659 |
-
<
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
default
|
| 664 |
-
</span>{" "}
|
| 665 |
-
— easiest path. Paste a token from{" "}
|
| 666 |
-
<code className="font-mono text-textPrimary">
|
| 667 |
-
huggingface.co/settings/tokens
|
| 668 |
-
</code>{" "}
|
| 669 |
-
(with the "Make calls to Inference Providers" permission),
|
| 670 |
-
pick a suggested model, hit Run. Responds in ~2 s, no warm-up.
|
| 671 |
-
</li>
|
| 672 |
-
<li>
|
| 673 |
-
<span className="text-textPrimary">PhysiX-Infer GPU ✦</span> — only
|
| 674 |
-
way to compare the GRPO-trained{" "}
|
| 675 |
-
<code className="font-mono text-textPrimary">physix-3b-rl</code>{" "}
|
| 676 |
-
against its{" "}
|
| 677 |
-
<code className="font-mono text-textPrimary">Qwen 2.5 3B</code> base
|
| 678 |
-
on identical hardware. No token. Sleeps after 5 min idle so first
|
| 679 |
-
request after sleep takes ~90-120 s while two 3B models load on the
|
| 680 |
-
L4 — the status banner below shows live state, with a Prewarm
|
| 681 |
-
button to wake it before you hit Run.
|
| 682 |
-
</li>
|
| 683 |
-
<li>
|
| 684 |
-
<span className="text-textPrimary">Ollama / OpenAI / Custom</span>{" "}
|
| 685 |
-
— bring your own endpoint. Useful for local dev (Ollama on
|
| 686 |
-
<code className="font-mono text-textPrimary">localhost:11434</code>),
|
| 687 |
-
frontier-model baselines (OpenAI), or pointing at a private vLLM /
|
| 688 |
-
inference endpoint URL.
|
| 689 |
-
</li>
|
| 690 |
-
</ul>
|
| 691 |
-
</div>
|
| 692 |
);
|
| 693 |
}
|
| 694 |
|
|
|
|
| 1 |
+
/** Single-LLM run pane: pick one of three preset models, hit run,
|
| 2 |
+
* watch the model step through the episode turn by turn.
|
| 3 |
*
|
| 4 |
* This is the "all the steps" view — every turn's hypothesis, reward
|
| 5 |
* breakdown, latency, and raw completion are surfaced in a scrollable
|
| 6 |
* transcript so judges can audit exactly what the model proposed.
|
| 7 |
*
|
| 8 |
+
* The model picker is intentionally a hard 3-option choice (trained
|
| 9 |
+
* PhysiX-3B, Qwen 3B base, Qwen 7B baseline) — typing model ids was
|
| 10 |
+
* confusing for first-time users and most picks ended up being one of
|
| 11 |
+
* these three anyway. */
|
| 12 |
|
| 13 |
import { useEffect, useMemo, useState } from "react";
|
| 14 |
|
| 15 |
import { EquationDisplay } from "@/components/EquationDisplay";
|
| 16 |
+
import { PhysixInferStatus } from "@/components/PhysixInferStatus";
|
| 17 |
import { Skeleton } from "@/components/Skeleton";
|
| 18 |
import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
|
| 19 |
import {
|
|
|
|
| 24 |
import { cn } from "@/lib/cn";
|
| 25 |
import {
|
| 26 |
DEFAULT_SINGLE_LLM_CONNECTION,
|
| 27 |
+
MODEL_PRESETS,
|
| 28 |
type LlmConnection,
|
| 29 |
+
type ModelPreset,
|
| 30 |
+
findEndpoint,
|
| 31 |
+
loadApiKey,
|
| 32 |
+
presetForConnection,
|
| 33 |
+
saveApiKey,
|
| 34 |
} from "@/lib/llmPresets";
|
| 35 |
import { pickPrimaryVariable } from "@/lib/trajectory";
|
| 36 |
import type { RewardBreakdown } from "@/types/physix";
|
|
|
|
| 68 |
const status = runner.status;
|
| 69 |
const busy = status === "starting" || status === "running";
|
| 70 |
const hasSession = runner.sessionId !== null;
|
| 71 |
+
const endpoint = findEndpoint(connection.endpointId);
|
| 72 |
+
const hasRequiredKey = !endpoint.needsKey || !!connection.apiKey.trim();
|
| 73 |
+
const canStart =
|
| 74 |
+
!busy &&
|
| 75 |
+
!!connection.model.trim() &&
|
| 76 |
+
!!connection.baseUrl.trim() &&
|
| 77 |
+
hasRequiredKey;
|
| 78 |
|
| 79 |
function handleStart(): void {
|
| 80 |
void runner.start({
|
|
|
|
| 119 |
</p>
|
| 120 |
</header>
|
| 121 |
|
| 122 |
+
<ModelPresetPicker
|
| 123 |
+
connection={connection}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
onChange={setConnection}
|
| 125 |
disabled={busy}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
/>
|
| 127 |
|
| 128 |
<ControlBar
|
|
|
|
| 644 |
);
|
| 645 |
}
|
| 646 |
|
| 647 |
+
// ---------------------------------------------------------------------
|
| 648 |
+
// Model preset picker — three buttons + one (optional) API-key field.
|
| 649 |
+
// ---------------------------------------------------------------------
|
| 650 |
+
//
|
| 651 |
+
// The picker replaces the old "Endpoint dropdown + freeform model id +
|
| 652 |
+
// hint paragraph" UI. Users always pick one of three known-good models;
|
| 653 |
+
// the API-key field only appears when the picked endpoint needs one
|
| 654 |
+
// (just the HF Router 7B preset today). Per-preset connections are
|
| 655 |
+
// persisted in localStorage by base URL via `loadApiKey` / `saveApiKey`,
|
| 656 |
+
// so a token typed for the 7B preset survives a page reload and isn't
|
| 657 |
+
// shown when the trained PhysiX preset is selected (it doesn't need
|
| 658 |
+
// one).
|
| 659 |
+
|
| 660 |
+
interface ModelPresetPickerProps {
|
| 661 |
+
connection: LlmConnection;
|
| 662 |
+
onChange: (next: LlmConnection) => void;
|
| 663 |
+
disabled?: boolean;
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
function ModelPresetPicker({
|
| 667 |
+
connection,
|
| 668 |
+
onChange,
|
| 669 |
+
disabled,
|
| 670 |
+
}: ModelPresetPickerProps): JSX.Element {
|
| 671 |
+
const selected = presetForConnection(connection) ?? MODEL_PRESETS[0]!;
|
| 672 |
+
const endpoint = findEndpoint(selected.connection.endpointId);
|
| 673 |
+
const needsKey = endpoint.needsKey;
|
| 674 |
+
|
| 675 |
+
const [revealKey, setRevealKey] = useState(false);
|
| 676 |
+
|
| 677 |
+
// Hydrate the API key from per-URL storage whenever the preset (and
|
| 678 |
+
// therefore base URL) changes.
|
| 679 |
+
useEffect(() => {
|
| 680 |
+
if (!connection.baseUrl) return;
|
| 681 |
+
const stored = loadApiKey(connection.baseUrl);
|
| 682 |
+
if (stored && stored !== connection.apiKey) {
|
| 683 |
+
onChange({ ...connection, apiKey: stored });
|
| 684 |
+
}
|
| 685 |
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
| 686 |
+
}, [connection.baseUrl]);
|
| 687 |
+
|
| 688 |
+
function selectPreset(preset: ModelPreset): void {
|
| 689 |
+
onChange({
|
| 690 |
+
...preset.connection,
|
| 691 |
+
apiKey: loadApiKey(preset.connection.baseUrl),
|
| 692 |
+
});
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
function setApiKey(key: string): void {
|
| 696 |
+
saveApiKey(connection.baseUrl, key);
|
| 697 |
+
onChange({ ...connection, apiKey: key });
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
return (
|
| 701 |
+
<section className="panel flex flex-col gap-4">
|
| 702 |
+
<header>
|
| 703 |
+
<p className="heading-eyebrow text-primary">Pick a model</p>
|
| 704 |
+
<p className="mt-1 text-xs text-textMuted">
|
| 705 |
+
Three known-good options — the trained PhysiX-3B, its Qwen 3B
|
| 706 |
+
base, and a Qwen 7B baseline. No URLs to type, no model ids to
|
| 707 |
+
paste.
|
| 708 |
+
</p>
|
| 709 |
+
</header>
|
| 710 |
+
|
| 711 |
+
<div
|
| 712 |
+
role="radiogroup"
|
| 713 |
+
aria-label="Model"
|
| 714 |
+
className="grid grid-cols-1 gap-3 md:grid-cols-3"
|
| 715 |
+
>
|
| 716 |
+
{MODEL_PRESETS.map((preset) => (
|
| 717 |
+
<PresetCard
|
| 718 |
+
key={preset.id}
|
| 719 |
+
preset={preset}
|
| 720 |
+
selected={selected.id === preset.id}
|
| 721 |
+
disabled={disabled ?? false}
|
| 722 |
+
onSelect={() => selectPreset(preset)}
|
| 723 |
+
/>
|
| 724 |
+
))}
|
| 725 |
+
</div>
|
| 726 |
+
|
| 727 |
+
{needsKey ? (
|
| 728 |
+
<label className="flex flex-col gap-1 text-xs text-textMuted">
|
| 729 |
+
<span className="heading-eyebrow flex items-baseline justify-between gap-2">
|
| 730 |
+
<span>HF token (required)</span>
|
| 731 |
+
<button
|
| 732 |
+
type="button"
|
| 733 |
+
onClick={() => setRevealKey((v) => !v)}
|
| 734 |
+
className="text-[10px] uppercase tracking-wider text-textMuted underline hover:text-textPrimary"
|
| 735 |
+
>
|
| 736 |
+
{revealKey ? "hide" : "show"}
|
| 737 |
+
</button>
|
| 738 |
+
</span>
|
| 739 |
+
<input
|
| 740 |
+
type={revealKey ? "text" : "password"}
|
| 741 |
+
value={connection.apiKey}
|
| 742 |
+
onChange={(e) => setApiKey(e.target.value)}
|
| 743 |
+
disabled={disabled}
|
| 744 |
+
placeholder="hf_..."
|
| 745 |
+
className="w-full rounded-lg border border-border bg-surfaceMuted px-3 py-2 font-mono text-xs text-textPrimary outline-none transition focus:border-textMuted disabled:opacity-50"
|
| 746 |
+
/>
|
| 747 |
+
<span className="text-[11px] leading-relaxed text-textMuted">
|
| 748 |
+
Get one at{" "}
|
| 749 |
+
<code className="font-mono text-textPrimary">
|
| 750 |
+
huggingface.co/settings/tokens
|
| 751 |
+
</code>{" "}
|
| 752 |
+
with the "Make calls to Inference Providers"
|
| 753 |
+
permission. Saved per endpoint in your browser.
|
| 754 |
+
</span>
|
| 755 |
+
</label>
|
| 756 |
+
) : null}
|
| 757 |
+
|
| 758 |
+
{/* Live banner only when the picked preset hits the GPU Space. */}
|
| 759 |
+
{selected.connection.endpointId === "physix" ? (
|
| 760 |
+
<PhysixInferStatus />
|
| 761 |
+
) : null}
|
| 762 |
+
</section>
|
| 763 |
+
);
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
interface PresetCardProps {
|
| 767 |
+
preset: ModelPreset;
|
| 768 |
+
selected: boolean;
|
| 769 |
+
disabled: boolean;
|
| 770 |
+
onSelect: () => void;
|
| 771 |
+
}
|
| 772 |
+
|
| 773 |
+
function PresetCard({
|
| 774 |
+
preset,
|
| 775 |
+
selected,
|
| 776 |
+
disabled,
|
| 777 |
+
onSelect,
|
| 778 |
+
}: PresetCardProps): JSX.Element {
|
| 779 |
+
return (
|
| 780 |
+
<button
|
| 781 |
+
type="button"
|
| 782 |
+
role="radio"
|
| 783 |
+
aria-checked={selected}
|
| 784 |
+
onClick={onSelect}
|
| 785 |
+
disabled={disabled}
|
| 786 |
+
className={cn(
|
| 787 |
+
"flex flex-col gap-2 rounded-xl border bg-surfaceMuted p-3 text-left transition",
|
| 788 |
+
"disabled:cursor-not-allowed disabled:opacity-50",
|
| 789 |
+
selected
|
| 790 |
+
? "border-primary bg-primary/5 shadow-sm"
|
| 791 |
+
: "border-border hover:border-textMuted",
|
| 792 |
+
)}
|
| 793 |
+
>
|
| 794 |
+
<div className="flex items-center justify-between gap-2">
|
| 795 |
+
<span className="text-sm font-semibold text-textPrimary">
|
| 796 |
+
{preset.label}
|
| 797 |
+
</span>
|
| 798 |
+
<span
|
| 799 |
+
className={cn(
|
| 800 |
+
"rounded-full border bg-surface px-2 py-0.5 text-[10px] uppercase tracking-wider",
|
| 801 |
+
selected
|
| 802 |
+
? "border-primary/60 text-primary"
|
| 803 |
+
: "border-border text-textMuted",
|
| 804 |
+
)}
|
| 805 |
+
>
|
| 806 |
+
{preset.badge}
|
| 807 |
+
</span>
|
| 808 |
+
</div>
|
| 809 |
+
<p className="text-[11px] leading-relaxed text-textMuted">
|
| 810 |
+
{preset.description}
|
| 811 |
</p>
|
| 812 |
+
<code className="font-mono text-[10px] text-textMuted">
|
| 813 |
+
{preset.connection.model}
|
| 814 |
+
</code>
|
| 815 |
+
</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
);
|
| 817 |
}
|
| 818 |
|
frontend/src/hooks/useLlmCompareRunner.ts
DELETED
|
@@ -1,128 +0,0 @@
|
|
| 1 |
-
/** Drives two parallel `useLlmEpisodeRunner` instances against the same
|
| 2 |
-
* episode seed. The whole point of the demo is to put two models on
|
| 3 |
-
* identical input and compare their behaviour, scored by the same
|
| 4 |
-
* verifier with no LLM-as-judge.
|
| 5 |
-
*
|
| 6 |
-
* Implementation note: each side gets its own session because the env
|
| 7 |
-
* builds a turn-by-turn history that the next prompt depends on. We
|
| 8 |
-
* *don't* fork a single session — that would corrupt history. Instead
|
| 9 |
-
* we start two sessions with the same `system_id` + `seed`, which the
|
| 10 |
-
* server already supports via its existing reset path. */
|
| 11 |
-
|
| 12 |
-
import { useCallback, useMemo, useRef, useState } from "react";
|
| 13 |
-
|
| 14 |
-
import {
|
| 15 |
-
type LlmEpisodeRunnerControls,
|
| 16 |
-
type LlmEpisodeRunnerState,
|
| 17 |
-
useLlmEpisodeRunner,
|
| 18 |
-
} from "@/hooks/useLlmEpisodeRunner";
|
| 19 |
-
import type { LlmConnection } from "@/lib/llmPresets";
|
| 20 |
-
|
| 21 |
-
export interface CompareSlot {
|
| 22 |
-
id: "a" | "b";
|
| 23 |
-
state: LlmEpisodeRunnerState;
|
| 24 |
-
controls: LlmEpisodeRunnerControls;
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
export interface CompareRunnerControls {
|
| 28 |
-
/** Start both sides on the same seed + system. Each side uses its
|
| 29 |
-
* own connection. */
|
| 30 |
-
startBoth: (options: {
|
| 31 |
-
systemId?: string | undefined;
|
| 32 |
-
maxTurns?: number | undefined;
|
| 33 |
-
connectionA: LlmConnection;
|
| 34 |
-
connectionB: LlmConnection;
|
| 35 |
-
temperature?: number | undefined;
|
| 36 |
-
}) => Promise<void>;
|
| 37 |
-
/** End both sessions and reset state. */
|
| 38 |
-
endBoth: () => Promise<void>;
|
| 39 |
-
}
|
| 40 |
-
|
| 41 |
-
export interface CompareRunnerState {
|
| 42 |
-
a: CompareSlot;
|
| 43 |
-
b: CompareSlot;
|
| 44 |
-
/** Seed the last `startBoth` call locked in. Surfaces in the UI so
|
| 45 |
-
* users know both sides really saw the same episode. */
|
| 46 |
-
lastSeed: number | null;
|
| 47 |
-
/** Resolved system_id (same for both slots). */
|
| 48 |
-
systemId: string | null;
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
export function useLlmCompareRunner(): CompareRunnerState & CompareRunnerControls {
|
| 52 |
-
const a = useLlmEpisodeRunner();
|
| 53 |
-
const b = useLlmEpisodeRunner();
|
| 54 |
-
const [lastSeed, setLastSeed] = useState<number | null>(null);
|
| 55 |
-
const [systemId, setSystemId] = useState<string | null>(null);
|
| 56 |
-
|
| 57 |
-
// Keep the latest controls on a ref so `startBoth` doesn't have to
|
| 58 |
-
// depend on them — useEpisodeRunner reinstates them on every render
|
| 59 |
-
// and pulling them through the dep array would churn the callback.
|
| 60 |
-
const controlsRef = useRef({ a: a, b: b });
|
| 61 |
-
controlsRef.current = { a, b };
|
| 62 |
-
|
| 63 |
-
const startBoth = useCallback(
|
| 64 |
-
async (options: {
|
| 65 |
-
systemId?: string | undefined;
|
| 66 |
-
maxTurns?: number | undefined;
|
| 67 |
-
connectionA: LlmConnection;
|
| 68 |
-
connectionB: LlmConnection;
|
| 69 |
-
temperature?: number | undefined;
|
| 70 |
-
}) => {
|
| 71 |
-
// Generate a single seed so both sides see identical observations.
|
| 72 |
-
// 31 bits keeps us inside JS-safe int range and Numpy-acceptable.
|
| 73 |
-
const seed = Math.floor(Math.random() * 2_147_483_647);
|
| 74 |
-
setLastSeed(seed);
|
| 75 |
-
setSystemId(options.systemId ?? null);
|
| 76 |
-
|
| 77 |
-
const common = {
|
| 78 |
-
systemId: options.systemId,
|
| 79 |
-
seed,
|
| 80 |
-
maxTurns: options.maxTurns,
|
| 81 |
-
temperature: options.temperature,
|
| 82 |
-
};
|
| 83 |
-
|
| 84 |
-
// Kick off both in parallel — the server makes independent
|
| 85 |
-
// sessions so they can't deadlock on each other.
|
| 86 |
-
await Promise.all([
|
| 87 |
-
controlsRef.current.a.start({ ...common, connection: options.connectionA }),
|
| 88 |
-
controlsRef.current.b.start({ ...common, connection: options.connectionB }),
|
| 89 |
-
]);
|
| 90 |
-
},
|
| 91 |
-
[],
|
| 92 |
-
);
|
| 93 |
-
|
| 94 |
-
const endBoth = useCallback(async () => {
|
| 95 |
-
await Promise.all([
|
| 96 |
-
controlsRef.current.a.end(),
|
| 97 |
-
controlsRef.current.b.end(),
|
| 98 |
-
]);
|
| 99 |
-
setLastSeed(null);
|
| 100 |
-
setSystemId(null);
|
| 101 |
-
}, []);
|
| 102 |
-
|
| 103 |
-
const slotA = useMemo<CompareSlot>(
|
| 104 |
-
() => ({
|
| 105 |
-
id: "a",
|
| 106 |
-
state: { ...a },
|
| 107 |
-
controls: { ...a },
|
| 108 |
-
}),
|
| 109 |
-
[a],
|
| 110 |
-
);
|
| 111 |
-
const slotB = useMemo<CompareSlot>(
|
| 112 |
-
() => ({
|
| 113 |
-
id: "b",
|
| 114 |
-
state: { ...b },
|
| 115 |
-
controls: { ...b },
|
| 116 |
-
}),
|
| 117 |
-
[b],
|
| 118 |
-
);
|
| 119 |
-
|
| 120 |
-
return {
|
| 121 |
-
a: slotA,
|
| 122 |
-
b: slotB,
|
| 123 |
-
lastSeed,
|
| 124 |
-
systemId: systemId ?? a.systemId ?? b.systemId,
|
| 125 |
-
startBoth,
|
| 126 |
-
endBoth,
|
| 127 |
-
};
|
| 128 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/lib/llmPresets.ts
CHANGED
|
@@ -186,48 +186,102 @@ export interface LlmConnection {
|
|
| 186 |
apiKey: string;
|
| 187 |
}
|
| 188 |
|
| 189 |
-
/** Default for the single-LLM "Run with LLM" pane
|
| 190 |
-
*
|
| 191 |
-
*
|
| 192 |
-
* paste a token, pick a suggested model (all live-probed and known to
|
| 193 |
-
* serve), get a response in ~2 s. No GPU cold-start, no localhost
|
| 194 |
-
* dependency.
|
| 195 |
-
*
|
| 196 |
-
* We prefill the model so the Run button is enabled the moment the
|
| 197 |
-
* user pastes a token — keeping the model empty and forcing them to
|
| 198 |
-
* pick from the dropdown is friction we don't need. The api key
|
| 199 |
-
* field is hydrated from localStorage by the panel on first render. */
|
| 200 |
export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
|
| 201 |
-
endpointId: "hf",
|
| 202 |
-
baseUrl: HF_ROUTER_BASE_URL,
|
| 203 |
-
// Matches the first entry of the "hf" endpoint's modelSuggestions —
|
| 204 |
-
// smallest router-served Qwen model, fastest response.
|
| 205 |
-
model: "Qwen/Qwen2.5-7B-Instruct",
|
| 206 |
-
apiKey: "",
|
| 207 |
-
};
|
| 208 |
-
|
| 209 |
-
/** Default A side of the Compare pane: trained PhysiX-3B on the sister
|
| 210 |
-
* GPU Space. The Compare pane's whole purpose is the trained-vs-base
|
| 211 |
-
* side-by-side, so it's worth the cold-start penalty here even though
|
| 212 |
-
* the single-LLM pane avoids it. No token needed. */
|
| 213 |
-
export const DEFAULT_CONNECTION_A: LlmConnection = {
|
| 214 |
endpointId: "physix",
|
| 215 |
baseUrl: PHYSIX_INFER_BASE_URL,
|
| 216 |
model: PHYSIX_MODEL_ID,
|
| 217 |
apiKey: "",
|
| 218 |
};
|
| 219 |
|
| 220 |
-
/
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
/** Build a fresh connection when the user changes endpoints. Keeps the
|
| 233 |
* api key for the new base URL out of localStorage in this helper —
|
|
|
|
| 186 |
apiKey: string;
|
| 187 |
}
|
| 188 |
|
| 189 |
+
/** Default for the single-LLM "Run with LLM" pane: the trained
|
| 190 |
+
* PhysiX-3B. The picker is now a 3-button preset — the first preset's
|
| 191 |
+
* connection IS this default, so they stay in sync. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
endpointId: "physix",
|
| 194 |
baseUrl: PHYSIX_INFER_BASE_URL,
|
| 195 |
model: PHYSIX_MODEL_ID,
|
| 196 |
apiKey: "",
|
| 197 |
};
|
| 198 |
|
| 199 |
+
// ---------------------------------------------------------------------
|
| 200 |
+
// Model presets — the 3 fixed options the Run pane exposes.
|
| 201 |
+
// ---------------------------------------------------------------------
|
| 202 |
+
|
| 203 |
+
/** A single preset = "click here to talk to model X via endpoint Y".
|
| 204 |
+
* The whole point is to spare users from picking an endpoint, then a
|
| 205 |
+
* model id, then realising the two don't match. Each preset bundles
|
| 206 |
+
* exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
|
| 207 |
+
export interface ModelPreset {
|
| 208 |
+
id: string;
|
| 209 |
+
label: string;
|
| 210 |
+
/** One-line "what is this" copy shown under the label. */
|
| 211 |
+
description: string;
|
| 212 |
+
/** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
|
| 213 |
+
badge: string;
|
| 214 |
+
/** Pre-built connection — drop straight into the runner. */
|
| 215 |
+
connection: LlmConnection;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
/** The three options the Run-with-LLM picker exposes. Order matters:
|
| 219 |
+
* the first entry is the default selection on a fresh page-load.
|
| 220 |
+
*
|
| 221 |
+
* Two of the three live on the PhysiX-Infer GPU Space (no token, same
|
| 222 |
+
* L4 hardware) so users can compare the trained PhysiX-3B against its
|
| 223 |
+
* Qwen 3B base apples-to-apples with one click. The 7B baseline runs
|
| 224 |
+
* through HF Router because no provider serves Qwen 3B today and HF
|
| 225 |
+
* Router gives a "bigger model" reference point in <2 s once a token
|
| 226 |
+
* is pasted. */
|
| 227 |
+
export const MODEL_PRESETS: readonly ModelPreset[] = [
|
| 228 |
+
{
|
| 229 |
+
id: "physix-3b-rl",
|
| 230 |
+
label: "PhysiX-3B (trained)",
|
| 231 |
+
description:
|
| 232 |
+
"Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
|
| 233 |
+
badge: "trained ✦",
|
| 234 |
+
connection: {
|
| 235 |
+
endpointId: "physix",
|
| 236 |
+
baseUrl: PHYSIX_INFER_BASE_URL,
|
| 237 |
+
model: PHYSIX_MODEL_ID,
|
| 238 |
+
apiKey: "",
|
| 239 |
+
},
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
id: "qwen-3b-base",
|
| 243 |
+
label: "Qwen 2.5 3B (base)",
|
| 244 |
+
description:
|
| 245 |
+
"Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
|
| 246 |
+
badge: "3B base",
|
| 247 |
+
connection: {
|
| 248 |
+
endpointId: "physix",
|
| 249 |
+
baseUrl: PHYSIX_INFER_BASE_URL,
|
| 250 |
+
model: QWEN_BASE_MODEL_ID,
|
| 251 |
+
apiKey: "",
|
| 252 |
+
},
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
id: "qwen-7b-hf",
|
| 256 |
+
label: "Qwen 2.5 7B (HF Router)",
|
| 257 |
+
description:
|
| 258 |
+
"Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
|
| 259 |
+
badge: "7B",
|
| 260 |
+
connection: {
|
| 261 |
+
endpointId: "hf",
|
| 262 |
+
baseUrl: HF_ROUTER_BASE_URL,
|
| 263 |
+
model: "Qwen/Qwen2.5-7B-Instruct",
|
| 264 |
+
apiKey: "",
|
| 265 |
+
},
|
| 266 |
+
},
|
| 267 |
+
];
|
| 268 |
+
|
| 269 |
+
export function findPreset(id: string): ModelPreset {
|
| 270 |
+
return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
/** Match a connection back to a preset (e.g. for selection state when
|
| 274 |
+
* hydrating from storage). Returns the first preset whose endpoint+
|
| 275 |
+
* model match; null if none match. */
|
| 276 |
+
export function presetForConnection(c: LlmConnection): ModelPreset | null {
|
| 277 |
+
return (
|
| 278 |
+
MODEL_PRESETS.find(
|
| 279 |
+
(p) =>
|
| 280 |
+
p.connection.endpointId === c.endpointId &&
|
| 281 |
+
p.connection.model === c.model,
|
| 282 |
+
) ?? null
|
| 283 |
+
);
|
| 284 |
+
}
|
| 285 |
|
| 286 |
/** Build a fresh connection when the user changes endpoints. Keeps the
|
| 287 |
* api key for the new base URL out of localStorage in this helper —
|