Pratyush-01 commited on
Commit
8225d8a
·
verified ·
1 Parent(s): 2f81a49

frontend: sync clean source — drop ComparePane + Compare LLMs tab + Which-endpoint copy; ship 3-preset picker

Browse files
frontend/src/App.tsx CHANGED
@@ -1,12 +1,11 @@
1
  import { useState } from "react";
2
 
3
- import { ComparePane } from "@/components/ComparePane";
4
  import { HeroIntro } from "@/components/HeroIntro";
5
  import { OpenEnvExplorerPane } from "@/components/OpenEnvExplorerPane";
6
  import { RunWithLlmPane } from "@/components/RunWithLlmPane";
7
  import { cn } from "@/lib/cn";
8
 
9
- type TabId = "run" | "compare" | "openenv";
10
 
11
  interface TabDef {
12
  id: TabId;
@@ -15,7 +14,6 @@ interface TabDef {
15
 
16
  const TABS: TabDef[] = [
17
  { id: "run", label: "Run with LLM" },
18
- { id: "compare", label: "Compare LLMs" },
19
  { id: "openenv", label: "OpenEnv API" },
20
  ];
21
 
@@ -75,13 +73,7 @@ export function App(): JSX.Element {
75
  id={`panel-${activeTab}`}
76
  aria-labelledby={`tab-${activeTab}`}
77
  >
78
- {activeTab === "run" ? (
79
- <RunWithLlmPane />
80
- ) : activeTab === "compare" ? (
81
- <ComparePane />
82
- ) : (
83
- <OpenEnvExplorerPane />
84
- )}
85
  </div>
86
  </main>
87
  );
 
1
  import { useState } from "react";
2
 
 
3
  import { HeroIntro } from "@/components/HeroIntro";
4
  import { OpenEnvExplorerPane } from "@/components/OpenEnvExplorerPane";
5
  import { RunWithLlmPane } from "@/components/RunWithLlmPane";
6
  import { cn } from "@/lib/cn";
7
 
8
+ type TabId = "run" | "openenv";
9
 
10
  interface TabDef {
11
  id: TabId;
 
14
 
15
  const TABS: TabDef[] = [
16
  { id: "run", label: "Run with LLM" },
 
17
  { id: "openenv", label: "OpenEnv API" },
18
  ];
19
 
 
73
  id={`panel-${activeTab}`}
74
  aria-labelledby={`tab-${activeTab}`}
75
  >
76
+ {activeTab === "run" ? <RunWithLlmPane /> : <OpenEnvExplorerPane />}
 
 
 
 
 
 
77
  </div>
78
  </main>
79
  );
frontend/src/components/ComparePane.tsx DELETED
@@ -1,624 +0,0 @@
1
- /** A/B comparison pane: trained PhysiX vs. baseline, scored by the
2
- * same verifier on the same episode seed.
3
- *
4
- * Two `LlmConnectionPanel`s feed two parallel `useLlmEpisodeRunner`
5
- * sessions, started via `useLlmCompareRunner`. The panes render the
6
- * same trajectory canvas (each side overlays its own predicted
7
- * trajectory) plus a per-side reward strip and turn transcript. */
8
-
9
- import { useEffect, useMemo, useState } from "react";
10
-
11
- import { EquationDisplay } from "@/components/EquationDisplay";
12
- import { LlmConnectionPanel } from "@/components/LlmConnectionPanel";
13
- import { Skeleton } from "@/components/Skeleton";
14
- import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
15
- import { useLlmCompareRunner, type CompareSlot } from "@/hooks/useLlmCompareRunner";
16
- import { type LlmTurn, type RunnerStatus } from "@/hooks/useLlmEpisodeRunner";
17
- import { cn } from "@/lib/cn";
18
- import { formatPercent } from "@/lib/format";
19
- import {
20
- DEFAULT_CONNECTION_A,
21
- DEFAULT_CONNECTION_B,
22
- type LlmConnection,
23
- } from "@/lib/llmPresets";
24
- import { pickPrimaryVariable } from "@/lib/trajectory";
25
- import type { RewardBreakdown } from "@/types/physix";
26
-
27
- const ZERO_REWARD: RewardBreakdown = {
28
- match: 0,
29
- progress: 0,
30
- simplicity: 0,
31
- format: 0,
32
- total: 0,
33
- shape: 0,
34
- freq: 0,
35
- amplitude: 0,
36
- };
37
- const CONVERGENCE_THRESHOLD = 0.93;
38
-
39
- export function ComparePane(): JSX.Element {
40
- const runner = useLlmCompareRunner();
41
-
42
- // Connection state lives here so navigating the system / button bar
43
- // doesn't clear the API key or model id.
44
- const [connectionA, setConnectionA] = useState<LlmConnection>(
45
- () => DEFAULT_CONNECTION_A,
46
- );
47
- const [connectionB, setConnectionB] = useState<LlmConnection>(
48
- () => DEFAULT_CONNECTION_B,
49
- );
50
- const [systemId, setSystemId] = useState<string>("");
51
- const [maxTurns, setMaxTurns] = useState<number>(8);
52
- const [temperature, setTemperature] = useState<number>(0.4);
53
-
54
- // Default to damped_spring; fall back to first in list.
55
- useEffect(() => {
56
- if (!systemId && runner.a.state.systems && runner.a.state.systems.length > 0) {
57
- const preferred = runner.a.state.systems.find((s) => s.system_id === "damped_spring");
58
- setSystemId(preferred?.system_id ?? runner.a.state.systems[0]?.system_id ?? "");
59
- }
60
- }, [runner.a.state.systems, systemId]);
61
-
62
- const eitherRunning =
63
- isActive(runner.a.state.status) || isActive(runner.b.state.status);
64
- const eitherStarting =
65
- runner.a.state.status === "starting" || runner.b.state.status === "starting";
66
- const canStart =
67
- !eitherRunning &&
68
- !eitherStarting &&
69
- !!connectionA.model.trim() &&
70
- !!connectionB.model.trim() &&
71
- !!connectionA.baseUrl.trim() &&
72
- !!connectionB.baseUrl.trim();
73
-
74
- function handleStart(): void {
75
- void runner.startBoth({
76
- systemId: systemId,
77
- maxTurns,
78
- connectionA,
79
- connectionB,
80
- temperature,
81
- });
82
- }
83
-
84
- return (
85
- <section className="flex flex-col gap-6">
86
- <div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
87
- <LlmConnectionPanel
88
- title="A"
89
- subtitle="Left side. Suggested: the trained model."
90
- accent="primary"
91
- value={connectionA}
92
- onChange={setConnectionA}
93
- disabled={eitherRunning || eitherStarting}
94
- installedOllamaModels={runner.a.state.models ?? []}
95
- installedOllamaLoading={runner.a.state.models === null}
96
- installedOllamaError={runner.a.state.modelsError}
97
- onRefreshOllama={() => void runner.a.controls.refreshModels()}
98
- />
99
- <LlmConnectionPanel
100
- title="B"
101
- subtitle="Right side. Suggested: a baseline you'd expect A to beat."
102
- accent="blue"
103
- value={connectionB}
104
- onChange={setConnectionB}
105
- disabled={eitherRunning || eitherStarting}
106
- installedOllamaModels={runner.b.state.models ?? []}
107
- installedOllamaLoading={runner.b.state.models === null}
108
- installedOllamaError={runner.b.state.modelsError}
109
- onRefreshOllama={() => void runner.b.controls.refreshModels()}
110
- />
111
- </div>
112
-
113
- <CompareControlBar
114
- systems={runner.a.state.systems}
115
- systemId={systemId}
116
- onSelectSystem={setSystemId}
117
- temperature={temperature}
118
- onChangeTemperature={setTemperature}
119
- maxTurns={maxTurns}
120
- onChangeMaxTurns={setMaxTurns}
121
- canStart={canStart}
122
- eitherRunning={eitherRunning}
123
- eitherStarting={eitherStarting}
124
- onStart={handleStart}
125
- onEnd={() => void runner.endBoth()}
126
- seed={runner.lastSeed}
127
- />
128
-
129
- <div className="grid grid-cols-1 gap-4 lg:grid-cols-2">
130
- <SlotColumn
131
- slot={runner.a}
132
- connection={connectionA}
133
- accent="primary"
134
- title="A"
135
- />
136
- <SlotColumn
137
- slot={runner.b}
138
- connection={connectionB}
139
- accent="blue"
140
- title="B"
141
- />
142
- </div>
143
-
144
- <ScoreboardBanner a={runner.a} b={runner.b} />
145
- </section>
146
- );
147
- }
148
-
149
- // ---------------------------------------------------------------------------
150
-
151
- function CompareControlBar({
152
- systems,
153
- systemId,
154
- onSelectSystem,
155
- temperature,
156
- onChangeTemperature,
157
- maxTurns,
158
- onChangeMaxTurns,
159
- canStart,
160
- eitherRunning,
161
- eitherStarting,
162
- onStart,
163
- onEnd,
164
- seed,
165
- }: {
166
- systems: import("@/lib/interactiveClient").SystemDescriptor[] | null;
167
- systemId: string;
168
- onSelectSystem: (id: string) => void;
169
- temperature: number;
170
- onChangeTemperature: (n: number) => void;
171
- maxTurns: number;
172
- onChangeMaxTurns: (n: number) => void;
173
- canStart: boolean;
174
- eitherRunning: boolean;
175
- eitherStarting: boolean;
176
- onStart: () => void;
177
- onEnd: () => void;
178
- seed: number | null;
179
- }): JSX.Element {
180
- return (
181
- <header className="panel flex flex-col gap-3">
182
- <div>
183
- <p className="heading-eyebrow text-primary">Run side-by-side</p>
184
- <h2 className="mt-1 text-xl font-semibold leading-tight">
185
- Same episode, two models, one verifier
186
- </h2>
187
- <p className="mt-1 max-w-3xl text-xs text-textMuted">
188
- Both sides see the same trajectory, hint, and seed. Reward
189
- comes only from <code className="font-mono">scipy.odeint</code>{" "}
190
- and per-step R² — there is no LLM-as-judge. Differences in the
191
- score are differences in the physics the model proposes.
192
- </p>
193
- </div>
194
-
195
- <div className="flex flex-wrap items-end gap-3">
196
- <Field label="System">
197
- <select
198
- className="rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none transition focus:border-textMuted disabled:opacity-50"
199
- value={systemId}
200
- onChange={(event) => onSelectSystem(event.target.value)}
201
- disabled={systems === null || eitherRunning || eitherStarting}
202
- >
203
- {systems === null ? (
204
- <option value="">Loading…</option>
205
- ) : (
206
- systems.map((descriptor) => (
207
- <option key={descriptor.system_id} value={descriptor.system_id}>
208
- {prettySystemId(descriptor.system_id)}
209
- </option>
210
- ))
211
- )}
212
- </select>
213
- </Field>
214
-
215
- <Field label="Temp">
216
- <input
217
- type="number"
218
- min={0}
219
- max={2}
220
- step={0.1}
221
- className="w-20 rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none focus:border-textMuted"
222
- value={temperature}
223
- onChange={(event) =>
224
- onChangeTemperature(
225
- Math.max(0, Math.min(2, Number(event.target.value))),
226
- )
227
- }
228
- disabled={eitherRunning || eitherStarting}
229
- />
230
- </Field>
231
-
232
- <Field label="Turn budget">
233
- <input
234
- type="number"
235
- min={1}
236
- max={32}
237
- className="w-24 rounded-lg border border-border bg-surfaceMuted px-3 py-2 text-sm text-textPrimary outline-none focus:border-textMuted"
238
- value={maxTurns}
239
- onChange={(event) =>
240
- onChangeMaxTurns(Math.max(1, Number(event.target.value)))
241
- }
242
- disabled={eitherRunning || eitherStarting}
243
- />
244
- </Field>
245
-
246
- <div className="ml-auto flex flex-wrap items-center gap-2">
247
- {seed !== null ? (
248
- <span className="rounded-full border border-border bg-surface px-2 py-1 font-mono text-[10px] text-textMuted">
249
- seed {seed}
250
- </span>
251
- ) : null}
252
- {!eitherRunning && !eitherStarting ? (
253
- <button
254
- type="button"
255
- className="btn-primary"
256
- onClick={onStart}
257
- disabled={!canStart}
258
- >
259
- ▶ Run side-by-side
260
- </button>
261
- ) : (
262
- <button type="button" className="btn-secondary" onClick={onEnd}>
263
- End both
264
- </button>
265
- )}
266
- </div>
267
- </div>
268
- </header>
269
- );
270
- }
271
-
272
- // ---------------------------------------------------------------------------
273
-
274
- function SlotColumn({
275
- slot,
276
- connection,
277
- accent,
278
- title,
279
- }: {
280
- slot: CompareSlot;
281
- connection: LlmConnection;
282
- accent: "primary" | "blue";
283
- title: string;
284
- }): JSX.Element {
285
- const turns = slot.state.turns;
286
- const latestTurn: LlmTurn | undefined = turns[turns.length - 1];
287
- const observation = latestTurn?.observation ?? slot.state.initialObservation;
288
- const lastReward = latestTurn?.observation.reward_breakdown ?? ZERO_REWARD;
289
- const observed = slot.state.initialObservation?.trajectory ?? observation?.trajectory ?? [];
290
- const stateVariables = observation?.state_variables ?? [];
291
- const primaryVariable = useMemo(
292
- () =>
293
- stateVariables.length > 0 ? pickPrimaryVariable(stateVariables) : "y",
294
- [stateVariables],
295
- );
296
- const finalMatch = lastReward.match ?? 0;
297
- const converged = finalMatch >= CONVERGENCE_THRESHOLD;
298
- const accentBorder = accent === "primary" ? "border-primary/40" : "border-accentBlue/40";
299
- const accentText = accent === "primary" ? "text-primary" : "text-accentBlue";
300
-
301
- return (
302
- <section className={cn("panel flex flex-col gap-3 border-l-4", accentBorder)}>
303
- <header className="flex items-baseline justify-between gap-2">
304
- <div className="flex items-baseline gap-2">
305
- <span className={cn("heading-eyebrow", accentText)}>Side {title}</span>
306
- <span className="font-mono text-xs text-textPrimary">
307
- {connection.model || "(no model selected)"}
308
- </span>
309
- </div>
310
- <SlotStatusBadge status={slot.state.status} converged={converged} />
311
- </header>
312
-
313
- {slot.state.errorMessage ? (
314
- <ErrorRow
315
- message={slot.state.errorMessage}
316
- onDismiss={() => slot.controls.resetError()}
317
- />
318
- ) : null}
319
-
320
- {observation ? (
321
- <>
322
- <div className="rounded-lg border border-border bg-surfaceMuted p-3">
323
- <TrajectoryCanvas
324
- observed={observed}
325
- predicted={latestTurn?.predictedTrajectory ?? []}
326
- variable={primaryVariable}
327
- variableLabel={primaryVariable}
328
- predictedProgress={1}
329
- />
330
- </div>
331
-
332
- <DenseRewardRow reward={lastReward} />
333
-
334
- <p className="text-[11px] text-textMuted">
335
- <span className="font-semibold uppercase tracking-wide text-textPrimary">
336
- Hint:
337
- </span>{" "}
338
- {observation.hint || "(none)"}
339
- </p>
340
-
341
- <SlotTurns turns={turns} />
342
- </>
343
- ) : (
344
- <SlotPlaceholder status={slot.state.status} />
345
- )}
346
- </section>
347
- );
348
- }
349
-
350
- function SlotStatusBadge({
351
- status,
352
- converged,
353
- }: {
354
- status: RunnerStatus;
355
- converged: boolean;
356
- }): JSX.Element {
357
- if (converged) {
358
- return (
359
- <span className="rounded-full border border-accentGreen/40 bg-accentGreen/10 px-2 py-0.5 text-[10px] uppercase tracking-wider text-accentGreen">
360
- converged
361
- </span>
362
- );
363
- }
364
- const tone =
365
- status === "running"
366
- ? "border-accentBlue/40 text-accentBlue"
367
- : status === "ended"
368
- ? "border-textMuted/40 text-textMuted"
369
- : status === "error"
370
- ? "border-primary/40 text-primary"
371
- : "border-textMuted/40 text-textMuted";
372
- return (
373
- <span className={cn("rounded-full border bg-surface px-2 py-0.5 text-[10px] uppercase tracking-wider", tone)}>
374
- {labelForStatus(status)}
375
- </span>
376
- );
377
- }
378
-
379
- function labelForStatus(status: RunnerStatus): string {
380
- switch (status) {
381
- case "starting":
382
- return "starting";
383
- case "running":
384
- return "running";
385
- case "paused":
386
- return "paused";
387
- case "ended":
388
- return "done";
389
- case "error":
390
- return "error";
391
- case "idle":
392
- default:
393
- return "idle";
394
- }
395
- }
396
-
397
- function SlotPlaceholder({ status }: { status: RunnerStatus }): JSX.Element {
398
- if (status === "starting") {
399
- return (
400
- <div className="flex flex-col gap-2" aria-busy>
401
- <Skeleton className="h-[240px] w-full" />
402
- <Skeleton className="h-3 w-48" />
403
- </div>
404
- );
405
- }
406
- return (
407
- <p className="text-xs text-textMuted">
408
- Configure both connections and press Run to start.
409
- </p>
410
- );
411
- }
412
-
413
- function SlotTurns({ turns }: { turns: LlmTurn[] }): JSX.Element {
414
- if (turns.length === 0) {
415
- return <p className="text-[11px] text-textMuted">No turns yet.</p>;
416
- }
417
- // Render a short transcript: just the latest two turns to keep the
418
- // column readable in the side-by-side layout. Power users can read
419
- // the raw turn-by-turn dump from the original single-model view.
420
- const visible = turns.slice(-2);
421
- return (
422
- <details className="rounded-md border border-border bg-surface px-3 py-2 text-[11px] text-textMuted">
423
- <summary className="cursor-pointer text-textPrimary">
424
- Latest turns ({turns.length})
425
- </summary>
426
- <ol className="mt-2 flex flex-col gap-2">
427
- {visible.map((turn) => (
428
- <li
429
- key={turn.turn}
430
- className="rounded border border-border bg-surfaceMuted p-2"
431
- >
432
- <div className="mb-1 flex items-center justify-between text-[10px] text-textMuted">
433
- <span>turn {turn.turn}</span>
434
- <span className="font-mono">
435
- R² {(turn.observation.reward_breakdown.match * 100).toFixed(0)}%
436
- </span>
437
- </div>
438
- {turn.action.equation ? (
439
- <EquationDisplay
440
- equation={turn.action.equation}
441
- rationale={turn.action.rationale}
442
- />
443
- ) : (
444
- <span className="text-accentAmber">unparseable</span>
445
- )}
446
- </li>
447
- ))}
448
- </ol>
449
- </details>
450
- );
451
- }
452
-
453
- function DenseRewardRow({ reward }: { reward: RewardBreakdown }): JSX.Element {
454
- // Reward components (top) feed the trainer's weighted total.
455
- // Diagnostic sub-scores (bottom) are visual-closeness signals only —
456
- // see RewardBreakdown class docstring on the backend.
457
- const rewardComponents: { name: string; value: number }[] = [
458
- { name: "match", value: reward.match ?? 0 },
459
- { name: "progress", value: reward.progress ?? 0 },
460
- { name: "simplicity", value: reward.simplicity ?? 0 },
461
- { name: "format", value: reward.format ?? 0 },
462
- ];
463
- const diagComponents: { name: string; value: number }[] = [
464
- { name: "shape", value: reward.shape ?? 0 },
465
- { name: "freq", value: reward.freq ?? 0 },
466
- { name: "amplitude", value: reward.amplitude ?? 0 },
467
- ];
468
- return (
469
- <div className="flex flex-col gap-2 rounded-md border border-border bg-surface px-3 py-2 font-mono text-[11px]">
470
- <div className="grid grid-cols-4 gap-2">
471
- {rewardComponents.map(({ name, value }) => (
472
- <RewardCell key={name} name={name} value={value} />
473
- ))}
474
- </div>
475
- <div className="flex items-center gap-2 border-t border-border/60 pt-2">
476
- <span
477
- className="text-[10px] uppercase tracking-wider text-textMuted"
478
- title="Diagnostic-only — not part of the reward total. Captures visual closeness (shape / freq / amplitude) where R² collapses to 0."
479
- >
480
- diag
481
- </span>
482
- <div className="grid flex-1 grid-cols-3 gap-2">
483
- {diagComponents.map(({ name, value }) => (
484
- <RewardCell key={name} name={name} value={value} muted />
485
- ))}
486
- </div>
487
- </div>
488
- </div>
489
- );
490
- }
491
-
492
- function RewardCell({
493
- name,
494
- value,
495
- muted = false,
496
- }: {
497
- name: string;
498
- value: number;
499
- muted?: boolean;
500
- }): JSX.Element {
501
- return (
502
- <div className="flex flex-col gap-1">
503
- <div className="flex items-baseline justify-between">
504
- <span className="text-textMuted">{name}</span>
505
- <span className={muted ? "text-textMuted" : "text-textPrimary"}>
506
- {value.toFixed(2)}
507
- </span>
508
- </div>
509
- <div
510
- className="h-1 w-full overflow-hidden rounded-full bg-border"
511
- aria-hidden
512
- >
513
- <div
514
- className={cn(
515
- "h-full rounded-full",
516
- value >= 0.7
517
- ? muted
518
- ? "bg-accentBlue/60"
519
- : "bg-accentGreen/70"
520
- : value >= 0.3
521
- ? "bg-accentAmber/70"
522
- : "bg-textMuted/40",
523
- )}
524
- style={{ width: `${Math.max(0, Math.min(1, value)) * 100}%` }}
525
- />
526
- </div>
527
- </div>
528
- );
529
- }
530
-
531
- function ScoreboardBanner({
532
- a,
533
- b,
534
- }: {
535
- a: CompareSlot;
536
- b: CompareSlot;
537
- }): JSX.Element | null {
538
- const aDone = a.state.status === "ended";
539
- const bDone = b.state.status === "ended";
540
- if (!aDone || !bDone) return null;
541
-
542
- const aMatch = lastMatch(a);
543
- const bMatch = lastMatch(b);
544
- const winner = aMatch === bMatch ? null : aMatch > bMatch ? "A" : "B";
545
-
546
- return (
547
- <div className="panel border border-accentGreen/30 bg-accentGreen/5 text-sm">
548
- <p className="heading-eyebrow text-accentGreen">Scoreboard</p>
549
- <div className="mt-2 flex flex-wrap items-baseline gap-6 text-textPrimary">
550
- <span>
551
- A: <span className="font-mono">{formatPercent(aMatch)}</span> R²
552
- </span>
553
- <span>
554
- B: <span className="font-mono">{formatPercent(bMatch)}</span> R²
555
- </span>
556
- {winner ? (
557
- <span className="text-accentGreen">
558
- Winner:{" "}
559
- <strong className="font-semibold text-textPrimary">{winner}</strong>
560
- </span>
561
- ) : (
562
- <span className="text-textMuted">Tied.</span>
563
- )}
564
- </div>
565
- </div>
566
- );
567
- }
568
-
569
- function ErrorRow({
570
- message,
571
- onDismiss,
572
- }: {
573
- message: string;
574
- onDismiss: () => void;
575
- }): JSX.Element {
576
- return (
577
- <div
578
- role="alert"
579
- className="flex items-start justify-between gap-2 rounded-md border border-accentAmber/40 bg-accentAmber/5 px-3 py-2 text-xs text-accentAmber"
580
- >
581
- <span className="whitespace-pre-line">{message}</span>
582
- <button
583
- type="button"
584
- onClick={onDismiss}
585
- className="text-[10px] text-textMuted underline hover:text-textPrimary"
586
- >
587
- dismiss
588
- </button>
589
- </div>
590
- );
591
- }
592
-
593
- function Field({
594
- label,
595
- children,
596
- }: {
597
- label: string;
598
- children: React.ReactNode;
599
- }): JSX.Element {
600
- return (
601
- <label className="flex flex-col gap-1 text-xs text-textMuted">
602
- <span className="heading-eyebrow">{label}</span>
603
- {children}
604
- </label>
605
- );
606
- }
607
-
608
- function isActive(status: RunnerStatus): boolean {
609
- return status === "running" || status === "paused";
610
- }
611
-
612
- function lastMatch(slot: CompareSlot): number {
613
- const turns = slot.state.turns;
614
- const last = turns[turns.length - 1];
615
- return last?.observation.reward_breakdown.match ?? 0;
616
- }
617
-
618
- function prettySystemId(systemId: string): string {
619
- if (!systemId) return "(none)";
620
- return systemId
621
- .split("_")
622
- .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
623
- .join(" ");
624
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/components/RunWithLlmPane.tsx CHANGED
@@ -1,17 +1,19 @@
1
- /** Single-LLM run pane: pick a connection, hit run, watch the model
2
- * step through the episode turn by turn.
3
  *
4
  * This is the "all the steps" view — every turn's hypothesis, reward
5
  * breakdown, latency, and raw completion are surfaced in a scrollable
6
  * transcript so judges can audit exactly what the model proposed.
7
  *
8
- * Compare with `ComparePane`, which runs two of these side by side
9
- * with a condensed transcript per side. */
 
 
10
 
11
  import { useEffect, useMemo, useState } from "react";
12
 
13
  import { EquationDisplay } from "@/components/EquationDisplay";
14
- import { LlmConnectionPanel } from "@/components/LlmConnectionPanel";
15
  import { Skeleton } from "@/components/Skeleton";
16
  import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
17
  import {
@@ -22,7 +24,13 @@ import {
22
  import { cn } from "@/lib/cn";
23
  import {
24
  DEFAULT_SINGLE_LLM_CONNECTION,
 
25
  type LlmConnection,
 
 
 
 
 
26
  } from "@/lib/llmPresets";
27
  import { pickPrimaryVariable } from "@/lib/trajectory";
28
  import type { RewardBreakdown } from "@/types/physix";
@@ -60,7 +68,13 @@ export function RunWithLlmPane(): JSX.Element {
60
  const status = runner.status;
61
  const busy = status === "starting" || status === "running";
62
  const hasSession = runner.sessionId !== null;
63
- const canStart = !busy && !!connection.model.trim() && !!connection.baseUrl.trim();
 
 
 
 
 
 
64
 
65
  function handleStart(): void {
66
  void runner.start({
@@ -105,19 +119,10 @@ export function RunWithLlmPane(): JSX.Element {
105
  </p>
106
  </header>
107
 
108
- <EndpointGuide />
109
-
110
- <LlmConnectionPanel
111
- title="LLM"
112
- subtitle="One model drives the episode."
113
- accent="primary"
114
- value={connection}
115
  onChange={setConnection}
116
  disabled={busy}
117
- installedOllamaModels={runner.models ?? []}
118
- installedOllamaLoading={runner.models === null}
119
- installedOllamaError={runner.modelsError}
120
- onRefreshOllama={() => void runner.refreshModels()}
121
  />
122
 
123
  <ControlBar
@@ -639,56 +644,175 @@ function RewardCell({
639
  );
640
  }
641
 
642
- /** "Which endpoint should I pick?" callout shown above the connection
643
- * panel. Three rows of one-liner guidance; no images, no links to
644
- * external docs — keeps the page fast and the answer visible without
645
- * scrolling.
646
- *
647
- * Why this exists:
648
- * The endpoint dropdown has 5 options and the optimal pick depends
649
- * on what the user has on hand. Without this callout most visitors
650
- * default to whatever's first and either (a) hit a token error
651
- * (HF Router with no token) or (b) sit through a 90 s GPU cold-boot
652
- * (PhysiX-Infer) without knowing it's coming. */
653
- function EndpointGuide(): JSX.Element {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  return (
655
- <div className="rounded-lg border border-border bg-surfaceMuted px-4 py-3 text-xs leading-relaxed text-textMuted">
656
- <p className="heading-eyebrow text-textPrimary">
657
- Which endpoint should you pick?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  </p>
659
- <ul className="mt-2 flex flex-col gap-1.5">
660
- <li>
661
- <span className="text-textPrimary">Hugging Face Router</span>{" "}
662
- <span className="rounded bg-surface px-1.5 py-0.5 text-[10px] uppercase tracking-wider text-textMuted">
663
- default
664
- </span>{" "}
665
- — easiest path. Paste a token from{" "}
666
- <code className="font-mono text-textPrimary">
667
- huggingface.co/settings/tokens
668
- </code>{" "}
669
- (with the &quot;Make calls to Inference Providers&quot; permission),
670
- pick a suggested model, hit Run. Responds in ~2 s, no warm-up.
671
- </li>
672
- <li>
673
- <span className="text-textPrimary">PhysiX-Infer GPU ✦</span> — only
674
- way to compare the GRPO-trained{" "}
675
- <code className="font-mono text-textPrimary">physix-3b-rl</code>{" "}
676
- against its{" "}
677
- <code className="font-mono text-textPrimary">Qwen 2.5 3B</code> base
678
- on identical hardware. No token. Sleeps after 5 min idle so first
679
- request after sleep takes ~90-120 s while two 3B models load on the
680
- L4 — the status banner below shows live state, with a Prewarm
681
- button to wake it before you hit Run.
682
- </li>
683
- <li>
684
- <span className="text-textPrimary">Ollama / OpenAI / Custom</span>{" "}
685
- — bring your own endpoint. Useful for local dev (Ollama on
686
- <code className="font-mono text-textPrimary">localhost:11434</code>),
687
- frontier-model baselines (OpenAI), or pointing at a private vLLM /
688
- inference endpoint URL.
689
- </li>
690
- </ul>
691
- </div>
692
  );
693
  }
694
 
 
1
+ /** Single-LLM run pane: pick one of three preset models, hit run,
2
+ * watch the model step through the episode turn by turn.
3
  *
4
  * This is the "all the steps" view — every turn's hypothesis, reward
5
  * breakdown, latency, and raw completion are surfaced in a scrollable
6
  * transcript so judges can audit exactly what the model proposed.
7
  *
8
+ * The model picker is intentionally a hard 3-option choice (trained
9
+ * PhysiX-3B, Qwen 3B base, Qwen 7B baseline) — typing model ids was
10
+ * confusing for first-time users and most picks ended up being one of
11
+ * these three anyway. */
12
 
13
  import { useEffect, useMemo, useState } from "react";
14
 
15
  import { EquationDisplay } from "@/components/EquationDisplay";
16
+ import { PhysixInferStatus } from "@/components/PhysixInferStatus";
17
  import { Skeleton } from "@/components/Skeleton";
18
  import { TrajectoryCanvas } from "@/components/TrajectoryCanvas";
19
  import {
 
24
  import { cn } from "@/lib/cn";
25
  import {
26
  DEFAULT_SINGLE_LLM_CONNECTION,
27
+ MODEL_PRESETS,
28
  type LlmConnection,
29
+ type ModelPreset,
30
+ findEndpoint,
31
+ loadApiKey,
32
+ presetForConnection,
33
+ saveApiKey,
34
  } from "@/lib/llmPresets";
35
  import { pickPrimaryVariable } from "@/lib/trajectory";
36
  import type { RewardBreakdown } from "@/types/physix";
 
68
  const status = runner.status;
69
  const busy = status === "starting" || status === "running";
70
  const hasSession = runner.sessionId !== null;
71
+ const endpoint = findEndpoint(connection.endpointId);
72
+ const hasRequiredKey = !endpoint.needsKey || !!connection.apiKey.trim();
73
+ const canStart =
74
+ !busy &&
75
+ !!connection.model.trim() &&
76
+ !!connection.baseUrl.trim() &&
77
+ hasRequiredKey;
78
 
79
  function handleStart(): void {
80
  void runner.start({
 
119
  </p>
120
  </header>
121
 
122
+ <ModelPresetPicker
123
+ connection={connection}
 
 
 
 
 
124
  onChange={setConnection}
125
  disabled={busy}
 
 
 
 
126
  />
127
 
128
  <ControlBar
 
644
  );
645
  }
646
 
647
+ // ---------------------------------------------------------------------
648
+ // Model preset picker three buttons + one (optional) API-key field.
649
+ // ---------------------------------------------------------------------
650
+ //
651
+ // The picker replaces the old "Endpoint dropdown + freeform model id +
652
+ // hint paragraph" UI. Users always pick one of three known-good models;
653
+ // the API-key field only appears when the picked endpoint needs one
654
+ // (just the HF Router 7B preset today). Per-preset connections are
655
+ // persisted in localStorage by base URL via `loadApiKey` / `saveApiKey`,
656
+ // so a token typed for the 7B preset survives a page reload and isn't
657
+ // shown when the trained PhysiX preset is selected (it doesn't need
658
+ // one).
659
+
660
+ interface ModelPresetPickerProps {
661
+ connection: LlmConnection;
662
+ onChange: (next: LlmConnection) => void;
663
+ disabled?: boolean;
664
+ }
665
+
666
+ function ModelPresetPicker({
667
+ connection,
668
+ onChange,
669
+ disabled,
670
+ }: ModelPresetPickerProps): JSX.Element {
671
+ const selected = presetForConnection(connection) ?? MODEL_PRESETS[0]!;
672
+ const endpoint = findEndpoint(selected.connection.endpointId);
673
+ const needsKey = endpoint.needsKey;
674
+
675
+ const [revealKey, setRevealKey] = useState(false);
676
+
677
+ // Hydrate the API key from per-URL storage whenever the preset (and
678
+ // therefore base URL) changes.
679
+ useEffect(() => {
680
+ if (!connection.baseUrl) return;
681
+ const stored = loadApiKey(connection.baseUrl);
682
+ if (stored && stored !== connection.apiKey) {
683
+ onChange({ ...connection, apiKey: stored });
684
+ }
685
+ // eslint-disable-next-line react-hooks/exhaustive-deps
686
+ }, [connection.baseUrl]);
687
+
688
+ function selectPreset(preset: ModelPreset): void {
689
+ onChange({
690
+ ...preset.connection,
691
+ apiKey: loadApiKey(preset.connection.baseUrl),
692
+ });
693
+ }
694
+
695
+ function setApiKey(key: string): void {
696
+ saveApiKey(connection.baseUrl, key);
697
+ onChange({ ...connection, apiKey: key });
698
+ }
699
+
700
  return (
701
+ <section className="panel flex flex-col gap-4">
702
+ <header>
703
+ <p className="heading-eyebrow text-primary">Pick a model</p>
704
+ <p className="mt-1 text-xs text-textMuted">
705
+ Three known-good options — the trained PhysiX-3B, its Qwen 3B
706
+ base, and a Qwen 7B baseline. No URLs to type, no model ids to
707
+ paste.
708
+ </p>
709
+ </header>
710
+
711
+ <div
712
+ role="radiogroup"
713
+ aria-label="Model"
714
+ className="grid grid-cols-1 gap-3 md:grid-cols-3"
715
+ >
716
+ {MODEL_PRESETS.map((preset) => (
717
+ <PresetCard
718
+ key={preset.id}
719
+ preset={preset}
720
+ selected={selected.id === preset.id}
721
+ disabled={disabled ?? false}
722
+ onSelect={() => selectPreset(preset)}
723
+ />
724
+ ))}
725
+ </div>
726
+
727
+ {needsKey ? (
728
+ <label className="flex flex-col gap-1 text-xs text-textMuted">
729
+ <span className="heading-eyebrow flex items-baseline justify-between gap-2">
730
+ <span>HF token (required)</span>
731
+ <button
732
+ type="button"
733
+ onClick={() => setRevealKey((v) => !v)}
734
+ className="text-[10px] uppercase tracking-wider text-textMuted underline hover:text-textPrimary"
735
+ >
736
+ {revealKey ? "hide" : "show"}
737
+ </button>
738
+ </span>
739
+ <input
740
+ type={revealKey ? "text" : "password"}
741
+ value={connection.apiKey}
742
+ onChange={(e) => setApiKey(e.target.value)}
743
+ disabled={disabled}
744
+ placeholder="hf_..."
745
+ className="w-full rounded-lg border border-border bg-surfaceMuted px-3 py-2 font-mono text-xs text-textPrimary outline-none transition focus:border-textMuted disabled:opacity-50"
746
+ />
747
+ <span className="text-[11px] leading-relaxed text-textMuted">
748
+ Get one at{" "}
749
+ <code className="font-mono text-textPrimary">
750
+ huggingface.co/settings/tokens
751
+ </code>{" "}
752
+ with the &quot;Make calls to Inference Providers&quot;
753
+ permission. Saved per endpoint in your browser.
754
+ </span>
755
+ </label>
756
+ ) : null}
757
+
758
+ {/* Live banner only when the picked preset hits the GPU Space. */}
759
+ {selected.connection.endpointId === "physix" ? (
760
+ <PhysixInferStatus />
761
+ ) : null}
762
+ </section>
763
+ );
764
+ }
765
+
766
+ interface PresetCardProps {
767
+ preset: ModelPreset;
768
+ selected: boolean;
769
+ disabled: boolean;
770
+ onSelect: () => void;
771
+ }
772
+
773
+ function PresetCard({
774
+ preset,
775
+ selected,
776
+ disabled,
777
+ onSelect,
778
+ }: PresetCardProps): JSX.Element {
779
+ return (
780
+ <button
781
+ type="button"
782
+ role="radio"
783
+ aria-checked={selected}
784
+ onClick={onSelect}
785
+ disabled={disabled}
786
+ className={cn(
787
+ "flex flex-col gap-2 rounded-xl border bg-surfaceMuted p-3 text-left transition",
788
+ "disabled:cursor-not-allowed disabled:opacity-50",
789
+ selected
790
+ ? "border-primary bg-primary/5 shadow-sm"
791
+ : "border-border hover:border-textMuted",
792
+ )}
793
+ >
794
+ <div className="flex items-center justify-between gap-2">
795
+ <span className="text-sm font-semibold text-textPrimary">
796
+ {preset.label}
797
+ </span>
798
+ <span
799
+ className={cn(
800
+ "rounded-full border bg-surface px-2 py-0.5 text-[10px] uppercase tracking-wider",
801
+ selected
802
+ ? "border-primary/60 text-primary"
803
+ : "border-border text-textMuted",
804
+ )}
805
+ >
806
+ {preset.badge}
807
+ </span>
808
+ </div>
809
+ <p className="text-[11px] leading-relaxed text-textMuted">
810
+ {preset.description}
811
  </p>
812
+ <code className="font-mono text-[10px] text-textMuted">
813
+ {preset.connection.model}
814
+ </code>
815
+ </button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  );
817
  }
818
 
frontend/src/hooks/useLlmCompareRunner.ts DELETED
@@ -1,128 +0,0 @@
1
- /** Drives two parallel `useLlmEpisodeRunner` instances against the same
2
- * episode seed. The whole point of the demo is to put two models on
3
- * identical input and compare their behaviour, scored by the same
4
- * verifier with no LLM-as-judge.
5
- *
6
- * Implementation note: each side gets its own session because the env
7
- * builds a turn-by-turn history that the next prompt depends on. We
8
- * *don't* fork a single session — that would corrupt history. Instead
9
- * we start two sessions with the same `system_id` + `seed`, which the
10
- * server already supports via its existing reset path. */
11
-
12
- import { useCallback, useMemo, useRef, useState } from "react";
13
-
14
- import {
15
- type LlmEpisodeRunnerControls,
16
- type LlmEpisodeRunnerState,
17
- useLlmEpisodeRunner,
18
- } from "@/hooks/useLlmEpisodeRunner";
19
- import type { LlmConnection } from "@/lib/llmPresets";
20
-
21
- export interface CompareSlot {
22
- id: "a" | "b";
23
- state: LlmEpisodeRunnerState;
24
- controls: LlmEpisodeRunnerControls;
25
- }
26
-
27
- export interface CompareRunnerControls {
28
- /** Start both sides on the same seed + system. Each side uses its
29
- * own connection. */
30
- startBoth: (options: {
31
- systemId?: string | undefined;
32
- maxTurns?: number | undefined;
33
- connectionA: LlmConnection;
34
- connectionB: LlmConnection;
35
- temperature?: number | undefined;
36
- }) => Promise<void>;
37
- /** End both sessions and reset state. */
38
- endBoth: () => Promise<void>;
39
- }
40
-
41
- export interface CompareRunnerState {
42
- a: CompareSlot;
43
- b: CompareSlot;
44
- /** Seed the last `startBoth` call locked in. Surfaces in the UI so
45
- * users know both sides really saw the same episode. */
46
- lastSeed: number | null;
47
- /** Resolved system_id (same for both slots). */
48
- systemId: string | null;
49
- }
50
-
51
- export function useLlmCompareRunner(): CompareRunnerState & CompareRunnerControls {
52
- const a = useLlmEpisodeRunner();
53
- const b = useLlmEpisodeRunner();
54
- const [lastSeed, setLastSeed] = useState<number | null>(null);
55
- const [systemId, setSystemId] = useState<string | null>(null);
56
-
57
- // Keep the latest controls on a ref so `startBoth` doesn't have to
58
- // depend on them — useEpisodeRunner reinstates them on every render
59
- // and pulling them through the dep array would churn the callback.
60
- const controlsRef = useRef({ a: a, b: b });
61
- controlsRef.current = { a, b };
62
-
63
- const startBoth = useCallback(
64
- async (options: {
65
- systemId?: string | undefined;
66
- maxTurns?: number | undefined;
67
- connectionA: LlmConnection;
68
- connectionB: LlmConnection;
69
- temperature?: number | undefined;
70
- }) => {
71
- // Generate a single seed so both sides see identical observations.
72
- // 31 bits keeps us inside JS-safe int range and Numpy-acceptable.
73
- const seed = Math.floor(Math.random() * 2_147_483_647);
74
- setLastSeed(seed);
75
- setSystemId(options.systemId ?? null);
76
-
77
- const common = {
78
- systemId: options.systemId,
79
- seed,
80
- maxTurns: options.maxTurns,
81
- temperature: options.temperature,
82
- };
83
-
84
- // Kick off both in parallel — the server makes independent
85
- // sessions so they can't deadlock on each other.
86
- await Promise.all([
87
- controlsRef.current.a.start({ ...common, connection: options.connectionA }),
88
- controlsRef.current.b.start({ ...common, connection: options.connectionB }),
89
- ]);
90
- },
91
- [],
92
- );
93
-
94
- const endBoth = useCallback(async () => {
95
- await Promise.all([
96
- controlsRef.current.a.end(),
97
- controlsRef.current.b.end(),
98
- ]);
99
- setLastSeed(null);
100
- setSystemId(null);
101
- }, []);
102
-
103
- const slotA = useMemo<CompareSlot>(
104
- () => ({
105
- id: "a",
106
- state: { ...a },
107
- controls: { ...a },
108
- }),
109
- [a],
110
- );
111
- const slotB = useMemo<CompareSlot>(
112
- () => ({
113
- id: "b",
114
- state: { ...b },
115
- controls: { ...b },
116
- }),
117
- [b],
118
- );
119
-
120
- return {
121
- a: slotA,
122
- b: slotB,
123
- lastSeed,
124
- systemId: systemId ?? a.systemId ?? b.systemId,
125
- startBoth,
126
- endBoth,
127
- };
128
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
frontend/src/lib/llmPresets.ts CHANGED
@@ -186,48 +186,102 @@ export interface LlmConnection {
186
  apiKey: string;
187
  }
188
 
189
- /** Default for the single-LLM "Run with LLM" pane.
190
- *
191
- * HF Router is the lowest-friction option for a first-time visitor:
192
- * paste a token, pick a suggested model (all live-probed and known to
193
- * serve), get a response in ~2 s. No GPU cold-start, no localhost
194
- * dependency.
195
- *
196
- * We prefill the model so the Run button is enabled the moment the
197
- * user pastes a token — keeping the model empty and forcing them to
198
- * pick from the dropdown is friction we don't need. The api key
199
- * field is hydrated from localStorage by the panel on first render. */
200
  export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
201
- endpointId: "hf",
202
- baseUrl: HF_ROUTER_BASE_URL,
203
- // Matches the first entry of the "hf" endpoint's modelSuggestions —
204
- // smallest router-served Qwen model, fastest response.
205
- model: "Qwen/Qwen2.5-7B-Instruct",
206
- apiKey: "",
207
- };
208
-
209
- /** Default A side of the Compare pane: trained PhysiX-3B on the sister
210
- * GPU Space. The Compare pane's whole purpose is the trained-vs-base
211
- * side-by-side, so it's worth the cold-start penalty here even though
212
- * the single-LLM pane avoids it. No token needed. */
213
- export const DEFAULT_CONNECTION_A: LlmConnection = {
214
  endpointId: "physix",
215
  baseUrl: PHYSIX_INFER_BASE_URL,
216
  model: PHYSIX_MODEL_ID,
217
  apiKey: "",
218
  };
219
 
220
- /** Default B side of the Compare pane: same sister Space, same L4 GPU,
221
- * just the Qwen 2.5 3B baseline. Apples-to-apples identical
222
- * architecture, identical hardware, identical generation params; only
223
- * the weights differ. Both models share the same Space, so warming
224
- * side A also warms B. */
225
- export const DEFAULT_CONNECTION_B: LlmConnection = {
226
- endpointId: "physix",
227
- baseUrl: PHYSIX_INFER_BASE_URL,
228
- model: QWEN_BASE_MODEL_ID,
229
- apiKey: "",
230
- };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  /** Build a fresh connection when the user changes endpoints. Keeps the
233
  * api key for the new base URL out of localStorage in this helper —
 
186
  apiKey: string;
187
  }
188
 
189
+ /** Default for the single-LLM "Run with LLM" pane: the trained
190
+ * PhysiX-3B. The picker is now a 3-button preset — the first preset's
191
+ * connection IS this default, so they stay in sync. */
 
 
 
 
 
 
 
 
192
  export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  endpointId: "physix",
194
  baseUrl: PHYSIX_INFER_BASE_URL,
195
  model: PHYSIX_MODEL_ID,
196
  apiKey: "",
197
  };
198
 
199
+ // ---------------------------------------------------------------------
200
+ // Model presets — the 3 fixed options the Run pane exposes.
201
+ // ---------------------------------------------------------------------
202
+
203
+ /** A single preset = "click here to talk to model X via endpoint Y".
204
+ * The whole point is to spare users from picking an endpoint, then a
205
+ * model id, then realising the two don't match. Each preset bundles
206
+ * exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
207
+ export interface ModelPreset {
208
+ id: string;
209
+ label: string;
210
+ /** One-line "what is this" copy shown under the label. */
211
+ description: string;
212
+ /** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
213
+ badge: string;
214
+ /** Pre-built connection — drop straight into the runner. */
215
+ connection: LlmConnection;
216
+ }
217
+
218
+ /** The three options the Run-with-LLM picker exposes. Order matters:
219
+ * the first entry is the default selection on a fresh page-load.
220
+ *
221
+ * Two of the three live on the PhysiX-Infer GPU Space (no token, same
222
+ * L4 hardware) so users can compare the trained PhysiX-3B against its
223
+ * Qwen 3B base apples-to-apples with one click. The 7B baseline runs
224
+ * through HF Router because no provider serves Qwen 3B today and HF
225
+ * Router gives a "bigger model" reference point in <2 s once a token
226
+ * is pasted. */
227
+ export const MODEL_PRESETS: readonly ModelPreset[] = [
228
+ {
229
+ id: "physix-3b-rl",
230
+ label: "PhysiX-3B (trained)",
231
+ description:
232
+ "Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
233
+ badge: "trained ✦",
234
+ connection: {
235
+ endpointId: "physix",
236
+ baseUrl: PHYSIX_INFER_BASE_URL,
237
+ model: PHYSIX_MODEL_ID,
238
+ apiKey: "",
239
+ },
240
+ },
241
+ {
242
+ id: "qwen-3b-base",
243
+ label: "Qwen 2.5 3B (base)",
244
+ description:
245
+ "Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
246
+ badge: "3B base",
247
+ connection: {
248
+ endpointId: "physix",
249
+ baseUrl: PHYSIX_INFER_BASE_URL,
250
+ model: QWEN_BASE_MODEL_ID,
251
+ apiKey: "",
252
+ },
253
+ },
254
+ {
255
+ id: "qwen-7b-hf",
256
+ label: "Qwen 2.5 7B (HF Router)",
257
+ description:
258
+ "Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
259
+ badge: "7B",
260
+ connection: {
261
+ endpointId: "hf",
262
+ baseUrl: HF_ROUTER_BASE_URL,
263
+ model: "Qwen/Qwen2.5-7B-Instruct",
264
+ apiKey: "",
265
+ },
266
+ },
267
+ ];
268
+
269
+ export function findPreset(id: string): ModelPreset {
270
+ return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
271
+ }
272
+
273
+ /** Match a connection back to a preset (e.g. for selection state when
274
+ * hydrating from storage). Returns the first preset whose endpoint+
275
+ * model match; null if none match. */
276
+ export function presetForConnection(c: LlmConnection): ModelPreset | null {
277
+ return (
278
+ MODEL_PRESETS.find(
279
+ (p) =>
280
+ p.connection.endpointId === c.endpointId &&
281
+ p.connection.model === c.model,
282
+ ) ?? null
283
+ );
284
+ }
285
 
286
  /** Build a fresh connection when the user changes endpoints. Keeps the
287
  * api key for the new base URL out of localStorage in this helper —