File size: 12,483 Bytes
0e24aff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/** Single source of truth for the connection panel.
 *
 *  The panel decomposes "Connect an LLM" into two questions:
 *
 *    1. Which **endpoint** are we hitting?  (4 fixed options)
 *    2. Which **model id** does that endpoint understand?
 *
 *  Endpoints rarely change — there are basically four ways anyone runs
 *  an OpenAI-compatible chat endpoint today, and the panel exposes
 *  exactly those. Models, on the other hand, are a long tail: we keep
 *  a curated suggestion list per endpoint so the user can either pick
 *  a known-good model with one click or paste anything they like.
 *
 *  URLs match `physix/server/providers.py`; keep the two in sync. */

export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1";
export const OPENAI_BASE_URL = "https://api.openai.com/v1";
export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1";
export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl";
export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct";

/** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen
 *  2.5 3B baseline behind a single OpenAI-compatible URL. Open access
 *  (no token); routing on the `model` field happens inside the proxy.
 *  Sleeps after 5 min idle, so the first call after sleep is ~90-120 s
 *  while vLLM warms up — subsequent calls are fast. */
export const PHYSIX_INFER_BASE_URL =
  "https://pratyush-01-physix-infer.hf.space/v1";

export type EndpointId = "ollama" | "hf" | "openai" | "custom" | "physix";

/** UX hint that drives how the model field renders. Ollama exposes a
 *  catalogue of installed tags via /interactive/models, so we render a
 *  hard select. Everywhere else the model id space is open, so we use
 *  a free-form input with a suggestions datalist. */
export type ModelInputMode = "ollama-installed" | "freeform-with-suggestions";

export interface ModelSuggestion {
  /** Model id passed verbatim to the chat endpoint. */
  id: string;
  /** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */
  tag?: string;
}

export interface Endpoint {
  id: EndpointId;
  label: string;
  /** Pre-filled when the endpoint is picked. Empty for `custom`. */
  baseUrl: string;
  /** Whether the endpoint typically requires a Bearer token. Drives the
   *  API key field's placeholder copy and one-line help text. */
  needsKey: boolean;
  /** How to render the Model field for this endpoint. */
  modelInputMode: ModelInputMode;
  /** Ordered list of curated suggestions for the Model datalist. The
   *  first entry is the default the form pre-fills when the endpoint
   *  is picked. Empty for `custom`. */
  modelSuggestions: ModelSuggestion[];
  /** One-line help shown under the panel. */
  hint: string;
}

// Order matters: the FIRST entry is what the dropdown prefills on a
// fresh page-load (and what `findEndpoint` falls back to for a stale
// localStorage id). HF Router is first because it's the lowest-friction
// "bring your own token" path — it answers in <2 s once a token is
// pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's
// still one click away for the "compare trained vs base" workflow.
export const ENDPOINTS: readonly Endpoint[] = [
  {
    id: "hf",
    label: "Hugging Face Router",
    baseUrl: HF_ROUTER_BASE_URL,
    needsKey: true,
    modelInputMode: "freeform-with-suggestions",
    // Suggestions limited to models we've live-probed against the HF
    // Router and confirmed serve through at least one provider. The
    // first entry is the default the form prefills — keep it
    // small-and-fast so the first turn doesn't feel like it stalled.
    //
    // Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
    // physix-3b-rl). It's the natural baseline to compare against the
    // trained model, but as of Apr 2026 NO router provider serves it,
    // so prefilling it would 400 every fresh user. We ship that model
    // via the "PhysiX-Infer GPU" endpoint instead — that's where the
    // apples-to-apples comparison happens.
    //
    // Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
    // in this list — the router only dispatches to provider-hosted
    // models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both
    // checkpoints) or a Custom inference endpoint URL.
    modelSuggestions: [
      { id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
      { id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
      { id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
      { id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
      { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
    ],
    hint:
      "Routed through https://router.huggingface.co/v1. Needs an HF token " +
      "with 'Make calls to Inference Providers' permission. Note: not every " +
      "HF model is router-served — pick from the suggestions or check the " +
      "model card's 'Inference Providers' panel before pasting an id. " +
      "To run your own fine-tune here, deploy it via 'Deploy → Inference " +
      "Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.",
  },
  {
    id: "physix",
    label: "PhysiX-Infer GPU ✦",
    // Sister L4 Space hosting both checkpoints behind one URL; the
    // proxy there picks the right vLLM based on the `model` field.
    baseUrl: PHYSIX_INFER_BASE_URL,
    needsKey: false,
    modelInputMode: "freeform-with-suggestions",
    // First entry pre-fills, so the default comparison is "trained vs
    // base" with identical hardware / generation params — only the
    // weights differ.
    modelSuggestions: [
      { id: PHYSIX_MODEL_ID, tag: "trained ✦" },
      { id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" },
    ],
    hint:
      "Both 3B models on a sister L4 Space — no token, no key. The Space " +
      "sleeps after 5 min idle, so the first call after sleep is ~90-120 s " +
      "while vLLM loads weights; subsequent calls are fast.",
  },
  {
    id: "ollama",
    label: "Ollama (localhost:11434)",
    baseUrl: OLLAMA_OPENAI_BASE_URL,
    needsKey: false,
    modelInputMode: "ollama-installed",
    modelSuggestions: [
      // Fallbacks if the live `/interactive/models` lookup fails — at
      // least the dropdown won't be empty.
      { id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" },
      { id: "qwen2.5:3b", tag: "base (already downloaded)" },
      { id: "qwen2.5:3b-instruct" },
      { id: "qwen2.5:7b-instruct" },
    ],
    hint: "Local dev. Requires `ollama serve` running on this machine.",
  },
  {
    id: "openai",
    label: "OpenAI",
    baseUrl: OPENAI_BASE_URL,
    needsKey: true,
    modelInputMode: "freeform-with-suggestions",
    modelSuggestions: [
      { id: "gpt-4o-mini", tag: "fast" },
      { id: "gpt-4o", tag: "frontier" },
      { id: "gpt-4.1-mini" },
    ],
    hint: "OpenAI's chat completions API. Needs an OpenAI API key.",
  },
  {
    id: "custom",
    label: "Custom",
    baseUrl: "",
    needsKey: false,
    modelInputMode: "freeform-with-suggestions",
    modelSuggestions: [],
    hint:
      "Point at any OpenAI-compatible /v1/chat/completions endpoint " +
      "(vLLM, OpenRouter, Together, llama.cpp, …).",
  },
];

export function findEndpoint(id: EndpointId): Endpoint {
  // Total over EndpointId at compile time, but keep a runtime fallback
  // in case storage hands us a stale id from a previous schema.
  return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!;
}

// ---------------------------------------------------------------------
// Connection state and persistence
// ---------------------------------------------------------------------

export interface LlmConnection {
  endpointId: EndpointId;
  /** For `custom`, the user-typed URL. For the others, equals the
   *  endpoint's canonical base URL — we still keep it on the
   *  connection so the network request never has to look it up. */
  baseUrl: string;
  model: string;
  apiKey: string;
}

/** Default for the single-LLM "Run with LLM" pane: the trained
 *  PhysiX-3B. The picker is now a 3-button preset — the first preset's
 *  connection IS this default, so they stay in sync. */
export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
  endpointId: "physix",
  baseUrl: PHYSIX_INFER_BASE_URL,
  model: PHYSIX_MODEL_ID,
  apiKey: "",
};

// ---------------------------------------------------------------------
// Model presets — the 3 fixed options the Run pane exposes.
// ---------------------------------------------------------------------

/** A single preset = "click here to talk to model X via endpoint Y".
 *  The whole point is to spare users from picking an endpoint, then a
 *  model id, then realising the two don't match. Each preset bundles
 *  exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
export interface ModelPreset {
  id: string;
  label: string;
  /** One-line "what is this" copy shown under the label. */
  description: string;
  /** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
  badge: string;
  /** Pre-built connection — drop straight into the runner. */
  connection: LlmConnection;
}

/** The three options the Run-with-LLM picker exposes. Order matters:
 *  the first entry is the default selection on a fresh page-load.
 *
 *  Two of the three live on the PhysiX-Infer GPU Space (no token, same
 *  L4 hardware) so users can compare the trained PhysiX-3B against its
 *  Qwen 3B base apples-to-apples with one click. The 7B baseline runs
 *  through HF Router because no provider serves Qwen 3B today and HF
 *  Router gives a "bigger model" reference point in <2 s once a token
 *  is pasted. */
export const MODEL_PRESETS: readonly ModelPreset[] = [
  {
    id: "physix-3b-rl",
    label: "PhysiX-3B (trained)",
    description:
      "Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
    badge: "trained ✦",
    connection: {
      endpointId: "physix",
      baseUrl: PHYSIX_INFER_BASE_URL,
      model: PHYSIX_MODEL_ID,
      apiKey: "",
    },
  },
  {
    id: "qwen-3b-base",
    label: "Qwen 2.5 3B (base)",
    description:
      "Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
    badge: "3B base",
    connection: {
      endpointId: "physix",
      baseUrl: PHYSIX_INFER_BASE_URL,
      model: QWEN_BASE_MODEL_ID,
      apiKey: "",
    },
  },
  {
    id: "qwen-7b-hf",
    label: "Qwen 2.5 7B (HF Router)",
    description:
      "Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
    badge: "7B",
    connection: {
      endpointId: "hf",
      baseUrl: HF_ROUTER_BASE_URL,
      model: "Qwen/Qwen2.5-7B-Instruct",
      apiKey: "",
    },
  },
];

export function findPreset(id: string): ModelPreset {
  return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
}

/** Match a connection back to a preset (e.g. for selection state when
 *  hydrating from storage). Returns the first preset whose endpoint+
 *  model match; null if none match. */
export function presetForConnection(c: LlmConnection): ModelPreset | null {
  return (
    MODEL_PRESETS.find(
      (p) =>
        p.connection.endpointId === c.endpointId &&
        p.connection.model === c.model,
    ) ?? null
  );
}

/** Build a fresh connection when the user changes endpoints. Keeps the
 *  api key for the new base URL out of localStorage in this helper —
 *  the panel hydrates it on render so we don't have to dual-write. */
export function connectionForEndpoint(endpoint: Endpoint): LlmConnection {
  return {
    endpointId: endpoint.id,
    baseUrl: endpoint.baseUrl,
    model: endpoint.modelSuggestions[0]?.id ?? "",
    apiKey: "",
  };
}

const KEY_STORAGE_NAMESPACE = "physix.apiKey:";

export function loadApiKey(baseUrl: string): string {
  if (!baseUrl) return "";
  try {
    return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? "";
  } catch {
    return "";
  }
}

export function saveApiKey(baseUrl: string, key: string): void {
  if (!baseUrl) return;
  try {
    if (key) {
      localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key);
    } else {
      localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl);
    }
  } catch {
    /* private mode / quota — silently no-op */
  }
}