physix / frontend /src /lib /llmPresets.ts
Pratyush-01's picture
Upload folder using huggingface_hub
0e24aff verified
/** Single source of truth for the connection panel.
*
* The panel decomposes "Connect an LLM" into two questions:
*
* 1. Which **endpoint** are we hitting? (4 fixed options)
* 2. Which **model id** does that endpoint understand?
*
* Endpoints rarely change — there are basically four ways anyone runs
* an OpenAI-compatible chat endpoint today, and the panel exposes
* exactly those. Models, on the other hand, are a long tail: we keep
* a curated suggestion list per endpoint so the user can either pick
* a known-good model with one click or paste anything they like.
*
* URLs match `physix/server/providers.py`; keep the two in sync. */
export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1";
export const OPENAI_BASE_URL = "https://api.openai.com/v1";
export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1";
export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl";
export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct";
/** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen
* 2.5 3B baseline behind a single OpenAI-compatible URL. Open access
* (no token); routing on the `model` field happens inside the proxy.
* Sleeps after 5 min idle, so the first call after sleep is ~90-120 s
* while vLLM warms up — subsequent calls are fast. */
export const PHYSIX_INFER_BASE_URL =
"https://pratyush-01-physix-infer.hf.space/v1";
export type EndpointId = "ollama" | "hf" | "openai" | "custom" | "physix";
/** UX hint that drives how the model field renders. Ollama exposes a
* catalogue of installed tags via /interactive/models, so we render a
* hard select. Everywhere else the model id space is open, so we use
* a free-form input with a suggestions datalist. */
export type ModelInputMode = "ollama-installed" | "freeform-with-suggestions";
export interface ModelSuggestion {
/** Model id passed verbatim to the chat endpoint. */
id: string;
/** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */
tag?: string;
}
export interface Endpoint {
id: EndpointId;
label: string;
/** Pre-filled when the endpoint is picked. Empty for `custom`. */
baseUrl: string;
/** Whether the endpoint typically requires a Bearer token. Drives the
* API key field's placeholder copy and one-line help text. */
needsKey: boolean;
/** How to render the Model field for this endpoint. */
modelInputMode: ModelInputMode;
/** Ordered list of curated suggestions for the Model datalist. The
* first entry is the default the form pre-fills when the endpoint
* is picked. Empty for `custom`. */
modelSuggestions: ModelSuggestion[];
/** One-line help shown under the panel. */
hint: string;
}
// Order matters: the FIRST entry is what the dropdown prefills on a
// fresh page-load (and what `findEndpoint` falls back to for a stale
// localStorage id). HF Router is first because it's the lowest-friction
// "bring your own token" path — it answers in <2 s once a token is
// pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's
// still one click away for the "compare trained vs base" workflow.
export const ENDPOINTS: readonly Endpoint[] = [
{
id: "hf",
label: "Hugging Face Router",
baseUrl: HF_ROUTER_BASE_URL,
needsKey: true,
modelInputMode: "freeform-with-suggestions",
// Suggestions limited to models we've live-probed against the HF
// Router and confirmed serve through at least one provider. The
// first entry is the default the form prefills — keep it
// small-and-fast so the first turn doesn't feel like it stalled.
//
// Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
// physix-3b-rl). It's the natural baseline to compare against the
// trained model, but as of Apr 2026 NO router provider serves it,
// so prefilling it would 400 every fresh user. We ship that model
// via the "PhysiX-Infer GPU" endpoint instead — that's where the
// apples-to-apples comparison happens.
//
// Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
// in this list — the router only dispatches to provider-hosted
// models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both
// checkpoints) or a Custom inference endpoint URL.
modelSuggestions: [
{ id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
{ id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
{ id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
{ id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
{ id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
],
hint:
"Routed through https://router.huggingface.co/v1. Needs an HF token " +
"with 'Make calls to Inference Providers' permission. Note: not every " +
"HF model is router-served — pick from the suggestions or check the " +
"model card's 'Inference Providers' panel before pasting an id. " +
"To run your own fine-tune here, deploy it via 'Deploy → Inference " +
"Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.",
},
{
id: "physix",
label: "PhysiX-Infer GPU ✦",
// Sister L4 Space hosting both checkpoints behind one URL; the
// proxy there picks the right vLLM based on the `model` field.
baseUrl: PHYSIX_INFER_BASE_URL,
needsKey: false,
modelInputMode: "freeform-with-suggestions",
// First entry pre-fills, so the default comparison is "trained vs
// base" with identical hardware / generation params — only the
// weights differ.
modelSuggestions: [
{ id: PHYSIX_MODEL_ID, tag: "trained ✦" },
{ id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" },
],
hint:
"Both 3B models on a sister L4 Space — no token, no key. The Space " +
"sleeps after 5 min idle, so the first call after sleep is ~90-120 s " +
"while vLLM loads weights; subsequent calls are fast.",
},
{
id: "ollama",
label: "Ollama (localhost:11434)",
baseUrl: OLLAMA_OPENAI_BASE_URL,
needsKey: false,
modelInputMode: "ollama-installed",
modelSuggestions: [
// Fallbacks if the live `/interactive/models` lookup fails — at
// least the dropdown won't be empty.
{ id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" },
{ id: "qwen2.5:3b", tag: "base (already downloaded)" },
{ id: "qwen2.5:3b-instruct" },
{ id: "qwen2.5:7b-instruct" },
],
hint: "Local dev. Requires `ollama serve` running on this machine.",
},
{
id: "openai",
label: "OpenAI",
baseUrl: OPENAI_BASE_URL,
needsKey: true,
modelInputMode: "freeform-with-suggestions",
modelSuggestions: [
{ id: "gpt-4o-mini", tag: "fast" },
{ id: "gpt-4o", tag: "frontier" },
{ id: "gpt-4.1-mini" },
],
hint: "OpenAI's chat completions API. Needs an OpenAI API key.",
},
{
id: "custom",
label: "Custom",
baseUrl: "",
needsKey: false,
modelInputMode: "freeform-with-suggestions",
modelSuggestions: [],
hint:
"Point at any OpenAI-compatible /v1/chat/completions endpoint " +
"(vLLM, OpenRouter, Together, llama.cpp, …).",
},
];
export function findEndpoint(id: EndpointId): Endpoint {
// Total over EndpointId at compile time, but keep a runtime fallback
// in case storage hands us a stale id from a previous schema.
return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!;
}
// ---------------------------------------------------------------------
// Connection state and persistence
// ---------------------------------------------------------------------
export interface LlmConnection {
endpointId: EndpointId;
/** For `custom`, the user-typed URL. For the others, equals the
* endpoint's canonical base URL — we still keep it on the
* connection so the network request never has to look it up. */
baseUrl: string;
model: string;
apiKey: string;
}
/** Default for the single-LLM "Run with LLM" pane: the trained
* PhysiX-3B. The picker is now a 3-button preset — the first preset's
* connection IS this default, so they stay in sync. */
export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
endpointId: "physix",
baseUrl: PHYSIX_INFER_BASE_URL,
model: PHYSIX_MODEL_ID,
apiKey: "",
};
// ---------------------------------------------------------------------
// Model presets — the 3 fixed options the Run pane exposes.
// ---------------------------------------------------------------------
/** A single preset = "click here to talk to model X via endpoint Y".
* The whole point is to spare users from picking an endpoint, then a
* model id, then realising the two don't match. Each preset bundles
* exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
export interface ModelPreset {
id: string;
label: string;
/** One-line "what is this" copy shown under the label. */
description: string;
/** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
badge: string;
/** Pre-built connection — drop straight into the runner. */
connection: LlmConnection;
}
/** The three options the Run-with-LLM picker exposes. Order matters:
* the first entry is the default selection on a fresh page-load.
*
* Two of the three live on the PhysiX-Infer GPU Space (no token, same
* L4 hardware) so users can compare the trained PhysiX-3B against its
* Qwen 3B base apples-to-apples with one click. The 7B baseline runs
* through HF Router because no provider serves Qwen 3B today and HF
* Router gives a "bigger model" reference point in <2 s once a token
* is pasted. */
export const MODEL_PRESETS: readonly ModelPreset[] = [
{
id: "physix-3b-rl",
label: "PhysiX-3B (trained)",
description:
"Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
badge: "trained ✦",
connection: {
endpointId: "physix",
baseUrl: PHYSIX_INFER_BASE_URL,
model: PHYSIX_MODEL_ID,
apiKey: "",
},
},
{
id: "qwen-3b-base",
label: "Qwen 2.5 3B (base)",
description:
"Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
badge: "3B base",
connection: {
endpointId: "physix",
baseUrl: PHYSIX_INFER_BASE_URL,
model: QWEN_BASE_MODEL_ID,
apiKey: "",
},
},
{
id: "qwen-7b-hf",
label: "Qwen 2.5 7B (HF Router)",
description:
"Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
badge: "7B",
connection: {
endpointId: "hf",
baseUrl: HF_ROUTER_BASE_URL,
model: "Qwen/Qwen2.5-7B-Instruct",
apiKey: "",
},
},
];
export function findPreset(id: string): ModelPreset {
return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
}
/** Match a connection back to a preset (e.g. for selection state when
* hydrating from storage). Returns the first preset whose endpoint+
* model match; null if none match. */
export function presetForConnection(c: LlmConnection): ModelPreset | null {
return (
MODEL_PRESETS.find(
(p) =>
p.connection.endpointId === c.endpointId &&
p.connection.model === c.model,
) ?? null
);
}
/** Build a fresh connection when the user changes endpoints. Keeps the
* api key for the new base URL out of localStorage in this helper —
* the panel hydrates it on render so we don't have to dual-write. */
export function connectionForEndpoint(endpoint: Endpoint): LlmConnection {
return {
endpointId: endpoint.id,
baseUrl: endpoint.baseUrl,
model: endpoint.modelSuggestions[0]?.id ?? "",
apiKey: "",
};
}
const KEY_STORAGE_NAMESPACE = "physix.apiKey:";
export function loadApiKey(baseUrl: string): string {
if (!baseUrl) return "";
try {
return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? "";
} catch {
return "";
}
}
export function saveApiKey(baseUrl: string, key: string): void {
if (!baseUrl) return;
try {
if (key) {
localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key);
} else {
localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl);
}
} catch {
/* private mode / quota — silently no-op */
}
}