Spaces:
Sleeping
Sleeping
File size: 12,483 Bytes
0e24aff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | /** Single source of truth for the connection panel.
*
* The panel decomposes "Connect an LLM" into two questions:
*
* 1. Which **endpoint** are we hitting? (4 fixed options)
* 2. Which **model id** does that endpoint understand?
*
* Endpoints rarely change — there are basically four ways anyone runs
* an OpenAI-compatible chat endpoint today, and the panel exposes
* exactly those. Models, on the other hand, are a long tail: we keep
* a curated suggestion list per endpoint so the user can either pick
* a known-good model with one click or paste anything they like.
*
* URLs match `physix/server/providers.py`; keep the two in sync. */
export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1";
export const OPENAI_BASE_URL = "https://api.openai.com/v1";
export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1";
export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl";
export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct";
/** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen
* 2.5 3B baseline behind a single OpenAI-compatible URL. Open access
* (no token); routing on the `model` field happens inside the proxy.
* Sleeps after 5 min idle, so the first call after sleep is ~90-120 s
* while vLLM warms up — subsequent calls are fast. */
export const PHYSIX_INFER_BASE_URL =
"https://pratyush-01-physix-infer.hf.space/v1";
export type EndpointId = "ollama" | "hf" | "openai" | "custom" | "physix";
/** UX hint that drives how the model field renders. Ollama exposes a
* catalogue of installed tags via /interactive/models, so we render a
* hard select. Everywhere else the model id space is open, so we use
* a free-form input with a suggestions datalist. */
export type ModelInputMode = "ollama-installed" | "freeform-with-suggestions";
export interface ModelSuggestion {
/** Model id passed verbatim to the chat endpoint. */
id: string;
/** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */
tag?: string;
}
export interface Endpoint {
id: EndpointId;
label: string;
/** Pre-filled when the endpoint is picked. Empty for `custom`. */
baseUrl: string;
/** Whether the endpoint typically requires a Bearer token. Drives the
* API key field's placeholder copy and one-line help text. */
needsKey: boolean;
/** How to render the Model field for this endpoint. */
modelInputMode: ModelInputMode;
/** Ordered list of curated suggestions for the Model datalist. The
* first entry is the default the form pre-fills when the endpoint
* is picked. Empty for `custom`. */
modelSuggestions: ModelSuggestion[];
/** One-line help shown under the panel. */
hint: string;
}
// Order matters: the FIRST entry is what the dropdown prefills on a
// fresh page-load (and what `findEndpoint` falls back to for a stale
// localStorage id). HF Router is first because it's the lowest-friction
// "bring your own token" path — it answers in <2 s once a token is
// pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's
// still one click away for the "compare trained vs base" workflow.
export const ENDPOINTS: readonly Endpoint[] = [
{
id: "hf",
label: "Hugging Face Router",
baseUrl: HF_ROUTER_BASE_URL,
needsKey: true,
modelInputMode: "freeform-with-suggestions",
// Suggestions limited to models we've live-probed against the HF
// Router and confirmed serve through at least one provider. The
// first entry is the default the form prefills — keep it
// small-and-fast so the first turn doesn't feel like it stalled.
//
// Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
// physix-3b-rl). It's the natural baseline to compare against the
// trained model, but as of Apr 2026 NO router provider serves it,
// so prefilling it would 400 every fresh user. We ship that model
// via the "PhysiX-Infer GPU" endpoint instead — that's where the
// apples-to-apples comparison happens.
//
// Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
// in this list — the router only dispatches to provider-hosted
// models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both
// checkpoints) or a Custom inference endpoint URL.
modelSuggestions: [
{ id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
{ id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
{ id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
{ id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
{ id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
],
hint:
"Routed through https://router.huggingface.co/v1. Needs an HF token " +
"with 'Make calls to Inference Providers' permission. Note: not every " +
"HF model is router-served — pick from the suggestions or check the " +
"model card's 'Inference Providers' panel before pasting an id. " +
"To run your own fine-tune here, deploy it via 'Deploy → Inference " +
"Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.",
},
{
id: "physix",
label: "PhysiX-Infer GPU ✦",
// Sister L4 Space hosting both checkpoints behind one URL; the
// proxy there picks the right vLLM based on the `model` field.
baseUrl: PHYSIX_INFER_BASE_URL,
needsKey: false,
modelInputMode: "freeform-with-suggestions",
// First entry pre-fills, so the default comparison is "trained vs
// base" with identical hardware / generation params — only the
// weights differ.
modelSuggestions: [
{ id: PHYSIX_MODEL_ID, tag: "trained ✦" },
{ id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" },
],
hint:
"Both 3B models on a sister L4 Space — no token, no key. The Space " +
"sleeps after 5 min idle, so the first call after sleep is ~90-120 s " +
"while vLLM loads weights; subsequent calls are fast.",
},
{
id: "ollama",
label: "Ollama (localhost:11434)",
baseUrl: OLLAMA_OPENAI_BASE_URL,
needsKey: false,
modelInputMode: "ollama-installed",
modelSuggestions: [
// Fallbacks if the live `/interactive/models` lookup fails — at
// least the dropdown won't be empty.
{ id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" },
{ id: "qwen2.5:3b", tag: "base (already downloaded)" },
{ id: "qwen2.5:3b-instruct" },
{ id: "qwen2.5:7b-instruct" },
],
hint: "Local dev. Requires `ollama serve` running on this machine.",
},
{
id: "openai",
label: "OpenAI",
baseUrl: OPENAI_BASE_URL,
needsKey: true,
modelInputMode: "freeform-with-suggestions",
modelSuggestions: [
{ id: "gpt-4o-mini", tag: "fast" },
{ id: "gpt-4o", tag: "frontier" },
{ id: "gpt-4.1-mini" },
],
hint: "OpenAI's chat completions API. Needs an OpenAI API key.",
},
{
id: "custom",
label: "Custom",
baseUrl: "",
needsKey: false,
modelInputMode: "freeform-with-suggestions",
modelSuggestions: [],
hint:
"Point at any OpenAI-compatible /v1/chat/completions endpoint " +
"(vLLM, OpenRouter, Together, llama.cpp, …).",
},
];
export function findEndpoint(id: EndpointId): Endpoint {
// Total over EndpointId at compile time, but keep a runtime fallback
// in case storage hands us a stale id from a previous schema.
return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!;
}
// ---------------------------------------------------------------------
// Connection state and persistence
// ---------------------------------------------------------------------
export interface LlmConnection {
endpointId: EndpointId;
/** For `custom`, the user-typed URL. For the others, equals the
* endpoint's canonical base URL — we still keep it on the
* connection so the network request never has to look it up. */
baseUrl: string;
model: string;
apiKey: string;
}
/** Default for the single-LLM "Run with LLM" pane: the trained
* PhysiX-3B. The picker is now a 3-button preset — the first preset's
* connection IS this default, so they stay in sync. */
export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
endpointId: "physix",
baseUrl: PHYSIX_INFER_BASE_URL,
model: PHYSIX_MODEL_ID,
apiKey: "",
};
// ---------------------------------------------------------------------
// Model presets — the 3 fixed options the Run pane exposes.
// ---------------------------------------------------------------------
/** A single preset = "click here to talk to model X via endpoint Y".
* The whole point is to spare users from picking an endpoint, then a
* model id, then realising the two don't match. Each preset bundles
* exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
export interface ModelPreset {
id: string;
label: string;
/** One-line "what is this" copy shown under the label. */
description: string;
/** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
badge: string;
/** Pre-built connection — drop straight into the runner. */
connection: LlmConnection;
}
/** The three options the Run-with-LLM picker exposes. Order matters:
* the first entry is the default selection on a fresh page-load.
*
* Two of the three live on the PhysiX-Infer GPU Space (no token, same
* L4 hardware) so users can compare the trained PhysiX-3B against its
* Qwen 3B base apples-to-apples with one click. The 7B baseline runs
* through HF Router because no provider serves Qwen 3B today and HF
* Router gives a "bigger model" reference point in <2 s once a token
* is pasted. */
export const MODEL_PRESETS: readonly ModelPreset[] = [
{
id: "physix-3b-rl",
label: "PhysiX-3B (trained)",
description:
"Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
badge: "trained ✦",
connection: {
endpointId: "physix",
baseUrl: PHYSIX_INFER_BASE_URL,
model: PHYSIX_MODEL_ID,
apiKey: "",
},
},
{
id: "qwen-3b-base",
label: "Qwen 2.5 3B (base)",
description:
"Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
badge: "3B base",
connection: {
endpointId: "physix",
baseUrl: PHYSIX_INFER_BASE_URL,
model: QWEN_BASE_MODEL_ID,
apiKey: "",
},
},
{
id: "qwen-7b-hf",
label: "Qwen 2.5 7B (HF Router)",
description:
"Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
badge: "7B",
connection: {
endpointId: "hf",
baseUrl: HF_ROUTER_BASE_URL,
model: "Qwen/Qwen2.5-7B-Instruct",
apiKey: "",
},
},
];
export function findPreset(id: string): ModelPreset {
return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
}
/** Match a connection back to a preset (e.g. for selection state when
* hydrating from storage). Returns the first preset whose endpoint+
* model match; null if none match. */
export function presetForConnection(c: LlmConnection): ModelPreset | null {
return (
MODEL_PRESETS.find(
(p) =>
p.connection.endpointId === c.endpointId &&
p.connection.model === c.model,
) ?? null
);
}
/** Build a fresh connection when the user changes endpoints. Keeps the
* api key for the new base URL out of localStorage in this helper —
* the panel hydrates it on render so we don't have to dual-write. */
export function connectionForEndpoint(endpoint: Endpoint): LlmConnection {
return {
endpointId: endpoint.id,
baseUrl: endpoint.baseUrl,
model: endpoint.modelSuggestions[0]?.id ?? "",
apiKey: "",
};
}
const KEY_STORAGE_NAMESPACE = "physix.apiKey:";
export function loadApiKey(baseUrl: string): string {
if (!baseUrl) return "";
try {
return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? "";
} catch {
return "";
}
}
export function saveApiKey(baseUrl: string, key: string): void {
if (!baseUrl) return;
try {
if (key) {
localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key);
} else {
localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl);
}
} catch {
/* private mode / quota — silently no-op */
}
}
|