Spaces:
Sleeping
Sleeping
| /** Single source of truth for the connection panel. | |
| * | |
| * The panel decomposes "Connect an LLM" into two questions: | |
| * | |
| * 1. Which **endpoint** are we hitting? (4 fixed options) | |
| * 2. Which **model id** does that endpoint understand? | |
| * | |
| * Endpoints rarely change — there are basically four ways anyone runs | |
| * an OpenAI-compatible chat endpoint today, and the panel exposes | |
| * exactly those. Models, on the other hand, are a long tail: we keep | |
| * a curated suggestion list per endpoint so the user can either pick | |
| * a known-good model with one click or paste anything they like. | |
| * | |
| * URLs match `physix/server/providers.py`; keep the two in sync. */ | |
| export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"; | |
| export const OPENAI_BASE_URL = "https://api.openai.com/v1"; | |
| export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1"; | |
| export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl"; | |
| export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"; | |
| /** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen | |
| * 2.5 3B baseline behind a single OpenAI-compatible URL. Open access | |
| * (no token); routing on the `model` field happens inside the proxy. | |
| * Sleeps after 5 min idle, so the first call after sleep is ~90-120 s | |
| * while vLLM warms up — subsequent calls are fast. */ | |
| export const PHYSIX_INFER_BASE_URL = | |
| "https://pratyush-01-physix-infer.hf.space/v1"; | |
| export type EndpointId = "ollama" | "hf" | "openai" | "custom" | "physix"; | |
| /** UX hint that drives how the model field renders. Ollama exposes a | |
| * catalogue of installed tags via /interactive/models, so we render a | |
| * hard select. Everywhere else the model id space is open, so we use | |
| * a free-form input with a suggestions datalist. */ | |
| export type ModelInputMode = "ollama-installed" | "freeform-with-suggestions"; | |
| export interface ModelSuggestion { | |
| /** Model id passed verbatim to the chat endpoint. */ | |
| id: string; | |
| /** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */ | |
| tag?: string; | |
| } | |
| export interface Endpoint { | |
| id: EndpointId; | |
| label: string; | |
| /** Pre-filled when the endpoint is picked. Empty for `custom`. */ | |
| baseUrl: string; | |
| /** Whether the endpoint typically requires a Bearer token. Drives the | |
| * API key field's placeholder copy and one-line help text. */ | |
| needsKey: boolean; | |
| /** How to render the Model field for this endpoint. */ | |
| modelInputMode: ModelInputMode; | |
| /** Ordered list of curated suggestions for the Model datalist. The | |
| * first entry is the default the form pre-fills when the endpoint | |
| * is picked. Empty for `custom`. */ | |
| modelSuggestions: ModelSuggestion[]; | |
| /** One-line help shown under the panel. */ | |
| hint: string; | |
| } | |
| // Order matters: the FIRST entry is what the dropdown prefills on a | |
| // fresh page-load (and what `findEndpoint` falls back to for a stale | |
| // localStorage id). HF Router is first because it's the lowest-friction | |
| // "bring your own token" path — it answers in <2 s once a token is | |
| // pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's | |
| // still one click away for the "compare trained vs base" workflow. | |
| export const ENDPOINTS: readonly Endpoint[] = [ | |
| { | |
| id: "hf", | |
| label: "Hugging Face Router", | |
| baseUrl: HF_ROUTER_BASE_URL, | |
| needsKey: true, | |
| modelInputMode: "freeform-with-suggestions", | |
| // Suggestions limited to models we've live-probed against the HF | |
| // Router and confirmed serve through at least one provider. The | |
| // first entry is the default the form prefills — keep it | |
| // small-and-fast so the first turn doesn't feel like it stalled. | |
| // | |
| // Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of | |
| // physix-3b-rl). It's the natural baseline to compare against the | |
| // trained model, but as of Apr 2026 NO router provider serves it, | |
| // so prefilling it would 400 every fresh user. We ship that model | |
| // via the "PhysiX-Infer GPU" endpoint instead — that's where the | |
| // apples-to-apples comparison happens. | |
| // | |
| // Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT | |
| // in this list — the router only dispatches to provider-hosted | |
| // models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both | |
| // checkpoints) or a Custom inference endpoint URL. | |
| modelSuggestions: [ | |
| { id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" }, | |
| { id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" }, | |
| { id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" }, | |
| { id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" }, | |
| { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" }, | |
| ], | |
| hint: | |
| "Routed through https://router.huggingface.co/v1. Needs an HF token " + | |
| "with 'Make calls to Inference Providers' permission. Note: not every " + | |
| "HF model is router-served — pick from the suggestions or check the " + | |
| "model card's 'Inference Providers' panel before pasting an id. " + | |
| "To run your own fine-tune here, deploy it via 'Deploy → Inference " + | |
| "Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.", | |
| }, | |
| { | |
| id: "physix", | |
| label: "PhysiX-Infer GPU ✦", | |
| // Sister L4 Space hosting both checkpoints behind one URL; the | |
| // proxy there picks the right vLLM based on the `model` field. | |
| baseUrl: PHYSIX_INFER_BASE_URL, | |
| needsKey: false, | |
| modelInputMode: "freeform-with-suggestions", | |
| // First entry pre-fills, so the default comparison is "trained vs | |
| // base" with identical hardware / generation params — only the | |
| // weights differ. | |
| modelSuggestions: [ | |
| { id: PHYSIX_MODEL_ID, tag: "trained ✦" }, | |
| { id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" }, | |
| ], | |
| hint: | |
| "Both 3B models on a sister L4 Space — no token, no key. The Space " + | |
| "sleeps after 5 min idle, so the first call after sleep is ~90-120 s " + | |
| "while vLLM loads weights; subsequent calls are fast.", | |
| }, | |
| { | |
| id: "ollama", | |
| label: "Ollama (localhost:11434)", | |
| baseUrl: OLLAMA_OPENAI_BASE_URL, | |
| needsKey: false, | |
| modelInputMode: "ollama-installed", | |
| modelSuggestions: [ | |
| // Fallbacks if the live `/interactive/models` lookup fails — at | |
| // least the dropdown won't be empty. | |
| { id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" }, | |
| { id: "qwen2.5:3b", tag: "base (already downloaded)" }, | |
| { id: "qwen2.5:3b-instruct" }, | |
| { id: "qwen2.5:7b-instruct" }, | |
| ], | |
| hint: "Local dev. Requires `ollama serve` running on this machine.", | |
| }, | |
| { | |
| id: "openai", | |
| label: "OpenAI", | |
| baseUrl: OPENAI_BASE_URL, | |
| needsKey: true, | |
| modelInputMode: "freeform-with-suggestions", | |
| modelSuggestions: [ | |
| { id: "gpt-4o-mini", tag: "fast" }, | |
| { id: "gpt-4o", tag: "frontier" }, | |
| { id: "gpt-4.1-mini" }, | |
| ], | |
| hint: "OpenAI's chat completions API. Needs an OpenAI API key.", | |
| }, | |
| { | |
| id: "custom", | |
| label: "Custom", | |
| baseUrl: "", | |
| needsKey: false, | |
| modelInputMode: "freeform-with-suggestions", | |
| modelSuggestions: [], | |
| hint: | |
| "Point at any OpenAI-compatible /v1/chat/completions endpoint " + | |
| "(vLLM, OpenRouter, Together, llama.cpp, …).", | |
| }, | |
| ]; | |
| export function findEndpoint(id: EndpointId): Endpoint { | |
| // Total over EndpointId at compile time, but keep a runtime fallback | |
| // in case storage hands us a stale id from a previous schema. | |
| return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!; | |
| } | |
| // --------------------------------------------------------------------- | |
| // Connection state and persistence | |
| // --------------------------------------------------------------------- | |
| export interface LlmConnection { | |
| endpointId: EndpointId; | |
| /** For `custom`, the user-typed URL. For the others, equals the | |
| * endpoint's canonical base URL — we still keep it on the | |
| * connection so the network request never has to look it up. */ | |
| baseUrl: string; | |
| model: string; | |
| apiKey: string; | |
| } | |
| /** Default for the single-LLM "Run with LLM" pane: the trained | |
| * PhysiX-3B. The picker is now a 3-button preset — the first preset's | |
| * connection IS this default, so they stay in sync. */ | |
| export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = { | |
| endpointId: "physix", | |
| baseUrl: PHYSIX_INFER_BASE_URL, | |
| model: PHYSIX_MODEL_ID, | |
| apiKey: "", | |
| }; | |
| // --------------------------------------------------------------------- | |
| // Model presets — the 3 fixed options the Run pane exposes. | |
| // --------------------------------------------------------------------- | |
| /** A single preset = "click here to talk to model X via endpoint Y". | |
| * The whole point is to spare users from picking an endpoint, then a | |
| * model id, then realising the two don't match. Each preset bundles | |
| * exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */ | |
| export interface ModelPreset { | |
| id: string; | |
| label: string; | |
| /** One-line "what is this" copy shown under the label. */ | |
| description: string; | |
| /** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */ | |
| badge: string; | |
| /** Pre-built connection — drop straight into the runner. */ | |
| connection: LlmConnection; | |
| } | |
| /** The three options the Run-with-LLM picker exposes. Order matters: | |
| * the first entry is the default selection on a fresh page-load. | |
| * | |
| * Two of the three live on the PhysiX-Infer GPU Space (no token, same | |
| * L4 hardware) so users can compare the trained PhysiX-3B against its | |
| * Qwen 3B base apples-to-apples with one click. The 7B baseline runs | |
| * through HF Router because no provider serves Qwen 3B today and HF | |
| * Router gives a "bigger model" reference point in <2 s once a token | |
| * is pasted. */ | |
| export const MODEL_PRESETS: readonly ModelPreset[] = [ | |
| { | |
| id: "physix-3b-rl", | |
| label: "PhysiX-3B (trained)", | |
| description: | |
| "Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.", | |
| badge: "trained ✦", | |
| connection: { | |
| endpointId: "physix", | |
| baseUrl: PHYSIX_INFER_BASE_URL, | |
| model: PHYSIX_MODEL_ID, | |
| apiKey: "", | |
| }, | |
| }, | |
| { | |
| id: "qwen-3b-base", | |
| label: "Qwen 2.5 3B (base)", | |
| description: | |
| "Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.", | |
| badge: "3B base", | |
| connection: { | |
| endpointId: "physix", | |
| baseUrl: PHYSIX_INFER_BASE_URL, | |
| model: QWEN_BASE_MODEL_ID, | |
| apiKey: "", | |
| }, | |
| }, | |
| { | |
| id: "qwen-7b-hf", | |
| label: "Qwen 2.5 7B (HF Router)", | |
| description: | |
| "Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.", | |
| badge: "7B", | |
| connection: { | |
| endpointId: "hf", | |
| baseUrl: HF_ROUTER_BASE_URL, | |
| model: "Qwen/Qwen2.5-7B-Instruct", | |
| apiKey: "", | |
| }, | |
| }, | |
| ]; | |
| export function findPreset(id: string): ModelPreset { | |
| return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!; | |
| } | |
| /** Match a connection back to a preset (e.g. for selection state when | |
| * hydrating from storage). Returns the first preset whose endpoint+ | |
| * model match; null if none match. */ | |
| export function presetForConnection(c: LlmConnection): ModelPreset | null { | |
| return ( | |
| MODEL_PRESETS.find( | |
| (p) => | |
| p.connection.endpointId === c.endpointId && | |
| p.connection.model === c.model, | |
| ) ?? null | |
| ); | |
| } | |
| /** Build a fresh connection when the user changes endpoints. Keeps the | |
| * api key for the new base URL out of localStorage in this helper — | |
| * the panel hydrates it on render so we don't have to dual-write. */ | |
| export function connectionForEndpoint(endpoint: Endpoint): LlmConnection { | |
| return { | |
| endpointId: endpoint.id, | |
| baseUrl: endpoint.baseUrl, | |
| model: endpoint.modelSuggestions[0]?.id ?? "", | |
| apiKey: "", | |
| }; | |
| } | |
| const KEY_STORAGE_NAMESPACE = "physix.apiKey:"; | |
| export function loadApiKey(baseUrl: string): string { | |
| if (!baseUrl) return ""; | |
| try { | |
| return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? ""; | |
| } catch { | |
| return ""; | |
| } | |
| } | |
| export function saveApiKey(baseUrl: string, key: string): void { | |
| if (!baseUrl) return; | |
| try { | |
| if (key) { | |
| localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key); | |
| } else { | |
| localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl); | |
| } | |
| } catch { | |
| /* private mode / quota — silently no-op */ | |
| } | |
| } | |