/** Single source of truth for the connection panel. * * The panel decomposes "Connect an LLM" into two questions: * * 1. Which **endpoint** are we hitting? (4 fixed options) * 2. Which **model id** does that endpoint understand? * * Endpoints rarely change — there are basically four ways anyone runs * an OpenAI-compatible chat endpoint today, and the panel exposes * exactly those. Models, on the other hand, are a long tail: we keep * a curated suggestion list per endpoint so the user can either pick * a known-good model with one click or paste anything they like. * * URLs match `physix/server/providers.py`; keep the two in sync. */ export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1"; export const OPENAI_BASE_URL = "https://api.openai.com/v1"; export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1"; export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl"; export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct"; /** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen * 2.5 3B baseline behind a single OpenAI-compatible URL. Open access * (no token); routing on the `model` field happens inside the proxy. * Sleeps after 5 min idle, so the first call after sleep is ~90-120 s * while vLLM warms up — subsequent calls are fast. */ export const PHYSIX_INFER_BASE_URL = "https://pratyush-01-physix-infer.hf.space/v1"; export type EndpointId = "ollama" | "hf" | "openai" | "custom" | "physix"; /** UX hint that drives how the model field renders. Ollama exposes a * catalogue of installed tags via /interactive/models, so we render a * hard select. Everywhere else the model id space is open, so we use * a free-form input with a suggestions datalist. */ export type ModelInputMode = "ollama-installed" | "freeform-with-suggestions"; export interface ModelSuggestion { /** Model id passed verbatim to the chat endpoint. */ id: string; /** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */ tag?: string; } export interface Endpoint { id: EndpointId; label: string; /** Pre-filled when the endpoint is picked. Empty for `custom`. */ baseUrl: string; /** Whether the endpoint typically requires a Bearer token. Drives the * API key field's placeholder copy and one-line help text. */ needsKey: boolean; /** How to render the Model field for this endpoint. */ modelInputMode: ModelInputMode; /** Ordered list of curated suggestions for the Model datalist. The * first entry is the default the form pre-fills when the endpoint * is picked. Empty for `custom`. */ modelSuggestions: ModelSuggestion[]; /** One-line help shown under the panel. */ hint: string; } // Order matters: the FIRST entry is what the dropdown prefills on a // fresh page-load (and what `findEndpoint` falls back to for a stale // localStorage id). HF Router is first because it's the lowest-friction // "bring your own token" path — it answers in <2 s once a token is // pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's // still one click away for the "compare trained vs base" workflow. export const ENDPOINTS: readonly Endpoint[] = [ { id: "hf", label: "Hugging Face Router", baseUrl: HF_ROUTER_BASE_URL, needsKey: true, modelInputMode: "freeform-with-suggestions", // Suggestions limited to models we've live-probed against the HF // Router and confirmed serve through at least one provider. The // first entry is the default the form prefills — keep it // small-and-fast so the first turn doesn't feel like it stalled. // // Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of // physix-3b-rl). It's the natural baseline to compare against the // trained model, but as of Apr 2026 NO router provider serves it, // so prefilling it would 400 every fresh user. We ship that model // via the "PhysiX-Infer GPU" endpoint instead — that's where the // apples-to-apples comparison happens. // // Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT // in this list — the router only dispatches to provider-hosted // models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both // checkpoints) or a Custom inference endpoint URL. modelSuggestions: [ { id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" }, { id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" }, { id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" }, { id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" }, { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" }, ], hint: "Routed through https://router.huggingface.co/v1. Needs an HF token " + "with 'Make calls to Inference Providers' permission. Note: not every " + "HF model is router-served — pick from the suggestions or check the " + "model card's 'Inference Providers' panel before pasting an id. " + "To run your own fine-tune here, deploy it via 'Deploy → Inference " + "Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.", }, { id: "physix", label: "PhysiX-Infer GPU ✦", // Sister L4 Space hosting both checkpoints behind one URL; the // proxy there picks the right vLLM based on the `model` field. baseUrl: PHYSIX_INFER_BASE_URL, needsKey: false, modelInputMode: "freeform-with-suggestions", // First entry pre-fills, so the default comparison is "trained vs // base" with identical hardware / generation params — only the // weights differ. modelSuggestions: [ { id: PHYSIX_MODEL_ID, tag: "trained ✦" }, { id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" }, ], hint: "Both 3B models on a sister L4 Space — no token, no key. The Space " + "sleeps after 5 min idle, so the first call after sleep is ~90-120 s " + "while vLLM loads weights; subsequent calls are fast.", }, { id: "ollama", label: "Ollama (localhost:11434)", baseUrl: OLLAMA_OPENAI_BASE_URL, needsKey: false, modelInputMode: "ollama-installed", modelSuggestions: [ // Fallbacks if the live `/interactive/models` lookup fails — at // least the dropdown won't be empty. { id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" }, { id: "qwen2.5:3b", tag: "base (already downloaded)" }, { id: "qwen2.5:3b-instruct" }, { id: "qwen2.5:7b-instruct" }, ], hint: "Local dev. Requires `ollama serve` running on this machine.", }, { id: "openai", label: "OpenAI", baseUrl: OPENAI_BASE_URL, needsKey: true, modelInputMode: "freeform-with-suggestions", modelSuggestions: [ { id: "gpt-4o-mini", tag: "fast" }, { id: "gpt-4o", tag: "frontier" }, { id: "gpt-4.1-mini" }, ], hint: "OpenAI's chat completions API. Needs an OpenAI API key.", }, { id: "custom", label: "Custom", baseUrl: "", needsKey: false, modelInputMode: "freeform-with-suggestions", modelSuggestions: [], hint: "Point at any OpenAI-compatible /v1/chat/completions endpoint " + "(vLLM, OpenRouter, Together, llama.cpp, …).", }, ]; export function findEndpoint(id: EndpointId): Endpoint { // Total over EndpointId at compile time, but keep a runtime fallback // in case storage hands us a stale id from a previous schema. return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!; } // --------------------------------------------------------------------- // Connection state and persistence // --------------------------------------------------------------------- export interface LlmConnection { endpointId: EndpointId; /** For `custom`, the user-typed URL. For the others, equals the * endpoint's canonical base URL — we still keep it on the * connection so the network request never has to look it up. */ baseUrl: string; model: string; apiKey: string; } /** Default for the single-LLM "Run with LLM" pane: the trained * PhysiX-3B. The picker is now a 3-button preset — the first preset's * connection IS this default, so they stay in sync. */ export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = { endpointId: "physix", baseUrl: PHYSIX_INFER_BASE_URL, model: PHYSIX_MODEL_ID, apiKey: "", }; // --------------------------------------------------------------------- // Model presets — the 3 fixed options the Run pane exposes. // --------------------------------------------------------------------- /** A single preset = "click here to talk to model X via endpoint Y". * The whole point is to spare users from picking an endpoint, then a * model id, then realising the two don't match. Each preset bundles * exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */ export interface ModelPreset { id: string; label: string; /** One-line "what is this" copy shown under the label. */ description: string; /** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */ badge: string; /** Pre-built connection — drop straight into the runner. */ connection: LlmConnection; } /** The three options the Run-with-LLM picker exposes. Order matters: * the first entry is the default selection on a fresh page-load. * * Two of the three live on the PhysiX-Infer GPU Space (no token, same * L4 hardware) so users can compare the trained PhysiX-3B against its * Qwen 3B base apples-to-apples with one click. The 7B baseline runs * through HF Router because no provider serves Qwen 3B today and HF * Router gives a "bigger model" reference point in <2 s once a token * is pasted. */ export const MODEL_PRESETS: readonly ModelPreset[] = [ { id: "physix-3b-rl", label: "PhysiX-3B (trained)", description: "Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.", badge: "trained ✦", connection: { endpointId: "physix", baseUrl: PHYSIX_INFER_BASE_URL, model: PHYSIX_MODEL_ID, apiKey: "", }, }, { id: "qwen-3b-base", label: "Qwen 2.5 3B (base)", description: "Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.", badge: "3B base", connection: { endpointId: "physix", baseUrl: PHYSIX_INFER_BASE_URL, model: QWEN_BASE_MODEL_ID, apiKey: "", }, }, { id: "qwen-7b-hf", label: "Qwen 2.5 7B (HF Router)", description: "Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.", badge: "7B", connection: { endpointId: "hf", baseUrl: HF_ROUTER_BASE_URL, model: "Qwen/Qwen2.5-7B-Instruct", apiKey: "", }, }, ]; export function findPreset(id: string): ModelPreset { return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!; } /** Match a connection back to a preset (e.g. for selection state when * hydrating from storage). Returns the first preset whose endpoint+ * model match; null if none match. */ export function presetForConnection(c: LlmConnection): ModelPreset | null { return ( MODEL_PRESETS.find( (p) => p.connection.endpointId === c.endpointId && p.connection.model === c.model, ) ?? null ); } /** Build a fresh connection when the user changes endpoints. Keeps the * api key for the new base URL out of localStorage in this helper — * the panel hydrates it on render so we don't have to dual-write. */ export function connectionForEndpoint(endpoint: Endpoint): LlmConnection { return { endpointId: endpoint.id, baseUrl: endpoint.baseUrl, model: endpoint.modelSuggestions[0]?.id ?? "", apiKey: "", }; } const KEY_STORAGE_NAMESPACE = "physix.apiKey:"; export function loadApiKey(baseUrl: string): string { if (!baseUrl) return ""; try { return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? ""; } catch { return ""; } } export function saveApiKey(baseUrl: string, key: string): void { if (!baseUrl) return; try { if (key) { localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key); } else { localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl); } } catch { /* private mode / quota — silently no-op */ } }