Spaces:

Pratyush-01
/

physix

Sleeping

App Files Files Community

physix / frontend /src /lib /llmPresets.ts

Pratyush-01

Upload folder using huggingface_hub

0e24aff verified 13 days ago

raw

history blame contribute delete

12.5 kB

	/** Single source of truth for the connection panel.
	*
	* The panel decomposes "Connect an LLM" into two questions:
	*
	* 1. Which endpoint are we hitting? (4 fixed options)
	* 2. Which model id does that endpoint understand?
	*
	* Endpoints rarely change — there are basically four ways anyone runs
	* an OpenAI-compatible chat endpoint today, and the panel exposes
	* exactly those. Models, on the other hand, are a long tail: we keep
	* a curated suggestion list per endpoint so the user can either pick
	* a known-good model with one click or paste anything they like.
	*
	* URLs match `physix/server/providers.py`; keep the two in sync. */

	export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1";
	export const OPENAI_BASE_URL = "https://api.openai.com/v1";
	export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1";
	export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl";
	export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct";

	/** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen
	* 2.5 3B baseline behind a single OpenAI-compatible URL. Open access
	* (no token); routing on the `model` field happens inside the proxy.
	* Sleeps after 5 min idle, so the first call after sleep is ~90-120 s
	* while vLLM warms up — subsequent calls are fast. */
	export const PHYSIX_INFER_BASE_URL =
	"https://pratyush-01-physix-infer.hf.space/v1";

	export type EndpointId = "ollama" \| "hf" \| "openai" \| "custom" \| "physix";

	/** UX hint that drives how the model field renders. Ollama exposes a
	* catalogue of installed tags via /interactive/models, so we render a
	* hard select. Everywhere else the model id space is open, so we use
	* a free-form input with a suggestions datalist. */
	export type ModelInputMode = "ollama-installed" \| "freeform-with-suggestions";

	export interface ModelSuggestion {
	/** Model id passed verbatim to the chat endpoint. */
	id: string;
	/** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */
	tag?: string;
	}

	export interface Endpoint {
	id: EndpointId;
	label: string;
	/** Pre-filled when the endpoint is picked. Empty for `custom`. */
	baseUrl: string;
	/** Whether the endpoint typically requires a Bearer token. Drives the
	* API key field's placeholder copy and one-line help text. */
	needsKey: boolean;
	/** How to render the Model field for this endpoint. */
	modelInputMode: ModelInputMode;
	/** Ordered list of curated suggestions for the Model datalist. The
	* first entry is the default the form pre-fills when the endpoint
	* is picked. Empty for `custom`. */
	modelSuggestions: ModelSuggestion[];
	/** One-line help shown under the panel. */
	hint: string;
	}

	// Order matters: the FIRST entry is what the dropdown prefills on a
	// fresh page-load (and what `findEndpoint` falls back to for a stale
	// localStorage id). HF Router is first because it's the lowest-friction
	// "bring your own token" path — it answers in <2 s once a token is
	// pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's
	// still one click away for the "compare trained vs base" workflow.
	export const ENDPOINTS: readonly Endpoint[] = [
	{
	id: "hf",
	label: "Hugging Face Router",
	baseUrl: HF_ROUTER_BASE_URL,
	needsKey: true,
	modelInputMode: "freeform-with-suggestions",
	// Suggestions limited to models we've live-probed against the HF
	// Router and confirmed serve through at least one provider. The
	// first entry is the default the form prefills — keep it
	// small-and-fast so the first turn doesn't feel like it stalled.
	//
	// Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
	// physix-3b-rl). It's the natural baseline to compare against the
	// trained model, but as of Apr 2026 NO router provider serves it,
	// so prefilling it would 400 every fresh user. We ship that model
	// via the "PhysiX-Infer GPU" endpoint instead — that's where the
	// apples-to-apples comparison happens.
	//
	// Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
	// in this list — the router only dispatches to provider-hosted
	// models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both
	// checkpoints) or a Custom inference endpoint URL.
	modelSuggestions: [
	{ id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
	{ id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
	{ id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
	{ id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
	{ id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
	],
	hint:
	"Routed through https://router.huggingface.co/v1. Needs an HF token " +
	"with 'Make calls to Inference Providers' permission. Note: not every " +
	"HF model is router-served — pick from the suggestions or check the " +
	"model card's 'Inference Providers' panel before pasting an id. " +
	"To run your own fine-tune here, deploy it via 'Deploy → Inference " +
	"Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.",
	},
	{
	id: "physix",
	label: "PhysiX-Infer GPU ✦",
	// Sister L4 Space hosting both checkpoints behind one URL; the
	// proxy there picks the right vLLM based on the `model` field.
	baseUrl: PHYSIX_INFER_BASE_URL,
	needsKey: false,
	modelInputMode: "freeform-with-suggestions",
	// First entry pre-fills, so the default comparison is "trained vs
	// base" with identical hardware / generation params — only the
	// weights differ.
	modelSuggestions: [
	{ id: PHYSIX_MODEL_ID, tag: "trained ✦" },
	{ id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" },
	],
	hint:
	"Both 3B models on a sister L4 Space — no token, no key. The Space " +
	"sleeps after 5 min idle, so the first call after sleep is ~90-120 s " +
	"while vLLM loads weights; subsequent calls are fast.",
	},
	{
	id: "ollama",
	label: "Ollama (localhost:11434)",
	baseUrl: OLLAMA_OPENAI_BASE_URL,
	needsKey: false,
	modelInputMode: "ollama-installed",
	modelSuggestions: [
	// Fallbacks if the live `/interactive/models` lookup fails — at
	// least the dropdown won't be empty.
	{ id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" },
	{ id: "qwen2.5:3b", tag: "base (already downloaded)" },
	{ id: "qwen2.5:3b-instruct" },
	{ id: "qwen2.5:7b-instruct" },
	],
	hint: "Local dev. Requires `ollama serve` running on this machine.",
	},
	{
	id: "openai",
	label: "OpenAI",
	baseUrl: OPENAI_BASE_URL,
	needsKey: true,
	modelInputMode: "freeform-with-suggestions",
	modelSuggestions: [
	{ id: "gpt-4o-mini", tag: "fast" },
	{ id: "gpt-4o", tag: "frontier" },
	{ id: "gpt-4.1-mini" },
	],
	hint: "OpenAI's chat completions API. Needs an OpenAI API key.",
	},
	{
	id: "custom",
	label: "Custom",
	baseUrl: "",
	needsKey: false,
	modelInputMode: "freeform-with-suggestions",
	modelSuggestions: [],
	hint:
	"Point at any OpenAI-compatible /v1/chat/completions endpoint " +
	"(vLLM, OpenRouter, Together, llama.cpp, …).",
	},
	];

	export function findEndpoint(id: EndpointId): Endpoint {
	// Total over EndpointId at compile time, but keep a runtime fallback
	// in case storage hands us a stale id from a previous schema.
	return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!;
	}

	// ---------------------------------------------------------------------
	// Connection state and persistence
	// ---------------------------------------------------------------------

	export interface LlmConnection {
	endpointId: EndpointId;
	/** For `custom`, the user-typed URL. For the others, equals the
	* endpoint's canonical base URL — we still keep it on the
	* connection so the network request never has to look it up. */
	baseUrl: string;
	model: string;
	apiKey: string;
	}

	/** Default for the single-LLM "Run with LLM" pane: the trained
	* PhysiX-3B. The picker is now a 3-button preset — the first preset's
	* connection IS this default, so they stay in sync. */
	export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
	endpointId: "physix",
	baseUrl: PHYSIX_INFER_BASE_URL,
	model: PHYSIX_MODEL_ID,
	apiKey: "",
	};

	// ---------------------------------------------------------------------
	// Model presets — the 3 fixed options the Run pane exposes.
	// ---------------------------------------------------------------------

	/** A single preset = "click here to talk to model X via endpoint Y".
	* The whole point is to spare users from picking an endpoint, then a
	* model id, then realising the two don't match. Each preset bundles
	* exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
	export interface ModelPreset {
	id: string;
	label: string;
	/** One-line "what is this" copy shown under the label. */
	description: string;
	/** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
	badge: string;
	/** Pre-built connection — drop straight into the runner. */
	connection: LlmConnection;
	}

	/** The three options the Run-with-LLM picker exposes. Order matters:
	* the first entry is the default selection on a fresh page-load.
	*
	* Two of the three live on the PhysiX-Infer GPU Space (no token, same
	* L4 hardware) so users can compare the trained PhysiX-3B against its
	* Qwen 3B base apples-to-apples with one click. The 7B baseline runs
	* through HF Router because no provider serves Qwen 3B today and HF
	* Router gives a "bigger model" reference point in <2 s once a token
	* is pasted. */
	export const MODEL_PRESETS: readonly ModelPreset[] = [
	{
	id: "physix-3b-rl",
	label: "PhysiX-3B (trained)",
	description:
	"Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
	badge: "trained ✦",
	connection: {
	endpointId: "physix",
	baseUrl: PHYSIX_INFER_BASE_URL,
	model: PHYSIX_MODEL_ID,
	apiKey: "",
	},
	},
	{
	id: "qwen-3b-base",
	label: "Qwen 2.5 3B (base)",
	description:
	"Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
	badge: "3B base",
	connection: {
	endpointId: "physix",
	baseUrl: PHYSIX_INFER_BASE_URL,
	model: QWEN_BASE_MODEL_ID,
	apiKey: "",
	},
	},
	{
	id: "qwen-7b-hf",
	label: "Qwen 2.5 7B (HF Router)",
	description:
	"Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
	badge: "7B",
	connection: {
	endpointId: "hf",
	baseUrl: HF_ROUTER_BASE_URL,
	model: "Qwen/Qwen2.5-7B-Instruct",
	apiKey: "",
	},
	},
	];

	export function findPreset(id: string): ModelPreset {
	return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
	}

	/** Match a connection back to a preset (e.g. for selection state when
	* hydrating from storage). Returns the first preset whose endpoint+
	* model match; null if none match. */
	export function presetForConnection(c: LlmConnection): ModelPreset \| null {
	return (
	MODEL_PRESETS.find(
	(p) =>
	p.connection.endpointId === c.endpointId &&
	p.connection.model === c.model,
	) ?? null
	);
	}

	/** Build a fresh connection when the user changes endpoints. Keeps the
	* api key for the new base URL out of localStorage in this helper —
	* the panel hydrates it on render so we don't have to dual-write. */
	export function connectionForEndpoint(endpoint: Endpoint): LlmConnection {
	return {
	endpointId: endpoint.id,
	baseUrl: endpoint.baseUrl,
	model: endpoint.modelSuggestions[0]?.id ?? "",
	apiKey: "",
	};
	}

	const KEY_STORAGE_NAMESPACE = "physix.apiKey:";

	export function loadApiKey(baseUrl: string): string {
	if (!baseUrl) return "";
	try {
	return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? "";
	} catch {
	return "";
	}
	}

	export function saveApiKey(baseUrl: string, key: string): void {
	if (!baseUrl) return;
	try {
	if (key) {
	localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key);
	} else {
	localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl);
	}
	} catch {
	/* private mode / quota — silently no-op */
	}
	}