Spaces:

Pratyush-01
/

physix

Sleeping

File size: 12,483 Bytes

0e24aff

/** Single source of truth for the connection panel.
 *
 *  The panel decomposes "Connect an LLM" into two questions:
 *
 *    1. Which **endpoint** are we hitting?  (4 fixed options)
 *    2. Which **model id** does that endpoint understand?
 *
 *  Endpoints rarely change — there are basically four ways anyone runs
 *  an OpenAI-compatible chat endpoint today, and the panel exposes
 *  exactly those. Models, on the other hand, are a long tail: we keep
 *  a curated suggestion list per endpoint so the user can either pick
 *  a known-good model with one click or paste anything they like.
 *
 *  URLs match `physix/server/providers.py`; keep the two in sync. */

export const HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1";
export const OPENAI_BASE_URL = "https://api.openai.com/v1";
export const OLLAMA_OPENAI_BASE_URL = "http://localhost:11434/v1";
export const PHYSIX_MODEL_ID = "Pratyush-01/physix-3b-rl";
export const QWEN_BASE_MODEL_ID = "Qwen/Qwen2.5-3B-Instruct";

/** Sister GPU Space that hosts both the trained PhysiX-3B and the Qwen
 *  2.5 3B baseline behind a single OpenAI-compatible URL. Open access
 *  (no token); routing on the `model` field happens inside the proxy.
 *  Sleeps after 5 min idle, so the first call after sleep is ~90-120 s
 *  while vLLM warms up — subsequent calls are fast. */
export const PHYSIX_INFER_BASE_URL =
  "https://pratyush-01-physix-infer.hf.space/v1";

export type EndpointId = "ollama" | "hf" | "openai" | "custom" | "physix";

/** UX hint that drives how the model field renders. Ollama exposes a
 *  catalogue of installed tags via /interactive/models, so we render a
 *  hard select. Everywhere else the model id space is open, so we use
 *  a free-form input with a suggestions datalist. */
export type ModelInputMode = "ollama-installed" | "freeform-with-suggestions";

export interface ModelSuggestion {
  /** Model id passed verbatim to the chat endpoint. */
  id: string;
  /** Short label rendered next to the id (`(trained)`, `(baseline)`, …). */
  tag?: string;
}

export interface Endpoint {
  id: EndpointId;
  label: string;
  /** Pre-filled when the endpoint is picked. Empty for `custom`. */
  baseUrl: string;
  /** Whether the endpoint typically requires a Bearer token. Drives the
   *  API key field's placeholder copy and one-line help text. */
  needsKey: boolean;
  /** How to render the Model field for this endpoint. */
  modelInputMode: ModelInputMode;
  /** Ordered list of curated suggestions for the Model datalist. The
   *  first entry is the default the form pre-fills when the endpoint
   *  is picked. Empty for `custom`. */
  modelSuggestions: ModelSuggestion[];
  /** One-line help shown under the panel. */
  hint: string;
}

// Order matters: the FIRST entry is what the dropdown prefills on a
// fresh page-load (and what `findEndpoint` falls back to for a stale
// localStorage id). HF Router is first because it's the lowest-friction
// "bring your own token" path — it answers in <2 s once a token is
// pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's
// still one click away for the "compare trained vs base" workflow.
export const ENDPOINTS: readonly Endpoint[] = [
  {
    id: "hf",
    label: "Hugging Face Router",
    baseUrl: HF_ROUTER_BASE_URL,
    needsKey: true,
    modelInputMode: "freeform-with-suggestions",
    // Suggestions limited to models we've live-probed against the HF
    // Router and confirmed serve through at least one provider. The
    // first entry is the default the form prefills — keep it
    // small-and-fast so the first turn doesn't feel like it stalled.
    //
    // Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
    // physix-3b-rl). It's the natural baseline to compare against the
    // trained model, but as of Apr 2026 NO router provider serves it,
    // so prefilling it would 400 every fresh user. We ship that model
    // via the "PhysiX-Infer GPU" endpoint instead — that's where the
    // apples-to-apples comparison happens.
    //
    // Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
    // in this list — the router only dispatches to provider-hosted
    // models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both
    // checkpoints) or a Custom inference endpoint URL.
    modelSuggestions: [
      { id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
      { id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
      { id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
      { id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
      { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
    ],
    hint:
      "Routed through https://router.huggingface.co/v1. Needs an HF token " +
      "with 'Make calls to Inference Providers' permission. Note: not every " +
      "HF model is router-served — pick from the suggestions or check the " +
      "model card's 'Inference Providers' panel before pasting an id. " +
      "To run your own fine-tune here, deploy it via 'Deploy → Inference " +
      "Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.",
  },
  {
    id: "physix",
    label: "PhysiX-Infer GPU ✦",
    // Sister L4 Space hosting both checkpoints behind one URL; the
    // proxy there picks the right vLLM based on the `model` field.
    baseUrl: PHYSIX_INFER_BASE_URL,
    needsKey: false,
    modelInputMode: "freeform-with-suggestions",
    // First entry pre-fills, so the default comparison is "trained vs
    // base" with identical hardware / generation params — only the
    // weights differ.
    modelSuggestions: [
      { id: PHYSIX_MODEL_ID, tag: "trained ✦" },
      { id: QWEN_BASE_MODEL_ID, tag: "base (apples-to-apples)" },
    ],
    hint:
      "Both 3B models on a sister L4 Space — no token, no key. The Space " +
      "sleeps after 5 min idle, so the first call after sleep is ~90-120 s " +
      "while vLLM loads weights; subsequent calls are fast.",
  },
  {
    id: "ollama",
    label: "Ollama (localhost:11434)",
    baseUrl: OLLAMA_OPENAI_BASE_URL,
    needsKey: false,
    modelInputMode: "ollama-installed",
    modelSuggestions: [
      // Fallbacks if the live `/interactive/models` lookup fails — at
      // least the dropdown won't be empty.
      { id: "hf.co/Pratyush-01/physix-3b-rl", tag: "trained ✦" },
      { id: "qwen2.5:3b", tag: "base (already downloaded)" },
      { id: "qwen2.5:3b-instruct" },
      { id: "qwen2.5:7b-instruct" },
    ],
    hint: "Local dev. Requires `ollama serve` running on this machine.",
  },
  {
    id: "openai",
    label: "OpenAI",
    baseUrl: OPENAI_BASE_URL,
    needsKey: true,
    modelInputMode: "freeform-with-suggestions",
    modelSuggestions: [
      { id: "gpt-4o-mini", tag: "fast" },
      { id: "gpt-4o", tag: "frontier" },
      { id: "gpt-4.1-mini" },
    ],
    hint: "OpenAI's chat completions API. Needs an OpenAI API key.",
  },
  {
    id: "custom",
    label: "Custom",
    baseUrl: "",
    needsKey: false,
    modelInputMode: "freeform-with-suggestions",
    modelSuggestions: [],
    hint:
      "Point at any OpenAI-compatible /v1/chat/completions endpoint " +
      "(vLLM, OpenRouter, Together, llama.cpp, …).",
  },
];

export function findEndpoint(id: EndpointId): Endpoint {
  // Total over EndpointId at compile time, but keep a runtime fallback
  // in case storage hands us a stale id from a previous schema.
  return ENDPOINTS.find((e) => e.id === id) ?? ENDPOINTS[0]!;
}

// ---------------------------------------------------------------------
// Connection state and persistence
// ---------------------------------------------------------------------

export interface LlmConnection {
  endpointId: EndpointId;
  /** For `custom`, the user-typed URL. For the others, equals the
   *  endpoint's canonical base URL — we still keep it on the
   *  connection so the network request never has to look it up. */
  baseUrl: string;
  model: string;
  apiKey: string;
}

/** Default for the single-LLM "Run with LLM" pane: the trained
 *  PhysiX-3B. The picker is now a 3-button preset — the first preset's
 *  connection IS this default, so they stay in sync. */
export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
  endpointId: "physix",
  baseUrl: PHYSIX_INFER_BASE_URL,
  model: PHYSIX_MODEL_ID,
  apiKey: "",
};

// ---------------------------------------------------------------------
// Model presets — the 3 fixed options the Run pane exposes.
// ---------------------------------------------------------------------

/** A single preset = "click here to talk to model X via endpoint Y".
 *  The whole point is to spare users from picking an endpoint, then a
 *  model id, then realising the two don't match. Each preset bundles
 *  exactly the (endpoint, model, baseUrl, needsKey) tuple that works. */
export interface ModelPreset {
  id: string;
  label: string;
  /** One-line "what is this" copy shown under the label. */
  description: string;
  /** Short tag rendered as a pill (e.g. "trained", "3B base", "7B"). */
  badge: string;
  /** Pre-built connection — drop straight into the runner. */
  connection: LlmConnection;
}

/** The three options the Run-with-LLM picker exposes. Order matters:
 *  the first entry is the default selection on a fresh page-load.
 *
 *  Two of the three live on the PhysiX-Infer GPU Space (no token, same
 *  L4 hardware) so users can compare the trained PhysiX-3B against its
 *  Qwen 3B base apples-to-apples with one click. The 7B baseline runs
 *  through HF Router because no provider serves Qwen 3B today and HF
 *  Router gives a "bigger model" reference point in <2 s once a token
 *  is pasted. */
export const MODEL_PRESETS: readonly ModelPreset[] = [
  {
    id: "physix-3b-rl",
    label: "PhysiX-3B (trained)",
    description:
      "Our GRPO-trained Qwen-3B on a sister L4 GPU Space. No token needed; first request after sleep is ~90-120 s while vLLM warms.",
    badge: "trained ✦",
    connection: {
      endpointId: "physix",
      baseUrl: PHYSIX_INFER_BASE_URL,
      model: PHYSIX_MODEL_ID,
      apiKey: "",
    },
  },
  {
    id: "qwen-3b-base",
    label: "Qwen 2.5 3B (base)",
    description:
      "Untrained base of PhysiX-3B on the same L4 Space. Apples-to-apples — identical hardware and generation params, only the weights differ.",
    badge: "3B base",
    connection: {
      endpointId: "physix",
      baseUrl: PHYSIX_INFER_BASE_URL,
      model: QWEN_BASE_MODEL_ID,
      apiKey: "",
    },
  },
  {
    id: "qwen-7b-hf",
    label: "Qwen 2.5 7B (HF Router)",
    description:
      "Bigger 7B baseline routed through Hugging Face. Needs an HF token with 'Make calls to Inference Providers' permission; responds in ~2 s.",
    badge: "7B",
    connection: {
      endpointId: "hf",
      baseUrl: HF_ROUTER_BASE_URL,
      model: "Qwen/Qwen2.5-7B-Instruct",
      apiKey: "",
    },
  },
];

export function findPreset(id: string): ModelPreset {
  return MODEL_PRESETS.find((p) => p.id === id) ?? MODEL_PRESETS[0]!;
}

/** Match a connection back to a preset (e.g. for selection state when
 *  hydrating from storage). Returns the first preset whose endpoint+
 *  model match; null if none match. */
export function presetForConnection(c: LlmConnection): ModelPreset | null {
  return (
    MODEL_PRESETS.find(
      (p) =>
        p.connection.endpointId === c.endpointId &&
        p.connection.model === c.model,
    ) ?? null
  );
}

/** Build a fresh connection when the user changes endpoints. Keeps the
 *  api key for the new base URL out of localStorage in this helper —
 *  the panel hydrates it on render so we don't have to dual-write. */
export function connectionForEndpoint(endpoint: Endpoint): LlmConnection {
  return {
    endpointId: endpoint.id,
    baseUrl: endpoint.baseUrl,
    model: endpoint.modelSuggestions[0]?.id ?? "",
    apiKey: "",
  };
}

const KEY_STORAGE_NAMESPACE = "physix.apiKey:";

export function loadApiKey(baseUrl: string): string {
  if (!baseUrl) return "";
  try {
    return localStorage.getItem(KEY_STORAGE_NAMESPACE + baseUrl) ?? "";
  } catch {
    return "";
  }
}

export function saveApiKey(baseUrl: string, key: string): void {
  if (!baseUrl) return;
  try {
    if (key) {
      localStorage.setItem(KEY_STORAGE_NAMESPACE + baseUrl, key);
    } else {
      localStorage.removeItem(KEY_STORAGE_NAMESPACE + baseUrl);
    }
  } catch {
    /* private mode / quota — silently no-op */
  }
}