Spaces:

Pratyush-01
/

physix-live

Sleeping

App Files Files Community

Pratyush-01 commited on 15 days ago

Commit

a548276

verified ·

1 Parent(s): 99c5c21

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +1 -1
frontend/src/components/RunWithLlmPane.tsx +5 -2
frontend/src/lib/llmPresets.ts +72 -43

Dockerfile CHANGED Viewed

@@ -38,7 +38,7 @@ COPY frontend/ ./
 ENV VITE_PHYSIX_API_URL=""
 # Cache-bust marker. Bump when an SPA change isn't taking on the Space —
 # HF BuildKit occasionally reuses stage-1 output even when sources changed.
-# physix-spa-rebuild: 4
 RUN pnpm exec tsc -b \
     && pnpm exec vite build --base=/web/

 ENV VITE_PHYSIX_API_URL=""
 # Cache-bust marker. Bump when an SPA change isn't taking on the Space —
 # HF BuildKit occasionally reuses stage-1 output even when sources changed.
+# physix-spa-rebuild: 5
 RUN pnpm exec tsc -b \
     && pnpm exec vite build --base=/web/

frontend/src/components/RunWithLlmPane.tsx CHANGED Viewed

@@ -20,7 +20,10 @@ import {
   useLlmEpisodeRunner,
 } from "@/hooks/useLlmEpisodeRunner";
 import { cn } from "@/lib/cn";
-import { DEFAULT_CONNECTION_A, type LlmConnection } from "@/lib/llmPresets";
 import { pickPrimaryVariable } from "@/lib/trajectory";
 import type { RewardBreakdown } from "@/types/physix";
@@ -40,7 +43,7 @@ export function RunWithLlmPane(): JSX.Element {
   const runner = useLlmEpisodeRunner();
   const [connection, setConnection] = useState<LlmConnection>(
-    () => DEFAULT_CONNECTION_A,
   );
   const [systemId, setSystemId] = useState<string>("");
   const [maxTurns, setMaxTurns] = useState<number>(8);

   useLlmEpisodeRunner,
 } from "@/hooks/useLlmEpisodeRunner";
 import { cn } from "@/lib/cn";
+import {
+  DEFAULT_SINGLE_LLM_CONNECTION,
+  type LlmConnection,
+} from "@/lib/llmPresets";
 import { pickPrimaryVariable } from "@/lib/trajectory";
 import type { RewardBreakdown } from "@/types/physix";
   const runner = useLlmEpisodeRunner();
   const [connection, setConnection] = useState<LlmConnection>(
+    () => DEFAULT_SINGLE_LLM_CONNECTION,
   );
   const [systemId, setSystemId] = useState<string>("");
   const [maxTurns, setMaxTurns] = useState<number>(8);

frontend/src/lib/llmPresets.ts CHANGED Viewed

@@ -60,7 +60,50 @@ export interface Endpoint {
   hint: string;
 }
 export const ENDPOINTS: readonly Endpoint[] = [
   {
     id: "physix",
     label: "PhysiX-Infer GPU ✦",
@@ -97,43 +140,6 @@ export const ENDPOINTS: readonly Endpoint[] = [
     ],
     hint: "Local dev. Requires `ollama serve` running on this machine.",
   },
-  {
-    id: "hf",
-    label: "Hugging Face Router",
-    baseUrl: HF_ROUTER_BASE_URL,
-    needsKey: true,
-    modelInputMode: "freeform-with-suggestions",
-    // Suggestions limited to models we've live-probed against the HF
-    // Router and confirmed serve through at least one provider. The
-    // first entry is the default the form prefills — keep it
-    // small-and-fast so the first turn doesn't feel like it stalled.
-    //
-    // Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
-    // physix-3b-rl). It's the natural baseline to compare against the
-    // trained model, but as of Apr 2026 NO router provider serves it,
-    // so prefilling it would 400 every fresh user. We ship that model
-    // via the "PhysiX-Infer GPU" endpoint above instead — that's where
-    // the apples-to-apples comparison happens.
-    //
-    // Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
-    // in this list — the router only dispatches to provider-hosted
-    // models. Use the "PhysiX-Infer GPU" endpoint above (free, hosts
-    // both checkpoints) or a Custom inference endpoint URL.
-    modelSuggestions: [
-      { id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
-      { id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
-      { id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
-      { id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
-      { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
-    ],
-    hint:
-      "Routed through https://router.huggingface.co/v1. Needs an HF token " +
-      "with 'Make calls to Inference Providers' permission. Note: not every " +
-      "HF model is router-served — pick from the suggestions or check the " +
-      "model card's 'Inference Providers' panel before pasting an id. " +
-      "To run your own fine-tune here, deploy it via 'Deploy → Inference " +
-      "Endpoints' first; otherwise use Ollama or a custom vLLM URL.",
-  },
   {
     id: "openai",
     label: "OpenAI",
@@ -180,8 +186,30 @@ export interface LlmConnection {
   apiKey: string;
 }
-/** Default A side: trained PhysiX-3B on the sister GPU Space.
- *  No token needed; first call after sleep is ~90-120 s, then fast. */
 export const DEFAULT_CONNECTION_A: LlmConnection = {
   endpointId: "physix",
   baseUrl: PHYSIX_INFER_BASE_URL,
@@ -189,10 +217,11 @@ export const DEFAULT_CONNECTION_A: LlmConnection = {
   apiKey: "",
 };
-/** Default B side: same sister Space, same L4 GPU, just the Qwen 2.5
- *  3B baseline. Apples-to-apples — identical architecture, identical
- *  hardware, identical generation params; only the weights differ.
- *  Both models share the same Space, so warming side A also warms B. */
 export const DEFAULT_CONNECTION_B: LlmConnection = {
   endpointId: "physix",
   baseUrl: PHYSIX_INFER_BASE_URL,

   hint: string;
 }
+// Order matters: the FIRST entry is what the dropdown prefills on a
+// fresh page-load (and what `findEndpoint` falls back to for a stale
+// localStorage id). HF Router is first because it's the lowest-friction
+// "bring your own token" path — it answers in <2 s once a token is
+// pasted, no GPU cold-start. The PhysiX-Infer entry is second so it's
+// still one click away for the "compare trained vs base" workflow.
 export const ENDPOINTS: readonly Endpoint[] = [
+  {
+    id: "hf",
+    label: "Hugging Face Router",
+    baseUrl: HF_ROUTER_BASE_URL,
+    needsKey: true,
+    modelInputMode: "freeform-with-suggestions",
+    // Suggestions limited to models we've live-probed against the HF
+    // Router and confirmed serve through at least one provider. The
+    // first entry is the default the form prefills — keep it
+    // small-and-fast so the first turn doesn't feel like it stalled.
+    //
+    // Notable absentee: Qwen/Qwen2.5-3B-Instruct (the base of
+    // physix-3b-rl). It's the natural baseline to compare against the
+    // trained model, but as of Apr 2026 NO router provider serves it,
+    // so prefilling it would 400 every fresh user. We ship that model
+    // via the "PhysiX-Infer GPU" endpoint instead — that's where the
+    // apples-to-apples comparison happens.
+    //
+    // Custom fine-tunes (incl. Pratyush-01/physix-3b-rl) are also NOT
+    // in this list — the router only dispatches to provider-hosted
+    // models. Use the "PhysiX-Infer GPU" endpoint (free, hosts both
+    // checkpoints) or a Custom inference endpoint URL.
+    modelSuggestions: [
+      { id: "Qwen/Qwen2.5-7B-Instruct", tag: "fast baseline" },
+      { id: "Qwen/Qwen2.5-72B-Instruct", tag: "large baseline" },
+      { id: "Qwen/Qwen2.5-Coder-32B-Instruct", tag: "coder" },
+      { id: "meta-llama/Llama-3.3-70B-Instruct", tag: "llama" },
+      { id: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", tag: "reasoning" },
+    ],
+    hint:
+      "Routed through https://router.huggingface.co/v1. Needs an HF token " +
+      "with 'Make calls to Inference Providers' permission. Note: not every " +
+      "HF model is router-served — pick from the suggestions or check the " +
+      "model card's 'Inference Providers' panel before pasting an id. " +
+      "To run your own fine-tune here, deploy it via 'Deploy → Inference " +
+      "Endpoints' first; otherwise use the PhysiX-Infer GPU endpoint.",
+  },
   {
     id: "physix",
     label: "PhysiX-Infer GPU ✦",
     ],
     hint: "Local dev. Requires `ollama serve` running on this machine.",
   },
   {
     id: "openai",
     label: "OpenAI",
   apiKey: string;
 }
+/** Default for the single-LLM "Run with LLM" pane.
+ *
+ *  HF Router is the lowest-friction option for a first-time visitor:
+ *  paste a token, pick a suggested model (all live-probed and known to
+ *  serve), get a response in ~2 s. No GPU cold-start, no localhost
+ *  dependency.
+ *
+ *  We prefill the model so the Run button is enabled the moment the
+ *  user pastes a token — keeping the model empty and forcing them to
+ *  pick from the dropdown is friction we don't need. The api key
+ *  field is hydrated from localStorage by the panel on first render. */
+export const DEFAULT_SINGLE_LLM_CONNECTION: LlmConnection = {
+  endpointId: "hf",
+  baseUrl: HF_ROUTER_BASE_URL,
+  // Matches the first entry of the "hf" endpoint's modelSuggestions —
+  // smallest router-served Qwen model, fastest response.
+  model: "Qwen/Qwen2.5-7B-Instruct",
+  apiKey: "",
+};
+/** Default A side of the Compare pane: trained PhysiX-3B on the sister
+ *  GPU Space. The Compare pane's whole purpose is the trained-vs-base
+ *  side-by-side, so it's worth the cold-start penalty here even though
+ *  the single-LLM pane avoids it. No token needed. */
 export const DEFAULT_CONNECTION_A: LlmConnection = {
   endpointId: "physix",
   baseUrl: PHYSIX_INFER_BASE_URL,
   apiKey: "",
 };
+/** Default B side of the Compare pane: same sister Space, same L4 GPU,
+ *  just the Qwen 2.5 3B baseline. Apples-to-apples — identical
+ *  architecture, identical hardware, identical generation params; only
+ *  the weights differ. Both models share the same Space, so warming
+ *  side A also warms B. */
 export const DEFAULT_CONNECTION_B: LlmConnection = {
   endpointId: "physix",
   baseUrl: PHYSIX_INFER_BASE_URL,