/** Interactive playground for the PhysiX OpenEnv interface. * * Lets the user drive an episode by hand: start a session (= one * `reset`), then submit one or more actions (= `step`s) directly, * no LLM in the loop. Shows the JSON request body, JSON response, * and a copy-pasteable curl command for every call so judges can * see exactly the wire format. * * Why this talks to `/interactive/sessions/*` rather than the bare * `/reset` and `/step`: * * The official OpenEnv HTTP server (`openenv.core.env_server`) * constructs a fresh ``Environment`` instance on every call to * `/reset` and `/step`, so the bare endpoints are intentionally * stateless and cannot carry an episode across requests. PhysiX * layers a per-browser session router on top of that for stateful * flows; this pane uses it so visitors actually get to step * through an episode. The bare `/reset`, `/schema`, and `/metadata` * endpoints are still surfaced lower in the page as a read-only * reference for the underlying contract. */ import { useCallback, useEffect, useMemo, useState } from "react"; import { TrajectoryCanvas } from "@/components/TrajectoryCanvas"; import { cn } from "@/lib/cn"; import { type DirectStepResponse, InteractiveApiError, InteractiveClient, type InteractiveStartResponse, type OpenEnvMetadata, type OpenEnvResetResponse, type OpenEnvSchemaResponse, type SessionSummary, type SystemDescriptor, } from "@/lib/interactiveClient"; import { pickPrimaryVariable } from "@/lib/trajectory"; import type { PhysiXAction, RewardBreakdown, TrajectorySample, } from "@/types/physix"; const DEFAULT_EQUATION = "d2y/dt2 = -9.81"; const DEFAULT_PARAMS_JSON = "{}"; const DEFAULT_RATIONALE = "Free-fall under gravity."; // Same shape used by RunWithLlmPane / ComparePane so the OpenEnv tab // renders the identical reward layout when no step has run yet. const ZERO_REWARD: RewardBreakdown = { match: 0, progress: 0, simplicity: 0, format: 0, total: 0, shape: 0, freq: 0, amplitude: 0, }; function getApiBaseUrl(): string { const fromEnv = (import.meta as ImportMeta & { env?: Record }) .env?.VITE_PHYSIX_API_URL; return (fromEnv ?? "http://localhost:8000").replace(/\/+$/, ""); } interface CallRecord { status: "idle" | "running" | "ok" | "error"; result: T | null; error: string | null; /** Wall-clock latency in ms, recorded client-side. */ latencyMs: number | null; /** The exact request body sent (or null for GET). */ requestBody: unknown; /** Endpoint label shown in the curl block. */ url: string | null; method: "GET" | "POST" | "DELETE"; } const idleCall = ( method: CallRecord["method"] = "GET", ): CallRecord => ({ status: "idle", result: null, error: null, latencyMs: null, requestBody: null, url: null, method, }); export function OpenEnvExplorerPane(): JSX.Element { const apiBaseUrl = useMemo(() => getApiBaseUrl(), []); const client = useMemo( () => new InteractiveClient({ baseUrl: apiBaseUrl }), [apiBaseUrl], ); // --- Catalogue of physical systems (used to populate the reset selector) --- const [systems, setSystems] = useState(null); const [systemsError, setSystemsError] = useState(null); useEffect(() => { let cancelled = false; void (async () => { try { const list = await client.listSystems(); if (!cancelled) setSystems(list); } catch (err) { if (!cancelled) setSystemsError(formatErr(err)); } })(); return () => { cancelled = true; }; }, [client]); // --- Session state ----------------------------------------------------- // Session id of the currently active episode. null = no session yet. const [sessionId, setSessionId] = useState(null); const [summary, setSummary] = useState(null); // --- Form state ------------------------------------------------------- const [systemId, setSystemId] = useState("free_fall"); const [seed, setSeed] = useState("42"); const [maxTurns, setMaxTurns] = useState(8); const [equation, setEquation] = useState(DEFAULT_EQUATION); const [paramsJson, setParamsJson] = useState(DEFAULT_PARAMS_JSON); const [paramsError, setParamsError] = useState(null); const [rationale, setRationale] = useState(DEFAULT_RATIONALE); // --- Per-endpoint call records ---------------------------------------- const [metadataCall, setMetadataCall] = useState>( idleCall("GET"), ); const [schemaCall, setSchemaCall] = useState>( idleCall("GET"), ); const [statelessResetCall, setStatelessResetCall] = useState< CallRecord >(idleCall("POST")); const [resetCall, setResetCall] = useState>( idleCall("POST"), ); const [stepCall, setStepCall] = useState>( idleCall("POST"), ); // Auto-fetch metadata + schema once on mount. useEffect(() => { void runMetadata(); void runSchema(); // eslint-disable-next-line react-hooks/exhaustive-deps }, []); // Refresh the session summary whenever the session id changes (and // after every successful step below). const refreshSummary = useCallback(async (): Promise => { if (!sessionId) return; try { const s = await client.getSummary(sessionId); setSummary(s); } catch { // Session may have been GC'd; clear it so the user gets a fresh // start on the next reset. setSummary(null); } }, [client, sessionId]); useEffect(() => { void refreshSummary(); }, [refreshSummary]); const runMetadata = useCallback(async (): Promise => { setMetadataCall({ ...idleCall("GET"), status: "running", url: `${apiBaseUrl}/metadata`, }); const t0 = performance.now(); try { const result = await client.openEnvMetadata(); setMetadataCall({ status: "ok", result, error: null, latencyMs: performance.now() - t0, requestBody: null, method: "GET", url: `${apiBaseUrl}/metadata`, }); } catch (err) { setMetadataCall({ status: "error", result: null, error: formatErr(err), latencyMs: performance.now() - t0, requestBody: null, method: "GET", url: `${apiBaseUrl}/metadata`, }); } }, [apiBaseUrl, client]); const runSchema = useCallback(async (): Promise => { setSchemaCall({ ...idleCall("GET"), status: "running", url: `${apiBaseUrl}/schema`, }); const t0 = performance.now(); try { const result = await client.openEnvSchema(); setSchemaCall({ status: "ok", result, error: null, latencyMs: performance.now() - t0, requestBody: null, method: "GET", url: `${apiBaseUrl}/schema`, }); } catch (err) { setSchemaCall({ status: "error", result: null, error: formatErr(err), latencyMs: performance.now() - t0, requestBody: null, method: "GET", url: `${apiBaseUrl}/schema`, }); } }, [apiBaseUrl, client]); const runStatelessReset = useCallback(async (): Promise => { const seedNum = parseSeed(seed); const body: Record = {}; if (seedNum !== null) body.seed = seedNum; if (systemId) body.system_id = systemId; setStatelessResetCall({ status: "running", result: null, error: null, latencyMs: null, requestBody: body, method: "POST", url: `${apiBaseUrl}/reset`, }); const t0 = performance.now(); try { const result = await client.openEnvReset({ seed: seedNum, system_id: systemId || null, }); setStatelessResetCall({ status: "ok", result, error: null, latencyMs: performance.now() - t0, requestBody: body, method: "POST", url: `${apiBaseUrl}/reset`, }); } catch (err) { setStatelessResetCall({ status: "error", result: null, error: formatErr(err), latencyMs: performance.now() - t0, requestBody: body, method: "POST", url: `${apiBaseUrl}/reset`, }); } }, [apiBaseUrl, client, seed, systemId]); const runReset = useCallback(async (): Promise => { const seedNum = parseSeed(seed); const body: Record = { max_turns: maxTurns }; if (seedNum !== null) body.seed = seedNum; if (systemId) body.system_id = systemId; setResetCall({ status: "running", result: null, error: null, latencyMs: null, requestBody: body, method: "POST", url: `${apiBaseUrl}/interactive/sessions`, }); const t0 = performance.now(); try { // End the previous session if any, so we don't leak server memory. if (sessionId) { await client.endSession(sessionId).catch(() => { /* best-effort */ }); } const result = await client.startSession({ system_id: systemId || undefined, seed: seedNum ?? undefined, max_turns: maxTurns, }); setResetCall({ status: "ok", result, error: null, latencyMs: performance.now() - t0, requestBody: body, method: "POST", url: `${apiBaseUrl}/interactive/sessions`, }); setSessionId(result.session_id); // Reset the previous step result so the canvas reflects the // fresh reset and the user can see only the observed trajectory. setStepCall(idleCall("POST")); setSummary({ session_id: result.session_id, system_id: result.system.system_id, turn: 0, max_turns: result.max_turns, converged: false, done: false, }); } catch (err) { setResetCall({ status: "error", result: null, error: formatErr(err), latencyMs: performance.now() - t0, requestBody: body, method: "POST", url: `${apiBaseUrl}/interactive/sessions`, }); } }, [apiBaseUrl, client, maxTurns, seed, sessionId, systemId]); const runStep = useCallback(async (): Promise => { const parsedParams: Record = {}; if (paramsJson.trim()) { try { const parsed = JSON.parse(paramsJson) as unknown; if ( parsed === null || typeof parsed !== "object" || Array.isArray(parsed) ) { setParamsError("params must be a JSON object, e.g. {\"k\": 4.0}."); return; } for (const [k, v] of Object.entries(parsed as Record)) { if (typeof v !== "number" || !Number.isFinite(v)) { setParamsError(`params.${k} must be a finite number.`); return; } parsedParams[k] = v; } } catch (err) { setParamsError(`params is not valid JSON: ${(err as Error).message}`); return; } } setParamsError(null); const action: PhysiXAction = { equation: equation.trim(), params: parsedParams, rationale: rationale.trim(), }; const body = { action }; // Lazy-reset: starting a session before the first step is the same // contract as `gym.Env.reset()` before the first `step()`. let activeSessionId = sessionId; if (!activeSessionId) { try { const seedNum = parseSeed(seed); const start = await client.startSession({ system_id: systemId || undefined, seed: seedNum ?? undefined, max_turns: maxTurns, }); activeSessionId = start.session_id; setSessionId(activeSessionId); setResetCall({ status: "ok", result: start, error: null, latencyMs: 0, requestBody: { auto: true }, method: "POST", url: `${apiBaseUrl}/interactive/sessions`, }); } catch (err) { setStepCall({ status: "error", result: null, error: `Auto-reset failed: ${formatErr(err)}`, latencyMs: 0, requestBody: body, method: "POST", url: `${apiBaseUrl}/interactive/sessions/(none)/step`, }); return; } } const stepUrl = `${apiBaseUrl}/interactive/sessions/${activeSessionId}/step`; setStepCall({ status: "running", result: null, error: null, latencyMs: null, requestBody: body, method: "POST", url: stepUrl, }); const t0 = performance.now(); try { const result = await client.directStep(activeSessionId, action); setStepCall({ status: "ok", result, error: null, latencyMs: performance.now() - t0, requestBody: body, method: "POST", url: stepUrl, }); void refreshSummary(); } catch (err) { setStepCall({ status: "error", result: null, error: formatErr(err), latencyMs: performance.now() - t0, requestBody: body, method: "POST", url: stepUrl, }); } }, [ apiBaseUrl, client, equation, maxTurns, paramsJson, rationale, refreshSummary, seed, sessionId, systemId, ]); // Trajectory bits to draw: prefer the latest step's observation, fall // back to the reset's, fall back to nothing. const observed: TrajectorySample[] = useMemo(() => { const fromStep = stepCall.result?.observation.trajectory; const fromReset = resetCall.result?.observation.trajectory; return fromStep ?? fromReset ?? []; }, [stepCall.result, resetCall.result]); const stateVariables: string[] = useMemo(() => { const fromStep = stepCall.result?.observation.state_variables; const fromReset = resetCall.result?.observation.state_variables; return fromStep ?? fromReset ?? ["y", "vy"]; }, [stepCall.result, resetCall.result]); const primaryVariable = pickPrimaryVariable(stateVariables); const predicted: TrajectorySample[] = stepCall.result?.predicted_trajectory ?? []; // Pull the reward breakdown off the last successful step so the panel // below renders the same layout the LLM tabs use. Falls back to the // all-zero stub before any step has been taken. const lastReward: RewardBreakdown = stepCall.result?.observation.reward_breakdown ?? ZERO_REWARD; const hasReward = stepCall.status === "ok" && stepCall.result !== null; const hasReset = resetCall.status === "ok" && sessionId !== null; return (

OpenEnv interface

Drive the env directly: reset, step, observe.

PhysiX-Live implements the standard{" "} OpenEnv {" "} contract: a fresh environment per /reset, then one observation per /step{" "} until done. Because the bare OpenEnv HTTP routes are stateless (they construct a new env per request), the actual playable flow below uses the per-session interactive router that wraps the same env with a session id — equivalent to a long-lived gym.Env{" "} handle. The bare /reset,{" "} /schema, and{" "} /metadata endpoints are surfaced at the bottom for reference.

{/* Reset card */}
POST

/interactive/sessions

Begin a new episode. Equivalent to{" "} /reset on a stateful env. The response includes a session_id{" "} you'll pass to subsequent /step{" "} calls.

{/* Step card */}
POST

/interactive/sessions/{summary?.session_id?.slice(0, 8) ?? "{id}"}/step

Submit one action. PhysiX expects an ODE in its small SymPy grammar plus optional numerical parameter substitutions. {!hasReset && ( <> {" "} No active session yet — clicking step will auto-reset using the values from the card on the left. )}

{/* Trajectory preview */}

Last observation —{" "} {primaryVariable}(t)

{observed.length} sample{observed.length === 1 ? "" : "s"} ·{" "} {stateVariables.join(", ") || "—"} {hasReward && ( <> {" · "}total{" "} {lastReward.total.toFixed(3)} )}
{/* Dense reward row — same layout as the LLM tabs. Only shown once an actual /step has scored, otherwise the all-zero stub would mislead users into thinking match=0 is real. */} {hasReward && } {observed.length === 0 && (

No observation yet — call /reset{" "} above to load one.

)}
{/* Stateless reference endpoints. */}

Stateless reference endpoints

These three endpoints come from the OpenEnv core HTTP layer. They construct a new environment per request, so a follow-up{" "} /step on the bare{" "} /reset would 500. Useful for inspection — for an episode use the session-backed cards above.

void runStatelessReset()} disabled={statelessResetCall.status === "running"} > {statelessResetCall.status === "running" ? "Calling…" : "Call"} } />
); } // ---------------- supporting components ---------------- interface StatusStripProps { baseUrl: string; metadata: OpenEnvMetadata | null; summary: SessionSummary | null; } function StatusStrip({ baseUrl, metadata, summary, }: StatusStripProps): JSX.Element { return (
Server {baseUrl} {metadata && ( Env {metadata.name} {metadata.version ? ` · v${metadata.version}` : ""} )} session_id {summary?.session_id?.slice(0, 12) ?? "—"} {summary?.session_id ? "…" : ""} turn {summary ? `${summary.turn} / ${summary.max_turns}` : "—"} {summary?.converged && ( converged )} {summary?.done && !summary.converged && ( budget exhausted )}
); } interface CallStatusPillProps { record: CallRecord; } function CallStatusPill({ record }: CallStatusPillProps): JSX.Element | null { if (record.status === "idle") return null; const map: Record<"running" | "ok" | "error", { label: string; cls: string }> = { running: { label: "running", cls: "border-accentBlue/40 text-accentBlue", }, ok: { label: `200 · ${record.latencyMs?.toFixed(0)}ms`, cls: "border-accentGreen/40 text-accentGreen", }, error: { label: "error", cls: "border-primary/50 text-primary", }, }; const { label, cls } = map[record.status]; return ( {label} ); } interface CurlBlockProps { method: "GET" | "POST" | "DELETE"; url: string; body: unknown; } function CurlBlock({ method, url, body }: CurlBlockProps): JSX.Element { const cmd = useMemo(() => buildCurl(method, url, body), [method, url, body]); const [copied, setCopied] = useState(false); async function copy(): Promise { try { await navigator.clipboard.writeText(cmd); setCopied(true); setTimeout(() => setCopied(false), 1500); } catch { // clipboard blocked — silent } } return (
curl
        {cmd}
      
); } interface ResponseBlockProps { record: CallRecord; kind: "reset" | "step"; } function ResponseBlock({ record, kind }: ResponseBlockProps): JSX.Element | null { if (record.status === "idle") return null; if (record.status === "running") { return

Awaiting response…

; } if (record.status === "error") { return (
error
{record.error}
); } // ok const body = pruneObservation(record.result, kind); return (
200 OK {record.latencyMs?.toFixed(0)}ms · response (trajectory truncated)
        {JSON.stringify(body, null, 2)}
      
); } interface ReferenceCardProps { title: string; description: string; record: CallRecord; onRetry: () => void; customAction?: JSX.Element; } function ReferenceCard({ title, description, record, onRetry, customAction, }: ReferenceCardProps): JSX.Element { return (

{title}

{description}

{customAction} {record.status === "error" && (
{record.error}
)} {record.status === "ok" && record.result !== null && (
200 OK · view JSON
            {JSON.stringify(record.result, null, 2)}
          
)}
); } // ---------------- reward display ---------------- // // Kept in sync with the duplicate in RunWithLlmPane / ComparePane so all // three tabs render the same layout: the four trainable reward // components on top (match / progress / simplicity / format) and the // three diagnostic-only sub-scores (shape / freq / amplitude) on the // bottom labelled "diag". The diag row exists because R² collapses to // zero on small phase shifts, which makes match=0 misleading on its // own; shape/freq/amplitude give partial credit for "visual closeness" // without ever feeding into the reward total or the trainer. function DenseRewardRow({ reward }: { reward: RewardBreakdown }): JSX.Element { const rewardComponents: { name: string; value: number }[] = [ { name: "match", value: reward.match ?? 0 }, { name: "progress", value: reward.progress ?? 0 }, { name: "simplicity", value: reward.simplicity ?? 0 }, { name: "format", value: reward.format ?? 0 }, ]; const diagComponents: { name: string; value: number }[] = [ { name: "shape", value: reward.shape ?? 0 }, { name: "freq", value: reward.freq ?? 0 }, { name: "amplitude", value: reward.amplitude ?? 0 }, ]; return (
{rewardComponents.map(({ name, value }) => ( ))}
diag
{diagComponents.map(({ name, value }) => ( ))}
); } function RewardCell({ name, value, muted = false, }: { name: string; value: number; muted?: boolean; }): JSX.Element { return (
{name} {value.toFixed(2)}
= 0.7 ? muted ? "bg-accentBlue/60" : "bg-accentGreen/70" : value >= 0.3 ? "bg-accentAmber/70" : "bg-textMuted/40", )} style={{ width: `${Math.max(0, Math.min(1, value)) * 100}%` }} />
); } // ---------------- helpers ---------------- function formatErr(err: unknown): string { if (err instanceof InteractiveApiError) return `[${err.status}] ${err.detail}`; if (err instanceof Error) return err.message; return String(err); } function parseSeed(raw: string): number | null { const trimmed = raw.trim(); if (!trimmed) return null; const n = Number(trimmed); if (!Number.isInteger(n) || n < 0) return null; return n; } function buildCurl( method: "GET" | "POST" | "DELETE", url: string, body: unknown, ): string { if (method === "GET" || method === "DELETE") { return `curl -X ${method} ${shellQuote(url)}`; } const payload = body == null ? "{}" : JSON.stringify(body); return [ `curl -X ${method}`, ` ${shellQuote(url)}`, ` -H ${shellQuote("Content-Type: application/json")}`, ` -d ${shellQuote(payload)}`, ].join(" \\\n"); } function shellQuote(s: string): string { return `'${s.replace(/'/g, "'\\''")}'`; } /** * The trajectory array dominates the JSON payload (often 100+ samples). * Truncate it for the response viewer so the JSON pane stays readable — * the canvas above already plots the full thing. */ function pruneObservation( result: unknown, _kind: "reset" | "step", ): unknown { if (result === null || typeof result !== "object") return result; const r = result as Record; const obs = r.observation as Record | undefined; if (!obs) return result; const traj = obs.trajectory; if (Array.isArray(traj) && traj.length > 6) { return { ...r, observation: { ...obs, trajectory: [ ...traj.slice(0, 3), `… ${traj.length - 6} more samples …`, ...traj.slice(-3), ], }, }; } return result; }