Spaces:

Pratyush-01
/

physix-live

Sleeping

App Files Files Community

Pratyush-01 commited on 13 days ago

Commit

a507dcb

verified ·

1 Parent(s): a548276

ui: status banner + endpoint guide for physix-infer cold-start

Browse files

Files changed (24) hide show

.openenvignore +43 -0
.pytest_cache/.gitignore +2 -0
.pytest_cache/CACHEDIR.TAG +4 -0
.pytest_cache/README.md +8 -0
.pytest_cache/v/cache/lastfailed +6 -0
.pytest_cache/v/cache/nodeids +141 -0
.ruff_cache/.gitignore +2 -0
.ruff_cache/0.15.12/13147506665210707489 +0 -0
.ruff_cache/0.15.12/18068402563573118557 +0 -0
.ruff_cache/0.15.12/3907545159351961271 +0 -0
.ruff_cache/0.15.12/482523753705084482 +0 -0
.ruff_cache/0.15.12/8552442504755746477 +0 -0
.ruff_cache/CACHEDIR.TAG +1 -0
.vscode/settings.json +6 -0
Dockerfile +1 -2
README.md +0 -1
docs/plots/loss.png +0 -0
docs/plots/reward.png +2 -2
docs/plots/reward_components.png +2 -2
frontend/src/components/LlmConnectionPanel.tsx +9 -0
frontend/src/components/PhysixInferStatus.tsx +368 -0
frontend/src/components/RunWithLlmPane.tsx +55 -0
frontend/tsconfig.tsbuildinfo +1 -1
train/.gitignore +5 -0

.openenvignore ADDED Viewed

	@@ -0,0 +1,43 @@

+# Files OpenEnv `push` should NOT upload to the HF Space.
+#
+# OpenEnv's defaults already exclude `.*` (so .venv, .pytest_cache,
+# .ruff_cache, .vscode are gone), `__pycache__`, and `*.pyc`. This
+# file adds the heavyweight build artefacts and source-tree subdirs
+# that don't belong in the Space build context.
+#
+# IMPORTANT: keep this list tight. Anything we exclude here is invisible
+# to the HF Spaces Docker build, so files referenced by Dockerfile.COPY
+# steps must NOT appear here.
+# Frontend build artefacts. The Space's two-stage Dockerfile rebuilds
+# the SPA from `frontend/` source, so we don't ship the host build.
+frontend/node_modules
+frontend/dist
+frontend/dist-ts-build
+# Training stack — runs on HF Jobs, not in the Space. The Space image
+# explicitly installs only the inference deps (no torch / unsloth / trl)
+# to keep cold-start small.
+train
+# Tests + dev-only docs aren't needed at runtime. Excluding them
+# shrinks the build context and removes a (tiny) source of noise in
+# the Hub UI.
+tests
+docs
+# scripts/ is mostly dev-only, but space_app.py is referenced by the
+# Dockerfile (mounts the SPA + adds the / -> /web/ redirect) and MUST
+# be present at build time. So we exclude the verifier helpers
+# explicitly rather than blanket-excluding the directory.
+scripts/verify_hf_router.py
+scripts/HF_ROUTER_VERIFICATION.md
+# Build / packaging artefacts.
+*.egg-info
+build
+dist
+# Editor / OS detritus.
+.DS_Store
+*.swp

.pytest_cache/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Created by pytest automatically.
2	+ *

.pytest_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1,4 @@

+Signature: 8a477f597d28d172789f06886806bc55
+# This file is a cache directory tag created by pytest.
+# For information about cache directory tags, see:
+#	https://bford.info/cachedir/spec.html

.pytest_cache/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# pytest cache directory #
+This directory contains data from the pytest's cache plugin,
+which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
+**Do not** commit this to version control.
+See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.

.pytest_cache/v/cache/lastfailed ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy": true,
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]": true,
+  "tests/test_dataset.py": true,
+  "tests/test_sft_dataset.py": true
+}

.pytest_cache/v/cache/nodeids ADDED Viewed

	@@ -0,0 +1,141 @@

+[
+  "tests/test_client_ws.py::test_websocket_round_trip",
+  "tests/test_dataset.py::test_dataset_observed_arrays_match_state_variables",
+  "tests/test_dataset.py::test_eval_dataset_marks_held_out_rows",
+  "tests/test_dataset.py::test_training_dataset_default_curriculum_is_demo_systems",
+  "tests/test_dataset.py::test_training_dataset_default_curriculum_is_supported_systems",
+  "tests/test_dataset.py::test_training_dataset_explicit_system_ids_override_default",
+  "tests/test_dataset.py::test_training_dataset_has_expected_schema",
+  "tests/test_dataset.py::test_training_dataset_prompts_are_chat_lists",
+  "tests/test_dataset.py::test_training_dataset_rejects_empty_system_ids",
+  "tests/test_dataset.py::test_training_dataset_rejects_unknown_system_ids",
+  "tests/test_dataset.py::test_training_dataset_uses_only_train_tiers",
+  "tests/test_environment.py::test_episode_terminates_on_convergence",
+  "tests/test_environment.py::test_history_accumulates_across_turns",
+  "tests/test_environment.py::test_max_turns_terminates_episode",
+  "tests/test_environment.py::test_progress_reward_rewards_improvement",
+  "tests/test_environment.py::test_reset_returns_well_formed_observation",
+  "tests/test_environment.py::test_state_property_exposes_episode_id",
+  "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = -9.81 / (y - y)]",
+  "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = exp(exp(exp(y)))]",
+  "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = log(0 * y)]",
+  "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(-theta**2 - 1)]",
+  "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(dtheta**2 + theta**2) * sin(theta)]",
+  "tests/test_environment.py::test_step_with_ground_truth_rewards_high",
+  "tests/test_environment.py::test_step_with_unparseable_equation_short_circuits",
+  "tests/test_interactive_api.py::test_budget_exhaustion_returns_409_on_next_step",
+  "tests/test_interactive_api.py::test_cors_preflight_for_dev_origin",
+  "tests/test_interactive_api.py::test_llm_step_after_budget_exhaustion_returns_409",
+  "tests/test_interactive_api.py::test_llm_step_drives_a_turn_using_injected_policy",
+  "tests/test_interactive_api.py::test_llm_step_handles_unparseable_completion_as_format_zero",
+  "tests/test_interactive_api.py::test_llm_step_runs_full_episode_with_three_canned_turns",
+  "tests/test_interactive_api.py::test_models_endpoint_returns_empty_with_error_when_daemon_unavailable",
+  "tests/test_interactive_api.py::test_models_endpoint_returns_injected_list",
+  "tests/test_interactive_api.py::test_session_lifecycle_create_step_delete",
+  "tests/test_interactive_api.py::test_session_lifecycle_create_summary_delete",
+  "tests/test_interactive_api.py::test_step_with_ground_truth_marks_done",
+  "tests/test_interactive_api.py::test_systems_endpoint_excludes_held_out_tier",
+  "tests/test_interactive_api.py::test_systems_endpoint_returns_demo_curriculum_in_order",
+  "tests/test_interactive_api.py::test_systems_endpoint_returns_supported_systems_in_order",
+  "tests/test_interactive_api.py::test_unknown_session_id_returns_404",
+  "tests/test_interactive_api.py::test_unknown_system_id_returns_400",
+  "tests/test_interactive_api.py::test_unparseable_equation_returns_zero_format_not_500",
+  "tests/test_metrics_diagnostic.py::test_diag_scores_perfect_on_identical_trajectory",
+  "tests/test_metrics_diagnostic.py::test_diag_scores_stay_informative_when_r2_collapses",
+  "tests/test_metrics_diagnostic.py::test_freq_hint_in_mismatch_summary_for_freq_mismatch",
+  "tests/test_metrics_diagnostic.py::test_freq_score_falls_back_for_non_oscillatory_signal",
+  "tests/test_metrics_diagnostic.py::test_reward_total_unchanged_by_diagnostic_fields",
+  "tests/test_parser.py::test_alias_does_not_fire_when_velocity_state_is_named_dvar",
+  "tests/test_parser.py::test_alias_only_replaces_word_boundary_matches",
+  "tests/test_parser.py::test_bare_dx_alias_substitutes_for_vx",
+  "tests/test_parser.py::test_basic_equation_round_trips",
+  "tests/test_parser.py::test_caret_is_accepted_as_power_synonym",
+  "tests/test_parser.py::test_decimal_literals_are_not_misread_as_attribute_access",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-computed-name call]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(y, vy)-Collection]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[lambda x: x-Lambda]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[vy[0]-Array indexing]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y == vy-Comparisons]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y and vy-Boolean]",
+  "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y if vy > 0 else -y-Conditional]",
+  "tests/test_parser.py::test_dotted_attribute_access_is_rejected_with_clear_error",
+  "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[math.sin(theta)]",
+  "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[np.exp(-theta**2)]",
+  "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[numpy.cos(theta) + 1]",
+  "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[scipy.special.expit(theta)]",
+  "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[theta.diff()]",
+  "tests/test_parser.py::test_dx_dt_alias_substitutes_for_vx_when_velocity_state_exists",
+  "tests/test_parser.py::test_dy_dt_alias_substitutes_for_vy",
+  "tests/test_parser.py::test_grammar_hint_documents_velocity_convention",
+  "tests/test_parser.py::test_keyword_arguments_in_call_are_rejected_with_specific_hint",
+  "tests/test_parser.py::test_multiple_equations_split_on_semicolons_keep_alias_behaviour",
+  "tests/test_parser.py::test_only_parse_error_ever_escapes_the_parser",
+  "tests/test_parser.py::test_sympy_internal_errors_become_parse_errors_not_attribute_errors",
+  "tests/test_parser.py::test_unknown_dx_in_system_without_paired_velocity_includes_hint",
+  "tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy",
+  "tests/test_parser.py::test_unknown_t_emits_autonomy_hint",
+  "tests/test_prompt.py::test_build_prompt_returns_chat_pair",
+  "tests/test_prompt.py::test_history_block_surfaces_dense_reward_components",
+  "tests/test_prompt.py::test_history_block_tolerates_missing_reward_components",
+  "tests/test_prompt.py::test_history_uses_equation_field_name_not_shorthand",
+  "tests/test_prompt.py::test_parse_completion_accepts_capitalised_keys",
+  "tests/test_prompt.py::test_parse_completion_accepts_eqn_synonym",
+  "tests/test_prompt.py::test_parse_completion_accepts_other_equation_synonyms",
+  "tests/test_prompt.py::test_parse_completion_accepts_parameters_synonym",
+  "tests/test_prompt.py::test_parse_completion_accepts_reasoning_synonym",
+  "tests/test_prompt.py::test_parse_completion_canonical_key_wins_over_synonym",
+  "tests/test_prompt.py::test_parse_completion_coerces_string_params",
+  "tests/test_prompt.py::test_parse_completion_extracts_clean_json",
+  "tests/test_prompt.py::test_parse_completion_handles_code_fences",
+  "tests/test_prompt.py::test_parse_completion_handles_latex_braces_inside_json_string",
+  "tests/test_prompt.py::test_parse_completion_normalizes_latex_frac_and_caret",
+  "tests/test_prompt.py::test_parse_completion_picks_first_object_when_text_has_multiple",
+  "tests/test_prompt.py::test_parse_completion_with_no_json_falls_back_to_raw_text",
+  "tests/test_prompt.py::test_parse_completion_with_no_json_yields_empty_equation",
+  "tests/test_prompt.py::test_render_includes_history_when_present",
+  "tests/test_prompt.py::test_render_includes_metadata_block",
+  "tests/test_prompt.py::test_render_includes_trajectory_samples",
+  "tests/test_prompt.py::test_render_omits_history_block_when_empty",
+  "tests/test_prompt.py::test_system_message_locks_in_canonical_field_name",
+  "tests/test_providers.py::test_format_provider_error_pinpoints_auth_failure",
+  "tests/test_providers.py::test_format_provider_error_pinpoints_missing_model",
+  "tests/test_providers.py::test_format_provider_error_pinpoints_unreachable_endpoint",
+  "tests/test_providers.py::test_resolve_api_key_browser_supplied_wins",
+  "tests/test_providers.py::test_resolve_api_key_falls_back_to_hf_token_for_hf_router",
+  "tests/test_providers.py::test_resolve_api_key_falls_back_to_openai_env",
+  "tests/test_providers.py::test_resolve_api_key_for_ollama_returns_placeholder_when_no_env",
+  "tests/test_providers.py::test_resolve_api_key_returns_none_for_unknown_url",
+  "tests/test_providers.py::test_resolve_api_key_uses_huggingface_api_key_if_hf_token_missing",
+  "tests/test_providers_hf.py::test_full_episode_flows_visitor_config_to_openai_client_unchanged",
+  "tests/test_providers_hf.py::test_full_episode_recovers_from_first_call_response_format_400",
+  "tests/test_providers_hf.py::test_hf_router_401_surfaces_inference_providers_permission_hint",
+  "tests/test_providers_hf.py::test_hf_router_404_surfaces_warm_provider_hint",
+  "tests/test_providers_hf.py::test_hf_router_base_url_is_canonical",
+  "tests/test_providers_hf.py::test_hf_router_client_is_constructed_with_visitor_token",
+  "tests/test_providers_hf.py::test_hf_router_client_uses_hf_token_env_when_visitor_omits_key",
+  "tests/test_providers_hf.py::test_hf_router_connection_failure_surfaces_network_hint",
+  "tests/test_providers_hf.py::test_hf_router_retries_without_response_format_on_bad_request",
+  "tests/test_providers_hf.py::test_hf_router_succeeds_on_first_try_when_provider_supports_json",
+  "tests/test_providers_hf.py::test_hf_router_timeout_surfaces_network_hint",
+  "tests/test_registry.py::test_demo_curriculum_is_non_empty",
+  "tests/test_registry.py::test_demo_curriculum_only_references_registered_systems",
+  "tests/test_registry.py::test_every_demo_system_instantiates_cleanly",
+  "tests/test_registry.py::test_every_supported_system_instantiates_cleanly",
+  "tests/test_registry.py::test_list_demo_systems_preserves_declared_order",
+  "tests/test_registry.py::test_list_supported_systems_preserves_declared_order",
+  "tests/test_registry.py::test_supported_systems_is_non_empty",
+  "tests/test_registry.py::test_supported_systems_only_references_registered_systems",
+  "tests/test_scorer.py::test_scorer_caches_by_key",
+  "tests/test_scorer.py::test_scorer_progress_shrinks_when_previous_total_is_high",
+  "tests/test_scorer.py::test_scorer_returns_high_match_for_ground_truth",
+  "tests/test_scorer.py::test_scorer_returns_zero_format_on_garbage",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_completions_are_valid_json_action",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size_matches_demo",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_default_scales_with_instances_per_system",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_default_size_matches_supported_systems",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_explicit_system_ids_override_default",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_empty_system_ids",
+  "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_unknown_system_ids"
+]

.ruff_cache/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Automatically created by ruff.
2	+ *

.ruff_cache/0.15.12/13147506665210707489 ADDED Viewed

Binary file (85 Bytes). View file

.ruff_cache/0.15.12/18068402563573118557 ADDED Viewed

Binary file (1.11 kB). View file

.ruff_cache/0.15.12/3907545159351961271 ADDED Viewed

Binary file (308 Bytes). View file

.ruff_cache/0.15.12/482523753705084482 ADDED Viewed

Binary file (355 Bytes). View file

.ruff_cache/0.15.12/8552442504755746477 ADDED Viewed

Binary file (1.01 kB). View file

.ruff_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1 @@


1	+ Signature: 8a477f597d28d172789f06886806bc55

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "python.defaultInterpreterPath": "/Users/pratyush/miniconda3/envs/openenv_run/bin/python",
+  "python.analysis.extraPaths": ["${workspaceFolder}"],
+  "python.testing.pytestEnabled": true,
+  "python.testing.pytestArgs": ["tests"]
+}

Dockerfile CHANGED Viewed

@@ -38,7 +38,7 @@ COPY frontend/ ./
 ENV VITE_PHYSIX_API_URL=""
 # Cache-bust marker. Bump when an SPA change isn't taking on the Space —
 # HF BuildKit occasionally reuses stage-1 output even when sources changed.
-# physix-spa-rebuild: 5
 RUN pnpm exec tsc -b \
     && pnpm exec vite build --base=/web/
@@ -102,5 +102,4 @@ EXPOSE 7860
 HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
     CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
-ENV ENABLE_WEB_INTERFACE=true
 CMD ["python3", "-m", "uvicorn", "_space_app:app", "--host", "0.0.0.0", "--port", "7860"]

 ENV VITE_PHYSIX_API_URL=""
 # Cache-bust marker. Bump when an SPA change isn't taking on the Space —
 # HF BuildKit occasionally reuses stage-1 output even when sources changed.
+# physix-spa-rebuild: 6
 RUN pnpm exec tsc -b \
     && pnpm exec vite build --base=/web/
 HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
     CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
 CMD ["python3", "-m", "uvicorn", "_space_app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -14,7 +14,6 @@ tags:
   - physics
   - equation-discovery
   - ode
-base_path: /web
 ---
 # PhysiX — Equation Discovery via RLVR

   - physics
   - equation-discovery
   - ode
 ---
 # PhysiX — Equation Discovery via RLVR

docs/plots/loss.png CHANGED Viewed

docs/plots/reward.png CHANGED Viewed

Git LFS Details

SHA256: bfb513495bf11c8d1892f534085a7d3eab9c7add6b23e2969953014c28527d0b
Pointer size: 131 Bytes
Size of remote file: 196 kB

Git LFS Details

SHA256: e57c92b1c5f9a8ec1ea0d352c406457718f537ebc962b410fc82c3dba9eedfd7
Pointer size: 131 Bytes
Size of remote file: 161 kB

docs/plots/reward_components.png CHANGED Viewed

Git LFS Details

SHA256: 9a3fdb634dd2672f3f15a79fe52a6ec3063f248e3c22c111a23c0f2da6ad198e
Pointer size: 131 Bytes
Size of remote file: 257 kB

Git LFS Details

SHA256: c959d29cb0efeb3d9726a7846f557c5095c72c23ec428dfcbfad75aeadb63a9c
Pointer size: 131 Bytes
Size of remote file: 212 kB

frontend/src/components/LlmConnectionPanel.tsx CHANGED Viewed

@@ -14,6 +14,7 @@
 import { useEffect, useState } from "react";
 import { cn } from "@/lib/cn";
 import type { LlmModelInfo } from "@/lib/interactiveClient";
 import {
@@ -195,6 +196,14 @@ export function LlmConnectionPanel({
         {endpoint.hint}
       </p>
       {endpoint.id === "ollama" && installedOllamaError ? (
         <OllamaTroubleshooter
           message={installedOllamaError}

 import { useEffect, useState } from "react";
+import { PhysixInferStatus } from "@/components/PhysixInferStatus";
 import { cn } from "@/lib/cn";
 import type { LlmModelInfo } from "@/lib/interactiveClient";
 import {
         {endpoint.hint}
       </p>
+      {/*
+        Live status banner for the GPU Space. Only renders for the
+        physix endpoint — every other endpoint either has no sleep
+        cycle (HF Router, OpenAI, Custom) or is local-only (Ollama),
+        so there's nothing to surface.
+      */}
+      {endpoint.id === "physix" ? <PhysixInferStatus /> : null}
       {endpoint.id === "ollama" && installedOllamaError ? (
         <OllamaTroubleshooter
           message={installedOllamaError}

frontend/src/components/PhysixInferStatus.tsx ADDED Viewed

	@@ -0,0 +1,368 @@

+/** Status banner for the PhysiX-Infer GPU Space.
+ *
+ *  Why this exists:
+ *    The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
+ *    GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
+ *    weights for both 3B models. Without warning, a user picks the
+ *    PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
+ *    convinced something is broken.
+ *
+ *    This panel surfaces the underlying state so the wait is *expected*,
+ *    not surprising — and offers a one-click "Prewarm" button so the
+ *    user can kick the boot off before they pick a system / hit Run.
+ *
+ *  Mechanics:
+ *    - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
+ *    - HF Spaces' edge proxy returns one of three observable states:
+ *        * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
+ *          → both vLLMs are loaded and serving. Fast next call.
+ *        * 200 with one upstream not "ok"
+ *          → container running but vLLM still warming. Some calls fast,
+ *            some still slow. Treat as "warming".
+ *        * 503 / connection-stuck-for->5s
+ *          → Space is asleep. Whatever woke it (this very request) will
+ *            now ride the cold-boot pipeline.
+ *    - Re-poll every 15 s while the component is mounted so the badge
+ *      stays accurate as the user thinks. Polling is cheap and the
+ *      requests count as activity, which keeps the Space awake while
+ *      they read — exactly the UX we want during a demo.
+ *
+ *  Note on CORS: the physix-infer FastAPI uses default CORS. The
+ *  /health endpoint returns plain JSON; modern browsers allow simple
+ *  GETs across origins to read the status code, but reading the BODY
+ *  needs Access-Control-Allow-Origin. If we can't read the body, we
+ *  fall back to "container is up" (best-effort) on any successful
+ *  response and "asleep" on network failure. */
+import { useCallback, useEffect, useRef, useState } from "react";
+import { cn } from "@/lib/cn";
+import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";
+// /health is mounted at the proxy root, so strip the trailing /v1.
+const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";
+// 15 s strikes a balance: long enough that we don't spam HF's edge with
+// requests, short enough that "GPU is now warm" surfaces well before
+// the user has finished typing their prompt.
+const POLL_INTERVAL_MS = 15_000;
+// Hard ceiling on a single probe. HF holds requests open while a Space
+// boots, and that boot can take ~120 s. We don't want to *wait* for
+// the boot — we want to detect the asleep state early so we can
+// render "cold" and offer the Prewarm button. Anything past 6 s
+// without a response is "asleep" for our purposes.
+const PROBE_TIMEOUT_MS = 6_000;
+type Status =
+  | { kind: "unknown" }
+  | { kind: "awake"; bothUpstreams: boolean }
+  | { kind: "warming" }
+  | { kind: "asleep" }
+  | { kind: "error"; message: string };
+interface ProbeResult {
+  status: Status;
+  /** True if the probe itself was successful enough to count as a
+   *  wake-up signal — i.e. HF Spaces' edge proxy received it and
+   *  routed it to the container. */
+  hitContainer: boolean;
+}
+// Module-level dedup. The Compare pane mounts TWO copies of this
+// component (one per side), and without coalescing they'd each fire
+// their own `/health` GET every 15 s — pointless duplicate load on
+// the GPU Space's edge. We share a single in-flight promise across
+// concurrent callers and cache the last successful result for a
+// short window so the second mount on the same tick reuses the
+// first probe's answer instead of issuing its own.
+let inFlight: Promise<ProbeResult> | null = null;
+let lastResult: { result: ProbeResult; at: number } | null = null;
+const SHARED_RESULT_WINDOW_MS = 5_000;
+async function probe(): Promise<ProbeResult> {
+  // Coalesce: a second probe() call that lands while the first is
+  // still in flight piggy-backs on the same network request.
+  if (inFlight) return inFlight;
+  // Replay the last result if it's fresh enough — covers the
+  // "two component mounts in the same render commit" case where
+  // both useEffects fire microseconds apart but neither has yet
+  // populated `inFlight`.
+  if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
+    return lastResult.result;
+  }
+  inFlight = (async (): Promise<ProbeResult> => {
+    const controller = new AbortController();
+    const timeoutId = window.setTimeout(
+      () => controller.abort(),
+      PROBE_TIMEOUT_MS,
+    );
+    try {
+      return await runProbe(controller.signal);
+    } finally {
+      window.clearTimeout(timeoutId);
+    }
+  })();
+  try {
+    const result = await inFlight;
+    lastResult = { result, at: Date.now() };
+    return result;
+  } finally {
+    inFlight = null;
+  }
+}
+async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
+  try {
+    const response = await fetch(HEALTH_URL, {
+      method: "GET",
+      mode: "cors",
+      signal,
+    });
+    if (!response.ok) {
+      // 503 from /health = at least one vLLM still booting. We hit the
+      // container, so we *did* wake the Space (HF Spaces' edge sends a
+      // 503 with body during cold-boot, then the body changes to ok
+      // once vLLMs come up).
+      return { status: { kind: "warming" }, hitContainer: true };
+    }
+    // 200 — try to read the body. If CORS strips it, default to "awake
+    // but unsure about per-upstream status".
+    try {
+      const body = (await response.json()) as {
+        upstreams?: Record<string, string>;
+      };
+      const upstreams = body.upstreams ?? {};
+      const allOk = Object.values(upstreams).every((v) => v === "ok");
+      if (allOk && Object.keys(upstreams).length > 0) {
+        return {
+          status: { kind: "awake", bothUpstreams: true },
+          hitContainer: true,
+        };
+      }
+      return { status: { kind: "warming" }, hitContainer: true };
+    } catch {
+      // CORS or non-JSON body. Best effort: 200 means the container
+      // answered, so it's awake; we just can't see the per-upstream
+      // detail.
+      return {
+        status: { kind: "awake", bothUpstreams: false },
+        hitContainer: true,
+      };
+    }
+  } catch (exc) {
+    // AbortError → timed out. Network error → DNS / offline / cors
+    // preflight refused. In either case the Space is effectively
+    // unreachable from the browser; the most likely cause is
+    // "asleep + slow cold-boot" rather than a real outage, so we
+    // render "asleep" (with a Prewarm button).
+    if ((exc as Error).name === "AbortError") {
+      return { status: { kind: "asleep" }, hitContainer: false };
+    }
+    return {
+      status: { kind: "error", message: (exc as Error).message },
+      hitContainer: false,
+    };
+  }
+}
+export function PhysixInferStatus(): JSX.Element {
+  const [status, setStatus] = useState<Status>({ kind: "unknown" });
+  const [prewarming, setPrewarming] = useState(false);
+  // Track whether we've ever seen "awake" so we don't downgrade to
+  // "asleep" on a transient network blip — the 15 s poll will
+  // re-confirm soon enough.
+  const wasAwakeRef = useRef(false);
+  const refresh = useCallback(async () => {
+    const result = await probe();
+    setStatus((prev) => {
+      // Sticky-awake: if we'd previously confirmed awake and this
+      // probe came back asleep / error, keep showing awake. If it
+      // really did go to sleep, the next 15 s poll will agree and
+      // we'll flip then.
+      if (
+        wasAwakeRef.current &&
+        (result.status.kind === "asleep" ||
+          result.status.kind === "error")
+      ) {
+        return prev;
+      }
+      if (result.status.kind === "awake") {
+        wasAwakeRef.current = true;
+      }
+      return result.status;
+    });
+  }, []);
+  useEffect(() => {
+    void refresh();
+    const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
+    return () => window.clearInterval(id);
+  }, [refresh]);
+  async function handlePrewarm(): Promise<void> {
+    if (prewarming) return;
+    setPrewarming(true);
+    setStatus({ kind: "warming" });
+    // Fire and forget: HF holds the request open until the container
+    // is up. We don't `await` because the result of THIS request is
+    // less interesting than the next 15 s poll which will tell us
+    // when both upstreams flipped to "ok".
+    try {
+      // No timeout here — let the browser hold the connection until
+      // HF Spaces wakes up and answers.
+      await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
+    } catch {
+      // Ignore — the polling loop will surface the real state.
+    } finally {
+      setPrewarming(false);
+      void refresh();
+    }
+  }
+  return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
+}
+// ---------------------------------------------------------------------
+// Render
+// ---------------------------------------------------------------------
+interface StatusBannerProps {
+  status: Status;
+  onPrewarm: () => void;
+  prewarming: boolean;
+}
+function StatusBanner({
+  status,
+  onPrewarm,
+  prewarming,
+}: StatusBannerProps): JSX.Element {
+  const tone = toneFor(status);
+  return (
+    <div
+      className={cn(
+        "rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
+        tone.bg,
+        tone.border,
+      )}
+    >
+      <div className="flex items-start gap-2">
+        <span
+          aria-hidden
+          className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
+        />
+        <div className="flex-1 min-w-0">
+          <p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
+          <p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
+        </div>
+        {showsPrewarm(status) ? (
+          <button
+            type="button"
+            onClick={onPrewarm}
+            disabled={prewarming}
+            className={cn(
+              "shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
+              "hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
+            )}
+          >
+            {prewarming ? "Prewarming…" : "Prewarm GPU"}
+          </button>
+        ) : null}
+      </div>
+    </div>
+  );
+}
+interface Tone {
+  bg: string;
+  border: string;
+  dot: string;
+  title: string;
+}
+function toneFor(status: Status): Tone {
+  switch (status.kind) {
+    case "awake":
+      return {
+        bg: "bg-emerald-950/40",
+        border: "border-emerald-800/60",
+        dot: "bg-emerald-400",
+        title: "text-emerald-200",
+      };
+    case "warming":
+      return {
+        bg: "bg-amber-950/40",
+        border: "border-amber-800/60",
+        dot: "bg-amber-400 animate-pulse",
+        title: "text-amber-200",
+      };
+    case "asleep":
+      return {
+        bg: "bg-amber-950/40",
+        border: "border-amber-800/60",
+        dot: "bg-amber-500",
+        title: "text-amber-200",
+      };
+    case "error":
+      return {
+        bg: "bg-rose-950/40",
+        border: "border-rose-800/60",
+        dot: "bg-rose-500",
+        title: "text-rose-200",
+      };
+    case "unknown":
+    default:
+      return {
+        bg: "bg-surfaceMuted",
+        border: "border-border",
+        dot: "bg-textMuted animate-pulse",
+        title: "text-textPrimary",
+      };
+  }
+}
+function labelFor(status: Status): string {
+  switch (status.kind) {
+    case "awake":
+      return status.bothUpstreams
+        ? "GPU is warm — both models loaded"
+        : "GPU is warm";
+    case "warming":
+      return "GPU is warming up";
+    case "asleep":
+      return "GPU is asleep";
+    case "error":
+      return "Couldn't reach the GPU Space";
+    case "unknown":
+    default:
+      return "Checking GPU status…";
+  }
+}
+function descriptionFor(status: Status): string {
+  switch (status.kind) {
+    case "awake":
+      return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
+    case "warming":
+      return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
+    case "asleep":
+      return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
+    case "error":
+      return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
+    case "unknown":
+    default:
+      return "Probing https://pratyush-01-physix-infer.hf.space/health …";
+  }
+}
+function showsPrewarm(status: Status): boolean {
+  return status.kind === "asleep" || status.kind === "error";
+}

frontend/src/components/RunWithLlmPane.tsx CHANGED Viewed

@@ -104,6 +104,8 @@ export function RunWithLlmPane(): JSX.Element {
         </p>
       </header>
       <LlmConnectionPanel
         title="LLM"
         subtitle="One model drives the episode."
@@ -639,6 +641,59 @@ function RewardCell({
   );
 }
 function ErrorRow({
   message,
   onDismiss,

         </p>
       </header>
+      <EndpointGuide />
       <LlmConnectionPanel
         title="LLM"
         subtitle="One model drives the episode."
   );
 }
+/** "Which endpoint should I pick?" callout shown above the connection
+ *  panel. Three rows of one-liner guidance; no images, no links to
+ *  external docs — keeps the page fast and the answer visible without
+ *  scrolling.
+ *
+ *  Why this exists:
+ *    The endpoint dropdown has 5 options and the optimal pick depends
+ *    on what the user has on hand. Without this callout most visitors
+ *    default to whatever's first and either (a) hit a token error
+ *    (HF Router with no token) or (b) sit through a 90 s GPU cold-boot
+ *    (PhysiX-Infer) without knowing it's coming. */
+function EndpointGuide(): JSX.Element {
+  return (
+    <div className="rounded-lg border border-border bg-surfaceMuted px-4 py-3 text-xs leading-relaxed text-textMuted">
+      <p className="heading-eyebrow text-textPrimary">
+        Which endpoint should you pick?
+      </p>
+      <ul className="mt-2 flex flex-col gap-1.5">
+        <li>
+          <span className="text-textPrimary">Hugging Face Router</span>{" "}
+          <span className="rounded bg-surface px-1.5 py-0.5 text-[10px] uppercase tracking-wider text-textMuted">
+            default
+          </span>{" "}
+          — easiest path. Paste a token from{" "}
+          <code className="font-mono text-textPrimary">
+            huggingface.co/settings/tokens
+          </code>{" "}
+          (with the &quot;Make calls to Inference Providers&quot; permission),
+          pick a suggested model, hit Run. Responds in ~2 s, no warm-up.
+        </li>
+        <li>
+          <span className="text-textPrimary">PhysiX-Infer GPU ✦</span> — only
+          way to compare the GRPO-trained{" "}
+          <code className="font-mono text-textPrimary">physix-3b-rl</code>{" "}
+          against its{" "}
+          <code className="font-mono text-textPrimary">Qwen 2.5 3B</code> base
+          on identical hardware. No token. Sleeps after 5 min idle so first
+          request after sleep takes ~90-120 s while two 3B models load on the
+          L4 — the status banner below shows live state, with a Prewarm
+          button to wake it before you hit Run.
+        </li>
+        <li>
+          <span className="text-textPrimary">Ollama / OpenAI / Custom</span>{" "}
+          — bring your own endpoint. Useful for local dev (Ollama on
+          <code className="font-mono text-textPrimary">localhost:11434</code>),
+          frontier-model baselines (OpenAI), or pointing at a private vLLM /
+          inference endpoint URL.
+        </li>
+      </ul>
+    </div>
+  );
+}
 function ErrorRow({
   message,
   onDismiss,

frontend/tsconfig.tsbuildinfo CHANGED Viewed

@@ -1 +1 @@

- {"root":["./src/app.tsx","./src/main.tsx","./src/components/comparepane.tsx","./src/components/equationdisplay.tsx","./src/components/herointro.tsx","./src/components/llmconnectionpanel.tsx","./src/components/openenvexplorerpane.tsx","./src/components/rewardgauge.tsx","./src/components/runwithllmpane.tsx","./src/components/skeleton.tsx","./src/components/trajectorycanvas.tsx","./src/hooks/usellmcomparerunner.ts","./src/hooks/usellmepisoderunner.ts","./src/lib/cn.ts","./src/lib/format.ts","./src/lib/interactiveclient.ts","./src/lib/llmpresets.ts","./src/lib/trajectory.ts","./src/types/physix.ts"],"version":"5.6.3"}

+ {"root":["./src/app.tsx","./src/main.tsx","./src/components/comparepane.tsx","./src/components/equationdisplay.tsx","./src/components/herointro.tsx","./src/components/llmconnectionpanel.tsx","./src/components/openenvexplorerpane.tsx","./src/components/physixinferstatus.tsx","./src/components/rewardgauge.tsx","./src/components/runwithllmpane.tsx","./src/components/skeleton.tsx","./src/components/trajectorycanvas.tsx","./src/hooks/usellmcomparerunner.ts","./src/hooks/usellmepisoderunner.ts","./src/lib/cn.ts","./src/lib/format.ts","./src/lib/interactiveclient.ts","./src/lib/llmpresets.ts","./src/lib/trajectory.ts","./src/types/physix.ts"],"version":"5.6.3"}

train/.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+**/__pycache__/
+**/*.pyc
+**/*.egg-info/
+runs/
+.DS_Store