Pratyush-01 commited on
Commit
a507dcb
·
verified ·
1 Parent(s): a548276

ui: status banner + endpoint guide for physix-infer cold-start

Browse files
.openenvignore ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Files OpenEnv `push` should NOT upload to the HF Space.
2
+ #
3
+ # OpenEnv's defaults already exclude `.*` (so .venv, .pytest_cache,
4
+ # .ruff_cache, .vscode are gone), `__pycache__`, and `*.pyc`. This
5
+ # file adds the heavyweight build artefacts and source-tree subdirs
6
+ # that don't belong in the Space build context.
7
+ #
8
+ # IMPORTANT: keep this list tight. Anything we exclude here is invisible
9
+ # to the HF Spaces Docker build, so files referenced by Dockerfile.COPY
10
+ # steps must NOT appear here.
11
+
12
+ # Frontend build artefacts. The Space's two-stage Dockerfile rebuilds
13
+ # the SPA from `frontend/` source, so we don't ship the host build.
14
+ frontend/node_modules
15
+ frontend/dist
16
+ frontend/dist-ts-build
17
+
18
+ # Training stack — runs on HF Jobs, not in the Space. The Space image
19
+ # explicitly installs only the inference deps (no torch / unsloth / trl)
20
+ # to keep cold-start small.
21
+ train
22
+
23
+ # Tests + dev-only docs aren't needed at runtime. Excluding them
24
+ # shrinks the build context and removes a (tiny) source of noise in
25
+ # the Hub UI.
26
+ tests
27
+ docs
28
+
29
+ # scripts/ is mostly dev-only, but space_app.py is referenced by the
30
+ # Dockerfile (mounts the SPA + adds the / -> /web/ redirect) and MUST
31
+ # be present at build time. So we exclude the verifier helpers
32
+ # explicitly rather than blanket-excluding the directory.
33
+ scripts/verify_hf_router.py
34
+ scripts/HF_ROUTER_VERIFICATION.md
35
+
36
+ # Build / packaging artefacts.
37
+ *.egg-info
38
+ build
39
+ dist
40
+
41
+ # Editor / OS detritus.
42
+ .DS_Store
43
+ *.swp
.pytest_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Created by pytest automatically.
2
+ *
.pytest_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
2
+ # This file is a cache directory tag created by pytest.
3
+ # For information about cache directory tags, see:
4
+ # https://bford.info/cachedir/spec.html
.pytest_cache/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # pytest cache directory #
2
+
3
+ This directory contains data from the pytest's cache plugin,
4
+ which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
5
+
6
+ **Do not** commit this to version control.
7
+
8
+ See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
.pytest_cache/v/cache/lastfailed ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy": true,
3
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]": true,
4
+ "tests/test_dataset.py": true,
5
+ "tests/test_sft_dataset.py": true
6
+ }
.pytest_cache/v/cache/nodeids ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "tests/test_client_ws.py::test_websocket_round_trip",
3
+ "tests/test_dataset.py::test_dataset_observed_arrays_match_state_variables",
4
+ "tests/test_dataset.py::test_eval_dataset_marks_held_out_rows",
5
+ "tests/test_dataset.py::test_training_dataset_default_curriculum_is_demo_systems",
6
+ "tests/test_dataset.py::test_training_dataset_default_curriculum_is_supported_systems",
7
+ "tests/test_dataset.py::test_training_dataset_explicit_system_ids_override_default",
8
+ "tests/test_dataset.py::test_training_dataset_has_expected_schema",
9
+ "tests/test_dataset.py::test_training_dataset_prompts_are_chat_lists",
10
+ "tests/test_dataset.py::test_training_dataset_rejects_empty_system_ids",
11
+ "tests/test_dataset.py::test_training_dataset_rejects_unknown_system_ids",
12
+ "tests/test_dataset.py::test_training_dataset_uses_only_train_tiers",
13
+ "tests/test_environment.py::test_episode_terminates_on_convergence",
14
+ "tests/test_environment.py::test_history_accumulates_across_turns",
15
+ "tests/test_environment.py::test_max_turns_terminates_episode",
16
+ "tests/test_environment.py::test_progress_reward_rewards_improvement",
17
+ "tests/test_environment.py::test_reset_returns_well_formed_observation",
18
+ "tests/test_environment.py::test_state_property_exposes_episode_id",
19
+ "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = -9.81 / (y - y)]",
20
+ "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = exp(exp(exp(y)))]",
21
+ "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = log(0 * y)]",
22
+ "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(-theta**2 - 1)]",
23
+ "tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(dtheta**2 + theta**2) * sin(theta)]",
24
+ "tests/test_environment.py::test_step_with_ground_truth_rewards_high",
25
+ "tests/test_environment.py::test_step_with_unparseable_equation_short_circuits",
26
+ "tests/test_interactive_api.py::test_budget_exhaustion_returns_409_on_next_step",
27
+ "tests/test_interactive_api.py::test_cors_preflight_for_dev_origin",
28
+ "tests/test_interactive_api.py::test_llm_step_after_budget_exhaustion_returns_409",
29
+ "tests/test_interactive_api.py::test_llm_step_drives_a_turn_using_injected_policy",
30
+ "tests/test_interactive_api.py::test_llm_step_handles_unparseable_completion_as_format_zero",
31
+ "tests/test_interactive_api.py::test_llm_step_runs_full_episode_with_three_canned_turns",
32
+ "tests/test_interactive_api.py::test_models_endpoint_returns_empty_with_error_when_daemon_unavailable",
33
+ "tests/test_interactive_api.py::test_models_endpoint_returns_injected_list",
34
+ "tests/test_interactive_api.py::test_session_lifecycle_create_step_delete",
35
+ "tests/test_interactive_api.py::test_session_lifecycle_create_summary_delete",
36
+ "tests/test_interactive_api.py::test_step_with_ground_truth_marks_done",
37
+ "tests/test_interactive_api.py::test_systems_endpoint_excludes_held_out_tier",
38
+ "tests/test_interactive_api.py::test_systems_endpoint_returns_demo_curriculum_in_order",
39
+ "tests/test_interactive_api.py::test_systems_endpoint_returns_supported_systems_in_order",
40
+ "tests/test_interactive_api.py::test_unknown_session_id_returns_404",
41
+ "tests/test_interactive_api.py::test_unknown_system_id_returns_400",
42
+ "tests/test_interactive_api.py::test_unparseable_equation_returns_zero_format_not_500",
43
+ "tests/test_metrics_diagnostic.py::test_diag_scores_perfect_on_identical_trajectory",
44
+ "tests/test_metrics_diagnostic.py::test_diag_scores_stay_informative_when_r2_collapses",
45
+ "tests/test_metrics_diagnostic.py::test_freq_hint_in_mismatch_summary_for_freq_mismatch",
46
+ "tests/test_metrics_diagnostic.py::test_freq_score_falls_back_for_non_oscillatory_signal",
47
+ "tests/test_metrics_diagnostic.py::test_reward_total_unchanged_by_diagnostic_fields",
48
+ "tests/test_parser.py::test_alias_does_not_fire_when_velocity_state_is_named_dvar",
49
+ "tests/test_parser.py::test_alias_only_replaces_word_boundary_matches",
50
+ "tests/test_parser.py::test_bare_dx_alias_substitutes_for_vx",
51
+ "tests/test_parser.py::test_basic_equation_round_trips",
52
+ "tests/test_parser.py::test_caret_is_accepted_as_power_synonym",
53
+ "tests/test_parser.py::test_decimal_literals_are_not_misread_as_attribute_access",
54
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]",
55
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-computed-name call]",
56
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(y, vy)-Collection]",
57
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[lambda x: x-Lambda]",
58
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[vy[0]-Array indexing]",
59
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y == vy-Comparisons]",
60
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y and vy-Boolean]",
61
+ "tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y if vy > 0 else -y-Conditional]",
62
+ "tests/test_parser.py::test_dotted_attribute_access_is_rejected_with_clear_error",
63
+ "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[math.sin(theta)]",
64
+ "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[np.exp(-theta**2)]",
65
+ "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[numpy.cos(theta) + 1]",
66
+ "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[scipy.special.expit(theta)]",
67
+ "tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[theta.diff()]",
68
+ "tests/test_parser.py::test_dx_dt_alias_substitutes_for_vx_when_velocity_state_exists",
69
+ "tests/test_parser.py::test_dy_dt_alias_substitutes_for_vy",
70
+ "tests/test_parser.py::test_grammar_hint_documents_velocity_convention",
71
+ "tests/test_parser.py::test_keyword_arguments_in_call_are_rejected_with_specific_hint",
72
+ "tests/test_parser.py::test_multiple_equations_split_on_semicolons_keep_alias_behaviour",
73
+ "tests/test_parser.py::test_only_parse_error_ever_escapes_the_parser",
74
+ "tests/test_parser.py::test_sympy_internal_errors_become_parse_errors_not_attribute_errors",
75
+ "tests/test_parser.py::test_unknown_dx_in_system_without_paired_velocity_includes_hint",
76
+ "tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy",
77
+ "tests/test_parser.py::test_unknown_t_emits_autonomy_hint",
78
+ "tests/test_prompt.py::test_build_prompt_returns_chat_pair",
79
+ "tests/test_prompt.py::test_history_block_surfaces_dense_reward_components",
80
+ "tests/test_prompt.py::test_history_block_tolerates_missing_reward_components",
81
+ "tests/test_prompt.py::test_history_uses_equation_field_name_not_shorthand",
82
+ "tests/test_prompt.py::test_parse_completion_accepts_capitalised_keys",
83
+ "tests/test_prompt.py::test_parse_completion_accepts_eqn_synonym",
84
+ "tests/test_prompt.py::test_parse_completion_accepts_other_equation_synonyms",
85
+ "tests/test_prompt.py::test_parse_completion_accepts_parameters_synonym",
86
+ "tests/test_prompt.py::test_parse_completion_accepts_reasoning_synonym",
87
+ "tests/test_prompt.py::test_parse_completion_canonical_key_wins_over_synonym",
88
+ "tests/test_prompt.py::test_parse_completion_coerces_string_params",
89
+ "tests/test_prompt.py::test_parse_completion_extracts_clean_json",
90
+ "tests/test_prompt.py::test_parse_completion_handles_code_fences",
91
+ "tests/test_prompt.py::test_parse_completion_handles_latex_braces_inside_json_string",
92
+ "tests/test_prompt.py::test_parse_completion_normalizes_latex_frac_and_caret",
93
+ "tests/test_prompt.py::test_parse_completion_picks_first_object_when_text_has_multiple",
94
+ "tests/test_prompt.py::test_parse_completion_with_no_json_falls_back_to_raw_text",
95
+ "tests/test_prompt.py::test_parse_completion_with_no_json_yields_empty_equation",
96
+ "tests/test_prompt.py::test_render_includes_history_when_present",
97
+ "tests/test_prompt.py::test_render_includes_metadata_block",
98
+ "tests/test_prompt.py::test_render_includes_trajectory_samples",
99
+ "tests/test_prompt.py::test_render_omits_history_block_when_empty",
100
+ "tests/test_prompt.py::test_system_message_locks_in_canonical_field_name",
101
+ "tests/test_providers.py::test_format_provider_error_pinpoints_auth_failure",
102
+ "tests/test_providers.py::test_format_provider_error_pinpoints_missing_model",
103
+ "tests/test_providers.py::test_format_provider_error_pinpoints_unreachable_endpoint",
104
+ "tests/test_providers.py::test_resolve_api_key_browser_supplied_wins",
105
+ "tests/test_providers.py::test_resolve_api_key_falls_back_to_hf_token_for_hf_router",
106
+ "tests/test_providers.py::test_resolve_api_key_falls_back_to_openai_env",
107
+ "tests/test_providers.py::test_resolve_api_key_for_ollama_returns_placeholder_when_no_env",
108
+ "tests/test_providers.py::test_resolve_api_key_returns_none_for_unknown_url",
109
+ "tests/test_providers.py::test_resolve_api_key_uses_huggingface_api_key_if_hf_token_missing",
110
+ "tests/test_providers_hf.py::test_full_episode_flows_visitor_config_to_openai_client_unchanged",
111
+ "tests/test_providers_hf.py::test_full_episode_recovers_from_first_call_response_format_400",
112
+ "tests/test_providers_hf.py::test_hf_router_401_surfaces_inference_providers_permission_hint",
113
+ "tests/test_providers_hf.py::test_hf_router_404_surfaces_warm_provider_hint",
114
+ "tests/test_providers_hf.py::test_hf_router_base_url_is_canonical",
115
+ "tests/test_providers_hf.py::test_hf_router_client_is_constructed_with_visitor_token",
116
+ "tests/test_providers_hf.py::test_hf_router_client_uses_hf_token_env_when_visitor_omits_key",
117
+ "tests/test_providers_hf.py::test_hf_router_connection_failure_surfaces_network_hint",
118
+ "tests/test_providers_hf.py::test_hf_router_retries_without_response_format_on_bad_request",
119
+ "tests/test_providers_hf.py::test_hf_router_succeeds_on_first_try_when_provider_supports_json",
120
+ "tests/test_providers_hf.py::test_hf_router_timeout_surfaces_network_hint",
121
+ "tests/test_registry.py::test_demo_curriculum_is_non_empty",
122
+ "tests/test_registry.py::test_demo_curriculum_only_references_registered_systems",
123
+ "tests/test_registry.py::test_every_demo_system_instantiates_cleanly",
124
+ "tests/test_registry.py::test_every_supported_system_instantiates_cleanly",
125
+ "tests/test_registry.py::test_list_demo_systems_preserves_declared_order",
126
+ "tests/test_registry.py::test_list_supported_systems_preserves_declared_order",
127
+ "tests/test_registry.py::test_supported_systems_is_non_empty",
128
+ "tests/test_registry.py::test_supported_systems_only_references_registered_systems",
129
+ "tests/test_scorer.py::test_scorer_caches_by_key",
130
+ "tests/test_scorer.py::test_scorer_progress_shrinks_when_previous_total_is_high",
131
+ "tests/test_scorer.py::test_scorer_returns_high_match_for_ground_truth",
132
+ "tests/test_scorer.py::test_scorer_returns_zero_format_on_garbage",
133
+ "tests/test_sft_dataset.py::test_build_sft_dataset_completions_are_valid_json_action",
134
+ "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size",
135
+ "tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size_matches_demo",
136
+ "tests/test_sft_dataset.py::test_build_sft_dataset_default_scales_with_instances_per_system",
137
+ "tests/test_sft_dataset.py::test_build_sft_dataset_default_size_matches_supported_systems",
138
+ "tests/test_sft_dataset.py::test_build_sft_dataset_explicit_system_ids_override_default",
139
+ "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_empty_system_ids",
140
+ "tests/test_sft_dataset.py::test_build_sft_dataset_rejects_unknown_system_ids"
141
+ ]
.ruff_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Automatically created by ruff.
2
+ *
.ruff_cache/0.15.12/13147506665210707489 ADDED
Binary file (85 Bytes). View file
 
.ruff_cache/0.15.12/18068402563573118557 ADDED
Binary file (1.11 kB). View file
 
.ruff_cache/0.15.12/3907545159351961271 ADDED
Binary file (308 Bytes). View file
 
.ruff_cache/0.15.12/482523753705084482 ADDED
Binary file (355 Bytes). View file
 
.ruff_cache/0.15.12/8552442504755746477 ADDED
Binary file (1.01 kB). View file
 
.ruff_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1 @@
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
.vscode/settings.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "python.defaultInterpreterPath": "/Users/pratyush/miniconda3/envs/openenv_run/bin/python",
3
+ "python.analysis.extraPaths": ["${workspaceFolder}"],
4
+ "python.testing.pytestEnabled": true,
5
+ "python.testing.pytestArgs": ["tests"]
6
+ }
Dockerfile CHANGED
@@ -38,7 +38,7 @@ COPY frontend/ ./
38
  ENV VITE_PHYSIX_API_URL=""
39
  # Cache-bust marker. Bump when an SPA change isn't taking on the Space —
40
  # HF BuildKit occasionally reuses stage-1 output even when sources changed.
41
- # physix-spa-rebuild: 5
42
  RUN pnpm exec tsc -b \
43
  && pnpm exec vite build --base=/web/
44
 
@@ -102,5 +102,4 @@ EXPOSE 7860
102
  HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
103
  CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
104
 
105
- ENV ENABLE_WEB_INTERFACE=true
106
  CMD ["python3", "-m", "uvicorn", "_space_app:app", "--host", "0.0.0.0", "--port", "7860"]
 
38
  ENV VITE_PHYSIX_API_URL=""
39
  # Cache-bust marker. Bump when an SPA change isn't taking on the Space —
40
  # HF BuildKit occasionally reuses stage-1 output even when sources changed.
41
+ # physix-spa-rebuild: 6
42
  RUN pnpm exec tsc -b \
43
  && pnpm exec vite build --base=/web/
44
 
 
102
  HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
103
  CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
104
 
 
105
  CMD ["python3", "-m", "uvicorn", "_space_app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -14,7 +14,6 @@ tags:
14
  - physics
15
  - equation-discovery
16
  - ode
17
- base_path: /web
18
  ---
19
 
20
  # PhysiX — Equation Discovery via RLVR
 
14
  - physics
15
  - equation-discovery
16
  - ode
 
17
  ---
18
 
19
  # PhysiX — Equation Discovery via RLVR
docs/plots/loss.png CHANGED
docs/plots/reward.png CHANGED

Git LFS Details

  • SHA256: bfb513495bf11c8d1892f534085a7d3eab9c7add6b23e2969953014c28527d0b
  • Pointer size: 131 Bytes
  • Size of remote file: 196 kB

Git LFS Details

  • SHA256: e57c92b1c5f9a8ec1ea0d352c406457718f537ebc962b410fc82c3dba9eedfd7
  • Pointer size: 131 Bytes
  • Size of remote file: 161 kB
docs/plots/reward_components.png CHANGED

Git LFS Details

  • SHA256: 9a3fdb634dd2672f3f15a79fe52a6ec3063f248e3c22c111a23c0f2da6ad198e
  • Pointer size: 131 Bytes
  • Size of remote file: 257 kB

Git LFS Details

  • SHA256: c959d29cb0efeb3d9726a7846f557c5095c72c23ec428dfcbfad75aeadb63a9c
  • Pointer size: 131 Bytes
  • Size of remote file: 212 kB
frontend/src/components/LlmConnectionPanel.tsx CHANGED
@@ -14,6 +14,7 @@
14
 
15
  import { useEffect, useState } from "react";
16
 
 
17
  import { cn } from "@/lib/cn";
18
  import type { LlmModelInfo } from "@/lib/interactiveClient";
19
  import {
@@ -195,6 +196,14 @@ export function LlmConnectionPanel({
195
  {endpoint.hint}
196
  </p>
197
 
 
 
 
 
 
 
 
 
198
  {endpoint.id === "ollama" && installedOllamaError ? (
199
  <OllamaTroubleshooter
200
  message={installedOllamaError}
 
14
 
15
  import { useEffect, useState } from "react";
16
 
17
+ import { PhysixInferStatus } from "@/components/PhysixInferStatus";
18
  import { cn } from "@/lib/cn";
19
  import type { LlmModelInfo } from "@/lib/interactiveClient";
20
  import {
 
196
  {endpoint.hint}
197
  </p>
198
 
199
+ {/*
200
+ Live status banner for the GPU Space. Only renders for the
201
+ physix endpoint — every other endpoint either has no sleep
202
+ cycle (HF Router, OpenAI, Custom) or is local-only (Ollama),
203
+ so there's nothing to surface.
204
+ */}
205
+ {endpoint.id === "physix" ? <PhysixInferStatus /> : null}
206
+
207
  {endpoint.id === "ollama" && installedOllamaError ? (
208
  <OllamaTroubleshooter
209
  message={installedOllamaError}
frontend/src/components/PhysixInferStatus.tsx ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /** Status banner for the PhysiX-Infer GPU Space.
2
+ *
3
+ * Why this exists:
4
+ * The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
5
+ * GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
6
+ * weights for both 3B models. Without warning, a user picks the
7
+ * PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
8
+ * convinced something is broken.
9
+ *
10
+ * This panel surfaces the underlying state so the wait is *expected*,
11
+ * not surprising — and offers a one-click "Prewarm" button so the
12
+ * user can kick the boot off before they pick a system / hit Run.
13
+ *
14
+ * Mechanics:
15
+ * - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
16
+ * - HF Spaces' edge proxy returns one of three observable states:
17
+ * * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
18
+ * → both vLLMs are loaded and serving. Fast next call.
19
+ * * 200 with one upstream not "ok"
20
+ * → container running but vLLM still warming. Some calls fast,
21
+ * some still slow. Treat as "warming".
22
+ * * 503 / connection-stuck-for->5s
23
+ * → Space is asleep. Whatever woke it (this very request) will
24
+ * now ride the cold-boot pipeline.
25
+ * - Re-poll every 15 s while the component is mounted so the badge
26
+ * stays accurate as the user thinks. Polling is cheap and the
27
+ * requests count as activity, which keeps the Space awake while
28
+ * they read — exactly the UX we want during a demo.
29
+ *
30
+ * Note on CORS: the physix-infer FastAPI uses default CORS. The
31
+ * /health endpoint returns plain JSON; modern browsers allow simple
32
+ * GETs across origins to read the status code, but reading the BODY
33
+ * needs Access-Control-Allow-Origin. If we can't read the body, we
34
+ * fall back to "container is up" (best-effort) on any successful
35
+ * response and "asleep" on network failure. */
36
+
37
+ import { useCallback, useEffect, useRef, useState } from "react";
38
+
39
+ import { cn } from "@/lib/cn";
40
+ import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";
41
+
42
+ // /health is mounted at the proxy root, so strip the trailing /v1.
43
+ const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";
44
+
45
+ // 15 s strikes a balance: long enough that we don't spam HF's edge with
46
+ // requests, short enough that "GPU is now warm" surfaces well before
47
+ // the user has finished typing their prompt.
48
+ const POLL_INTERVAL_MS = 15_000;
49
+
50
+ // Hard ceiling on a single probe. HF holds requests open while a Space
51
+ // boots, and that boot can take ~120 s. We don't want to *wait* for
52
+ // the boot — we want to detect the asleep state early so we can
53
+ // render "cold" and offer the Prewarm button. Anything past 6 s
54
+ // without a response is "asleep" for our purposes.
55
+ const PROBE_TIMEOUT_MS = 6_000;
56
+
57
+ type Status =
58
+ | { kind: "unknown" }
59
+ | { kind: "awake"; bothUpstreams: boolean }
60
+ | { kind: "warming" }
61
+ | { kind: "asleep" }
62
+ | { kind: "error"; message: string };
63
+
64
+ interface ProbeResult {
65
+ status: Status;
66
+ /** True if the probe itself was successful enough to count as a
67
+ * wake-up signal — i.e. HF Spaces' edge proxy received it and
68
+ * routed it to the container. */
69
+ hitContainer: boolean;
70
+ }
71
+
72
+ // Module-level dedup. The Compare pane mounts TWO copies of this
73
+ // component (one per side), and without coalescing they'd each fire
74
+ // their own `/health` GET every 15 s — pointless duplicate load on
75
+ // the GPU Space's edge. We share a single in-flight promise across
76
+ // concurrent callers and cache the last successful result for a
77
+ // short window so the second mount on the same tick reuses the
78
+ // first probe's answer instead of issuing its own.
79
+ let inFlight: Promise<ProbeResult> | null = null;
80
+ let lastResult: { result: ProbeResult; at: number } | null = null;
81
+ const SHARED_RESULT_WINDOW_MS = 5_000;
82
+
83
+ async function probe(): Promise<ProbeResult> {
84
+ // Coalesce: a second probe() call that lands while the first is
85
+ // still in flight piggy-backs on the same network request.
86
+ if (inFlight) return inFlight;
87
+ // Replay the last result if it's fresh enough — covers the
88
+ // "two component mounts in the same render commit" case where
89
+ // both useEffects fire microseconds apart but neither has yet
90
+ // populated `inFlight`.
91
+ if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
92
+ return lastResult.result;
93
+ }
94
+
95
+ inFlight = (async (): Promise<ProbeResult> => {
96
+ const controller = new AbortController();
97
+ const timeoutId = window.setTimeout(
98
+ () => controller.abort(),
99
+ PROBE_TIMEOUT_MS,
100
+ );
101
+
102
+ try {
103
+ return await runProbe(controller.signal);
104
+ } finally {
105
+ window.clearTimeout(timeoutId);
106
+ }
107
+ })();
108
+
109
+ try {
110
+ const result = await inFlight;
111
+ lastResult = { result, at: Date.now() };
112
+ return result;
113
+ } finally {
114
+ inFlight = null;
115
+ }
116
+ }
117
+
118
+ async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
119
+ try {
120
+ const response = await fetch(HEALTH_URL, {
121
+ method: "GET",
122
+ mode: "cors",
123
+ signal,
124
+ });
125
+
126
+ if (!response.ok) {
127
+ // 503 from /health = at least one vLLM still booting. We hit the
128
+ // container, so we *did* wake the Space (HF Spaces' edge sends a
129
+ // 503 with body during cold-boot, then the body changes to ok
130
+ // once vLLMs come up).
131
+ return { status: { kind: "warming" }, hitContainer: true };
132
+ }
133
+
134
+ // 200 — try to read the body. If CORS strips it, default to "awake
135
+ // but unsure about per-upstream status".
136
+ try {
137
+ const body = (await response.json()) as {
138
+ upstreams?: Record<string, string>;
139
+ };
140
+ const upstreams = body.upstreams ?? {};
141
+ const allOk = Object.values(upstreams).every((v) => v === "ok");
142
+ if (allOk && Object.keys(upstreams).length > 0) {
143
+ return {
144
+ status: { kind: "awake", bothUpstreams: true },
145
+ hitContainer: true,
146
+ };
147
+ }
148
+ return { status: { kind: "warming" }, hitContainer: true };
149
+ } catch {
150
+ // CORS or non-JSON body. Best effort: 200 means the container
151
+ // answered, so it's awake; we just can't see the per-upstream
152
+ // detail.
153
+ return {
154
+ status: { kind: "awake", bothUpstreams: false },
155
+ hitContainer: true,
156
+ };
157
+ }
158
+ } catch (exc) {
159
+ // AbortError → timed out. Network error → DNS / offline / cors
160
+ // preflight refused. In either case the Space is effectively
161
+ // unreachable from the browser; the most likely cause is
162
+ // "asleep + slow cold-boot" rather than a real outage, so we
163
+ // render "asleep" (with a Prewarm button).
164
+ if ((exc as Error).name === "AbortError") {
165
+ return { status: { kind: "asleep" }, hitContainer: false };
166
+ }
167
+ return {
168
+ status: { kind: "error", message: (exc as Error).message },
169
+ hitContainer: false,
170
+ };
171
+ }
172
+ }
173
+
174
+ export function PhysixInferStatus(): JSX.Element {
175
+ const [status, setStatus] = useState<Status>({ kind: "unknown" });
176
+ const [prewarming, setPrewarming] = useState(false);
177
+ // Track whether we've ever seen "awake" so we don't downgrade to
178
+ // "asleep" on a transient network blip — the 15 s poll will
179
+ // re-confirm soon enough.
180
+ const wasAwakeRef = useRef(false);
181
+
182
+ const refresh = useCallback(async () => {
183
+ const result = await probe();
184
+ setStatus((prev) => {
185
+ // Sticky-awake: if we'd previously confirmed awake and this
186
+ // probe came back asleep / error, keep showing awake. If it
187
+ // really did go to sleep, the next 15 s poll will agree and
188
+ // we'll flip then.
189
+ if (
190
+ wasAwakeRef.current &&
191
+ (result.status.kind === "asleep" ||
192
+ result.status.kind === "error")
193
+ ) {
194
+ return prev;
195
+ }
196
+ if (result.status.kind === "awake") {
197
+ wasAwakeRef.current = true;
198
+ }
199
+ return result.status;
200
+ });
201
+ }, []);
202
+
203
+ useEffect(() => {
204
+ void refresh();
205
+ const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
206
+ return () => window.clearInterval(id);
207
+ }, [refresh]);
208
+
209
+ async function handlePrewarm(): Promise<void> {
210
+ if (prewarming) return;
211
+ setPrewarming(true);
212
+ setStatus({ kind: "warming" });
213
+ // Fire and forget: HF holds the request open until the container
214
+ // is up. We don't `await` because the result of THIS request is
215
+ // less interesting than the next 15 s poll which will tell us
216
+ // when both upstreams flipped to "ok".
217
+ try {
218
+ // No timeout here — let the browser hold the connection until
219
+ // HF Spaces wakes up and answers.
220
+ await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
221
+ } catch {
222
+ // Ignore — the polling loop will surface the real state.
223
+ } finally {
224
+ setPrewarming(false);
225
+ void refresh();
226
+ }
227
+ }
228
+
229
+ return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
230
+ }
231
+
232
+ // ---------------------------------------------------------------------
233
+ // Render
234
+ // ---------------------------------------------------------------------
235
+
236
+ interface StatusBannerProps {
237
+ status: Status;
238
+ onPrewarm: () => void;
239
+ prewarming: boolean;
240
+ }
241
+
242
+ function StatusBanner({
243
+ status,
244
+ onPrewarm,
245
+ prewarming,
246
+ }: StatusBannerProps): JSX.Element {
247
+ const tone = toneFor(status);
248
+
249
+ return (
250
+ <div
251
+ className={cn(
252
+ "rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
253
+ tone.bg,
254
+ tone.border,
255
+ )}
256
+ >
257
+ <div className="flex items-start gap-2">
258
+ <span
259
+ aria-hidden
260
+ className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
261
+ />
262
+ <div className="flex-1 min-w-0">
263
+ <p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
264
+ <p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
265
+ </div>
266
+ {showsPrewarm(status) ? (
267
+ <button
268
+ type="button"
269
+ onClick={onPrewarm}
270
+ disabled={prewarming}
271
+ className={cn(
272
+ "shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
273
+ "hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
274
+ )}
275
+ >
276
+ {prewarming ? "Prewarming…" : "Prewarm GPU"}
277
+ </button>
278
+ ) : null}
279
+ </div>
280
+ </div>
281
+ );
282
+ }
283
+
284
+ interface Tone {
285
+ bg: string;
286
+ border: string;
287
+ dot: string;
288
+ title: string;
289
+ }
290
+
291
+ function toneFor(status: Status): Tone {
292
+ switch (status.kind) {
293
+ case "awake":
294
+ return {
295
+ bg: "bg-emerald-950/40",
296
+ border: "border-emerald-800/60",
297
+ dot: "bg-emerald-400",
298
+ title: "text-emerald-200",
299
+ };
300
+ case "warming":
301
+ return {
302
+ bg: "bg-amber-950/40",
303
+ border: "border-amber-800/60",
304
+ dot: "bg-amber-400 animate-pulse",
305
+ title: "text-amber-200",
306
+ };
307
+ case "asleep":
308
+ return {
309
+ bg: "bg-amber-950/40",
310
+ border: "border-amber-800/60",
311
+ dot: "bg-amber-500",
312
+ title: "text-amber-200",
313
+ };
314
+ case "error":
315
+ return {
316
+ bg: "bg-rose-950/40",
317
+ border: "border-rose-800/60",
318
+ dot: "bg-rose-500",
319
+ title: "text-rose-200",
320
+ };
321
+ case "unknown":
322
+ default:
323
+ return {
324
+ bg: "bg-surfaceMuted",
325
+ border: "border-border",
326
+ dot: "bg-textMuted animate-pulse",
327
+ title: "text-textPrimary",
328
+ };
329
+ }
330
+ }
331
+
332
+ function labelFor(status: Status): string {
333
+ switch (status.kind) {
334
+ case "awake":
335
+ return status.bothUpstreams
336
+ ? "GPU is warm — both models loaded"
337
+ : "GPU is warm";
338
+ case "warming":
339
+ return "GPU is warming up";
340
+ case "asleep":
341
+ return "GPU is asleep";
342
+ case "error":
343
+ return "Couldn't reach the GPU Space";
344
+ case "unknown":
345
+ default:
346
+ return "Checking GPU status…";
347
+ }
348
+ }
349
+
350
+ function descriptionFor(status: Status): string {
351
+ switch (status.kind) {
352
+ case "awake":
353
+ return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
354
+ case "warming":
355
+ return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
356
+ case "asleep":
357
+ return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
358
+ case "error":
359
+ return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
360
+ case "unknown":
361
+ default:
362
+ return "Probing https://pratyush-01-physix-infer.hf.space/health …";
363
+ }
364
+ }
365
+
366
+ function showsPrewarm(status: Status): boolean {
367
+ return status.kind === "asleep" || status.kind === "error";
368
+ }
frontend/src/components/RunWithLlmPane.tsx CHANGED
@@ -104,6 +104,8 @@ export function RunWithLlmPane(): JSX.Element {
104
  </p>
105
  </header>
106
 
 
 
107
  <LlmConnectionPanel
108
  title="LLM"
109
  subtitle="One model drives the episode."
@@ -639,6 +641,59 @@ function RewardCell({
639
  );
640
  }
641
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  function ErrorRow({
643
  message,
644
  onDismiss,
 
104
  </p>
105
  </header>
106
 
107
+ <EndpointGuide />
108
+
109
  <LlmConnectionPanel
110
  title="LLM"
111
  subtitle="One model drives the episode."
 
641
  );
642
  }
643
 
644
+ /** "Which endpoint should I pick?" callout shown above the connection
645
+ * panel. Three rows of one-liner guidance; no images, no links to
646
+ * external docs — keeps the page fast and the answer visible without
647
+ * scrolling.
648
+ *
649
+ * Why this exists:
650
+ * The endpoint dropdown has 5 options and the optimal pick depends
651
+ * on what the user has on hand. Without this callout most visitors
652
+ * default to whatever's first and either (a) hit a token error
653
+ * (HF Router with no token) or (b) sit through a 90 s GPU cold-boot
654
+ * (PhysiX-Infer) without knowing it's coming. */
655
+ function EndpointGuide(): JSX.Element {
656
+ return (
657
+ <div className="rounded-lg border border-border bg-surfaceMuted px-4 py-3 text-xs leading-relaxed text-textMuted">
658
+ <p className="heading-eyebrow text-textPrimary">
659
+ Which endpoint should you pick?
660
+ </p>
661
+ <ul className="mt-2 flex flex-col gap-1.5">
662
+ <li>
663
+ <span className="text-textPrimary">Hugging Face Router</span>{" "}
664
+ <span className="rounded bg-surface px-1.5 py-0.5 text-[10px] uppercase tracking-wider text-textMuted">
665
+ default
666
+ </span>{" "}
667
+ — easiest path. Paste a token from{" "}
668
+ <code className="font-mono text-textPrimary">
669
+ huggingface.co/settings/tokens
670
+ </code>{" "}
671
+ (with the &quot;Make calls to Inference Providers&quot; permission),
672
+ pick a suggested model, hit Run. Responds in ~2 s, no warm-up.
673
+ </li>
674
+ <li>
675
+ <span className="text-textPrimary">PhysiX-Infer GPU ✦</span> — only
676
+ way to compare the GRPO-trained{" "}
677
+ <code className="font-mono text-textPrimary">physix-3b-rl</code>{" "}
678
+ against its{" "}
679
+ <code className="font-mono text-textPrimary">Qwen 2.5 3B</code> base
680
+ on identical hardware. No token. Sleeps after 5 min idle so first
681
+ request after sleep takes ~90-120 s while two 3B models load on the
682
+ L4 — the status banner below shows live state, with a Prewarm
683
+ button to wake it before you hit Run.
684
+ </li>
685
+ <li>
686
+ <span className="text-textPrimary">Ollama / OpenAI / Custom</span>{" "}
687
+ — bring your own endpoint. Useful for local dev (Ollama on
688
+ <code className="font-mono text-textPrimary">localhost:11434</code>),
689
+ frontier-model baselines (OpenAI), or pointing at a private vLLM /
690
+ inference endpoint URL.
691
+ </li>
692
+ </ul>
693
+ </div>
694
+ );
695
+ }
696
+
697
  function ErrorRow({
698
  message,
699
  onDismiss,
frontend/tsconfig.tsbuildinfo CHANGED
@@ -1 +1 @@
1
- {"root":["./src/app.tsx","./src/main.tsx","./src/components/comparepane.tsx","./src/components/equationdisplay.tsx","./src/components/herointro.tsx","./src/components/llmconnectionpanel.tsx","./src/components/openenvexplorerpane.tsx","./src/components/rewardgauge.tsx","./src/components/runwithllmpane.tsx","./src/components/skeleton.tsx","./src/components/trajectorycanvas.tsx","./src/hooks/usellmcomparerunner.ts","./src/hooks/usellmepisoderunner.ts","./src/lib/cn.ts","./src/lib/format.ts","./src/lib/interactiveclient.ts","./src/lib/llmpresets.ts","./src/lib/trajectory.ts","./src/types/physix.ts"],"version":"5.6.3"}
 
1
+ {"root":["./src/app.tsx","./src/main.tsx","./src/components/comparepane.tsx","./src/components/equationdisplay.tsx","./src/components/herointro.tsx","./src/components/llmconnectionpanel.tsx","./src/components/openenvexplorerpane.tsx","./src/components/physixinferstatus.tsx","./src/components/rewardgauge.tsx","./src/components/runwithllmpane.tsx","./src/components/skeleton.tsx","./src/components/trajectorycanvas.tsx","./src/hooks/usellmcomparerunner.ts","./src/hooks/usellmepisoderunner.ts","./src/lib/cn.ts","./src/lib/format.ts","./src/lib/interactiveclient.ts","./src/lib/llmpresets.ts","./src/lib/trajectory.ts","./src/types/physix.ts"],"version":"5.6.3"}
train/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ **/__pycache__/
2
+ **/*.pyc
3
+ **/*.egg-info/
4
+ runs/
5
+ .DS_Store