Spaces:
Sleeping
Sleeping
ui: status banner + endpoint guide for physix-infer cold-start
Browse files- .openenvignore +43 -0
- .pytest_cache/.gitignore +2 -0
- .pytest_cache/CACHEDIR.TAG +4 -0
- .pytest_cache/README.md +8 -0
- .pytest_cache/v/cache/lastfailed +6 -0
- .pytest_cache/v/cache/nodeids +141 -0
- .ruff_cache/.gitignore +2 -0
- .ruff_cache/0.15.12/13147506665210707489 +0 -0
- .ruff_cache/0.15.12/18068402563573118557 +0 -0
- .ruff_cache/0.15.12/3907545159351961271 +0 -0
- .ruff_cache/0.15.12/482523753705084482 +0 -0
- .ruff_cache/0.15.12/8552442504755746477 +0 -0
- .ruff_cache/CACHEDIR.TAG +1 -0
- .vscode/settings.json +6 -0
- Dockerfile +1 -2
- README.md +0 -1
- docs/plots/loss.png +0 -0
- docs/plots/reward.png +2 -2
- docs/plots/reward_components.png +2 -2
- frontend/src/components/LlmConnectionPanel.tsx +9 -0
- frontend/src/components/PhysixInferStatus.tsx +368 -0
- frontend/src/components/RunWithLlmPane.tsx +55 -0
- frontend/tsconfig.tsbuildinfo +1 -1
- train/.gitignore +5 -0
.openenvignore
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Files OpenEnv `push` should NOT upload to the HF Space.
|
| 2 |
+
#
|
| 3 |
+
# OpenEnv's defaults already exclude `.*` (so .venv, .pytest_cache,
|
| 4 |
+
# .ruff_cache, .vscode are gone), `__pycache__`, and `*.pyc`. This
|
| 5 |
+
# file adds the heavyweight build artefacts and source-tree subdirs
|
| 6 |
+
# that don't belong in the Space build context.
|
| 7 |
+
#
|
| 8 |
+
# IMPORTANT: keep this list tight. Anything we exclude here is invisible
|
| 9 |
+
# to the HF Spaces Docker build, so files referenced by Dockerfile.COPY
|
| 10 |
+
# steps must NOT appear here.
|
| 11 |
+
|
| 12 |
+
# Frontend build artefacts. The Space's two-stage Dockerfile rebuilds
|
| 13 |
+
# the SPA from `frontend/` source, so we don't ship the host build.
|
| 14 |
+
frontend/node_modules
|
| 15 |
+
frontend/dist
|
| 16 |
+
frontend/dist-ts-build
|
| 17 |
+
|
| 18 |
+
# Training stack — runs on HF Jobs, not in the Space. The Space image
|
| 19 |
+
# explicitly installs only the inference deps (no torch / unsloth / trl)
|
| 20 |
+
# to keep cold-start small.
|
| 21 |
+
train
|
| 22 |
+
|
| 23 |
+
# Tests + dev-only docs aren't needed at runtime. Excluding them
|
| 24 |
+
# shrinks the build context and removes a (tiny) source of noise in
|
| 25 |
+
# the Hub UI.
|
| 26 |
+
tests
|
| 27 |
+
docs
|
| 28 |
+
|
| 29 |
+
# scripts/ is mostly dev-only, but space_app.py is referenced by the
|
| 30 |
+
# Dockerfile (mounts the SPA + adds the / -> /web/ redirect) and MUST
|
| 31 |
+
# be present at build time. So we exclude the verifier helpers
|
| 32 |
+
# explicitly rather than blanket-excluding the directory.
|
| 33 |
+
scripts/verify_hf_router.py
|
| 34 |
+
scripts/HF_ROUTER_VERIFICATION.md
|
| 35 |
+
|
| 36 |
+
# Build / packaging artefacts.
|
| 37 |
+
*.egg-info
|
| 38 |
+
build
|
| 39 |
+
dist
|
| 40 |
+
|
| 41 |
+
# Editor / OS detritus.
|
| 42 |
+
.DS_Store
|
| 43 |
+
*.swp
|
.pytest_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by pytest automatically.
|
| 2 |
+
*
|
.pytest_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
| 2 |
+
# This file is a cache directory tag created by pytest.
|
| 3 |
+
# For information about cache directory tags, see:
|
| 4 |
+
# https://bford.info/cachedir/spec.html
|
.pytest_cache/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pytest cache directory #
|
| 2 |
+
|
| 3 |
+
This directory contains data from the pytest's cache plugin,
|
| 4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
| 5 |
+
|
| 6 |
+
**Do not** commit this to version control.
|
| 7 |
+
|
| 8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
.pytest_cache/v/cache/lastfailed
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy": true,
|
| 3 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]": true,
|
| 4 |
+
"tests/test_dataset.py": true,
|
| 5 |
+
"tests/test_sft_dataset.py": true
|
| 6 |
+
}
|
.pytest_cache/v/cache/nodeids
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"tests/test_client_ws.py::test_websocket_round_trip",
|
| 3 |
+
"tests/test_dataset.py::test_dataset_observed_arrays_match_state_variables",
|
| 4 |
+
"tests/test_dataset.py::test_eval_dataset_marks_held_out_rows",
|
| 5 |
+
"tests/test_dataset.py::test_training_dataset_default_curriculum_is_demo_systems",
|
| 6 |
+
"tests/test_dataset.py::test_training_dataset_default_curriculum_is_supported_systems",
|
| 7 |
+
"tests/test_dataset.py::test_training_dataset_explicit_system_ids_override_default",
|
| 8 |
+
"tests/test_dataset.py::test_training_dataset_has_expected_schema",
|
| 9 |
+
"tests/test_dataset.py::test_training_dataset_prompts_are_chat_lists",
|
| 10 |
+
"tests/test_dataset.py::test_training_dataset_rejects_empty_system_ids",
|
| 11 |
+
"tests/test_dataset.py::test_training_dataset_rejects_unknown_system_ids",
|
| 12 |
+
"tests/test_dataset.py::test_training_dataset_uses_only_train_tiers",
|
| 13 |
+
"tests/test_environment.py::test_episode_terminates_on_convergence",
|
| 14 |
+
"tests/test_environment.py::test_history_accumulates_across_turns",
|
| 15 |
+
"tests/test_environment.py::test_max_turns_terminates_episode",
|
| 16 |
+
"tests/test_environment.py::test_progress_reward_rewards_improvement",
|
| 17 |
+
"tests/test_environment.py::test_reset_returns_well_formed_observation",
|
| 18 |
+
"tests/test_environment.py::test_state_property_exposes_episode_id",
|
| 19 |
+
"tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = -9.81 / (y - y)]",
|
| 20 |
+
"tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = exp(exp(exp(y)))]",
|
| 21 |
+
"tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[free_fall-d2y/dt2 = log(0 * y)]",
|
| 22 |
+
"tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(-theta**2 - 1)]",
|
| 23 |
+
"tests/test_environment.py::test_step_swallows_simulator_failures_as_format_zero_match_zero[simple_pendulum-d2theta/dt2 = -sqrt(dtheta**2 + theta**2) * sin(theta)]",
|
| 24 |
+
"tests/test_environment.py::test_step_with_ground_truth_rewards_high",
|
| 25 |
+
"tests/test_environment.py::test_step_with_unparseable_equation_short_circuits",
|
| 26 |
+
"tests/test_interactive_api.py::test_budget_exhaustion_returns_409_on_next_step",
|
| 27 |
+
"tests/test_interactive_api.py::test_cors_preflight_for_dev_origin",
|
| 28 |
+
"tests/test_interactive_api.py::test_llm_step_after_budget_exhaustion_returns_409",
|
| 29 |
+
"tests/test_interactive_api.py::test_llm_step_drives_a_turn_using_injected_policy",
|
| 30 |
+
"tests/test_interactive_api.py::test_llm_step_handles_unparseable_completion_as_format_zero",
|
| 31 |
+
"tests/test_interactive_api.py::test_llm_step_runs_full_episode_with_three_canned_turns",
|
| 32 |
+
"tests/test_interactive_api.py::test_models_endpoint_returns_empty_with_error_when_daemon_unavailable",
|
| 33 |
+
"tests/test_interactive_api.py::test_models_endpoint_returns_injected_list",
|
| 34 |
+
"tests/test_interactive_api.py::test_session_lifecycle_create_step_delete",
|
| 35 |
+
"tests/test_interactive_api.py::test_session_lifecycle_create_summary_delete",
|
| 36 |
+
"tests/test_interactive_api.py::test_step_with_ground_truth_marks_done",
|
| 37 |
+
"tests/test_interactive_api.py::test_systems_endpoint_excludes_held_out_tier",
|
| 38 |
+
"tests/test_interactive_api.py::test_systems_endpoint_returns_demo_curriculum_in_order",
|
| 39 |
+
"tests/test_interactive_api.py::test_systems_endpoint_returns_supported_systems_in_order",
|
| 40 |
+
"tests/test_interactive_api.py::test_unknown_session_id_returns_404",
|
| 41 |
+
"tests/test_interactive_api.py::test_unknown_system_id_returns_400",
|
| 42 |
+
"tests/test_interactive_api.py::test_unparseable_equation_returns_zero_format_not_500",
|
| 43 |
+
"tests/test_metrics_diagnostic.py::test_diag_scores_perfect_on_identical_trajectory",
|
| 44 |
+
"tests/test_metrics_diagnostic.py::test_diag_scores_stay_informative_when_r2_collapses",
|
| 45 |
+
"tests/test_metrics_diagnostic.py::test_freq_hint_in_mismatch_summary_for_freq_mismatch",
|
| 46 |
+
"tests/test_metrics_diagnostic.py::test_freq_score_falls_back_for_non_oscillatory_signal",
|
| 47 |
+
"tests/test_metrics_diagnostic.py::test_reward_total_unchanged_by_diagnostic_fields",
|
| 48 |
+
"tests/test_parser.py::test_alias_does_not_fire_when_velocity_state_is_named_dvar",
|
| 49 |
+
"tests/test_parser.py::test_alias_only_replaces_word_boundary_matches",
|
| 50 |
+
"tests/test_parser.py::test_bare_dx_alias_substitutes_for_vx",
|
| 51 |
+
"tests/test_parser.py::test_basic_equation_round_trips",
|
| 52 |
+
"tests/test_parser.py::test_caret_is_accepted_as_power_synonym",
|
| 53 |
+
"tests/test_parser.py::test_decimal_literals_are_not_misread_as_attribute_access",
|
| 54 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-Lambda]",
|
| 55 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(lambda x: x)(y)-computed-name call]",
|
| 56 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[(y, vy)-Collection]",
|
| 57 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[lambda x: x-Lambda]",
|
| 58 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[vy[0]-Array indexing]",
|
| 59 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y == vy-Comparisons]",
|
| 60 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y and vy-Boolean]",
|
| 61 |
+
"tests/test_parser.py::test_disallowed_constructs_each_have_targeted_error[y if vy > 0 else -y-Conditional]",
|
| 62 |
+
"tests/test_parser.py::test_dotted_attribute_access_is_rejected_with_clear_error",
|
| 63 |
+
"tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[math.sin(theta)]",
|
| 64 |
+
"tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[np.exp(-theta**2)]",
|
| 65 |
+
"tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[numpy.cos(theta) + 1]",
|
| 66 |
+
"tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[scipy.special.expit(theta)]",
|
| 67 |
+
"tests/test_parser.py::test_dotted_attribute_access_variants_all_rejected[theta.diff()]",
|
| 68 |
+
"tests/test_parser.py::test_dx_dt_alias_substitutes_for_vx_when_velocity_state_exists",
|
| 69 |
+
"tests/test_parser.py::test_dy_dt_alias_substitutes_for_vy",
|
| 70 |
+
"tests/test_parser.py::test_grammar_hint_documents_velocity_convention",
|
| 71 |
+
"tests/test_parser.py::test_keyword_arguments_in_call_are_rejected_with_specific_hint",
|
| 72 |
+
"tests/test_parser.py::test_multiple_equations_split_on_semicolons_keep_alias_behaviour",
|
| 73 |
+
"tests/test_parser.py::test_only_parse_error_ever_escapes_the_parser",
|
| 74 |
+
"tests/test_parser.py::test_sympy_internal_errors_become_parse_errors_not_attribute_errors",
|
| 75 |
+
"tests/test_parser.py::test_unknown_dx_in_system_without_paired_velocity_includes_hint",
|
| 76 |
+
"tests/test_parser.py::test_unknown_dy_in_paired_system_suggests_vy",
|
| 77 |
+
"tests/test_parser.py::test_unknown_t_emits_autonomy_hint",
|
| 78 |
+
"tests/test_prompt.py::test_build_prompt_returns_chat_pair",
|
| 79 |
+
"tests/test_prompt.py::test_history_block_surfaces_dense_reward_components",
|
| 80 |
+
"tests/test_prompt.py::test_history_block_tolerates_missing_reward_components",
|
| 81 |
+
"tests/test_prompt.py::test_history_uses_equation_field_name_not_shorthand",
|
| 82 |
+
"tests/test_prompt.py::test_parse_completion_accepts_capitalised_keys",
|
| 83 |
+
"tests/test_prompt.py::test_parse_completion_accepts_eqn_synonym",
|
| 84 |
+
"tests/test_prompt.py::test_parse_completion_accepts_other_equation_synonyms",
|
| 85 |
+
"tests/test_prompt.py::test_parse_completion_accepts_parameters_synonym",
|
| 86 |
+
"tests/test_prompt.py::test_parse_completion_accepts_reasoning_synonym",
|
| 87 |
+
"tests/test_prompt.py::test_parse_completion_canonical_key_wins_over_synonym",
|
| 88 |
+
"tests/test_prompt.py::test_parse_completion_coerces_string_params",
|
| 89 |
+
"tests/test_prompt.py::test_parse_completion_extracts_clean_json",
|
| 90 |
+
"tests/test_prompt.py::test_parse_completion_handles_code_fences",
|
| 91 |
+
"tests/test_prompt.py::test_parse_completion_handles_latex_braces_inside_json_string",
|
| 92 |
+
"tests/test_prompt.py::test_parse_completion_normalizes_latex_frac_and_caret",
|
| 93 |
+
"tests/test_prompt.py::test_parse_completion_picks_first_object_when_text_has_multiple",
|
| 94 |
+
"tests/test_prompt.py::test_parse_completion_with_no_json_falls_back_to_raw_text",
|
| 95 |
+
"tests/test_prompt.py::test_parse_completion_with_no_json_yields_empty_equation",
|
| 96 |
+
"tests/test_prompt.py::test_render_includes_history_when_present",
|
| 97 |
+
"tests/test_prompt.py::test_render_includes_metadata_block",
|
| 98 |
+
"tests/test_prompt.py::test_render_includes_trajectory_samples",
|
| 99 |
+
"tests/test_prompt.py::test_render_omits_history_block_when_empty",
|
| 100 |
+
"tests/test_prompt.py::test_system_message_locks_in_canonical_field_name",
|
| 101 |
+
"tests/test_providers.py::test_format_provider_error_pinpoints_auth_failure",
|
| 102 |
+
"tests/test_providers.py::test_format_provider_error_pinpoints_missing_model",
|
| 103 |
+
"tests/test_providers.py::test_format_provider_error_pinpoints_unreachable_endpoint",
|
| 104 |
+
"tests/test_providers.py::test_resolve_api_key_browser_supplied_wins",
|
| 105 |
+
"tests/test_providers.py::test_resolve_api_key_falls_back_to_hf_token_for_hf_router",
|
| 106 |
+
"tests/test_providers.py::test_resolve_api_key_falls_back_to_openai_env",
|
| 107 |
+
"tests/test_providers.py::test_resolve_api_key_for_ollama_returns_placeholder_when_no_env",
|
| 108 |
+
"tests/test_providers.py::test_resolve_api_key_returns_none_for_unknown_url",
|
| 109 |
+
"tests/test_providers.py::test_resolve_api_key_uses_huggingface_api_key_if_hf_token_missing",
|
| 110 |
+
"tests/test_providers_hf.py::test_full_episode_flows_visitor_config_to_openai_client_unchanged",
|
| 111 |
+
"tests/test_providers_hf.py::test_full_episode_recovers_from_first_call_response_format_400",
|
| 112 |
+
"tests/test_providers_hf.py::test_hf_router_401_surfaces_inference_providers_permission_hint",
|
| 113 |
+
"tests/test_providers_hf.py::test_hf_router_404_surfaces_warm_provider_hint",
|
| 114 |
+
"tests/test_providers_hf.py::test_hf_router_base_url_is_canonical",
|
| 115 |
+
"tests/test_providers_hf.py::test_hf_router_client_is_constructed_with_visitor_token",
|
| 116 |
+
"tests/test_providers_hf.py::test_hf_router_client_uses_hf_token_env_when_visitor_omits_key",
|
| 117 |
+
"tests/test_providers_hf.py::test_hf_router_connection_failure_surfaces_network_hint",
|
| 118 |
+
"tests/test_providers_hf.py::test_hf_router_retries_without_response_format_on_bad_request",
|
| 119 |
+
"tests/test_providers_hf.py::test_hf_router_succeeds_on_first_try_when_provider_supports_json",
|
| 120 |
+
"tests/test_providers_hf.py::test_hf_router_timeout_surfaces_network_hint",
|
| 121 |
+
"tests/test_registry.py::test_demo_curriculum_is_non_empty",
|
| 122 |
+
"tests/test_registry.py::test_demo_curriculum_only_references_registered_systems",
|
| 123 |
+
"tests/test_registry.py::test_every_demo_system_instantiates_cleanly",
|
| 124 |
+
"tests/test_registry.py::test_every_supported_system_instantiates_cleanly",
|
| 125 |
+
"tests/test_registry.py::test_list_demo_systems_preserves_declared_order",
|
| 126 |
+
"tests/test_registry.py::test_list_supported_systems_preserves_declared_order",
|
| 127 |
+
"tests/test_registry.py::test_supported_systems_is_non_empty",
|
| 128 |
+
"tests/test_registry.py::test_supported_systems_only_references_registered_systems",
|
| 129 |
+
"tests/test_scorer.py::test_scorer_caches_by_key",
|
| 130 |
+
"tests/test_scorer.py::test_scorer_progress_shrinks_when_previous_total_is_high",
|
| 131 |
+
"tests/test_scorer.py::test_scorer_returns_high_match_for_ground_truth",
|
| 132 |
+
"tests/test_scorer.py::test_scorer_returns_zero_format_on_garbage",
|
| 133 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_completions_are_valid_json_action",
|
| 134 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size",
|
| 135 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_default_curriculum_size_matches_demo",
|
| 136 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_default_scales_with_instances_per_system",
|
| 137 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_default_size_matches_supported_systems",
|
| 138 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_explicit_system_ids_override_default",
|
| 139 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_rejects_empty_system_ids",
|
| 140 |
+
"tests/test_sft_dataset.py::test_build_sft_dataset_rejects_unknown_system_ids"
|
| 141 |
+
]
|
.ruff_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Automatically created by ruff.
|
| 2 |
+
*
|
.ruff_cache/0.15.12/13147506665210707489
ADDED
|
Binary file (85 Bytes). View file
|
|
|
.ruff_cache/0.15.12/18068402563573118557
ADDED
|
Binary file (1.11 kB). View file
|
|
|
.ruff_cache/0.15.12/3907545159351961271
ADDED
|
Binary file (308 Bytes). View file
|
|
|
.ruff_cache/0.15.12/482523753705084482
ADDED
|
Binary file (355 Bytes). View file
|
|
|
.ruff_cache/0.15.12/8552442504755746477
ADDED
|
Binary file (1.01 kB). View file
|
|
|
.ruff_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
.vscode/settings.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"python.defaultInterpreterPath": "/Users/pratyush/miniconda3/envs/openenv_run/bin/python",
|
| 3 |
+
"python.analysis.extraPaths": ["${workspaceFolder}"],
|
| 4 |
+
"python.testing.pytestEnabled": true,
|
| 5 |
+
"python.testing.pytestArgs": ["tests"]
|
| 6 |
+
}
|
Dockerfile
CHANGED
|
@@ -38,7 +38,7 @@ COPY frontend/ ./
|
|
| 38 |
ENV VITE_PHYSIX_API_URL=""
|
| 39 |
# Cache-bust marker. Bump when an SPA change isn't taking on the Space —
|
| 40 |
# HF BuildKit occasionally reuses stage-1 output even when sources changed.
|
| 41 |
-
# physix-spa-rebuild:
|
| 42 |
RUN pnpm exec tsc -b \
|
| 43 |
&& pnpm exec vite build --base=/web/
|
| 44 |
|
|
@@ -102,5 +102,4 @@ EXPOSE 7860
|
|
| 102 |
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
| 103 |
CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
|
| 104 |
|
| 105 |
-
ENV ENABLE_WEB_INTERFACE=true
|
| 106 |
CMD ["python3", "-m", "uvicorn", "_space_app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 38 |
ENV VITE_PHYSIX_API_URL=""
|
| 39 |
# Cache-bust marker. Bump when an SPA change isn't taking on the Space —
|
| 40 |
# HF BuildKit occasionally reuses stage-1 output even when sources changed.
|
| 41 |
+
# physix-spa-rebuild: 6
|
| 42 |
RUN pnpm exec tsc -b \
|
| 43 |
&& pnpm exec vite build --base=/web/
|
| 44 |
|
|
|
|
| 102 |
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
| 103 |
CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
|
| 104 |
|
|
|
|
| 105 |
CMD ["python3", "-m", "uvicorn", "_space_app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -14,7 +14,6 @@ tags:
|
|
| 14 |
- physics
|
| 15 |
- equation-discovery
|
| 16 |
- ode
|
| 17 |
-
base_path: /web
|
| 18 |
---
|
| 19 |
|
| 20 |
# PhysiX — Equation Discovery via RLVR
|
|
|
|
| 14 |
- physics
|
| 15 |
- equation-discovery
|
| 16 |
- ode
|
|
|
|
| 17 |
---
|
| 18 |
|
| 19 |
# PhysiX — Equation Discovery via RLVR
|
docs/plots/loss.png
CHANGED
|
|
docs/plots/reward.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
docs/plots/reward_components.png
CHANGED
|
Git LFS Details
|
|
Git LFS Details
|
frontend/src/components/LlmConnectionPanel.tsx
CHANGED
|
@@ -14,6 +14,7 @@
|
|
| 14 |
|
| 15 |
import { useEffect, useState } from "react";
|
| 16 |
|
|
|
|
| 17 |
import { cn } from "@/lib/cn";
|
| 18 |
import type { LlmModelInfo } from "@/lib/interactiveClient";
|
| 19 |
import {
|
|
@@ -195,6 +196,14 @@ export function LlmConnectionPanel({
|
|
| 195 |
{endpoint.hint}
|
| 196 |
</p>
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
{endpoint.id === "ollama" && installedOllamaError ? (
|
| 199 |
<OllamaTroubleshooter
|
| 200 |
message={installedOllamaError}
|
|
|
|
| 14 |
|
| 15 |
import { useEffect, useState } from "react";
|
| 16 |
|
| 17 |
+
import { PhysixInferStatus } from "@/components/PhysixInferStatus";
|
| 18 |
import { cn } from "@/lib/cn";
|
| 19 |
import type { LlmModelInfo } from "@/lib/interactiveClient";
|
| 20 |
import {
|
|
|
|
| 196 |
{endpoint.hint}
|
| 197 |
</p>
|
| 198 |
|
| 199 |
+
{/*
|
| 200 |
+
Live status banner for the GPU Space. Only renders for the
|
| 201 |
+
physix endpoint — every other endpoint either has no sleep
|
| 202 |
+
cycle (HF Router, OpenAI, Custom) or is local-only (Ollama),
|
| 203 |
+
so there's nothing to surface.
|
| 204 |
+
*/}
|
| 205 |
+
{endpoint.id === "physix" ? <PhysixInferStatus /> : null}
|
| 206 |
+
|
| 207 |
{endpoint.id === "ollama" && installedOllamaError ? (
|
| 208 |
<OllamaTroubleshooter
|
| 209 |
message={installedOllamaError}
|
frontend/src/components/PhysixInferStatus.tsx
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/** Status banner for the PhysiX-Infer GPU Space.
|
| 2 |
+
*
|
| 3 |
+
* Why this exists:
|
| 4 |
+
* The PhysiX-Infer Space sleeps after 5 min of idle to avoid burning
|
| 5 |
+
* GPU time. A cold-start takes 90-120 s while vLLM downloads / loads
|
| 6 |
+
* weights for both 3B models. Without warning, a user picks the
|
| 7 |
+
* PhysiX endpoint, hits Run, and stares at a spinner for 2 minutes
|
| 8 |
+
* convinced something is broken.
|
| 9 |
+
*
|
| 10 |
+
* This panel surfaces the underlying state so the wait is *expected*,
|
| 11 |
+
* not surprising — and offers a one-click "Prewarm" button so the
|
| 12 |
+
* user can kick the boot off before they pick a system / hit Run.
|
| 13 |
+
*
|
| 14 |
+
* Mechanics:
|
| 15 |
+
* - On mount, GET https://pratyush-01-physix-infer.hf.space/health.
|
| 16 |
+
* - HF Spaces' edge proxy returns one of three observable states:
|
| 17 |
+
* * 200 with body { upstreams: { qwen: "ok", physix: "ok" } }
|
| 18 |
+
* → both vLLMs are loaded and serving. Fast next call.
|
| 19 |
+
* * 200 with one upstream not "ok"
|
| 20 |
+
* → container running but vLLM still warming. Some calls fast,
|
| 21 |
+
* some still slow. Treat as "warming".
|
| 22 |
+
* * 503 / connection-stuck-for->5s
|
| 23 |
+
* → Space is asleep. Whatever woke it (this very request) will
|
| 24 |
+
* now ride the cold-boot pipeline.
|
| 25 |
+
* - Re-poll every 15 s while the component is mounted so the badge
|
| 26 |
+
* stays accurate as the user thinks. Polling is cheap and the
|
| 27 |
+
* requests count as activity, which keeps the Space awake while
|
| 28 |
+
* they read — exactly the UX we want during a demo.
|
| 29 |
+
*
|
| 30 |
+
* Note on CORS: the physix-infer FastAPI uses default CORS. The
|
| 31 |
+
* /health endpoint returns plain JSON; modern browsers allow simple
|
| 32 |
+
* GETs across origins to read the status code, but reading the BODY
|
| 33 |
+
* needs Access-Control-Allow-Origin. If we can't read the body, we
|
| 34 |
+
* fall back to "container is up" (best-effort) on any successful
|
| 35 |
+
* response and "asleep" on network failure. */
|
| 36 |
+
|
| 37 |
+
import { useCallback, useEffect, useRef, useState } from "react";
|
| 38 |
+
|
| 39 |
+
import { cn } from "@/lib/cn";
|
| 40 |
+
import { PHYSIX_INFER_BASE_URL } from "@/lib/llmPresets";
|
| 41 |
+
|
| 42 |
+
// /health is mounted at the proxy root, so strip the trailing /v1.
|
| 43 |
+
const HEALTH_URL = PHYSIX_INFER_BASE_URL.replace(/\/v1\/?$/, "") + "/health";
|
| 44 |
+
|
| 45 |
+
// 15 s strikes a balance: long enough that we don't spam HF's edge with
|
| 46 |
+
// requests, short enough that "GPU is now warm" surfaces well before
|
| 47 |
+
// the user has finished typing their prompt.
|
| 48 |
+
const POLL_INTERVAL_MS = 15_000;
|
| 49 |
+
|
| 50 |
+
// Hard ceiling on a single probe. HF holds requests open while a Space
|
| 51 |
+
// boots, and that boot can take ~120 s. We don't want to *wait* for
|
| 52 |
+
// the boot — we want to detect the asleep state early so we can
|
| 53 |
+
// render "cold" and offer the Prewarm button. Anything past 6 s
|
| 54 |
+
// without a response is "asleep" for our purposes.
|
| 55 |
+
const PROBE_TIMEOUT_MS = 6_000;
|
| 56 |
+
|
| 57 |
+
type Status =
|
| 58 |
+
| { kind: "unknown" }
|
| 59 |
+
| { kind: "awake"; bothUpstreams: boolean }
|
| 60 |
+
| { kind: "warming" }
|
| 61 |
+
| { kind: "asleep" }
|
| 62 |
+
| { kind: "error"; message: string };
|
| 63 |
+
|
| 64 |
+
interface ProbeResult {
|
| 65 |
+
status: Status;
|
| 66 |
+
/** True if the probe itself was successful enough to count as a
|
| 67 |
+
* wake-up signal — i.e. HF Spaces' edge proxy received it and
|
| 68 |
+
* routed it to the container. */
|
| 69 |
+
hitContainer: boolean;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
// Module-level dedup. The Compare pane mounts TWO copies of this
|
| 73 |
+
// component (one per side), and without coalescing they'd each fire
|
| 74 |
+
// their own `/health` GET every 15 s — pointless duplicate load on
|
| 75 |
+
// the GPU Space's edge. We share a single in-flight promise across
|
| 76 |
+
// concurrent callers and cache the last successful result for a
|
| 77 |
+
// short window so the second mount on the same tick reuses the
|
| 78 |
+
// first probe's answer instead of issuing its own.
|
| 79 |
+
let inFlight: Promise<ProbeResult> | null = null;
|
| 80 |
+
let lastResult: { result: ProbeResult; at: number } | null = null;
|
| 81 |
+
const SHARED_RESULT_WINDOW_MS = 5_000;
|
| 82 |
+
|
| 83 |
+
async function probe(): Promise<ProbeResult> {
|
| 84 |
+
// Coalesce: a second probe() call that lands while the first is
|
| 85 |
+
// still in flight piggy-backs on the same network request.
|
| 86 |
+
if (inFlight) return inFlight;
|
| 87 |
+
// Replay the last result if it's fresh enough — covers the
|
| 88 |
+
// "two component mounts in the same render commit" case where
|
| 89 |
+
// both useEffects fire microseconds apart but neither has yet
|
| 90 |
+
// populated `inFlight`.
|
| 91 |
+
if (lastResult && Date.now() - lastResult.at < SHARED_RESULT_WINDOW_MS) {
|
| 92 |
+
return lastResult.result;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
inFlight = (async (): Promise<ProbeResult> => {
|
| 96 |
+
const controller = new AbortController();
|
| 97 |
+
const timeoutId = window.setTimeout(
|
| 98 |
+
() => controller.abort(),
|
| 99 |
+
PROBE_TIMEOUT_MS,
|
| 100 |
+
);
|
| 101 |
+
|
| 102 |
+
try {
|
| 103 |
+
return await runProbe(controller.signal);
|
| 104 |
+
} finally {
|
| 105 |
+
window.clearTimeout(timeoutId);
|
| 106 |
+
}
|
| 107 |
+
})();
|
| 108 |
+
|
| 109 |
+
try {
|
| 110 |
+
const result = await inFlight;
|
| 111 |
+
lastResult = { result, at: Date.now() };
|
| 112 |
+
return result;
|
| 113 |
+
} finally {
|
| 114 |
+
inFlight = null;
|
| 115 |
+
}
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
async function runProbe(signal: AbortSignal): Promise<ProbeResult> {
|
| 119 |
+
try {
|
| 120 |
+
const response = await fetch(HEALTH_URL, {
|
| 121 |
+
method: "GET",
|
| 122 |
+
mode: "cors",
|
| 123 |
+
signal,
|
| 124 |
+
});
|
| 125 |
+
|
| 126 |
+
if (!response.ok) {
|
| 127 |
+
// 503 from /health = at least one vLLM still booting. We hit the
|
| 128 |
+
// container, so we *did* wake the Space (HF Spaces' edge sends a
|
| 129 |
+
// 503 with body during cold-boot, then the body changes to ok
|
| 130 |
+
// once vLLMs come up).
|
| 131 |
+
return { status: { kind: "warming" }, hitContainer: true };
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
// 200 — try to read the body. If CORS strips it, default to "awake
|
| 135 |
+
// but unsure about per-upstream status".
|
| 136 |
+
try {
|
| 137 |
+
const body = (await response.json()) as {
|
| 138 |
+
upstreams?: Record<string, string>;
|
| 139 |
+
};
|
| 140 |
+
const upstreams = body.upstreams ?? {};
|
| 141 |
+
const allOk = Object.values(upstreams).every((v) => v === "ok");
|
| 142 |
+
if (allOk && Object.keys(upstreams).length > 0) {
|
| 143 |
+
return {
|
| 144 |
+
status: { kind: "awake", bothUpstreams: true },
|
| 145 |
+
hitContainer: true,
|
| 146 |
+
};
|
| 147 |
+
}
|
| 148 |
+
return { status: { kind: "warming" }, hitContainer: true };
|
| 149 |
+
} catch {
|
| 150 |
+
// CORS or non-JSON body. Best effort: 200 means the container
|
| 151 |
+
// answered, so it's awake; we just can't see the per-upstream
|
| 152 |
+
// detail.
|
| 153 |
+
return {
|
| 154 |
+
status: { kind: "awake", bothUpstreams: false },
|
| 155 |
+
hitContainer: true,
|
| 156 |
+
};
|
| 157 |
+
}
|
| 158 |
+
} catch (exc) {
|
| 159 |
+
// AbortError → timed out. Network error → DNS / offline / cors
|
| 160 |
+
// preflight refused. In either case the Space is effectively
|
| 161 |
+
// unreachable from the browser; the most likely cause is
|
| 162 |
+
// "asleep + slow cold-boot" rather than a real outage, so we
|
| 163 |
+
// render "asleep" (with a Prewarm button).
|
| 164 |
+
if ((exc as Error).name === "AbortError") {
|
| 165 |
+
return { status: { kind: "asleep" }, hitContainer: false };
|
| 166 |
+
}
|
| 167 |
+
return {
|
| 168 |
+
status: { kind: "error", message: (exc as Error).message },
|
| 169 |
+
hitContainer: false,
|
| 170 |
+
};
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
export function PhysixInferStatus(): JSX.Element {
|
| 175 |
+
const [status, setStatus] = useState<Status>({ kind: "unknown" });
|
| 176 |
+
const [prewarming, setPrewarming] = useState(false);
|
| 177 |
+
// Track whether we've ever seen "awake" so we don't downgrade to
|
| 178 |
+
// "asleep" on a transient network blip — the 15 s poll will
|
| 179 |
+
// re-confirm soon enough.
|
| 180 |
+
const wasAwakeRef = useRef(false);
|
| 181 |
+
|
| 182 |
+
const refresh = useCallback(async () => {
|
| 183 |
+
const result = await probe();
|
| 184 |
+
setStatus((prev) => {
|
| 185 |
+
// Sticky-awake: if we'd previously confirmed awake and this
|
| 186 |
+
// probe came back asleep / error, keep showing awake. If it
|
| 187 |
+
// really did go to sleep, the next 15 s poll will agree and
|
| 188 |
+
// we'll flip then.
|
| 189 |
+
if (
|
| 190 |
+
wasAwakeRef.current &&
|
| 191 |
+
(result.status.kind === "asleep" ||
|
| 192 |
+
result.status.kind === "error")
|
| 193 |
+
) {
|
| 194 |
+
return prev;
|
| 195 |
+
}
|
| 196 |
+
if (result.status.kind === "awake") {
|
| 197 |
+
wasAwakeRef.current = true;
|
| 198 |
+
}
|
| 199 |
+
return result.status;
|
| 200 |
+
});
|
| 201 |
+
}, []);
|
| 202 |
+
|
| 203 |
+
useEffect(() => {
|
| 204 |
+
void refresh();
|
| 205 |
+
const id = window.setInterval(() => void refresh(), POLL_INTERVAL_MS);
|
| 206 |
+
return () => window.clearInterval(id);
|
| 207 |
+
}, [refresh]);
|
| 208 |
+
|
| 209 |
+
async function handlePrewarm(): Promise<void> {
|
| 210 |
+
if (prewarming) return;
|
| 211 |
+
setPrewarming(true);
|
| 212 |
+
setStatus({ kind: "warming" });
|
| 213 |
+
// Fire and forget: HF holds the request open until the container
|
| 214 |
+
// is up. We don't `await` because the result of THIS request is
|
| 215 |
+
// less interesting than the next 15 s poll which will tell us
|
| 216 |
+
// when both upstreams flipped to "ok".
|
| 217 |
+
try {
|
| 218 |
+
// No timeout here — let the browser hold the connection until
|
| 219 |
+
// HF Spaces wakes up and answers.
|
| 220 |
+
await fetch(HEALTH_URL, { method: "GET", mode: "cors" });
|
| 221 |
+
} catch {
|
| 222 |
+
// Ignore — the polling loop will surface the real state.
|
| 223 |
+
} finally {
|
| 224 |
+
setPrewarming(false);
|
| 225 |
+
void refresh();
|
| 226 |
+
}
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
return <StatusBanner status={status} onPrewarm={handlePrewarm} prewarming={prewarming} />;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
// ---------------------------------------------------------------------
|
| 233 |
+
// Render
|
| 234 |
+
// ---------------------------------------------------------------------
|
| 235 |
+
|
| 236 |
+
interface StatusBannerProps {
|
| 237 |
+
status: Status;
|
| 238 |
+
onPrewarm: () => void;
|
| 239 |
+
prewarming: boolean;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
function StatusBanner({
|
| 243 |
+
status,
|
| 244 |
+
onPrewarm,
|
| 245 |
+
prewarming,
|
| 246 |
+
}: StatusBannerProps): JSX.Element {
|
| 247 |
+
const tone = toneFor(status);
|
| 248 |
+
|
| 249 |
+
return (
|
| 250 |
+
<div
|
| 251 |
+
className={cn(
|
| 252 |
+
"rounded-lg border px-3 py-2 text-[11px] leading-relaxed",
|
| 253 |
+
tone.bg,
|
| 254 |
+
tone.border,
|
| 255 |
+
)}
|
| 256 |
+
>
|
| 257 |
+
<div className="flex items-start gap-2">
|
| 258 |
+
<span
|
| 259 |
+
aria-hidden
|
| 260 |
+
className={cn("mt-1 inline-block h-2 w-2 shrink-0 rounded-full", tone.dot)}
|
| 261 |
+
/>
|
| 262 |
+
<div className="flex-1 min-w-0">
|
| 263 |
+
<p className={cn("font-medium", tone.title)}>{labelFor(status)}</p>
|
| 264 |
+
<p className="mt-0.5 text-textMuted">{descriptionFor(status)}</p>
|
| 265 |
+
</div>
|
| 266 |
+
{showsPrewarm(status) ? (
|
| 267 |
+
<button
|
| 268 |
+
type="button"
|
| 269 |
+
onClick={onPrewarm}
|
| 270 |
+
disabled={prewarming}
|
| 271 |
+
className={cn(
|
| 272 |
+
"shrink-0 rounded-md border border-border bg-surface px-2 py-1 text-[10px] font-medium uppercase tracking-wider transition",
|
| 273 |
+
"hover:bg-surfaceMuted disabled:cursor-not-allowed disabled:opacity-60",
|
| 274 |
+
)}
|
| 275 |
+
>
|
| 276 |
+
{prewarming ? "Prewarming…" : "Prewarm GPU"}
|
| 277 |
+
</button>
|
| 278 |
+
) : null}
|
| 279 |
+
</div>
|
| 280 |
+
</div>
|
| 281 |
+
);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
interface Tone {
|
| 285 |
+
bg: string;
|
| 286 |
+
border: string;
|
| 287 |
+
dot: string;
|
| 288 |
+
title: string;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
function toneFor(status: Status): Tone {
|
| 292 |
+
switch (status.kind) {
|
| 293 |
+
case "awake":
|
| 294 |
+
return {
|
| 295 |
+
bg: "bg-emerald-950/40",
|
| 296 |
+
border: "border-emerald-800/60",
|
| 297 |
+
dot: "bg-emerald-400",
|
| 298 |
+
title: "text-emerald-200",
|
| 299 |
+
};
|
| 300 |
+
case "warming":
|
| 301 |
+
return {
|
| 302 |
+
bg: "bg-amber-950/40",
|
| 303 |
+
border: "border-amber-800/60",
|
| 304 |
+
dot: "bg-amber-400 animate-pulse",
|
| 305 |
+
title: "text-amber-200",
|
| 306 |
+
};
|
| 307 |
+
case "asleep":
|
| 308 |
+
return {
|
| 309 |
+
bg: "bg-amber-950/40",
|
| 310 |
+
border: "border-amber-800/60",
|
| 311 |
+
dot: "bg-amber-500",
|
| 312 |
+
title: "text-amber-200",
|
| 313 |
+
};
|
| 314 |
+
case "error":
|
| 315 |
+
return {
|
| 316 |
+
bg: "bg-rose-950/40",
|
| 317 |
+
border: "border-rose-800/60",
|
| 318 |
+
dot: "bg-rose-500",
|
| 319 |
+
title: "text-rose-200",
|
| 320 |
+
};
|
| 321 |
+
case "unknown":
|
| 322 |
+
default:
|
| 323 |
+
return {
|
| 324 |
+
bg: "bg-surfaceMuted",
|
| 325 |
+
border: "border-border",
|
| 326 |
+
dot: "bg-textMuted animate-pulse",
|
| 327 |
+
title: "text-textPrimary",
|
| 328 |
+
};
|
| 329 |
+
}
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
function labelFor(status: Status): string {
|
| 333 |
+
switch (status.kind) {
|
| 334 |
+
case "awake":
|
| 335 |
+
return status.bothUpstreams
|
| 336 |
+
? "GPU is warm — both models loaded"
|
| 337 |
+
: "GPU is warm";
|
| 338 |
+
case "warming":
|
| 339 |
+
return "GPU is warming up";
|
| 340 |
+
case "asleep":
|
| 341 |
+
return "GPU is asleep";
|
| 342 |
+
case "error":
|
| 343 |
+
return "Couldn't reach the GPU Space";
|
| 344 |
+
case "unknown":
|
| 345 |
+
default:
|
| 346 |
+
return "Checking GPU status…";
|
| 347 |
+
}
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
function descriptionFor(status: Status): string {
|
| 351 |
+
switch (status.kind) {
|
| 352 |
+
case "awake":
|
| 353 |
+
return "Next request will respond in ~1-3 s. Sleeps again after 5 min idle.";
|
| 354 |
+
case "warming":
|
| 355 |
+
return "vLLM is loading the 3B weights. First request will resolve in ~30-90 s; subsequent calls are fast.";
|
| 356 |
+
case "asleep":
|
| 357 |
+
return "First request will trigger a cold boot (~90-120 s while vLLM loads two 3B models on the L4). Click Prewarm now if you'd rather not wait inside the episode.";
|
| 358 |
+
case "error":
|
| 359 |
+
return "The Space might be temporarily unreachable. Episodes targeting PhysiX-Infer will fail until it recovers — try Hugging Face Router as a fallback.";
|
| 360 |
+
case "unknown":
|
| 361 |
+
default:
|
| 362 |
+
return "Probing https://pratyush-01-physix-infer.hf.space/health …";
|
| 363 |
+
}
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
function showsPrewarm(status: Status): boolean {
|
| 367 |
+
return status.kind === "asleep" || status.kind === "error";
|
| 368 |
+
}
|
frontend/src/components/RunWithLlmPane.tsx
CHANGED
|
@@ -104,6 +104,8 @@ export function RunWithLlmPane(): JSX.Element {
|
|
| 104 |
</p>
|
| 105 |
</header>
|
| 106 |
|
|
|
|
|
|
|
| 107 |
<LlmConnectionPanel
|
| 108 |
title="LLM"
|
| 109 |
subtitle="One model drives the episode."
|
|
@@ -639,6 +641,59 @@ function RewardCell({
|
|
| 639 |
);
|
| 640 |
}
|
| 641 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
function ErrorRow({
|
| 643 |
message,
|
| 644 |
onDismiss,
|
|
|
|
| 104 |
</p>
|
| 105 |
</header>
|
| 106 |
|
| 107 |
+
<EndpointGuide />
|
| 108 |
+
|
| 109 |
<LlmConnectionPanel
|
| 110 |
title="LLM"
|
| 111 |
subtitle="One model drives the episode."
|
|
|
|
| 641 |
);
|
| 642 |
}
|
| 643 |
|
| 644 |
+
/** "Which endpoint should I pick?" callout shown above the connection
|
| 645 |
+
* panel. Three rows of one-liner guidance; no images, no links to
|
| 646 |
+
* external docs — keeps the page fast and the answer visible without
|
| 647 |
+
* scrolling.
|
| 648 |
+
*
|
| 649 |
+
* Why this exists:
|
| 650 |
+
* The endpoint dropdown has 5 options and the optimal pick depends
|
| 651 |
+
* on what the user has on hand. Without this callout most visitors
|
| 652 |
+
* default to whatever's first and either (a) hit a token error
|
| 653 |
+
* (HF Router with no token) or (b) sit through a 90 s GPU cold-boot
|
| 654 |
+
* (PhysiX-Infer) without knowing it's coming. */
|
| 655 |
+
function EndpointGuide(): JSX.Element {
|
| 656 |
+
return (
|
| 657 |
+
<div className="rounded-lg border border-border bg-surfaceMuted px-4 py-3 text-xs leading-relaxed text-textMuted">
|
| 658 |
+
<p className="heading-eyebrow text-textPrimary">
|
| 659 |
+
Which endpoint should you pick?
|
| 660 |
+
</p>
|
| 661 |
+
<ul className="mt-2 flex flex-col gap-1.5">
|
| 662 |
+
<li>
|
| 663 |
+
<span className="text-textPrimary">Hugging Face Router</span>{" "}
|
| 664 |
+
<span className="rounded bg-surface px-1.5 py-0.5 text-[10px] uppercase tracking-wider text-textMuted">
|
| 665 |
+
default
|
| 666 |
+
</span>{" "}
|
| 667 |
+
— easiest path. Paste a token from{" "}
|
| 668 |
+
<code className="font-mono text-textPrimary">
|
| 669 |
+
huggingface.co/settings/tokens
|
| 670 |
+
</code>{" "}
|
| 671 |
+
(with the "Make calls to Inference Providers" permission),
|
| 672 |
+
pick a suggested model, hit Run. Responds in ~2 s, no warm-up.
|
| 673 |
+
</li>
|
| 674 |
+
<li>
|
| 675 |
+
<span className="text-textPrimary">PhysiX-Infer GPU ✦</span> — only
|
| 676 |
+
way to compare the GRPO-trained{" "}
|
| 677 |
+
<code className="font-mono text-textPrimary">physix-3b-rl</code>{" "}
|
| 678 |
+
against its{" "}
|
| 679 |
+
<code className="font-mono text-textPrimary">Qwen 2.5 3B</code> base
|
| 680 |
+
on identical hardware. No token. Sleeps after 5 min idle so first
|
| 681 |
+
request after sleep takes ~90-120 s while two 3B models load on the
|
| 682 |
+
L4 — the status banner below shows live state, with a Prewarm
|
| 683 |
+
button to wake it before you hit Run.
|
| 684 |
+
</li>
|
| 685 |
+
<li>
|
| 686 |
+
<span className="text-textPrimary">Ollama / OpenAI / Custom</span>{" "}
|
| 687 |
+
— bring your own endpoint. Useful for local dev (Ollama on
|
| 688 |
+
<code className="font-mono text-textPrimary">localhost:11434</code>),
|
| 689 |
+
frontier-model baselines (OpenAI), or pointing at a private vLLM /
|
| 690 |
+
inference endpoint URL.
|
| 691 |
+
</li>
|
| 692 |
+
</ul>
|
| 693 |
+
</div>
|
| 694 |
+
);
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
function ErrorRow({
|
| 698 |
message,
|
| 699 |
onDismiss,
|
frontend/tsconfig.tsbuildinfo
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"root":["./src/app.tsx","./src/main.tsx","./src/components/comparepane.tsx","./src/components/equationdisplay.tsx","./src/components/herointro.tsx","./src/components/llmconnectionpanel.tsx","./src/components/openenvexplorerpane.tsx","./src/components/rewardgauge.tsx","./src/components/runwithllmpane.tsx","./src/components/skeleton.tsx","./src/components/trajectorycanvas.tsx","./src/hooks/usellmcomparerunner.ts","./src/hooks/usellmepisoderunner.ts","./src/lib/cn.ts","./src/lib/format.ts","./src/lib/interactiveclient.ts","./src/lib/llmpresets.ts","./src/lib/trajectory.ts","./src/types/physix.ts"],"version":"5.6.3"}
|
|
|
|
| 1 |
+
{"root":["./src/app.tsx","./src/main.tsx","./src/components/comparepane.tsx","./src/components/equationdisplay.tsx","./src/components/herointro.tsx","./src/components/llmconnectionpanel.tsx","./src/components/openenvexplorerpane.tsx","./src/components/physixinferstatus.tsx","./src/components/rewardgauge.tsx","./src/components/runwithllmpane.tsx","./src/components/skeleton.tsx","./src/components/trajectorycanvas.tsx","./src/hooks/usellmcomparerunner.ts","./src/hooks/usellmepisoderunner.ts","./src/lib/cn.ts","./src/lib/format.ts","./src/lib/interactiveclient.ts","./src/lib/llmpresets.ts","./src/lib/trajectory.ts","./src/types/physix.ts"],"version":"5.6.3"}
|
train/.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
**/__pycache__/
|
| 2 |
+
**/*.pyc
|
| 3 |
+
**/*.egg-info/
|
| 4 |
+
runs/
|
| 5 |
+
.DS_Store
|