Spaces:
Sleeping
Sleeping
cleanup: trim verbose comments, drop dead code, fix stale tests, proper Dockerfile + .gitignore
Browse files- .gitignore +31 -0
- Dockerfile +14 -51
- __init__.py +5 -15
- client.py +1 -7
- frontend/src/components/OpenEnvExplorerPane.tsx +6 -7
- frontend/src/components/PhysixInferStatus.tsx +2 -7
- models.py +1 -7
- scripts/space_app.py +7 -22
- scripts/verify_hf_router.py +1 -2
- tests/test_interactive_api.py +2 -6
- tests/test_providers_hf.py +0 -1
- train/README.md +7 -13
.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.egg-info/
|
| 5 |
+
.pytest_cache/
|
| 6 |
+
.ruff_cache/
|
| 7 |
+
.mypy_cache/
|
| 8 |
+
.venv/
|
| 9 |
+
build/
|
| 10 |
+
dist/
|
| 11 |
+
|
| 12 |
+
# Frontend
|
| 13 |
+
frontend/node_modules/
|
| 14 |
+
frontend/dist/
|
| 15 |
+
frontend/dist-ts-build/
|
| 16 |
+
frontend/tsconfig.tsbuildinfo
|
| 17 |
+
frontend/.vite/
|
| 18 |
+
|
| 19 |
+
# OS / editor
|
| 20 |
+
.DS_Store
|
| 21 |
+
*.swp
|
| 22 |
+
.vscode/
|
| 23 |
+
.idea/
|
| 24 |
+
|
| 25 |
+
# Local env / secrets
|
| 26 |
+
.env
|
| 27 |
+
.env.local
|
| 28 |
+
|
| 29 |
+
# Local artifacts from running training scripts
|
| 30 |
+
/tmp_*/
|
| 31 |
+
*.log
|
Dockerfile
CHANGED
|
@@ -1,59 +1,32 @@
|
|
| 1 |
-
# PhysiX-Live demo Space — CPU-only env + UI
|
| 2 |
#
|
| 3 |
-
#
|
|
|
|
|
|
|
|
|
|
| 4 |
#
|
| 5 |
-
#
|
| 6 |
-
#
|
| 7 |
-
#
|
| 8 |
-
# ├─ /web/ (built React SPA)
|
| 9 |
-
# └─ /interactive/.../llm-step (LLM-driven episode)
|
| 10 |
-
#
|
| 11 |
-
# What this Space does NOT host:
|
| 12 |
-
# * Inference. The demo is CPU-only — no torch, no vLLM, no GPU. When
|
| 13 |
-
# the UI calls `/interactive/.../llm-step` the server forwards to
|
| 14 |
-
# whatever OpenAI-compatible base URL the browser handed us
|
| 15 |
-
# (HF Router, OpenAI, Ollama, or our sister L4 Space at
|
| 16 |
-
# `Pratyush-01/physix-infer` for the trained 3B + Qwen baseline).
|
| 17 |
-
#
|
| 18 |
-
# Why a separate inference Space:
|
| 19 |
-
# Keeps this CPU image tiny (sub-second cold-start) so the demo URL
|
| 20 |
-
# never feels like it's stalled. The L4 Space pays GPU rates only
|
| 21 |
-
# while it's actually serving requests — its `sleep_time=300s` shuts
|
| 22 |
-
# it down between sessions. Two Spaces, two failure surfaces; if
|
| 23 |
-
# inference is broken the verifier-only demo (Custom URL → Ollama
|
| 24 |
-
# etc.) still works.
|
| 25 |
|
| 26 |
############################
|
| 27 |
# Stage 1: build the SPA
|
| 28 |
############################
|
| 29 |
-
# WORKDIR renamed (was /build) to break HF BuildKit's poisoned cache.
|
| 30 |
-
# The previous /build mount kept a stale pnpm symlink at
|
| 31 |
-
# /build/node_modules/@types/katex
|
| 32 |
-
# from an earlier failed deploy, and every subsequent `COPY frontend/ ./`
|
| 33 |
-
# blew up with `cannot copy to non-directory`. Switching paths gets us
|
| 34 |
-
# a fresh cache bucket; nothing in the project depends on /build itself.
|
| 35 |
FROM node:20-alpine AS frontend
|
| 36 |
WORKDIR /spa
|
| 37 |
RUN corepack enable
|
| 38 |
|
| 39 |
-
# Copy ALL of frontend/ first — including package.json/pnpm-lock.yaml —
|
| 40 |
-
# THEN install. Order matters: install runs ON TOP OF the source tree
|
| 41 |
-
# instead of the source tree being overlaid on top of a pre-installed
|
| 42 |
-
# node_modules, eliminating the directory-vs-symlink collision class
|
| 43 |
-
# of failure entirely.
|
| 44 |
COPY frontend/ ./
|
| 45 |
|
| 46 |
-
# Same-origin API fetches
|
| 47 |
ENV VITE_PHYSIX_API_URL=""
|
| 48 |
|
| 49 |
-
# Cache-bust marker. Bump when an SPA change isn't taking on the Space.
|
| 50 |
-
# physix-spa-rebuild: 10
|
| 51 |
RUN pnpm install --frozen-lockfile \
|
| 52 |
&& pnpm exec tsc -b \
|
| 53 |
&& pnpm exec vite build --base=/web/
|
| 54 |
|
| 55 |
############################
|
| 56 |
-
# Stage 2: runtime
|
| 57 |
############################
|
| 58 |
FROM python:3.11-slim AS runtime
|
| 59 |
|
|
@@ -67,16 +40,13 @@ ENV PYTHONUNBUFFERED=1 \
|
|
| 67 |
PHYSIX_HOST=0.0.0.0 \
|
| 68 |
PHYSIX_CORS_ORIGINS=*
|
| 69 |
|
| 70 |
-
# curl for healthchecks; the slim image has neither curl nor build tools
|
| 71 |
-
# by default. Everything else (numpy, scipy, sympy) is a wheel install.
|
| 72 |
RUN apt-get update \
|
| 73 |
&& apt-get install -y --no-install-recommends curl \
|
| 74 |
&& rm -rf /var/lib/apt/lists/*
|
| 75 |
|
| 76 |
WORKDIR /app
|
| 77 |
|
| 78 |
-
#
|
| 79 |
-
# this Space never trains and never runs a model locally.
|
| 80 |
RUN pip install \
|
| 81 |
"openenv-core[core]>=0.2.2" \
|
| 82 |
"numpy>=1.24" \
|
|
@@ -88,27 +58,20 @@ RUN pip install \
|
|
| 88 |
"openai>=1.40" \
|
| 89 |
"requests>=2.31"
|
| 90 |
|
| 91 |
-
COPY pyproject.toml ./
|
| 92 |
COPY physix ./physix
|
| 93 |
-
COPY README.md ./
|
| 94 |
RUN pip install --no-deps -e .
|
| 95 |
|
| 96 |
-
# Built SPA from stage 1.
|
| 97 |
COPY --from=frontend /spa/dist /app/static
|
| 98 |
-
|
| 99 |
-
# Space wrapper — mounts the React SPA at /web/, registers / -> /web/
|
| 100 |
-
# redirect (OpenEnv's create_fastapi_app doesn't add one for us).
|
| 101 |
COPY scripts/space_app.py /app/_space_app.py
|
| 102 |
|
| 103 |
-
#
|
| 104 |
-
#
|
| 105 |
-
# and be world-writable BEFORE the runtime user shows up.
|
| 106 |
RUN mkdir -p "$HOME" "$HF_HOME" "$XDG_CACHE_HOME" \
|
| 107 |
&& chmod -R 0777 /tmp /app
|
| 108 |
|
| 109 |
EXPOSE 7860
|
| 110 |
|
| 111 |
-
# /health is OpenEnv's stock endpoint and turns 200 once uvicorn binds.
|
| 112 |
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
| 113 |
CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
|
| 114 |
|
|
|
|
| 1 |
+
# PhysiX-Live demo Space — CPU-only env + UI on :7860
|
| 2 |
#
|
| 3 |
+
# uvicorn _space_app:app
|
| 4 |
+
# ├─ /reset, /step OpenEnv stateless API
|
| 5 |
+
# ├─ /interactive/* browser session API + LLM-step
|
| 6 |
+
# └─ /web/ built React SPA
|
| 7 |
#
|
| 8 |
+
# No torch / vLLM / GPU here. LLM inference is forwarded to whatever
|
| 9 |
+
# OpenAI-compatible base URL the browser provides (HF Router, OpenAI,
|
| 10 |
+
# Ollama, or our sister L4 Space `Pratyush-01/physix-infer`).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
############################
|
| 13 |
# Stage 1: build the SPA
|
| 14 |
############################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
FROM node:20-alpine AS frontend
|
| 16 |
WORKDIR /spa
|
| 17 |
RUN corepack enable
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
COPY frontend/ ./
|
| 20 |
|
| 21 |
+
# Same-origin API fetches at runtime (Space serves both API and UI).
|
| 22 |
ENV VITE_PHYSIX_API_URL=""
|
| 23 |
|
|
|
|
|
|
|
| 24 |
RUN pnpm install --frozen-lockfile \
|
| 25 |
&& pnpm exec tsc -b \
|
| 26 |
&& pnpm exec vite build --base=/web/
|
| 27 |
|
| 28 |
############################
|
| 29 |
+
# Stage 2: runtime
|
| 30 |
############################
|
| 31 |
FROM python:3.11-slim AS runtime
|
| 32 |
|
|
|
|
| 40 |
PHYSIX_HOST=0.0.0.0 \
|
| 41 |
PHYSIX_CORS_ORIGINS=*
|
| 42 |
|
|
|
|
|
|
|
| 43 |
RUN apt-get update \
|
| 44 |
&& apt-get install -y --no-install-recommends curl \
|
| 45 |
&& rm -rf /var/lib/apt/lists/*
|
| 46 |
|
| 47 |
WORKDIR /app
|
| 48 |
|
| 49 |
+
# Inference deps only — no torch / unsloth / trl. Training runs on HF Jobs.
|
|
|
|
| 50 |
RUN pip install \
|
| 51 |
"openenv-core[core]>=0.2.2" \
|
| 52 |
"numpy>=1.24" \
|
|
|
|
| 58 |
"openai>=1.40" \
|
| 59 |
"requests>=2.31"
|
| 60 |
|
| 61 |
+
COPY pyproject.toml README.md ./
|
| 62 |
COPY physix ./physix
|
|
|
|
| 63 |
RUN pip install --no-deps -e .
|
| 64 |
|
|
|
|
| 65 |
COPY --from=frontend /spa/dist /app/static
|
|
|
|
|
|
|
|
|
|
| 66 |
COPY scripts/space_app.py /app/_space_app.py
|
| 67 |
|
| 68 |
+
# HF Spaces runs as a non-root UID with no /etc/passwd; pre-create
|
| 69 |
+
# writable cache dirs so $HOME-based caches work.
|
|
|
|
| 70 |
RUN mkdir -p "$HOME" "$HF_HOME" "$XDG_CACHE_HOME" \
|
| 71 |
&& chmod -R 0777 /tmp /app
|
| 72 |
|
| 73 |
EXPOSE 7860
|
| 74 |
|
|
|
|
| 75 |
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
|
| 76 |
CMD curl -fsS "http://127.0.0.1:${PORT}/health" || exit 1
|
| 77 |
|
__init__.py
CHANGED
|
@@ -1,19 +1,9 @@
|
|
| 1 |
-
"""OpenEnv root package shim.
|
| 2 |
|
| 3 |
-
|
| 4 |
-
``
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
imports stay at ``from physix.* import ...``. These root shims simply
|
| 8 |
-
re-export from ``physix`` so OpenEnv's auto-discovery (which Pascal-
|
| 9 |
-
cases ``name: physix`` to ``Physix*`` in client.py) finds the symbols
|
| 10 |
-
it expects without duplicating any implementation.
|
| 11 |
-
|
| 12 |
-
NOTE: Inside the deployed Space the package installed via
|
| 13 |
-
``pip install -e .`` is ``physix`` (see ``[tool.hatch.build.targets.wheel]``
|
| 14 |
-
in pyproject.toml). These root files are *only* loaded by the OpenEnv
|
| 15 |
-
CLI's local validator and by users who import the env-directory as a
|
| 16 |
-
package; they are never imported at runtime by the FastAPI server.
|
| 17 |
"""
|
| 18 |
|
| 19 |
from physix import ( # noqa: F401
|
|
|
|
| 1 |
+
"""OpenEnv root package shim — re-exports the public API from ``physix``.
|
| 2 |
|
| 3 |
+
OpenEnv's CLI validator expects ``__init__.py``, ``client.py``, and
|
| 4 |
+
``models.py`` at the env-directory root. The real implementation lives
|
| 5 |
+
under ``physix/``; these root files are thin re-exports so the wheel
|
| 6 |
+
build and runtime imports stay at ``from physix.* import ...``.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
from physix import ( # noqa: F401
|
client.py
CHANGED
|
@@ -1,10 +1,4 @@
|
|
| 1 |
-
"""OpenEnv root client shim — re-exports ``physix.client``.
|
| 2 |
-
|
| 3 |
-
OpenEnv's CLI validator and auto-discovery expect ``client.py`` at the
|
| 4 |
-
env-directory root. The real implementation lives in
|
| 5 |
-
``physix/client.py``; this file just re-exports it so the OpenEnv
|
| 6 |
-
contract is satisfied without duplicating any code.
|
| 7 |
-
"""
|
| 8 |
|
| 9 |
from physix.client import PhysiXEnv, PhysixEnv # noqa: F401
|
| 10 |
|
|
|
|
| 1 |
+
"""OpenEnv root client shim — re-exports ``physix.client``."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from physix.client import PhysiXEnv, PhysixEnv # noqa: F401
|
| 4 |
|
frontend/src/components/OpenEnvExplorerPane.tsx
CHANGED
|
@@ -46,7 +46,7 @@ const DEFAULT_EQUATION = "d2y/dt2 = -9.81";
|
|
| 46 |
const DEFAULT_PARAMS_JSON = "{}";
|
| 47 |
const DEFAULT_RATIONALE = "Free-fall under gravity.";
|
| 48 |
|
| 49 |
-
// Same shape used by RunWithLlmPane
|
| 50 |
// renders the identical reward layout when no step has run yet.
|
| 51 |
const ZERO_REWARD: RewardBreakdown = {
|
| 52 |
match: 0,
|
|
@@ -985,12 +985,11 @@ function ReferenceCard({
|
|
| 985 |
|
| 986 |
// ---------------- reward display ----------------
|
| 987 |
//
|
| 988 |
-
// Kept in sync with
|
| 989 |
-
//
|
| 990 |
-
//
|
| 991 |
-
//
|
| 992 |
-
//
|
| 993 |
-
// zero on small phase shifts, which makes match=0 misleading on its
|
| 994 |
// own; shape/freq/amplitude give partial credit for "visual closeness"
|
| 995 |
// without ever feeding into the reward total or the trainer.
|
| 996 |
|
|
|
|
| 46 |
const DEFAULT_PARAMS_JSON = "{}";
|
| 47 |
const DEFAULT_RATIONALE = "Free-fall under gravity.";
|
| 48 |
|
| 49 |
+
// Same zero-reward shape used by RunWithLlmPane so the OpenEnv tab
|
| 50 |
// renders the identical reward layout when no step has run yet.
|
| 51 |
const ZERO_REWARD: RewardBreakdown = {
|
| 52 |
match: 0,
|
|
|
|
| 985 |
|
| 986 |
// ---------------- reward display ----------------
|
| 987 |
//
|
| 988 |
+
// Kept visually in sync with RunWithLlmPane: four trainable reward
|
| 989 |
+
// components on top (match / progress / simplicity / format) and three
|
| 990 |
+
// diagnostic-only sub-scores (shape / freq / amplitude) on the bottom
|
| 991 |
+
// labelled "diag". Diag exists because R² collapses to zero on small
|
| 992 |
+
// phase shifts, which makes match=0 misleading on its
|
|
|
|
| 993 |
// own; shape/freq/amplitude give partial credit for "visual closeness"
|
| 994 |
// without ever feeding into the reward total or the trainer.
|
| 995 |
|
frontend/src/components/PhysixInferStatus.tsx
CHANGED
|
@@ -69,13 +69,8 @@ interface ProbeResult {
|
|
| 69 |
hitContainer: boolean;
|
| 70 |
}
|
| 71 |
|
| 72 |
-
// Module-level dedup.
|
| 73 |
-
//
|
| 74 |
-
// their own `/health` GET every 15 s — pointless duplicate load on
|
| 75 |
-
// the GPU Space's edge. We share a single in-flight promise across
|
| 76 |
-
// concurrent callers and cache the last successful result for a
|
| 77 |
-
// short window so the second mount on the same tick reuses the
|
| 78 |
-
// first probe's answer instead of issuing its own.
|
| 79 |
let inFlight: Promise<ProbeResult> | null = null;
|
| 80 |
let lastResult: { result: ProbeResult; at: number } | null = null;
|
| 81 |
const SHARED_RESULT_WINDOW_MS = 5_000;
|
|
|
|
| 69 |
hitContainer: boolean;
|
| 70 |
}
|
| 71 |
|
| 72 |
+
// Module-level dedup. Multiple mounts share a single in-flight `/health`
|
| 73 |
+
// probe and cache the last successful result for a short window.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
let inFlight: Promise<ProbeResult> | null = null;
|
| 75 |
let lastResult: { result: ProbeResult; at: number } | null = null;
|
| 76 |
const SHARED_RESULT_WINDOW_MS = 5_000;
|
models.py
CHANGED
|
@@ -1,10 +1,4 @@
|
|
| 1 |
-
"""OpenEnv root models shim — re-exports ``physix.models``.
|
| 2 |
-
|
| 3 |
-
OpenEnv's CLI validator expects ``models.py`` at the env-directory
|
| 4 |
-
root. The real Pydantic schemas live in ``physix/models.py``; this
|
| 5 |
-
file re-exports them so OpenEnv's auto-discovery finds them under
|
| 6 |
-
the env-name-derived path.
|
| 7 |
-
"""
|
| 8 |
|
| 9 |
from physix.models import ( # noqa: F401
|
| 10 |
CONVERGENCE_THRESHOLD,
|
|
|
|
| 1 |
+
"""OpenEnv root models shim — re-exports ``physix.models``."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from physix.models import ( # noqa: F401
|
| 4 |
CONVERGENCE_THRESHOLD,
|
scripts/space_app.py
CHANGED
|
@@ -1,23 +1,11 @@
|
|
| 1 |
"""Space entrypoint: physix.server.app:app + static UI mount.
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
does NOT add a root redirect; that's only in
|
| 10 |
-
the higher-level wrapper which mounts Gradio
|
| 11 |
-
at ``/web`` and would clobber our React SPA).
|
| 12 |
-
2. ``GET /web`` -> 302 to ``/web/`` (same reason; users hit the
|
| 13 |
-
no-trailing-slash variant from outside links).
|
| 14 |
-
3. ``StaticFiles`` mount at ``/web/`` serving the built Vite SPA. The
|
| 15 |
-
vite build was run with ``--base=/web/`` so all asset URLs in the
|
| 16 |
-
emitted ``index.html`` already include the prefix.
|
| 17 |
-
|
| 18 |
-
Kept as a real .py file (not a heredoc inside the Dockerfile) so any
|
| 19 |
-
syntax error is caught by the build's static analysis rather than at
|
| 20 |
-
runtime — saved several deploy-fail loops in earlier iterations.
|
| 21 |
"""
|
| 22 |
|
| 23 |
from __future__ import annotations
|
|
@@ -44,10 +32,7 @@ async def _web_no_slash_redirect() -> RedirectResponse:
|
|
| 44 |
|
| 45 |
if _STATIC_DIR.is_dir():
|
| 46 |
# html=True makes StaticFiles serve index.html for directory hits and
|
| 47 |
-
# fall back to it for unknown sub-paths
|
| 48 |
-
# works). Mounted last so registered API routes (/web/metadata,
|
| 49 |
-
# /web/reset, /web/step from OpenEnv; /interactive/* from physix)
|
| 50 |
-
# always win over the static handler.
|
| 51 |
app.mount(
|
| 52 |
"/web",
|
| 53 |
StaticFiles(directory=str(_STATIC_DIR), html=True),
|
|
|
|
| 1 |
"""Space entrypoint: physix.server.app:app + static UI mount.
|
| 2 |
|
| 3 |
+
Adds two things on top of `physix.server.app:app`:
|
| 4 |
+
|
| 5 |
+
1. `GET /` and `GET /web` -> 302 to `/web/` (the OpenEnv app doesn't
|
| 6 |
+
ship a root redirect, so the bare Space URL would otherwise 404).
|
| 7 |
+
2. `StaticFiles` mount at `/web/` for the built Vite SPA (Vite is
|
| 8 |
+
built with `--base=/web/` so asset URLs already include the prefix).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
"""
|
| 10 |
|
| 11 |
from __future__ import annotations
|
|
|
|
| 32 |
|
| 33 |
if _STATIC_DIR.is_dir():
|
| 34 |
# html=True makes StaticFiles serve index.html for directory hits and
|
| 35 |
+
# fall back to it for unknown sub-paths so client-side routing works.
|
|
|
|
|
|
|
|
|
|
| 36 |
app.mount(
|
| 37 |
"/web",
|
| 38 |
StaticFiles(directory=str(_STATIC_DIR), html=True),
|
scripts/verify_hf_router.py
CHANGED
|
@@ -30,7 +30,6 @@ from __future__ import annotations
|
|
| 30 |
|
| 31 |
import argparse
|
| 32 |
import asyncio
|
| 33 |
-
import json
|
| 34 |
import os
|
| 35 |
import sys
|
| 36 |
from dataclasses import dataclass
|
|
@@ -132,7 +131,7 @@ def check_token() -> str:
|
|
| 132 |
print(response.text[:500], file=sys.stderr)
|
| 133 |
sys.exit(1)
|
| 134 |
|
| 135 |
-
print(_green(
|
| 136 |
return token
|
| 137 |
|
| 138 |
|
|
|
|
| 30 |
|
| 31 |
import argparse
|
| 32 |
import asyncio
|
|
|
|
| 33 |
import os
|
| 34 |
import sys
|
| 35 |
from dataclasses import dataclass
|
|
|
|
| 131 |
print(response.text[:500], file=sys.stderr)
|
| 132 |
sys.exit(1)
|
| 133 |
|
| 134 |
+
print(_green("✓ HF_TOKEN is valid and has Inference Providers scope."))
|
| 135 |
return token
|
| 136 |
|
| 137 |
|
tests/test_interactive_api.py
CHANGED
|
@@ -89,15 +89,11 @@ def test_systems_endpoint_returns_supported_systems_in_order(
|
|
| 89 |
returned_ids = [row["system_id"] for row in catalogue]
|
| 90 |
assert returned_ids == list(SUPPORTED_SYSTEMS)
|
| 91 |
|
| 92 |
-
# The
|
| 93 |
-
# tier-3 (``projectile_drag``, ``charged_b_field``) so visitors can
|
| 94 |
-
# stress-test the verifier on systems the model never trained on —
|
| 95 |
-
# that's the generalisation showcase.
|
| 96 |
system_ids = set(returned_ids)
|
| 97 |
assert "free_fall" in system_ids
|
|
|
|
| 98 |
assert "damped_spring" in system_ids
|
| 99 |
-
assert "projectile_drag" in system_ids
|
| 100 |
-
assert "charged_b_field" in system_ids
|
| 101 |
|
| 102 |
|
| 103 |
# --- Local model catalogue ---
|
|
|
|
| 89 |
returned_ids = [row["system_id"] for row in catalogue]
|
| 90 |
assert returned_ids == list(SUPPORTED_SYSTEMS)
|
| 91 |
|
| 92 |
+
# The 3 systems we trained on must all be exposed.
|
|
|
|
|
|
|
|
|
|
| 93 |
system_ids = set(returned_ids)
|
| 94 |
assert "free_fall" in system_ids
|
| 95 |
+
assert "simple_pendulum" in system_ids
|
| 96 |
assert "damped_spring" in system_ids
|
|
|
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
# --- Local model catalogue ---
|
tests/test_providers_hf.py
CHANGED
|
@@ -27,7 +27,6 @@ from unittest.mock import MagicMock, patch
|
|
| 27 |
|
| 28 |
import openai
|
| 29 |
import pytest
|
| 30 |
-
from fastapi import FastAPI
|
| 31 |
from fastapi.middleware.cors import CORSMiddleware
|
| 32 |
from fastapi.testclient import TestClient
|
| 33 |
from openenv.core.env_server import create_fastapi_app
|
|
|
|
| 27 |
|
| 28 |
import openai
|
| 29 |
import pytest
|
|
|
|
| 30 |
from fastapi.middleware.cors import CORSMiddleware
|
| 31 |
from fastapi.testclient import TestClient
|
| 32 |
from openenv.core.env_server import create_fastapi_app
|
train/README.md
CHANGED
|
@@ -4,19 +4,13 @@ This folder contains the scripts that launch SFT → GRPO training for the
|
|
| 4 |
[PhysiX OpenEnv](../) on **Hugging Face Jobs**, plus a self-contained
|
| 5 |
**Colab notebook** judges can re-run.
|
| 6 |
|
| 7 |
-
> This used to be a separate `physix-train/` repo / training Space
|
| 8 |
-
> (Dockerfile + `train.sh`). We migrated to HF Jobs because it queues,
|
| 9 |
-
> doesn't pay for idle time, and reuses the upstream Unsloth image
|
| 10 |
-
> directly. The Docker artifacts have been removed and the launcher
|
| 11 |
-
> moved into the env repo, so there's now one repo, one Space.
|
| 12 |
-
|
| 13 |
## Files
|
| 14 |
|
| 15 |
| File | What it does |
|
| 16 |
|------|--------------|
|
| 17 |
-
| [`physix_train_colab.ipynb`](physix_train_colab.ipynb) | End-to-end SFT → GRPO in one notebook. Built on **OpenEnv + Unsloth + TRL**. T4/L4 for `1.5b` profile, A100 for `3b`. |
|
| 18 |
-
| [`submit.py`](submit.py) | Submit a job to HF Jobs via `HfApi.run_uv_job`
|
| 19 |
-
| [`job_train.py`](job_train.py) |
|
| 20 |
| [`job_train_single.py`](job_train_single.py) | Single-system variant (defaults to `damped_spring`) — focused reward signal, easier to read curves. |
|
| 21 |
| [`sync-plots.sh`](sync-plots.sh) | Pull committed loss/reward PNGs from the model repo into `../docs/plots/` so they ship with the env Space. |
|
| 22 |
|
|
@@ -35,15 +29,15 @@ export WANDB_API_KEY=wandb_v1_...
|
|
| 35 |
python submit.py
|
| 36 |
```
|
| 37 |
|
| 38 |
-
Defaults: l40sx1
|
| 39 |
-
`
|
| 40 |
|
| 41 |
## Run in Colab
|
| 42 |
|
| 43 |
Open [`physix_train_colab.ipynb`](physix_train_colab.ipynb) on a Colab
|
| 44 |
GPU runtime. The notebook installs the same dependency set as the cloud
|
| 45 |
-
job, fetches the source from
|
| 46 |
-
plots loss + reward
|
| 47 |
|
| 48 |
## Pipeline cost (l40sx1, 3B profile)
|
| 49 |
|
|
|
|
| 4 |
[PhysiX OpenEnv](../) on **Hugging Face Jobs**, plus a self-contained
|
| 5 |
**Colab notebook** judges can re-run.
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
## Files
|
| 8 |
|
| 9 |
| File | What it does |
|
| 10 |
|------|--------------|
|
| 11 |
+
| [`physix_train_colab.ipynb`](physix_train_colab.ipynb) | End-to-end SFT → GRPO in one notebook. Built on **OpenEnv + Unsloth + TRL**. T4/L4 for `1.5b` profile, L4/A100 for `3b`. |
|
| 12 |
+
| [`submit.py`](submit.py) | Submit a job to HF Jobs via `HfApi.run_uv_job`. |
|
| 13 |
+
| [`job_train.py`](job_train.py) | Training driver across the 3 trained systems. Runs *inside* the HF Jobs container. PEP 723 inline deps. |
|
| 14 |
| [`job_train_single.py`](job_train_single.py) | Single-system variant (defaults to `damped_spring`) — focused reward signal, easier to read curves. |
|
| 15 |
| [`sync-plots.sh`](sync-plots.sh) | Pull committed loss/reward PNGs from the model repo into `../docs/plots/` so they ship with the env Space. |
|
| 16 |
|
|
|
|
| 29 |
python submit.py
|
| 30 |
```
|
| 31 |
|
| 32 |
+
Defaults: l40sx1, 3 h timeout. Source is fetched at job-start by
|
| 33 |
+
`_stage_physix_live()` directly from this Hugging Face Space repo.
|
| 34 |
|
| 35 |
## Run in Colab
|
| 36 |
|
| 37 |
Open [`physix_train_colab.ipynb`](physix_train_colab.ipynb) on a Colab
|
| 38 |
GPU runtime. The notebook installs the same dependency set as the cloud
|
| 39 |
+
job, fetches the source from this Hugging Face Space, runs SFT then
|
| 40 |
+
GRPO, and plots loss + reward curves at the end.
|
| 41 |
|
| 42 |
## Pipeline cost (l40sx1, 3B profile)
|
| 43 |
|