Spaces:
Sleeping
Sleeping
| """Real-network smoke test for the Hugging Face Router path. | |
| The judges will overwhelmingly run the demo through HF Router, so we | |
| verify it works against the real endpoint *before* shipping the Space. | |
| This script: | |
| 1. Confirms HF_TOKEN is set and has the right scope by listing | |
| accounts via /v1/models. (Cheap; doesn't bill credits.) | |
| 2. For every model in the demo's HF Router suggestion list, fires | |
| a single chat completion to confirm at least one serving | |
| provider is warm. Reports which ones serve and which 404. | |
| 3. Drives one full PhysiX episode end-to-end through the live | |
| server using whichever model served first, and prints the | |
| reward breakdown. | |
| Not part of pytest because it (a) needs the real HF_TOKEN, (b) burns | |
| real credits (~one cent per run), and (c) is tied to which providers | |
| are warm at any given moment, which is intrinsically flaky. | |
| Usage: | |
| export HF_TOKEN=hf_xxx | |
| # in one terminal: | |
| python -m physix.server.app --host 127.0.0.1 --port 8000 | |
| # in another: | |
| python scripts/verify_hf_router.py | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import asyncio | |
| import json | |
| import os | |
| import sys | |
| from dataclasses import dataclass | |
| import openai | |
| import requests | |
| from physix.server.providers import HF_ROUTER_BASE_URL | |
| # Models the connection panel suggests under the HF Router endpoint. | |
| # Keep this list in sync with `frontend/src/lib/llmPresets.ts`. | |
| HF_SUGGESTED_MODELS: list[str] = [ | |
| "Pratyush-01/physix-3b-rl", | |
| "Pratyush-01/physix-3b-sft-merged", | |
| "Qwen/Qwen2.5-3B-Instruct", | |
| "Qwen/Qwen2.5-7B-Instruct", | |
| ] | |
| class ModelCheckResult: | |
| model: str | |
| served: bool | |
| detail: str | |
| latency_s: float = 0.0 | |
| def _green(s: str) -> str: | |
| return f"\033[32m{s}\033[0m" | |
| def _red(s: str) -> str: | |
| return f"\033[31m{s}\033[0m" | |
| def _yellow(s: str) -> str: | |
| return f"\033[33m{s}\033[0m" | |
| def _bold(s: str) -> str: | |
| return f"\033[1m{s}\033[0m" | |
| def check_token() -> str: | |
| """Verify HF_TOKEN exists and has Inference Providers scope. | |
| Returns the token. Exits 1 on any auth-level failure with a clear | |
| remediation message — this is the most common reason the demo | |
| appears to "not work" for first-time visitors. | |
| """ | |
| token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY") | |
| if not token: | |
| print(_red("HF_TOKEN is not set.")) | |
| print( | |
| " → Create one at https://huggingface.co/settings/tokens", | |
| file=sys.stderr, | |
| ) | |
| print( | |
| " with the 'Make calls to Inference Providers' fine-grained", | |
| file=sys.stderr, | |
| ) | |
| print( | |
| " permission, then `export HF_TOKEN=hf_...` and re-run.", | |
| file=sys.stderr, | |
| ) | |
| sys.exit(1) | |
| # /v1/models is the cheapest way to confirm the token has the | |
| # right scope; HF returns 200 with a paginated catalogue. | |
| try: | |
| response = requests.get( | |
| f"{HF_ROUTER_BASE_URL}/models", | |
| headers={"Authorization": f"Bearer {token}"}, | |
| timeout=15, | |
| ) | |
| except requests.RequestException as exc: | |
| print(_red(f"Could not reach {HF_ROUTER_BASE_URL}: {exc}")) | |
| sys.exit(1) | |
| if response.status_code == 401: | |
| print( | |
| _red( | |
| "HF_TOKEN was rejected (401). The token likely doesn't have " | |
| "the 'Make calls to Inference Providers' permission." | |
| ) | |
| ) | |
| print( | |
| " → Re-create the token at https://huggingface.co/settings/tokens", | |
| file=sys.stderr, | |
| ) | |
| print( | |
| " making sure that fine-grained scope is checked.", | |
| file=sys.stderr, | |
| ) | |
| sys.exit(1) | |
| if not response.ok: | |
| print(_red(f"HF Router rejected /models lookup: HTTP {response.status_code}")) | |
| print(response.text[:500], file=sys.stderr) | |
| sys.exit(1) | |
| print(_green(f"✓ HF_TOKEN is valid and has Inference Providers scope.")) | |
| return token | |
| def check_model(token: str, model: str, *, timeout_s: float = 60.0) -> ModelCheckResult: | |
| """Fire one tiny chat completion against a model. | |
| Returns a structured result indicating whether at least one | |
| provider is currently serving that model. We deliberately use a | |
| 1-token completion to keep credit usage minimal. | |
| """ | |
| client = openai.OpenAI( | |
| base_url=HF_ROUTER_BASE_URL, | |
| api_key=token, | |
| timeout=timeout_s, | |
| default_headers={"User-Agent": "physix-live-demo/0.1 (verify_hf_router)"}, | |
| ) | |
| import time | |
| t0 = time.perf_counter() | |
| try: | |
| response = client.chat.completions.create( | |
| model=model, | |
| messages=[{"role": "user", "content": "Reply with the single word OK."}], | |
| max_tokens=4, | |
| temperature=0.0, | |
| ) | |
| except openai.NotFoundError: | |
| return ModelCheckResult( | |
| model=model, | |
| served=False, | |
| detail=( | |
| "404 — no Inference Provider is currently serving this model. " | |
| "Check the model card's 'Deploy → Inference API' panel." | |
| ), | |
| ) | |
| except openai.AuthenticationError as exc: | |
| return ModelCheckResult( | |
| model=model, | |
| served=False, | |
| detail=f"401 — {exc}", | |
| ) | |
| except openai.BadRequestError as exc: | |
| return ModelCheckResult( | |
| model=model, | |
| served=False, | |
| detail=f"400 — {exc}", | |
| ) | |
| except (openai.APIConnectionError, openai.APITimeoutError) as exc: | |
| return ModelCheckResult( | |
| model=model, | |
| served=False, | |
| detail=f"connection/timeout — {exc}", | |
| ) | |
| except Exception as exc: # noqa: BLE001 | |
| return ModelCheckResult( | |
| model=model, | |
| served=False, | |
| detail=f"{type(exc).__name__}: {exc}", | |
| ) | |
| elapsed = time.perf_counter() - t0 | |
| content = (response.choices[0].message.content if response.choices else "") or "" | |
| return ModelCheckResult( | |
| model=model, | |
| served=True, | |
| detail=f"got: {content.strip()[:40]!r}", | |
| latency_s=elapsed, | |
| ) | |
| async def drive_one_episode(token: str, model: str, base_url: str) -> None: | |
| """Drive a single PhysiX episode end-to-end through the live | |
| server, using the chosen HF Router model. Confirms not just that | |
| the LLM responds, but that the full env+verifier+UI loop works.""" | |
| import httpx | |
| print() | |
| print(_bold(f"--- Driving one PhysiX episode through {model} ---")) | |
| timeout = httpx.Timeout(180.0, connect=10.0) | |
| async with httpx.AsyncClient(base_url=base_url, timeout=timeout) as http: | |
| # Sanity: the local server is up. | |
| try: | |
| await http.get("/interactive/systems") | |
| except httpx.HTTPError as exc: | |
| print(_red(f"Local PhysiX server unreachable at {base_url}: {exc}")) | |
| print( | |
| " → Start it with `python -m physix.server.app --host 127.0.0.1 --port 8000`", | |
| file=sys.stderr, | |
| ) | |
| return | |
| start = await http.post( | |
| "/interactive/sessions", | |
| json={"system_id": "free_fall_drag", "seed": 42, "max_turns": 4}, | |
| ) | |
| start.raise_for_status() | |
| session_id = start.json()["session_id"] | |
| print(f" session_id: {session_id}") | |
| for turn in range(4): | |
| step = await http.post( | |
| f"/interactive/sessions/{session_id}/llm-step", | |
| json={ | |
| "base_url": HF_ROUTER_BASE_URL, | |
| "model": model, | |
| "api_key": token, | |
| "temperature": 0.4, | |
| "max_tokens": 1024, | |
| }, | |
| ) | |
| if step.status_code != 200: | |
| print(_red(f" turn {turn + 1}: HTTP {step.status_code}")) | |
| try: | |
| detail = step.json().get("detail", step.text) | |
| except Exception: | |
| detail = step.text | |
| print(f" {detail}") | |
| break | |
| body = step.json() | |
| reward = body["observation"]["reward_breakdown"] | |
| print( | |
| f" turn {turn + 1}: " | |
| f"match={reward['match']:.2f} " | |
| f"format={reward['format']:.2f} " | |
| f"total={reward['total']:.2f} " | |
| f"({body['latency_s']:.1f}s)" | |
| ) | |
| print(f" equation: {body['action']['equation']!r}") | |
| if body["observation"]["done"]: | |
| print(_green(" done.")) | |
| break | |
| await http.delete(f"/interactive/sessions/{session_id}") | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description=__doc__) | |
| parser.add_argument( | |
| "--server-url", | |
| default="http://127.0.0.1:8000", | |
| help="Local PhysiX server (default: http://127.0.0.1:8000)", | |
| ) | |
| parser.add_argument( | |
| "--skip-episode", | |
| action="store_true", | |
| help="Skip the end-to-end episode drive; only do model probes.", | |
| ) | |
| args = parser.parse_args() | |
| print(_bold("=== Step 1: HF_TOKEN ===")) | |
| token = check_token() | |
| print() | |
| print(_bold("=== Step 2: probing each suggested HF model ===")) | |
| print( | |
| " (one tiny completion per model; non-served models will 404 quickly)" | |
| ) | |
| print() | |
| results: list[ModelCheckResult] = [] | |
| for model in HF_SUGGESTED_MODELS: | |
| print(f" → {model:50s}", end=" ", flush=True) | |
| result = check_model(token, model) | |
| results.append(result) | |
| if result.served: | |
| print(_green(f"OK ({result.latency_s:.1f}s) {result.detail}")) | |
| else: | |
| print(_red("NOT SERVED")) | |
| print(f" {result.detail}") | |
| served = [r for r in results if r.served] | |
| not_served = [r for r in results if not r.served] | |
| print() | |
| print(_bold("=== Summary ===")) | |
| print(f" {_green(f'{len(served)} served')} / {_yellow(f'{len(not_served)} not served')} of {len(results)}") | |
| if not_served: | |
| print() | |
| print(_yellow("Not served:")) | |
| for r in not_served: | |
| print(f" · {r.model}") | |
| print() | |
| print( | |
| "If the trained PhysiX model is in the not-served list, you have" | |
| ) | |
| print("a few options before shipping:") | |
| print(" 1. Open the model card → 'Deploy' → 'Inference Providers' →") | |
| print(" enable a provider that hosts it (Featherless / Together).") | |
| print(" 2. Append `:fastest` to the model id in the demo's preset") | |
| print(" to let HF auto-pick a provider.") | |
| print( | |
| " 3. Fall back to one of the served baselines — the comparison" | |
| ) | |
| print(" story still works.") | |
| if args.skip_episode or not served: | |
| return | |
| asyncio.run(drive_one_episode(token, served[0].model, args.server_url)) | |
| if __name__ == "__main__": | |
| main() | |