physix / scripts /verify_hf_router.py
Pratyush-01's picture
Upload folder using huggingface_hub
0e24aff verified
"""Real-network smoke test for the Hugging Face Router path.
The judges will overwhelmingly run the demo through HF Router, so we
verify it works against the real endpoint *before* shipping the Space.
This script:
1. Confirms HF_TOKEN is set and has the right scope by listing
accounts via /v1/models. (Cheap; doesn't bill credits.)
2. For every model in the demo's HF Router suggestion list, fires
a single chat completion to confirm at least one serving
provider is warm. Reports which ones serve and which 404.
3. Drives one full PhysiX episode end-to-end through the live
server using whichever model served first, and prints the
reward breakdown.
Not part of pytest because it (a) needs the real HF_TOKEN, (b) burns
real credits (~one cent per run), and (c) is tied to which providers
are warm at any given moment, which is intrinsically flaky.
Usage:
export HF_TOKEN=hf_xxx
# in one terminal:
python -m physix.server.app --host 127.0.0.1 --port 8000
# in another:
python scripts/verify_hf_router.py
"""
from __future__ import annotations
import argparse
import asyncio
import json
import os
import sys
from dataclasses import dataclass
import openai
import requests
from physix.server.providers import HF_ROUTER_BASE_URL
# Models the connection panel suggests under the HF Router endpoint.
# Keep this list in sync with `frontend/src/lib/llmPresets.ts`.
HF_SUGGESTED_MODELS: list[str] = [
"Pratyush-01/physix-3b-rl",
"Pratyush-01/physix-3b-sft-merged",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-7B-Instruct",
]
@dataclass
class ModelCheckResult:
model: str
served: bool
detail: str
latency_s: float = 0.0
def _green(s: str) -> str:
return f"\033[32m{s}\033[0m"
def _red(s: str) -> str:
return f"\033[31m{s}\033[0m"
def _yellow(s: str) -> str:
return f"\033[33m{s}\033[0m"
def _bold(s: str) -> str:
return f"\033[1m{s}\033[0m"
def check_token() -> str:
"""Verify HF_TOKEN exists and has Inference Providers scope.
Returns the token. Exits 1 on any auth-level failure with a clear
remediation message — this is the most common reason the demo
appears to "not work" for first-time visitors.
"""
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY")
if not token:
print(_red("HF_TOKEN is not set."))
print(
" → Create one at https://huggingface.co/settings/tokens",
file=sys.stderr,
)
print(
" with the 'Make calls to Inference Providers' fine-grained",
file=sys.stderr,
)
print(
" permission, then `export HF_TOKEN=hf_...` and re-run.",
file=sys.stderr,
)
sys.exit(1)
# /v1/models is the cheapest way to confirm the token has the
# right scope; HF returns 200 with a paginated catalogue.
try:
response = requests.get(
f"{HF_ROUTER_BASE_URL}/models",
headers={"Authorization": f"Bearer {token}"},
timeout=15,
)
except requests.RequestException as exc:
print(_red(f"Could not reach {HF_ROUTER_BASE_URL}: {exc}"))
sys.exit(1)
if response.status_code == 401:
print(
_red(
"HF_TOKEN was rejected (401). The token likely doesn't have "
"the 'Make calls to Inference Providers' permission."
)
)
print(
" → Re-create the token at https://huggingface.co/settings/tokens",
file=sys.stderr,
)
print(
" making sure that fine-grained scope is checked.",
file=sys.stderr,
)
sys.exit(1)
if not response.ok:
print(_red(f"HF Router rejected /models lookup: HTTP {response.status_code}"))
print(response.text[:500], file=sys.stderr)
sys.exit(1)
print(_green(f"✓ HF_TOKEN is valid and has Inference Providers scope."))
return token
def check_model(token: str, model: str, *, timeout_s: float = 60.0) -> ModelCheckResult:
"""Fire one tiny chat completion against a model.
Returns a structured result indicating whether at least one
provider is currently serving that model. We deliberately use a
1-token completion to keep credit usage minimal.
"""
client = openai.OpenAI(
base_url=HF_ROUTER_BASE_URL,
api_key=token,
timeout=timeout_s,
default_headers={"User-Agent": "physix-live-demo/0.1 (verify_hf_router)"},
)
import time
t0 = time.perf_counter()
try:
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "Reply with the single word OK."}],
max_tokens=4,
temperature=0.0,
)
except openai.NotFoundError:
return ModelCheckResult(
model=model,
served=False,
detail=(
"404 — no Inference Provider is currently serving this model. "
"Check the model card's 'Deploy → Inference API' panel."
),
)
except openai.AuthenticationError as exc:
return ModelCheckResult(
model=model,
served=False,
detail=f"401 — {exc}",
)
except openai.BadRequestError as exc:
return ModelCheckResult(
model=model,
served=False,
detail=f"400 — {exc}",
)
except (openai.APIConnectionError, openai.APITimeoutError) as exc:
return ModelCheckResult(
model=model,
served=False,
detail=f"connection/timeout — {exc}",
)
except Exception as exc: # noqa: BLE001
return ModelCheckResult(
model=model,
served=False,
detail=f"{type(exc).__name__}: {exc}",
)
elapsed = time.perf_counter() - t0
content = (response.choices[0].message.content if response.choices else "") or ""
return ModelCheckResult(
model=model,
served=True,
detail=f"got: {content.strip()[:40]!r}",
latency_s=elapsed,
)
async def drive_one_episode(token: str, model: str, base_url: str) -> None:
"""Drive a single PhysiX episode end-to-end through the live
server, using the chosen HF Router model. Confirms not just that
the LLM responds, but that the full env+verifier+UI loop works."""
import httpx
print()
print(_bold(f"--- Driving one PhysiX episode through {model} ---"))
timeout = httpx.Timeout(180.0, connect=10.0)
async with httpx.AsyncClient(base_url=base_url, timeout=timeout) as http:
# Sanity: the local server is up.
try:
await http.get("/interactive/systems")
except httpx.HTTPError as exc:
print(_red(f"Local PhysiX server unreachable at {base_url}: {exc}"))
print(
" → Start it with `python -m physix.server.app --host 127.0.0.1 --port 8000`",
file=sys.stderr,
)
return
start = await http.post(
"/interactive/sessions",
json={"system_id": "free_fall_drag", "seed": 42, "max_turns": 4},
)
start.raise_for_status()
session_id = start.json()["session_id"]
print(f" session_id: {session_id}")
for turn in range(4):
step = await http.post(
f"/interactive/sessions/{session_id}/llm-step",
json={
"base_url": HF_ROUTER_BASE_URL,
"model": model,
"api_key": token,
"temperature": 0.4,
"max_tokens": 1024,
},
)
if step.status_code != 200:
print(_red(f" turn {turn + 1}: HTTP {step.status_code}"))
try:
detail = step.json().get("detail", step.text)
except Exception:
detail = step.text
print(f" {detail}")
break
body = step.json()
reward = body["observation"]["reward_breakdown"]
print(
f" turn {turn + 1}: "
f"match={reward['match']:.2f} "
f"format={reward['format']:.2f} "
f"total={reward['total']:.2f} "
f"({body['latency_s']:.1f}s)"
)
print(f" equation: {body['action']['equation']!r}")
if body["observation"]["done"]:
print(_green(" done."))
break
await http.delete(f"/interactive/sessions/{session_id}")
def main() -> None:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--server-url",
default="http://127.0.0.1:8000",
help="Local PhysiX server (default: http://127.0.0.1:8000)",
)
parser.add_argument(
"--skip-episode",
action="store_true",
help="Skip the end-to-end episode drive; only do model probes.",
)
args = parser.parse_args()
print(_bold("=== Step 1: HF_TOKEN ==="))
token = check_token()
print()
print(_bold("=== Step 2: probing each suggested HF model ==="))
print(
" (one tiny completion per model; non-served models will 404 quickly)"
)
print()
results: list[ModelCheckResult] = []
for model in HF_SUGGESTED_MODELS:
print(f" → {model:50s}", end=" ", flush=True)
result = check_model(token, model)
results.append(result)
if result.served:
print(_green(f"OK ({result.latency_s:.1f}s) {result.detail}"))
else:
print(_red("NOT SERVED"))
print(f" {result.detail}")
served = [r for r in results if r.served]
not_served = [r for r in results if not r.served]
print()
print(_bold("=== Summary ==="))
print(f" {_green(f'{len(served)} served')} / {_yellow(f'{len(not_served)} not served')} of {len(results)}")
if not_served:
print()
print(_yellow("Not served:"))
for r in not_served:
print(f" · {r.model}")
print()
print(
"If the trained PhysiX model is in the not-served list, you have"
)
print("a few options before shipping:")
print(" 1. Open the model card → 'Deploy' → 'Inference Providers' →")
print(" enable a provider that hosts it (Featherless / Together).")
print(" 2. Append `:fastest` to the model id in the demo's preset")
print(" to let HF auto-pick a provider.")
print(
" 3. Fall back to one of the served baselines — the comparison"
)
print(" story still works.")
if args.skip_episode or not served:
return
asyncio.run(drive_one_episode(token, served[0].model, args.server_url))
if __name__ == "__main__":
main()