# /// script # requires-python = ">=3.10" # dependencies = [ # "torch", # "transformers==4.56.2", # "trl==0.22.2", # "datasets", # "peft", # "accelerate", # "bitsandbytes", # "unsloth", # "openenv-core", # "fastapi", # "uvicorn", # "pydantic", # "huggingface_hub", # ] # /// """ Eval-only HF Jobs orchestrator: pull a trained model from HF Hub, run inference_eval.py on it, upload the new eval_results.json back to the model repo. Useful when we want to re-eval a model after a code fix (e.g. parser changes, max_new_tokens fix) without re-training. Submit with: hf jobs uv run --flavor a10g-large --timeout 30m --secrets HF_TOKEN \\ -e MODEL_REPO=InosLihka/rhythm-env-meta-trained-sft-v1 \\ -e NUM_EPISODES=20 \\ -d scripts/eval_on_hf.py """ import os import shutil import subprocess import sys from pathlib import Path REPO_URL = os.environ.get("REPO_URL", "https://huggingface.co/spaces/InosLihka/rhythm_env") WORK_DIR = "/tmp/rhythm_env" MODEL_REPO = os.environ.get("MODEL_REPO", "InosLihka/rhythm-env-meta-trained-sft-v1") NUM_EPISODES = int(os.environ.get("NUM_EPISODES", "20")) print(f"=== Eval-only config ===") print(f" MODEL_REPO: {MODEL_REPO}") print(f" NUM_EPISODES: {NUM_EPISODES}") print() def run(cmd): print(f"\n>>> {' '.join(cmd) if isinstance(cmd, list) else cmd}", flush=True) subprocess.run(cmd, check=True) def main(): if Path(WORK_DIR).exists(): shutil.rmtree(WORK_DIR) run(["git", "clone", REPO_URL, WORK_DIR]) os.chdir(WORK_DIR) sys.path.insert(0, WORK_DIR) sys.path.insert(0, os.path.join(WORK_DIR, "training")) # Download the trained model from huggingface_hub import snapshot_download model_local = snapshot_download( repo_id=MODEL_REPO, repo_type="model", local_dir=f"/tmp/{MODEL_REPO.replace('/', '_')}", ) print(f"Downloaded model to: {model_local}") # Run extended eval eval_args = [ "python", "training/inference_eval.py", "--model_path", model_local, "--num_episodes", str(NUM_EPISODES), "--output_file", "eval_results_v2.json", ] run(eval_args) # Upload back token = os.environ.get("HF_TOKEN") if token: from huggingface_hub import HfApi, login login(token=token) api = HfApi() api.upload_file( path_or_fileobj="eval_results_v2.json", path_in_repo="eval_results_v2.json", repo_id=MODEL_REPO, repo_type="model", commit_message=f"Re-eval with max_new_tokens=256 fix; n={NUM_EPISODES} per condition", ) print() print("=" * 60) print("DONE") print(f" Eval JSON: https://huggingface.co/{MODEL_REPO}/blob/main/eval_results_v2.json") print("=" * 60) if __name__ == "__main__": main()