Spaces:
Sleeping
Sleeping
File size: 2,849 Bytes
b9c9b8f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | # /// script
# requires-python = ">=3.10"
# dependencies = [
# "torch",
# "transformers==4.56.2",
# "trl==0.22.2",
# "datasets",
# "peft",
# "accelerate",
# "bitsandbytes",
# "unsloth",
# "openenv-core",
# "fastapi",
# "uvicorn",
# "pydantic",
# "huggingface_hub",
# ]
# ///
"""
Eval-only HF Jobs orchestrator: pull a trained model from HF Hub, run
inference_eval.py on it, upload the new eval_results.json back to the
model repo. Useful when we want to re-eval a model after a code fix
(e.g. parser changes, max_new_tokens fix) without re-training.
Submit with:
hf jobs uv run --flavor a10g-large --timeout 30m --secrets HF_TOKEN \\
-e MODEL_REPO=InosLihka/rhythm-env-meta-trained-sft-v1 \\
-e NUM_EPISODES=20 \\
-d scripts/eval_on_hf.py
"""
import os
import shutil
import subprocess
import sys
from pathlib import Path
REPO_URL = os.environ.get("REPO_URL", "https://huggingface.co/spaces/InosLihka/rhythm_env")
WORK_DIR = "/tmp/rhythm_env"
MODEL_REPO = os.environ.get("MODEL_REPO", "InosLihka/rhythm-env-meta-trained-sft-v1")
NUM_EPISODES = int(os.environ.get("NUM_EPISODES", "20"))
print(f"=== Eval-only config ===")
print(f" MODEL_REPO: {MODEL_REPO}")
print(f" NUM_EPISODES: {NUM_EPISODES}")
print()
def run(cmd):
print(f"\n>>> {' '.join(cmd) if isinstance(cmd, list) else cmd}", flush=True)
subprocess.run(cmd, check=True)
def main():
if Path(WORK_DIR).exists():
shutil.rmtree(WORK_DIR)
run(["git", "clone", REPO_URL, WORK_DIR])
os.chdir(WORK_DIR)
sys.path.insert(0, WORK_DIR)
sys.path.insert(0, os.path.join(WORK_DIR, "training"))
# Download the trained model
from huggingface_hub import snapshot_download
model_local = snapshot_download(
repo_id=MODEL_REPO,
repo_type="model",
local_dir=f"/tmp/{MODEL_REPO.replace('/', '_')}",
)
print(f"Downloaded model to: {model_local}")
# Run extended eval
eval_args = [
"python", "training/inference_eval.py",
"--model_path", model_local,
"--num_episodes", str(NUM_EPISODES),
"--output_file", "eval_results_v2.json",
]
run(eval_args)
# Upload back
token = os.environ.get("HF_TOKEN")
if token:
from huggingface_hub import HfApi, login
login(token=token)
api = HfApi()
api.upload_file(
path_or_fileobj="eval_results_v2.json",
path_in_repo="eval_results_v2.json",
repo_id=MODEL_REPO,
repo_type="model",
commit_message=f"Re-eval with max_new_tokens=256 fix; n={NUM_EPISODES} per condition",
)
print()
print("=" * 60)
print("DONE")
print(f" Eval JSON: https://huggingface.co/{MODEL_REPO}/blob/main/eval_results_v2.json")
print("=" * 60)
if __name__ == "__main__":
main()
|