File size: 2,849 Bytes
b9c9b8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# /// script
# requires-python = ">=3.10"
# dependencies = [
#   "torch",
#   "transformers==4.56.2",
#   "trl==0.22.2",
#   "datasets",
#   "peft",
#   "accelerate",
#   "bitsandbytes",
#   "unsloth",
#   "openenv-core",
#   "fastapi",
#   "uvicorn",
#   "pydantic",
#   "huggingface_hub",
# ]
# ///
"""
Eval-only HF Jobs orchestrator: pull a trained model from HF Hub, run
inference_eval.py on it, upload the new eval_results.json back to the
model repo. Useful when we want to re-eval a model after a code fix
(e.g. parser changes, max_new_tokens fix) without re-training.

Submit with:
    hf jobs uv run --flavor a10g-large --timeout 30m --secrets HF_TOKEN \\
        -e MODEL_REPO=InosLihka/rhythm-env-meta-trained-sft-v1 \\
        -e NUM_EPISODES=20 \\
        -d scripts/eval_on_hf.py
"""

import os
import shutil
import subprocess
import sys
from pathlib import Path

REPO_URL = os.environ.get("REPO_URL", "https://huggingface.co/spaces/InosLihka/rhythm_env")
WORK_DIR = "/tmp/rhythm_env"

MODEL_REPO = os.environ.get("MODEL_REPO", "InosLihka/rhythm-env-meta-trained-sft-v1")
NUM_EPISODES = int(os.environ.get("NUM_EPISODES", "20"))

print(f"=== Eval-only config ===")
print(f"  MODEL_REPO:  {MODEL_REPO}")
print(f"  NUM_EPISODES: {NUM_EPISODES}")
print()


def run(cmd):
    print(f"\n>>> {' '.join(cmd) if isinstance(cmd, list) else cmd}", flush=True)
    subprocess.run(cmd, check=True)


def main():
    if Path(WORK_DIR).exists():
        shutil.rmtree(WORK_DIR)
    run(["git", "clone", REPO_URL, WORK_DIR])
    os.chdir(WORK_DIR)
    sys.path.insert(0, WORK_DIR)
    sys.path.insert(0, os.path.join(WORK_DIR, "training"))

    # Download the trained model
    from huggingface_hub import snapshot_download

    model_local = snapshot_download(
        repo_id=MODEL_REPO,
        repo_type="model",
        local_dir=f"/tmp/{MODEL_REPO.replace('/', '_')}",
    )
    print(f"Downloaded model to: {model_local}")

    # Run extended eval
    eval_args = [
        "python", "training/inference_eval.py",
        "--model_path", model_local,
        "--num_episodes", str(NUM_EPISODES),
        "--output_file", "eval_results_v2.json",
    ]
    run(eval_args)

    # Upload back
    token = os.environ.get("HF_TOKEN")
    if token:
        from huggingface_hub import HfApi, login
        login(token=token)
        api = HfApi()
        api.upload_file(
            path_or_fileobj="eval_results_v2.json",
            path_in_repo="eval_results_v2.json",
            repo_id=MODEL_REPO,
            repo_type="model",
            commit_message=f"Re-eval with max_new_tokens=256 fix; n={NUM_EPISODES} per condition",
        )

    print()
    print("=" * 60)
    print("DONE")
    print(f"  Eval JSON: https://huggingface.co/{MODEL_REPO}/blob/main/eval_results_v2.json")
    print("=" * 60)


if __name__ == "__main__":
    main()