Spaces:
Sleeping
Sleeping
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "torch", | |
| # "transformers==4.56.2", | |
| # "trl==0.22.2", | |
| # "datasets", | |
| # "peft", | |
| # "accelerate", | |
| # "bitsandbytes", | |
| # "unsloth", | |
| # "openenv-core", | |
| # "fastapi", | |
| # "uvicorn", | |
| # "pydantic", | |
| # "huggingface_hub", | |
| # ] | |
| # /// | |
| """ | |
| Eval-only HF Jobs orchestrator: pull a trained model from HF Hub, run | |
| inference_eval.py on it, upload the new eval_results.json back to the | |
| model repo. Useful when we want to re-eval a model after a code fix | |
| (e.g. parser changes, max_new_tokens fix) without re-training. | |
| Submit with: | |
| hf jobs uv run --flavor a10g-large --timeout 30m --secrets HF_TOKEN \\ | |
| -e MODEL_REPO=InosLihka/rhythm-env-meta-trained-sft-v1 \\ | |
| -e NUM_EPISODES=20 \\ | |
| -d scripts/eval_on_hf.py | |
| """ | |
| import os | |
| import shutil | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| REPO_URL = os.environ.get("REPO_URL", "https://huggingface.co/spaces/InosLihka/rhythm_env") | |
| WORK_DIR = "/tmp/rhythm_env" | |
| MODEL_REPO = os.environ.get("MODEL_REPO", "InosLihka/rhythm-env-meta-trained-sft-v1") | |
| NUM_EPISODES = int(os.environ.get("NUM_EPISODES", "20")) | |
| print(f"=== Eval-only config ===") | |
| print(f" MODEL_REPO: {MODEL_REPO}") | |
| print(f" NUM_EPISODES: {NUM_EPISODES}") | |
| print() | |
| def run(cmd): | |
| print(f"\n>>> {' '.join(cmd) if isinstance(cmd, list) else cmd}", flush=True) | |
| subprocess.run(cmd, check=True) | |
| def main(): | |
| if Path(WORK_DIR).exists(): | |
| shutil.rmtree(WORK_DIR) | |
| run(["git", "clone", REPO_URL, WORK_DIR]) | |
| os.chdir(WORK_DIR) | |
| sys.path.insert(0, WORK_DIR) | |
| sys.path.insert(0, os.path.join(WORK_DIR, "training")) | |
| # Download the trained model | |
| from huggingface_hub import snapshot_download | |
| model_local = snapshot_download( | |
| repo_id=MODEL_REPO, | |
| repo_type="model", | |
| local_dir=f"/tmp/{MODEL_REPO.replace('/', '_')}", | |
| ) | |
| print(f"Downloaded model to: {model_local}") | |
| # Run extended eval | |
| eval_args = [ | |
| "python", "training/inference_eval.py", | |
| "--model_path", model_local, | |
| "--num_episodes", str(NUM_EPISODES), | |
| "--output_file", "eval_results_v2.json", | |
| ] | |
| run(eval_args) | |
| # Upload back | |
| token = os.environ.get("HF_TOKEN") | |
| if token: | |
| from huggingface_hub import HfApi, login | |
| login(token=token) | |
| api = HfApi() | |
| api.upload_file( | |
| path_or_fileobj="eval_results_v2.json", | |
| path_in_repo="eval_results_v2.json", | |
| repo_id=MODEL_REPO, | |
| repo_type="model", | |
| commit_message=f"Re-eval with max_new_tokens=256 fix; n={NUM_EPISODES} per condition", | |
| ) | |
| print() | |
| print("=" * 60) | |
| print("DONE") | |
| print(f" Eval JSON: https://huggingface.co/{MODEL_REPO}/blob/main/eval_results_v2.json") | |
| print("=" * 60) | |
| if __name__ == "__main__": | |
| main() | |