Spaces:

InosLihka
/

rhythm_env

Sleeping

App Files Files Community

rhythm_env / scripts /eval_on_hf.py

InosLihka

Fix max_new_tokens for CoT format + add eval-only HF Jobs script

b9c9b8f 12 days ago

raw

history blame contribute delete

2.85 kB

	# /// script
	# requires-python = ">=3.10"
	# dependencies = [
	# "torch",
	# "transformers==4.56.2",
	# "trl==0.22.2",
	# "datasets",
	# "peft",
	# "accelerate",
	# "bitsandbytes",
	# "unsloth",
	# "openenv-core",
	# "fastapi",
	# "uvicorn",
	# "pydantic",
	# "huggingface_hub",
	# ]
	# ///
	"""
	Eval-only HF Jobs orchestrator: pull a trained model from HF Hub, run
	inference_eval.py on it, upload the new eval_results.json back to the
	model repo. Useful when we want to re-eval a model after a code fix
	(e.g. parser changes, max_new_tokens fix) without re-training.

	Submit with:
	hf jobs uv run --flavor a10g-large --timeout 30m --secrets HF_TOKEN \\
	-e MODEL_REPO=InosLihka/rhythm-env-meta-trained-sft-v1 \\
	-e NUM_EPISODES=20 \\
	-d scripts/eval_on_hf.py
	"""

	import os
	import shutil
	import subprocess
	import sys
	from pathlib import Path

	REPO_URL = os.environ.get("REPO_URL", "https://huggingface.co/spaces/InosLihka/rhythm_env")
	WORK_DIR = "/tmp/rhythm_env"

	MODEL_REPO = os.environ.get("MODEL_REPO", "InosLihka/rhythm-env-meta-trained-sft-v1")
	NUM_EPISODES = int(os.environ.get("NUM_EPISODES", "20"))

	print(f"=== Eval-only config ===")
	print(f" MODEL_REPO: {MODEL_REPO}")
	print(f" NUM_EPISODES: {NUM_EPISODES}")
	print()


	def run(cmd):
	print(f"\n>>> {' '.join(cmd) if isinstance(cmd, list) else cmd}", flush=True)
	subprocess.run(cmd, check=True)


	def main():
	if Path(WORK_DIR).exists():
	shutil.rmtree(WORK_DIR)
	run(["git", "clone", REPO_URL, WORK_DIR])
	os.chdir(WORK_DIR)
	sys.path.insert(0, WORK_DIR)
	sys.path.insert(0, os.path.join(WORK_DIR, "training"))

	# Download the trained model
	from huggingface_hub import snapshot_download

	model_local = snapshot_download(
	repo_id=MODEL_REPO,
	repo_type="model",
	local_dir=f"/tmp/{MODEL_REPO.replace('/', '_')}",
	)
	print(f"Downloaded model to: {model_local}")

	# Run extended eval
	eval_args = [
	"python", "training/inference_eval.py",
	"--model_path", model_local,
	"--num_episodes", str(NUM_EPISODES),
	"--output_file", "eval_results_v2.json",
	]
	run(eval_args)

	# Upload back
	token = os.environ.get("HF_TOKEN")
	if token:
	from huggingface_hub import HfApi, login
	login(token=token)
	api = HfApi()
	api.upload_file(
	path_or_fileobj="eval_results_v2.json",
	path_in_repo="eval_results_v2.json",
	repo_id=MODEL_REPO,
	repo_type="model",
	commit_message=f"Re-eval with max_new_tokens=256 fix; n={NUM_EPISODES} per condition",
	)

	print()
	print("=" * 60)
	print("DONE")
	print(f" Eval JSON: https://huggingface.co/{MODEL_REPO}/blob/main/eval_results_v2.json")
	print("=" * 60)


	if __name__ == "__main__":
	main()