Spaces:
Paused
Paused
| """Push CERNenv artefacts to the Hugging Face Hub. | |
| Two subcommands: | |
| * ``model`` — push trained LoRA adapters (output of ``training_unsloth.py``) | |
| to a model repo. Generates a model card describing the run. | |
| * ``space`` — push a directory as a Hugging Face Space | |
| (e.g. ``space/training`` for the trainer Space, or the project root | |
| to publish the env Space). Front-matter is taken from the README.md | |
| inside the directory. | |
| Usage: | |
| python -m scripts.push_to_hub model \\ | |
| --adapter_dir runs/unsloth-grpo \\ | |
| --repo_id YOUR_HF_USERNAME/cernenv-grpo-qwen2.5-3b \\ | |
| --base_model unsloth/Qwen2.5-3B-Instruct | |
| python -m scripts.push_to_hub space \\ | |
| --space_dir space/training \\ | |
| --repo_id YOUR_HF_USERNAME/cernenv-trainer \\ | |
| --hardware a100-large | |
| python -m scripts.push_to_hub space \\ | |
| --space_dir . \\ | |
| --repo_id YOUR_HF_USERNAME/cernenv \\ | |
| --include "models.py" "server/**" "openenv.yaml" "pyproject.toml" "client.py" "README.md" | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import logging | |
| import os | |
| import sys | |
| from pathlib import Path | |
| from typing import Iterable, List, Optional | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") | |
| logger = logging.getLogger(__name__) | |
| DEFAULT_SPACE_EXCLUDES: List[str] = [ | |
| ".venv/**", | |
| "__pycache__/**", | |
| "**/__pycache__/**", | |
| "*.pyc", | |
| ".cursor/**", | |
| ".git/**", | |
| ".DS_Store", | |
| "**/.DS_Store", | |
| "runs/**", | |
| "training/runs/**", | |
| "training/plots/**", | |
| "wandb/**", | |
| "*.zip", | |
| "*.apk", | |
| "*.png", | |
| "*.jpg", | |
| "*.jpeg", | |
| "[External]*.txt", | |
| "Hackathon FAQs*.txt", | |
| "*.log", | |
| ] | |
| def _hf_login() -> None: | |
| from huggingface_hub import login | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| raise SystemExit( | |
| "HF_TOKEN environment variable is required (write-scoped Hugging Face token)." | |
| ) | |
| login(token=token) | |
| def _model_card(*, repo_id: str, base_model: str, run_dir: Path) -> str: | |
| return f"""--- | |
| license: bsd-3-clause | |
| library_name: peft | |
| base_model: {base_model} | |
| tags: | |
| - cernenv | |
| - openenv | |
| - reinforcement-learning | |
| - grpo | |
| - unsloth | |
| - lora | |
| - particle-physics | |
| --- | |
| # {repo_id} | |
| LoRA (Low-Rank Adaptation) adapters trained with **GRPO** (Group-Relative | |
| Policy Optimization) inside the **CERNenv** OpenEnv environment — an | |
| LHC (Large Hadron Collider) particle-discovery POMDP (Partially Observable | |
| Markov Decision Process). | |
| The agent (this model) plays the role of a high-energy physicist running an | |
| analysis: it configures the beam, allocates luminosity, picks decay | |
| channels and triggers, reconstructs events, fits resonances, estimates | |
| significance, and finally submits a structured discovery claim that is | |
| graded against a hidden ground-truth particle. | |
| * Base model: `{base_model}` | |
| * RL framework: TRL (Transformer Reinforcement Learning) GRPO | |
| * Acceleration: Unsloth + 4-bit + LoRA | |
| * Environment: [CERNenv](https://huggingface.co/spaces/{repo_id.split('/')[0]}/cernenv) | |
| ## Usage | |
| ```python | |
| from peft import PeftModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| base = "{base_model}" | |
| adapter = "{repo_id}" | |
| tokenizer = AutoTokenizer.from_pretrained(base) | |
| model = AutoModelForCausalLM.from_pretrained(base, device_map="auto") | |
| model = PeftModel.from_pretrained(model, adapter) | |
| ``` | |
| See the CERNenv repo for full evaluation, plots, and the `LLMAgent` wrapper. | |
| """ | |
| def push_model( | |
| *, | |
| adapter_dir: str, | |
| repo_id: str, | |
| base_model: str, | |
| private: bool, | |
| ) -> None: | |
| from huggingface_hub import HfApi, create_repo | |
| _hf_login() | |
| api = HfApi() | |
| run_dir = Path(adapter_dir) | |
| if not run_dir.exists(): | |
| raise SystemExit(f"adapter_dir not found: {run_dir}") | |
| create_repo(repo_id=repo_id, repo_type="model", private=private, exist_ok=True) | |
| card_path = run_dir / "README.md" | |
| card_path.write_text(_model_card(repo_id=repo_id, base_model=base_model, run_dir=run_dir)) | |
| logger.info("uploading %s → %s", run_dir, repo_id) | |
| api.upload_folder( | |
| folder_path=str(run_dir), | |
| repo_id=repo_id, | |
| repo_type="model", | |
| commit_message="Upload CERNenv GRPO LoRA adapters", | |
| ) | |
| logger.info("done: https://huggingface.co/%s", repo_id) | |
| def push_space( | |
| *, | |
| space_dir: str, | |
| repo_id: str, | |
| hardware: Optional[str], | |
| private: bool, | |
| include: Optional[List[str]], | |
| exclude: Optional[List[str]], | |
| ) -> None: | |
| from huggingface_hub import HfApi, create_repo | |
| _hf_login() | |
| api = HfApi() | |
| src = Path(space_dir).resolve() | |
| if not src.exists(): | |
| raise SystemExit(f"space_dir not found: {src}") | |
| create_repo( | |
| repo_id=repo_id, | |
| repo_type="space", | |
| space_sdk="docker", | |
| space_hardware=hardware, | |
| private=private, | |
| exist_ok=True, | |
| ) | |
| effective_exclude = list(DEFAULT_SPACE_EXCLUDES) | |
| if exclude: | |
| effective_exclude.extend(exclude) | |
| logger.info("uploading %s → space:%s", src, repo_id) | |
| logger.info("ignore patterns: %s", effective_exclude) | |
| api.upload_folder( | |
| folder_path=str(src), | |
| repo_id=repo_id, | |
| repo_type="space", | |
| commit_message="Update CERNenv Space", | |
| allow_patterns=include, | |
| ignore_patterns=effective_exclude, | |
| ) | |
| logger.info("done: https://huggingface.co/spaces/%s", repo_id) | |
| def main() -> None: # pragma: no cover | |
| parser = argparse.ArgumentParser() | |
| sub = parser.add_subparsers(dest="cmd", required=True) | |
| m = sub.add_parser("model", help="push trained LoRA adapters to the Hub") | |
| m.add_argument("--adapter_dir", required=True) | |
| m.add_argument("--repo_id", required=True) | |
| m.add_argument("--base_model", required=True) | |
| m.add_argument("--private", action="store_true") | |
| s = sub.add_parser("space", help="push a directory as an HF Space") | |
| s.add_argument("--space_dir", required=True) | |
| s.add_argument("--repo_id", required=True) | |
| s.add_argument("--hardware", default=None, | |
| help="e.g. a100-large, t4-small, l4-medium") | |
| s.add_argument("--private", action="store_true") | |
| s.add_argument("--include", nargs="*", default=None, | |
| help="glob patterns to include") | |
| s.add_argument("--exclude", nargs="*", default=None, | |
| help="glob patterns to exclude") | |
| args = parser.parse_args() | |
| if args.cmd == "model": | |
| push_model( | |
| adapter_dir=args.adapter_dir, | |
| repo_id=args.repo_id, | |
| base_model=args.base_model, | |
| private=args.private, | |
| ) | |
| elif args.cmd == "space": | |
| push_space( | |
| space_dir=args.space_dir, | |
| repo_id=args.repo_id, | |
| hardware=args.hardware, | |
| private=args.private, | |
| include=args.include, | |
| exclude=args.exclude, | |
| ) | |
| if __name__ == "__main__": # pragma: no cover | |
| main() | |