| """Push CERNenv artefacts to the Hugging Face Hub.
|
|
|
| Two subcommands:
|
|
|
| * ``model`` — push trained LoRA adapters (output of ``training_unsloth.py``)
|
| to a model repo. Generates a model card describing the run.
|
|
|
| * ``space`` — push a directory as a Hugging Face Space
|
| (e.g. ``space/training`` for the trainer Space, or the project root
|
| to publish the env Space). Front-matter is taken from the README.md
|
| inside the directory.
|
|
|
| Usage:
|
| python -m scripts.push_to_hub model \\
|
| --adapter_dir runs/unsloth-grpo \\
|
| --repo_id YOUR_HF_USERNAME/cernenv-grpo-qwen2.5-3b \\
|
| --base_model unsloth/Qwen2.5-3B-Instruct
|
|
|
| python -m scripts.push_to_hub space \\
|
| --space_dir space/training \\
|
| --repo_id YOUR_HF_USERNAME/cernenv-trainer \\
|
| --hardware a100-large
|
|
|
| python -m scripts.push_to_hub space \\
|
| --space_dir . \\
|
| --repo_id YOUR_HF_USERNAME/cernenv \\
|
| --include "models.py" "server/**" "openenv.yaml" "pyproject.toml" "client.py" "README.md"
|
| """
|
|
|
| from __future__ import annotations
|
|
|
| import argparse
|
| import logging
|
| import os
|
| import sys
|
| from pathlib import Path
|
| from typing import Iterable, List, Optional
|
|
|
|
|
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
| logger = logging.getLogger(__name__)
|
|
|
|
|
| DEFAULT_SPACE_EXCLUDES: List[str] = [
|
| ".venv/**",
|
| "__pycache__/**",
|
| "**/__pycache__/**",
|
| "*.pyc",
|
| ".cursor/**",
|
| ".git/**",
|
| ".DS_Store",
|
| "**/.DS_Store",
|
| "runs/**",
|
| "training/runs/**",
|
| "training/plots/**",
|
| "wandb/**",
|
| "*.zip",
|
| "*.apk",
|
| "*.png",
|
| "*.jpg",
|
| "*.jpeg",
|
| "[External]*.txt",
|
| "Hackathon FAQs*.txt",
|
| "*.log",
|
| ]
|
|
|
|
|
| def _hf_login() -> None:
|
| from huggingface_hub import login
|
|
|
| token = os.environ.get("HF_TOKEN")
|
| if not token:
|
| raise SystemExit(
|
| "HF_TOKEN environment variable is required (write-scoped Hugging Face token)."
|
| )
|
| login(token=token)
|
|
|
|
|
| def _model_card(*, repo_id: str, base_model: str, run_dir: Path) -> str:
|
| return f"""---
|
| license: bsd-3-clause
|
| library_name: peft
|
| base_model: {base_model}
|
| tags:
|
| - cernenv
|
| - openenv
|
| - reinforcement-learning
|
| - grpo
|
| - unsloth
|
| - lora
|
| - particle-physics
|
| ---
|
|
|
| # {repo_id}
|
|
|
| LoRA (Low-Rank Adaptation) adapters trained with **GRPO** (Group-Relative
|
| Policy Optimization) inside the **CERNenv** OpenEnv environment — an
|
| LHC (Large Hadron Collider) particle-discovery POMDP (Partially Observable
|
| Markov Decision Process).
|
|
|
| The agent (this model) plays the role of a high-energy physicist running an
|
| analysis: it configures the beam, allocates luminosity, picks decay
|
| channels and triggers, reconstructs events, fits resonances, estimates
|
| significance, and finally submits a structured discovery claim that is
|
| graded against a hidden ground-truth particle.
|
|
|
| * Base model: `{base_model}`
|
| * RL framework: TRL (Transformer Reinforcement Learning) GRPO
|
| * Acceleration: Unsloth + 4-bit + LoRA
|
| * Environment: [CERNenv](https://huggingface.co/spaces/{repo_id.split('/')[0]}/cernenv)
|
|
|
| ## Usage
|
|
|
| ```python
|
| from peft import PeftModel
|
| from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
| base = "{base_model}"
|
| adapter = "{repo_id}"
|
|
|
| tokenizer = AutoTokenizer.from_pretrained(base)
|
| model = AutoModelForCausalLM.from_pretrained(base, device_map="auto")
|
| model = PeftModel.from_pretrained(model, adapter)
|
| ```
|
|
|
| See the CERNenv repo for full evaluation, plots, and the `LLMAgent` wrapper.
|
| """
|
|
|
|
|
| def push_model(
|
| *,
|
| adapter_dir: str,
|
| repo_id: str,
|
| base_model: str,
|
| private: bool,
|
| ) -> None:
|
| from huggingface_hub import HfApi, create_repo
|
|
|
| _hf_login()
|
| api = HfApi()
|
|
|
| run_dir = Path(adapter_dir)
|
| if not run_dir.exists():
|
| raise SystemExit(f"adapter_dir not found: {run_dir}")
|
|
|
| create_repo(repo_id=repo_id, repo_type="model", private=private, exist_ok=True)
|
|
|
| card_path = run_dir / "README.md"
|
| card_path.write_text(_model_card(repo_id=repo_id, base_model=base_model, run_dir=run_dir))
|
|
|
| logger.info("uploading %s → %s", run_dir, repo_id)
|
| api.upload_folder(
|
| folder_path=str(run_dir),
|
| repo_id=repo_id,
|
| repo_type="model",
|
| commit_message="Upload CERNenv GRPO LoRA adapters",
|
| )
|
| logger.info("done: https://huggingface.co/%s", repo_id)
|
|
|
|
|
| def push_space(
|
| *,
|
| space_dir: str,
|
| repo_id: str,
|
| hardware: Optional[str],
|
| private: bool,
|
| include: Optional[List[str]],
|
| exclude: Optional[List[str]],
|
| ) -> None:
|
| from huggingface_hub import HfApi, create_repo
|
|
|
| _hf_login()
|
| api = HfApi()
|
|
|
| src = Path(space_dir).resolve()
|
| if not src.exists():
|
| raise SystemExit(f"space_dir not found: {src}")
|
|
|
| create_repo(
|
| repo_id=repo_id,
|
| repo_type="space",
|
| space_sdk="docker",
|
| space_hardware=hardware,
|
| private=private,
|
| exist_ok=True,
|
| )
|
|
|
| effective_exclude = list(DEFAULT_SPACE_EXCLUDES)
|
| if exclude:
|
| effective_exclude.extend(exclude)
|
|
|
| logger.info("uploading %s → space:%s", src, repo_id)
|
| logger.info("ignore patterns: %s", effective_exclude)
|
| api.upload_folder(
|
| folder_path=str(src),
|
| repo_id=repo_id,
|
| repo_type="space",
|
| commit_message="Update CERNenv Space",
|
| allow_patterns=include,
|
| ignore_patterns=effective_exclude,
|
| )
|
| logger.info("done: https://huggingface.co/spaces/%s", repo_id)
|
|
|
|
|
| def main() -> None:
|
| parser = argparse.ArgumentParser()
|
| sub = parser.add_subparsers(dest="cmd", required=True)
|
|
|
| m = sub.add_parser("model", help="push trained LoRA adapters to the Hub")
|
| m.add_argument("--adapter_dir", required=True)
|
| m.add_argument("--repo_id", required=True)
|
| m.add_argument("--base_model", required=True)
|
| m.add_argument("--private", action="store_true")
|
|
|
| s = sub.add_parser("space", help="push a directory as an HF Space")
|
| s.add_argument("--space_dir", required=True)
|
| s.add_argument("--repo_id", required=True)
|
| s.add_argument("--hardware", default=None,
|
| help="e.g. a100-large, t4-small, l4-medium")
|
| s.add_argument("--private", action="store_true")
|
| s.add_argument("--include", nargs="*", default=None,
|
| help="glob patterns to include")
|
| s.add_argument("--exclude", nargs="*", default=None,
|
| help="glob patterns to exclude")
|
|
|
| args = parser.parse_args()
|
|
|
| if args.cmd == "model":
|
| push_model(
|
| adapter_dir=args.adapter_dir,
|
| repo_id=args.repo_id,
|
| base_model=args.base_model,
|
| private=args.private,
|
| )
|
| elif args.cmd == "space":
|
| push_space(
|
| space_dir=args.space_dir,
|
| repo_id=args.repo_id,
|
| hardware=args.hardware,
|
| private=args.private,
|
| include=args.include,
|
| exclude=args.exclude,
|
| )
|
|
|
|
|
| if __name__ == "__main__":
|
| main()
|
|
|