| |
| """Package the currently active PolyGuard model artifacts for implementation use.""" |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| from datetime import datetime, timezone |
| import json |
| import os |
| from pathlib import Path |
| import shutil |
| import zipfile |
| from typing import Any |
|
|
| from huggingface_hub import HfApi |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| DEFAULT_BUNDLE_NAME = "local-qwen-0-5b-active-smoke" |
| DEFAULT_ARTIFACT_REPO = "TheJackBright/polyguard-openenv-training-full-artifacts" |
|
|
|
|
| def parse_args() -> argparse.Namespace: |
| parser = argparse.ArgumentParser(description="Package active PolyGuard model artifacts.") |
| parser.add_argument("--bundle-name", default=DEFAULT_BUNDLE_NAME) |
| parser.add_argument("--artifact-repo-id", default=DEFAULT_ARTIFACT_REPO) |
| parser.add_argument("--output-root", default=str(ROOT / "submission_bundle" / "model_artifacts")) |
| parser.add_argument("--upload", action="store_true") |
| parser.add_argument("--no-merged", action="store_true", help="Skip the merged model directory.") |
| parser.add_argument("--zip", action="store_true", default=True) |
| return parser.parse_args() |
|
|
|
|
| def load_json(path: Path) -> dict[str, Any]: |
| if not path.exists(): |
| return {} |
| try: |
| payload = json.loads(path.read_text(encoding="utf-8")) |
| except json.JSONDecodeError: |
| return {} |
| return payload if isinstance(payload, dict) else {} |
|
|
|
|
| def copy_tree(source: Path, target: Path) -> dict[str, Any]: |
| if not source.exists(): |
| return {"source": str(source), "target": str(target), "exists": False, "file_count": 0, "bytes": 0} |
| shutil.copytree(source, target, dirs_exist_ok=True, symlinks=False) |
| file_count = 0 |
| total_bytes = 0 |
| for path in target.rglob("*"): |
| if path.is_file(): |
| file_count += 1 |
| total_bytes += path.stat().st_size |
| return { |
| "source": str(source), |
| "target": str(target), |
| "exists": True, |
| "file_count": file_count, |
| "bytes": total_bytes, |
| } |
|
|
|
|
| def copy_file(source: Path, target: Path) -> dict[str, Any]: |
| if not source.exists(): |
| return {"source": str(source), "target": str(target), "exists": False, "bytes": 0} |
| target.parent.mkdir(parents=True, exist_ok=True) |
| shutil.copy2(source, target) |
| return {"source": str(source), "target": str(target), "exists": True, "bytes": target.stat().st_size} |
|
|
|
|
| def write_json(path: Path, payload: Any) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| path.write_text(json.dumps(payload, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") |
|
|
|
|
| def write_readme(bundle_dir: Path, manifest: dict[str, Any]) -> None: |
| bundle_dir.joinpath("README.md").write_text( |
| "\n".join( |
| [ |
| "# PolyGuard Active Model Artifact Bundle", |
| "", |
| f"Bundle: `{manifest['bundle_name']}`", |
| f"Model: `{manifest.get('model_id', '')}`", |
| f"Base model: `{manifest.get('base_model', '')}`", |
| f"Preferred artifact: `{manifest.get('preferred_artifact', '')}`", |
| "", |
| "This bundle is meant for implementation/testing while the full per-model remote sweep artifacts are still uploading.", |
| "", |
| "## Contents", |
| "", |
| "- `checkpoints/grpo_adapter/`", |
| "- `checkpoints/sft_adapter/`", |
| "- `checkpoints/merged/` when included", |
| "- `manifests/active_model_manifest.json`", |
| "- `reports/`", |
| "", |
| "## Restore Locally", |
| "", |
| "```bash", |
| "cd /Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl", |
| "cp -R submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter checkpoints/grpo_adapter", |
| "cp -R submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter checkpoints/sft_adapter", |
| "cp -R submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/merged checkpoints/merged", |
| "mkdir -p checkpoints/active", |
| "cp submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json checkpoints/active/active_model_manifest.json", |
| "curl http://127.0.0.1:8200/policy/model_status", |
| "```", |
| "", |
| "## Hugging Face Download", |
| "", |
| "After upload, download with:", |
| "", |
| "```bash", |
| "export HF_TOKEN=\"$(cat ~/.cache/huggingface/token)\"", |
| "huggingface-cli download TheJackBright/polyguard-openenv-training-full-artifacts \\", |
| " --repo-type model \\", |
| " --include 'usable_model_bundles/local-qwen-0-5b-active-smoke/**' \\", |
| " --local-dir ./hf_artifacts", |
| "```", |
| "", |
| "Note: this is the current local active Qwen 0.5B implementation bundle. It is not the final full remote Qwen 0.5B/1.5B sweep checkpoint until those files appear in the HF artifact repo.", |
| "", |
| ] |
| ), |
| encoding="utf-8", |
| ) |
|
|
|
|
| def zip_bundle(bundle_dir: Path) -> Path: |
| zip_path = bundle_dir.with_suffix(".zip") |
| if zip_path.exists(): |
| zip_path.unlink() |
| with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as archive: |
| for path in bundle_dir.rglob("*"): |
| if path.is_file() and path.name != ".DS_Store": |
| archive.write(path, arcname=str(path.relative_to(bundle_dir.parent))) |
| return zip_path |
|
|
|
|
| def package_bundle(*, bundle_name: str, output_root: Path, include_merged: bool) -> dict[str, Any]: |
| active_manifest = load_json(ROOT / "checkpoints" / "active" / "active_model_manifest.json") |
| bundle_dir = output_root / bundle_name |
| if bundle_dir.exists(): |
| shutil.rmtree(bundle_dir) |
| bundle_dir.mkdir(parents=True, exist_ok=True) |
|
|
| copies: dict[str, Any] = { |
| "grpo_adapter": copy_tree(ROOT / "checkpoints" / "grpo_adapter", bundle_dir / "checkpoints" / "grpo_adapter"), |
| "sft_adapter": copy_tree(ROOT / "checkpoints" / "sft_adapter", bundle_dir / "checkpoints" / "sft_adapter"), |
| "active_manifest": copy_file( |
| ROOT / "checkpoints" / "active" / "active_model_manifest.json", |
| bundle_dir / "manifests" / "active_model_manifest.json", |
| ), |
| "active_report_manifest": copy_file( |
| ROOT / "outputs" / "reports" / "active_model" / "active_model_manifest.json", |
| bundle_dir / "manifests" / "active_model_report_manifest.json", |
| ), |
| "submission_evidence_manifest": copy_file( |
| ROOT / "outputs" / "reports" / "submission_evidence" / "qwen_0_5b_1_5b" / "manifest.json", |
| bundle_dir / "manifests" / "submission_evidence_manifest.json", |
| ), |
| "reports": copy_tree(ROOT / "outputs" / "reports" / "active_model", bundle_dir / "reports"), |
| } |
| if include_merged: |
| copies["merged"] = copy_tree(ROOT / "checkpoints" / "merged", bundle_dir / "checkpoints" / "merged") |
|
|
| manifest = { |
| "status": "ok", |
| "bundle_name": bundle_name, |
| "created_at_utc": datetime.now(timezone.utc).isoformat(), |
| "source": "local_active_model", |
| "run_id": active_manifest.get("run_id", "qwen-qwen2-5-0-5b-instruct"), |
| "label": active_manifest.get("label", "local-qwen-0.5b-active-smoke"), |
| "model_id": active_manifest.get("model_id", "Qwen/Qwen2.5-0.5B-Instruct"), |
| "base_model": active_manifest.get("base_model", "Qwen/Qwen2.5-0.5B-Instruct"), |
| "preferred_artifact": active_manifest.get("preferred_artifact", "grpo_adapter"), |
| "availability": active_manifest.get("availability", {}), |
| "remote_full_sweep_note": ( |
| "The full Qwen 0.5B/1.5B remote sweep artifacts are still pending upload in the HF artifact repo. " |
| "This bundle packages the currently active local trained/smoke artifacts for product integration." |
| ), |
| "copies": copies, |
| } |
| write_json(bundle_dir / "bundle_manifest.json", manifest) |
| write_readme(bundle_dir, manifest) |
| return manifest |
|
|
|
|
| def upload_bundle(bundle_dir: Path, repo_id: str, bundle_name: str) -> str: |
| token = os.getenv("HF_TOKEN") |
| api = HfApi(token=token) |
| api.create_repo(repo_id=repo_id, repo_type="model", private=True, exist_ok=True) |
| path_in_repo = f"usable_model_bundles/{bundle_name}" |
| api.upload_folder( |
| repo_id=repo_id, |
| repo_type="model", |
| folder_path=str(bundle_dir), |
| path_in_repo=path_in_repo, |
| commit_message=f"Upload PolyGuard usable model bundle: {bundle_name}", |
| ignore_patterns=[".DS_Store", "**/.DS_Store"], |
| ) |
| return path_in_repo |
|
|
|
|
| def main() -> None: |
| args = parse_args() |
| output_root = Path(args.output_root) |
| manifest = package_bundle(bundle_name=args.bundle_name, output_root=output_root, include_merged=not args.no_merged) |
| bundle_dir = output_root / args.bundle_name |
| zip_path = zip_bundle(bundle_dir) if args.zip else None |
| if zip_path: |
| manifest["zip_path"] = str(zip_path) |
| if args.upload: |
| manifest["hf_repo_id"] = args.artifact_repo_id |
| manifest["hf_path_in_repo"] = upload_bundle(bundle_dir, args.artifact_repo_id, args.bundle_name) |
| manifest["hf_url"] = f"https://huggingface.co/{args.artifact_repo_id}/tree/main/{manifest['hf_path_in_repo']}" |
| write_json(bundle_dir / "bundle_manifest.json", manifest) |
| print(json.dumps(manifest, ensure_ascii=True, indent=2)) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|