#!/usr/bin/env python3 """Build and optionally deploy the final PolyGuard artifact Space. The script is intentionally packaging-only: it does not train or modify model weights. It mirrors the best tracked evidence into docs/results, packages the available model artifacts into a separate Hugging Face Space, and records missing artifacts honestly in a manifest. """ from __future__ import annotations import argparse import html import json import os from pathlib import Path import shutil from typing import Any import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt # noqa: E402 from huggingface_hub import HfApi # noqa: E402 ROOT = Path(__file__).resolve().parents[1] DEFAULT_SPACE_ID = "adithya9903/polyguard-openenv-final-artifacts" DEFAULT_DOCS_DIR = ROOT / "docs" / "results" / "final_submission_evidence" DEFAULT_SPACE_DIR = Path("/tmp/polyguard-final-artifact-space") EVIDENCE_DIR = ROOT / "docs" / "results" / "submission_evidence_qwen_0_5b_1_5b_3b" SWEEP_REPORT_DIR = ROOT / "outputs" / "reports" / "sweeps" SWEEP_CHECKPOINT_DIR = ROOT / "checkpoints" / "sweeps" RUNS = { "qwen-qwen2-5-0-5b-instruct": { "label": "Qwen 0.5B", "model_id": "Qwen/Qwen2.5-0.5B-Instruct", }, "qwen-qwen2-5-1-5b-instruct": { "label": "Qwen 1.5B", "model_id": "Qwen/Qwen2.5-1.5B-Instruct", }, "qwen-qwen2-5-3b-instruct": { "label": "Qwen 3B", "model_id": "Qwen/Qwen2.5-3B-Instruct", }, } FRONTPAGE_CHARTS = { "01_basic_llm_vs_full_pipeline_reward.png": ( EVIDENCE_DIR / "charts" / "generated" / "basic_llm_vs_full_pipeline_reward.png" ), "02_reward_delta_by_seed.png": ( EVIDENCE_DIR / "charts" / "generated" / "basic_llm_vs_full_pipeline_reward_delta_by_seed.png" ), "03_policy_ablation_reward.png": ( EVIDENCE_DIR / "charts" / "generated" / "policy_ablation_avg_reward.png" ), "04_reward_components.png": ( EVIDENCE_DIR / "charts" / "generated" / "reward_component_bars.png" ), "05_train_holdout_gap.png": ( EVIDENCE_DIR / "charts" / "local_available_combined" / "train_holdout_gap.png" ), "06_inference_latency_validity.png": ( EVIDENCE_DIR / "charts" / "local_available_combined" / "inference_latency_validity.png" ), "07_sft_vs_grpo_reward.png": ( EVIDENCE_DIR / "charts" / "local_available_combined" / "sft_vs_grpo_reward.png" ), } def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Deploy the final PolyGuard artifact Space.") parser.add_argument("--space-id", default=DEFAULT_SPACE_ID) parser.add_argument("--docs-dir", default=str(DEFAULT_DOCS_DIR)) parser.add_argument("--space-dir", default=str(DEFAULT_SPACE_DIR)) parser.add_argument("--public", action="store_true", help="Create/update the Space as public.") parser.add_argument("--deploy", action="store_true", help="Upload the Space bundle to Hugging Face.") parser.add_argument("--skip-docs", action="store_true") return parser.parse_args() def load_json(path: Path, default: Any) -> Any: if not path.exists(): return default try: return json.loads(path.read_text(encoding="utf-8")) except json.JSONDecodeError: return default def write_json(path: Path, payload: Any) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(payload, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") def write_text(path: Path, text: str) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(text, encoding="utf-8") def copy_file(src: Path, dst: Path) -> bool: if not src.exists(): return False dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst) return True def copy_tree(src: Path, dst: Path) -> dict[str, Any]: if not src.exists(): return {"exists": False, "file_count": 0, "bytes": 0} if dst.exists(): shutil.rmtree(dst) shutil.copytree(src, dst, ignore=shutil.ignore_patterns(".DS_Store", "__pycache__", "*.pyc")) files = [path for path in dst.rglob("*") if path.is_file()] return { "exists": True, "file_count": len(files), "bytes": sum(path.stat().st_size for path in files), } def dir_size(path: Path) -> int: if not path.exists(): return 0 return sum(item.stat().st_size for item in path.rglob("*") if item.is_file()) def summarize_artifact_dir(path: Path) -> dict[str, Any]: return { "exists": path.exists(), "file_count": len([p for p in path.rglob("*") if p.is_file()]) if path.exists() else 0, "bytes": dir_size(path), } def plot_model_reward(summary: dict[str, Any], path: Path) -> None: labels: list[str] = [] sft: list[float] = [] grpo: list[float | None] = [] for model in summary.get("models", []): metrics = model.get("metrics", {}) labels.append(str(model.get("label") or model.get("run_id"))) sft.append(float(metrics.get("sft_avg_env_reward") or 0.0)) value = metrics.get("grpo_avg_env_reward") grpo.append(float(value) if value is not None else None) if not labels: return path.parent.mkdir(parents=True, exist_ok=True) x = list(range(len(labels))) width = 0.35 plt.figure(figsize=(9.5, 5)) plt.bar([i - width / 2 for i in x], sft, width=width, label="SFT baseline") grpo_values = [value if value is not None else 0.0 for value in grpo] plt.bar([i + width / 2 for i in x], grpo_values, width=width, label="GRPO policy") for i, value in enumerate(grpo): if value is None: plt.text(i + width / 2, 0.025, "pending", ha="center", rotation=90, fontsize=8) plt.ylim(0, 1) plt.ylabel("Verifier reward") plt.title("SFT Baseline vs GRPO Policy Reward") plt.xticks(x, labels) plt.legend() plt.tight_layout() plt.savefig(path, dpi=180) plt.close() def plot_sft_loss(summary: dict[str, Any], path: Path) -> None: labels: list[str] = [] values: list[float] = [] for model in summary.get("models", []): labels.append(str(model.get("label") or model.get("run_id"))) values.append(float(model.get("metrics", {}).get("sft_train_loss") or 0.0)) if not labels: return path.parent.mkdir(parents=True, exist_ok=True) plt.figure(figsize=(9.5, 5)) plt.bar(labels, values, color=["#315f72", "#8a5a44", "#2f6f4e"][: len(labels)]) plt.ylabel("Final SFT train loss") plt.title("SFT Training Loss By Qwen Size") plt.tight_layout() plt.savefig(path, dpi=180) plt.close() def plot_grpo_curve(history_path: Path, output: Path) -> None: rows = load_json(history_path, []) points = [ (int(row.get("step") or idx + 1), float(row.get("reward"))) for idx, row in enumerate(rows) if isinstance(row, dict) and row.get("reward") is not None ] if not points: return output.parent.mkdir(parents=True, exist_ok=True) steps, rewards = zip(*points) window = 50 smooth = [] for idx in range(len(rewards)): start = max(0, idx - window + 1) smooth.append(sum(rewards[start : idx + 1]) / (idx - start + 1)) plt.figure(figsize=(10, 5)) plt.plot(steps, rewards, alpha=0.18, label="step reward") plt.plot(steps, smooth, linewidth=2.0, label="rolling mean (50)") plt.ylim(0, 1) plt.xlabel("GRPO step") plt.ylabel("Verifier reward") plt.title("Qwen 3B GRPO Reward Curve") plt.legend() plt.tight_layout() plt.savefig(output, dpi=180) plt.close() def artifact_availability() -> dict[str, Any]: availability: dict[str, Any] = {} for run_id, meta in RUNS.items(): checkpoint_dir = SWEEP_CHECKPOINT_DIR / run_id report_dir = SWEEP_REPORT_DIR / run_id sft_adapter = checkpoint_dir / "sft_adapter" grpo_adapter = checkpoint_dir / "grpo_adapter" availability[run_id] = { "label": meta["label"], "model_id": meta["model_id"], "checkpoint_tree": summarize_artifact_dir(checkpoint_dir), "sft_adapter": summarize_artifact_dir(sft_adapter), "grpo_adapter": summarize_artifact_dir(grpo_adapter), "reports": summarize_artifact_dir(report_dir), "sft_report": (report_dir / "sft_trl_run.json").exists(), "grpo_report": (report_dir / "grpo_trl_run.json").exists(), "postsave_sft": (report_dir / "postsave_inference_sft.json").exists(), "postsave_grpo": (report_dir / "postsave_inference_grpo.json").exists(), "policy_ablation": (report_dir / "grpo_ablation_report.json").exists(), } missing: list[str] = [] if not sft_adapter.exists(): missing.append("sft_adapter") if not grpo_adapter.exists(): missing.append("grpo_adapter") availability[run_id]["missing_trained_files"] = missing availability[run_id]["status"] = "complete" if not missing else "reports_only_or_partial" return availability def build_docs(docs_dir: Path, manifest: dict[str, Any]) -> None: if docs_dir.exists(): shutil.rmtree(docs_dir) (docs_dir / "charts" / "frontpage").mkdir(parents=True, exist_ok=True) (docs_dir / "charts" / "all").mkdir(parents=True, exist_ok=True) (docs_dir / "reports").mkdir(parents=True, exist_ok=True) summary = load_json(EVIDENCE_DIR / "submission_summary.json", {}) plot_model_reward(summary, docs_dir / "charts" / "frontpage" / "00_sft_vs_grpo_reward_by_model.png") plot_sft_loss(summary, docs_dir / "charts" / "frontpage" / "08_sft_loss_by_model.png") plot_grpo_curve( SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "grpo_history.json", docs_dir / "charts" / "frontpage" / "09_qwen_3b_grpo_reward_curve.png", ) copied: list[str] = [] for name, source in FRONTPAGE_CHARTS.items(): if copy_file(source, docs_dir / "charts" / "frontpage" / name): copied.append(name) for source_dir in [ EVIDENCE_DIR / "charts" / "generated", EVIDENCE_DIR / "charts" / "local_available_combined", ]: if source_dir.exists(): for item in sorted(source_dir.glob("*.png")): copy_file(item, docs_dir / "charts" / "all" / item.name) report_sources = [ EVIDENCE_DIR / "submission_summary.json", EVIDENCE_DIR / "reports" / "basic_llm_vs_polyguard_report.json", EVIDENCE_DIR / "reports" / "policy_ablation_report.json", EVIDENCE_DIR / "reports" / "basic_llm_failure_cases.md", EVIDENCE_DIR / "reports" / "action_traces.jsonl", SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "grpo_trl_run.json", SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "postsave_inference_grpo.json", SWEEP_REPORT_DIR / "qwen-qwen2-5-3b-instruct" / "grpo_ablation_report.json", ] for source in report_sources: copy_file(source, docs_dir / "reports" / source.name) write_json(docs_dir / "manifest.json", manifest) write_text(docs_dir / "README.md", final_docs_readme(manifest)) def final_docs_readme(manifest: dict[str, Any]) -> str: availability = manifest["artifact_availability"] rows = [] for run_id, data in availability.items(): rows.append( "| {label} | {sft} | {grpo} | {checkpoints} | {reports} | {status} |".format( label=data["label"], sft="yes" if data["sft_adapter"]["exists"] else "missing", grpo="yes" if data["grpo_adapter"]["exists"] else "missing", checkpoints="yes" if data["checkpoint_tree"]["exists"] else "missing", reports="yes" if data["reports"]["exists"] else "missing", status=data["status"], ) ) return """# PolyGuard Final Submission Evidence This folder is the current curated evidence set for the final submission. It replaces the earlier Qwen 0.5B/1.5B-only view with a single location for the best charts, reports, action traces, and model-artifact availability. ## Hugging Face Artifact Space - Space: [{space_id}](https://huggingface.co/spaces/{space_id}) - Download command: ```bash HF_TOKEN= ./.venv/bin/hf download {space_id} --repo-type space --local-dir ./hf_final_artifacts ``` ## Artifact Availability | Model | SFT adapter | GRPO adapter | Checkpoints | Reports | Status | | --- | --- | --- | --- | --- | --- | {rows} Qwen 0.5B and 1.5B currently have SFT histories/reports and post-save SFT evidence in this repository, but no downloadable SFT/GRPO adapter directories were present in the local checkout or authenticated artifact repos at packaging time. Qwen 3B has both SFT and GRPO adapters, checkpoint metadata/intermediate checkpoints, GRPO history, post-save GRPO inference, and policy ablation evidence. ## Frontpage Charts - `charts/frontpage/00_sft_vs_grpo_reward_by_model.png` - `charts/frontpage/01_basic_llm_vs_full_pipeline_reward.png` - `charts/frontpage/02_reward_delta_by_seed.png` - `charts/frontpage/03_policy_ablation_reward.png` - `charts/frontpage/04_reward_components.png` - `charts/frontpage/05_train_holdout_gap.png` - `charts/frontpage/06_inference_latency_validity.png` - `charts/frontpage/07_sft_vs_grpo_reward.png` - `charts/frontpage/08_sft_loss_by_model.png` - `charts/frontpage/09_qwen_3b_grpo_reward_curve.png` ## Improvement Evidence - Basic LLM proxy vs full PolyGuard pipeline reward delta: `{delta}` average reward. - Full pipeline legality rate: `{pipeline_legality}`. - Basic LLM failure/exploit rate: `{basic_failure_rate}`. - Full pipeline failure/exploit rate: `{pipeline_failure_rate}`. Reward values in the tracked API/reports remain numeric and clamped to `[0.001, 0.999]` at three decimal precision. """.format( space_id=manifest["space_id"], rows="\n".join(rows), delta=manifest.get("basic_vs_pipeline", {}).get("reward_delta"), pipeline_legality=manifest.get("basic_vs_pipeline", {}).get("pipeline_legality"), basic_failure_rate=manifest.get("basic_vs_pipeline", {}).get("basic_failure_rate"), pipeline_failure_rate=manifest.get("basic_vs_pipeline", {}).get("pipeline_failure_rate"), ) def build_space(space_dir: Path, manifest: dict[str, Any]) -> None: if space_dir.exists(): shutil.rmtree(space_dir) space_dir.mkdir(parents=True) write_text( space_dir / "README.md", """--- title: PolyGuard Final Artifacts sdk: static pinned: false --- # PolyGuard Final Artifacts This Space stores the final PolyGuard evidence bundle and the available trained adapter artifacts. It is separate from the training Spaces and does not run training. Open `index.html` or inspect the `artifacts/`, `reports/`, and `evidence/` folders in the Space file browser. """, ) write_text( space_dir / ".gitattributes", """*.safetensors filter=lfs diff=lfs merge=lfs -text *.bin filter=lfs diff=lfs merge=lfs -text *.pt filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text """, ) write_json(space_dir / "manifest.json", manifest) evidence_target = space_dir / "evidence" / "final_submission_evidence" copy_tree(Path(manifest["docs_dir"]), evidence_target) for run_id in RUNS: checkpoint_dir = SWEEP_CHECKPOINT_DIR / run_id report_dir = SWEEP_REPORT_DIR / run_id if checkpoint_dir.exists(): copy_tree(checkpoint_dir, space_dir / "checkpoints" / run_id) for stage in ["sft_adapter", "grpo_adapter"]: source = checkpoint_dir / stage if source.exists(): copy_tree(source, space_dir / "artifacts" / run_id / stage) if report_dir.exists(): copy_tree(report_dir, space_dir / "reports" / run_id) write_text(space_dir / "index.html", index_html(manifest)) def index_html(manifest: dict[str, Any]) -> str: rows = [] for run_id, data in manifest["artifact_availability"].items(): rows.append( "{label}{sft}{grpo}{checkpoints}{reports}{status}".format( label=html.escape(data["label"]), sft="available" if data["sft_adapter"]["exists"] else "missing", grpo="available" if data["grpo_adapter"]["exists"] else "missing", checkpoints="available" if data["checkpoint_tree"]["exists"] else "missing", reports="available" if data["reports"]["exists"] else "missing", status=html.escape(data["status"]), ) ) return """ PolyGuard Final Artifacts

PolyGuard Final Artifacts

This Space stores the final evidence bundle and available trained adapters. It does not retrain models.

{rows}
ModelSFT adapterGRPO adapterCheckpointsReportsStatus
Evidence
evidence/final_submission_evidence/
Adapters
artifacts/qwen-qwen2-5-3b-instruct/
Checkpoints
checkpoints/qwen-qwen2-5-3b-instruct/
Reports
reports/
Manifest
manifest.json
""".format(rows="\n".join(rows)) def deploy_space(space_id: str, space_dir: Path, public: bool) -> None: token = os.getenv("HF_TOKEN") if not token: raise SystemExit("HF_TOKEN is required for --deploy") api = HfApi(token=token) api.create_repo( repo_id=space_id, repo_type="space", space_sdk="static", private=not public, exist_ok=True, ) ignore_patterns = [".DS_Store", "**/.DS_Store", "__pycache__/*", "*.pyc", ".cache/*", ".cache/**"] if dir_size(space_dir) > 100 * 1024 * 1024: api.upload_folder( repo_id=space_id, repo_type="space", folder_path=str(space_dir), commit_message="Upload PolyGuard final evidence and adapters", ignore_patterns=ignore_patterns + ["checkpoints/*", "checkpoints/**"], ) checkpoint_root = space_dir / "checkpoints" for run_dir in sorted(path for path in checkpoint_root.glob("*") if path.is_dir()): for file_path in sorted(path for path in run_dir.iterdir() if path.is_file()): api.upload_file( repo_id=space_id, repo_type="space", path_or_fileobj=str(file_path), path_in_repo=f"checkpoints/{run_dir.name}/{file_path.name}", commit_message=f"Upload {run_dir.name} checkpoint metadata", ) for subdir in sorted(path for path in run_dir.iterdir() if path.is_dir()): nested_dirs = sorted(path for path in subdir.iterdir() if path.is_dir()) if nested_dirs: for file_path in sorted(path for path in subdir.iterdir() if path.is_file()): api.upload_file( repo_id=space_id, repo_type="space", path_or_fileobj=str(file_path), path_in_repo=f"checkpoints/{run_dir.name}/{subdir.name}/{file_path.name}", commit_message=f"Upload {run_dir.name} {subdir.name} metadata", ) for nested in nested_dirs: api.upload_folder( repo_id=space_id, repo_type="space", folder_path=str(nested), path_in_repo=f"checkpoints/{run_dir.name}/{subdir.name}/{nested.name}", commit_message=f"Upload {run_dir.name} {subdir.name}/{nested.name}", ignore_patterns=ignore_patterns, ) else: api.upload_folder( repo_id=space_id, repo_type="space", folder_path=str(subdir), path_in_repo=f"checkpoints/{run_dir.name}/{subdir.name}", commit_message=f"Upload {run_dir.name} {subdir.name}", ignore_patterns=ignore_patterns, ) else: api.upload_folder( repo_id=space_id, repo_type="space", folder_path=str(space_dir), commit_message="Upload PolyGuard final evidence and trained adapters", ignore_patterns=ignore_patterns, ) def main() -> None: args = parse_args() docs_dir = Path(args.docs_dir) space_dir = Path(args.space_dir) summary = load_json(EVIDENCE_DIR / "submission_summary.json", {}) basic = load_json(EVIDENCE_DIR / "reports" / "basic_llm_vs_polyguard_report.json", {}) basic_summary = basic.get("summaries", {}) manifest = { "status": "ok", "space_id": args.space_id, "space_url": f"https://huggingface.co/spaces/{args.space_id}", "docs_dir": str(docs_dir.relative_to(ROOT) if docs_dir.is_relative_to(ROOT) else docs_dir), "evidence_source": str(EVIDENCE_DIR.relative_to(ROOT)), "artifact_availability": artifact_availability(), "submission_models": summary.get("models", []), "basic_vs_pipeline": { "reward_delta": basic.get("pipeline_minus_basic_reward_delta"), "basic_reward": basic_summary.get("basic_llm", {}).get("avg_reward"), "pipeline_reward": basic_summary.get("full_polyguard_pipeline", {}).get("avg_reward"), "basic_failure_rate": basic_summary.get("basic_llm", {}).get("exploit_or_failure_rate"), "pipeline_failure_rate": basic_summary.get("full_polyguard_pipeline", {}).get("exploit_or_failure_rate"), "pipeline_legality": basic_summary.get("full_polyguard_pipeline", {}).get("legality_rate"), }, "download_command": ( f"HF_TOKEN= ./.venv/bin/hf download {args.space_id} " "--repo-type space --local-dir ./hf_final_artifacts" ), "notes": [ "Packaging-only run; no retraining is performed.", "Qwen 3B has SFT and GRPO adapter directories plus checkpoint metadata/intermediate checkpoints in this artifact Space.", "Qwen 0.5B and 1.5B adapter directories were not present locally or in the checked artifact repos; reports remain included.", ], } if not args.skip_docs: build_docs(docs_dir, manifest) manifest = load_json(docs_dir / "manifest.json", manifest) build_space(space_dir, manifest) if args.deploy: deploy_space(args.space_id, space_dir, public=args.public) print(json.dumps({"status": "ok", "space_url": manifest["space_url"], "space_dir": str(space_dir), "docs_dir": str(docs_dir)}, indent=2)) if __name__ == "__main__": main()