#!/usr/bin/env python3 """Push the distill code/configs to the HF backup repo. Usage: .venv/bin/python scripts/backup_to_hf.py "" """ import os import sys from pathlib import Path from huggingface_hub import HfApi, CommitOperationAdd, create_commit REPO_ID = "Delta-Vector/distill-m-6a3lnzvb-code" REPO_TYPE = "model" # Files/directories to mirror to the repo INCLUDE = [ "distill.py", "configs/base.toml", "configs/zero_14_17.toml", "configs/replicate_zero4.toml", "configs/grow40_winning.toml", "configs/grow40_simple.toml", "configs/grow40_winning_v2.toml", "configs/sweep/A_resume_lr1e7_cos.toml", "configs/sweep/B_resume_lr5e8_cos.toml", "configs/sweep/C_resume_lr2e8_cos.toml", "configs/sweep/D_resume_lr1e7_const.toml", "configs/sweep/E_resume_lr5e8_b95.toml", "configs/sweep/F_cold_lr1e7_grow40.toml", "configs/sweep/G_cold_lr2e7_grow40.toml", "configs/sweep/H_cold_lr1e7_32L.toml", "configs/sweep/I_cold_paramgroups_grow40.toml", "configs/sweep/J_phase2_lr5e9_const.toml", "configs/sweep/K_phase2_lr2e8_const.toml", "configs/sweep/L_phase2_lr1e8_warmup500.toml", "configs/sweep/M_phase2_lr2e8_largebatch.toml", "configs/accelerate.yaml", "scripts/backup_to_hf.py", "scripts/run_sweep.sh", "scripts/run_sweep_rerun.sh", "scripts/run_hparam_sweep.sh", "scripts/run_phase2_sweep.sh", "pyproject.toml", "requirements.lock.txt", ] def main(): msg = sys.argv[1] if len(sys.argv) > 1 else "update" token = os.environ.get("HF_TOKEN") if not token: print("HF_TOKEN env var required", file=sys.stderr) sys.exit(1) root = Path(__file__).resolve().parent.parent ops = [] for rel in INCLUDE: local = root / rel if not local.exists(): print(f" skip (missing): {rel}") continue ops.append( CommitOperationAdd(path_in_repo=rel, path_or_fileobj=str(local)) ) print(f" add: {rel}") if not ops: print("nothing to upload") return api = HfApi(token=token) api.create_commit( repo_id=REPO_ID, repo_type=REPO_TYPE, operations=ops, commit_message=msg, ) print(f"pushed {len(ops)} files to {REPO_ID}: {msg}") if __name__ == "__main__": main()