| |
| |
|
|
| import argparse |
| import os |
| import sys |
| from pathlib import Path |
| from huggingface_hub import HfApi |
|
|
| RESTORE_DIR = os.environ.get("HF_RESTORE_DIR", "/tmp/crs_backup") |
|
|
| def list_backups(api: HfApi, repo_id: str, prefix: str): |
| files = api.list_repo_files(repo_id=repo_id, repo_type="dataset") |
| backs = [f for f in files if f.startswith(prefix) and f.endswith(".tar.gz")] |
| backs.sort() |
| return backs |
|
|
| def ensure_dataset(api: HfApi, repo_id: str): |
| try: |
| api.dataset_info(repo_id=repo_id) |
| except Exception: |
| |
| api.create_repo(repo_id=repo_id, repo_type="dataset", private=True, exist_ok=True) |
|
|
| def upload(args): |
| api = HfApi(token=args.token) |
| ensure_dataset(api, args.repo) |
|
|
| |
| api.upload_file( |
| path_or_fileobj=args.file, |
| path_in_repo=os.path.basename(args.file), |
| repo_id=args.repo, |
| repo_type="dataset", |
| ) |
|
|
| |
| if args.max and args.max > 0: |
| backs = list_backups(api, args.repo, args.prefix) |
| if len(backs) > args.max: |
| to_del = backs[: len(backs) - args.max] |
| for f in to_del: |
| try: |
| api.delete_file(path_in_repo=f, repo_id=args.repo, repo_type="dataset") |
| except Exception: |
| pass |
|
|
| def restore(args): |
| api = HfApi(token=args.token) |
| backs = list_backups(api, args.repo, args.prefix) |
| if not backs: |
| return |
|
|
| latest = backs[-1] |
| |
| Path(RESTORE_DIR).mkdir(parents=True, exist_ok=True) |
| path = api.hf_hub_download( |
| repo_id=args.repo, |
| filename=latest, |
| repo_type="dataset", |
| local_dir=RESTORE_DIR, |
| local_dir_use_symlinks=False, |
| ) |
| print(path) |
|
|
| def main(): |
| p = argparse.ArgumentParser() |
| sub = p.add_subparsers(dest="cmd", required=True) |
|
|
| up = sub.add_parser("upload") |
| up.add_argument("--token", required=True) |
| up.add_argument("--repo", required=True) |
| up.add_argument("--file", required=True) |
| up.add_argument("--prefix", required=True) |
| up.add_argument("--max", type=int, default=10) |
| up.set_defaults(func=upload) |
|
|
| rs = sub.add_parser("restore") |
| rs.add_argument("--token", required=True) |
| rs.add_argument("--repo", required=True) |
| rs.add_argument("--prefix", required=True) |
| rs.set_defaults(func=restore) |
|
|
| args = p.parse_args() |
| try: |
| args.func(args) |
| except KeyboardInterrupt: |
| sys.exit(130) |
|
|
| if __name__ == "__main__": |
| main() |
|
|