cjovs commited on
Commit
9d3f59d
·
verified ·
1 Parent(s): 8059bf0

Make HF backups atomic

Browse files
Files changed (1) hide show
  1. deploy/huggingface/backup_manager.py +21 -23
deploy/huggingface/backup_manager.py CHANGED
@@ -10,10 +10,12 @@ from pathlib import Path, PurePosixPath
10
  from typing import Iterable, Mapping
11
 
12
  try:
13
- from huggingface_hub import HfApi, hf_hub_download
14
  from huggingface_hub.errors import EntryNotFoundError
15
  from huggingface_hub.utils import HfHubHTTPError, get_token
16
  except ImportError: # pragma: no cover - only exercised in runtime images without the dependency
 
 
17
  HfApi = None # type: ignore[assignment]
18
  EntryNotFoundError = Exception # type: ignore[assignment]
19
  HfHubHTTPError = Exception # type: ignore[assignment]
@@ -93,7 +95,7 @@ def data_root_has_state(data_root: Path) -> bool:
93
 
94
 
95
  def _require_hub_support() -> None:
96
- if HfApi is None:
97
  raise RuntimeError("huggingface_hub is required for Hugging Face backup operations")
98
 
99
 
@@ -134,31 +136,27 @@ def backup_to_dataset(
134
 
135
  api = HfApi(token=resolved_token)
136
  commit_suffix = f" for {space_name}" if space_name else ""
137
- api.upload_file(
138
- path_or_fileobj=str(archive_path),
139
- path_in_repo=f"backups/{archive_path.name}",
 
 
 
 
 
 
 
 
 
 
 
 
140
  repo_id=repo_id,
 
141
  repo_type="dataset",
142
  token=resolved_token,
143
- commit_message=f"Add backup {archive_path.name}{commit_suffix}",
144
  )
145
- api.upload_file(
146
- path_or_fileobj=str(archive_path),
147
- path_in_repo="backups/latest.tar.gz",
148
- repo_id=repo_id,
149
- repo_type="dataset",
150
- token=resolved_token,
151
- commit_message=f"Update latest backup{commit_suffix}",
152
- )
153
-
154
- for old_path in select_backups_to_delete(_iter_repo_paths(api, repo_id, resolved_token), keep=keep):
155
- api.delete_file(
156
- path_in_repo=old_path,
157
- repo_id=repo_id,
158
- repo_type="dataset",
159
- token=resolved_token,
160
- commit_message=f"Prune old backup {PurePosixPath(old_path).name}",
161
- )
162
 
163
  return archive_path.name
164
 
 
10
  from typing import Iterable, Mapping
11
 
12
  try:
13
+ from huggingface_hub import CommitOperationAdd, CommitOperationDelete, HfApi, hf_hub_download
14
  from huggingface_hub.errors import EntryNotFoundError
15
  from huggingface_hub.utils import HfHubHTTPError, get_token
16
  except ImportError: # pragma: no cover - only exercised in runtime images without the dependency
17
+ CommitOperationAdd = None # type: ignore[assignment]
18
+ CommitOperationDelete = None # type: ignore[assignment]
19
  HfApi = None # type: ignore[assignment]
20
  EntryNotFoundError = Exception # type: ignore[assignment]
21
  HfHubHTTPError = Exception # type: ignore[assignment]
 
95
 
96
 
97
  def _require_hub_support() -> None:
98
+ if HfApi is None or CommitOperationAdd is None or CommitOperationDelete is None:
99
  raise RuntimeError("huggingface_hub is required for Hugging Face backup operations")
100
 
101
 
 
136
 
137
  api = HfApi(token=resolved_token)
138
  commit_suffix = f" for {space_name}" if space_name else ""
139
+ repo_paths = _iter_repo_paths(api, repo_id, resolved_token)
140
+ operations = [
141
+ CommitOperationAdd(
142
+ path_in_repo=f"backups/{archive_path.name}",
143
+ path_or_fileobj=str(archive_path),
144
+ ),
145
+ CommitOperationAdd(
146
+ path_in_repo="backups/latest.tar.gz",
147
+ path_or_fileobj=str(archive_path),
148
+ ),
149
+ ]
150
+ for old_path in select_backups_to_delete(repo_paths, keep=keep):
151
+ operations.append(CommitOperationDelete(path_in_repo=old_path))
152
+
153
+ api.create_commit(
154
  repo_id=repo_id,
155
+ operations=operations,
156
  repo_type="dataset",
157
  token=resolved_token,
158
+ commit_message=f"Update backup {archive_path.name}{commit_suffix}",
159
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  return archive_path.name
162