GGSheng commited on
Commit
842d8e8
·
verified ·
1 Parent(s): 783de55

fix: 强制推送更新 backup.py 修复逻辑

Browse files
Files changed (1) hide show
  1. openclaw_hf/backup.py +56 -55
openclaw_hf/backup.py CHANGED
@@ -676,16 +676,13 @@ class OpenClawBackup:
676
  # Skip if very low change rate AND (fewer changed files OR smaller total size) than minimum threshold
677
  # But still perform backup if it's a scheduled full backup
678
  if changed_files < self.config.dynamic_min_changed_files and estimated_size_kb < self.config.dynamic_min_changed_size_kb and not force_full_backup:
679
- # Track skipped incremental backups to prevent chain gaps (persisted to file)
680
- current_skipped = _load_skipped_count() + 1
681
- _save_skipped_count(current_skipped)
682
 
683
- # Safety: if we've skipped too many incrementals in a row, force a backup anyway
684
- # to maintain chain freshness and prevent potential gaps
685
  max_skipped_incrementals = max(3, self.config.max_incremental_backups // 3)
686
  if current_skipped >= max_skipped_incrementals:
687
  print(f"[Dynamic Strategy] Too many skipped incrementals ({current_skipped}), forcing backup to maintain chain continuity")
688
- _clear_skipped_count()
689
  else:
690
  skip_reason = f"Very low change rate ({change_rate:.1f} files/min, {changed_files} files, {estimated_size_kb:.0f}KB), skipping incremental backup ({current_skipped}/{max_skipped_incrementals})"
691
  print(f"[Dynamic Strategy] {skip_reason}")
@@ -696,8 +693,7 @@ class OpenClawBackup:
696
  "reason": skip_reason,
697
  }
698
  else:
699
- # Normal change rate - reset skipped counter
700
- _clear_skipped_count()
701
 
702
  # Adjust compression based on size category
703
  if size_category == "large" and estimated_size_mb > 3000:
@@ -1750,20 +1746,37 @@ class OpenClawBackup:
1750
  result = []
1751
  for timestamp in sorted_timestamps:
1752
  result.extend(backups_by_time[timestamp])
1753
-
1754
  return result
1755
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1756
  def prune_old_backups(self) -> list[str]:
1757
  print("[PRUNE] Starting old backup cleanup...")
1758
- print(f"[PRUNE] Retention policy: keep_count={self.config.keep_count}")
1759
-
1760
- MIN_FULL_BACKUPS = 12
1761
 
1762
  if not hasattr(self.api, "delete_file"):
1763
  print("[PRUNE] API does not support delete_file, skipping cleanup")
1764
  return []
1765
 
1766
  keep_count = self.config.keep_count if self.config.keep_count >= 1 else 48
 
1767
 
1768
  if not hasattr(self.api, "list_repo_files"):
1769
  print("[PRUNE] API does not support list_repo_files, skipping cleanup")
@@ -1817,68 +1830,56 @@ class OpenClawBackup:
1817
 
1818
  sorted_timestamps = sorted(backups_by_time.keys(), reverse=True)
1819
 
1820
- full_backup_timestamps = []
1821
- incremental_backup_timestamps = []
1822
  for ts in sorted_timestamps:
1823
- archive_name = Path(backups_by_time[ts][0]).name
1824
- if ".part-" in archive_name:
1825
- base_name = archive_name.rsplit(".part-", 1)[0]
1826
- else:
1827
- base_name = archive_name
1828
- if base_name.endswith(".tar.gz"):
1829
- meta_name = base_name[:-7] + ".meta.json"
1830
- else:
1831
- meta_name = base_name + ".meta.json"
1832
- if path_prefix_slash:
1833
- meta_path = f"{path_prefix_slash}{meta_name}"
1834
- else:
1835
- meta_path = meta_name
1836
- if meta_path in files:
1837
  try:
1838
- cached_meta = self._get_cached_meta(meta_path)
1839
  if cached_meta is not None:
1840
  meta = cached_meta
1841
  else:
1842
- local_meta = self._download_backup(meta_path, self.config.work_dir, repo_id=self.config.dataset_repo)
1843
  with open(local_meta, "r") as f:
1844
  meta = json.load(f)
1845
- self._set_cached_meta(meta_path, meta)
1846
  if meta.get("backup_type") == "full":
1847
- full_backup_timestamps.append(ts)
1848
- else:
1849
- incremental_backup_timestamps.append(ts)
1850
  except Exception:
1851
  pass
1852
 
1853
- print(f"[PRUNE] Full backup timestamps identified: {len(full_backup_timestamps)}")
1854
- print(f"[PRUNE] Incremental backup timestamps identified: {len(incremental_backup_timestamps)}")
1855
 
1856
- MIN_FULL_BACKUPS = 12
 
 
1857
 
1858
- cutoff_timestamp = None
1859
- if len(full_backup_timestamps) >= MIN_FULL_BACKUPS:
1860
- cutoff_timestamp = full_backup_timestamps[MIN_FULL_BACKUPS - 1]
1861
- print(f"[PRUNE] Minimum {MIN_FULL_BACKUPS} full backups required, cutoff at: {cutoff_timestamp}")
1862
- elif full_backup_timestamps:
1863
- cutoff_timestamp = full_backup_timestamps[-1]
1864
- print(f"[PRUNE] Only {len(full_backup_timestamps)} full backups exist, oldest full backup: {cutoff_timestamp}")
1865
 
1866
- timestamps_to_delete = []
1867
- if cutoff_timestamp is not None:
1868
- for ts in sorted_timestamps:
1869
- if ts < cutoff_timestamp:
1870
- timestamps_to_delete.append(ts)
1871
- else:
1872
- timestamps_to_delete = sorted_timestamps[keep_count:]
1873
 
1874
- if len(sorted_timestamps) <= keep_count and not timestamps_to_delete:
1875
- print(f"[PRUNE] Total backups ({len(sorted_timestamps)}) <= keep_count ({keep_count}), no pruning needed")
1876
- return []
 
 
 
 
 
 
1877
 
1878
  if not timestamps_to_delete:
1879
- print(f"[PRUNE] All older backups are protected (minimum {MIN_FULL_BACKUPS} full backups required)")
1880
  return []
1881
 
 
 
 
1882
  # Limit batch size to avoid rate limiting (max 50 files per cleanup)
1883
  max_files_per_cleanup = 50
1884
  total_files_to_delete = 0
 
676
  # Skip if very low change rate AND (fewer changed files OR smaller total size) than minimum threshold
677
  # But still perform backup if it's a scheduled full backup
678
  if changed_files < self.config.dynamic_min_changed_files and estimated_size_kb < self.config.dynamic_min_changed_size_kb and not force_full_backup:
679
+ current_skipped = self._load_skipped_count() + 1
680
+ self._save_skipped_count(current_skipped)
 
681
 
 
 
682
  max_skipped_incrementals = max(3, self.config.max_incremental_backups // 3)
683
  if current_skipped >= max_skipped_incrementals:
684
  print(f"[Dynamic Strategy] Too many skipped incrementals ({current_skipped}), forcing backup to maintain chain continuity")
685
+ self._clear_skipped_count()
686
  else:
687
  skip_reason = f"Very low change rate ({change_rate:.1f} files/min, {changed_files} files, {estimated_size_kb:.0f}KB), skipping incremental backup ({current_skipped}/{max_skipped_incrementals})"
688
  print(f"[Dynamic Strategy] {skip_reason}")
 
693
  "reason": skip_reason,
694
  }
695
  else:
696
+ self._clear_skipped_count()
 
697
 
698
  # Adjust compression based on size category
699
  if size_category == "large" and estimated_size_mb > 3000:
 
1746
  result = []
1747
  for timestamp in sorted_timestamps:
1748
  result.extend(backups_by_time[timestamp])
1749
+
1750
  return result
1751
 
1752
+ def _find_meta_file_for_timestamp(self, timestamp: str, files: list, path_prefix_slash: str) -> str | None:
1753
+ """Find the meta.json file path for a backup timestamp.
1754
+
1755
+ Args:
1756
+ timestamp: The backup timestamp (e.g., "20260425-013501")
1757
+ files: List of files in the repository
1758
+ path_prefix_slash: Path prefix with trailing slash
1759
+
1760
+ Returns:
1761
+ The meta file path if found, None otherwise
1762
+ """
1763
+ for path in files:
1764
+ if not isinstance(path, str):
1765
+ continue
1766
+ if timestamp in path and path.endswith(".meta.json"):
1767
+ return path
1768
+ return None
1769
+
1770
  def prune_old_backups(self) -> list[str]:
1771
  print("[PRUNE] Starting old backup cleanup...")
1772
+ print(f"[PRUNE] Retention policy: keep_count={self.config.keep_count}, min_full_backups=12")
 
 
1773
 
1774
  if not hasattr(self.api, "delete_file"):
1775
  print("[PRUNE] API does not support delete_file, skipping cleanup")
1776
  return []
1777
 
1778
  keep_count = self.config.keep_count if self.config.keep_count >= 1 else 48
1779
+ min_full_backups = 12
1780
 
1781
  if not hasattr(self.api, "list_repo_files"):
1782
  print("[PRUNE] API does not support list_repo_files, skipping cleanup")
 
1830
 
1831
  sorted_timestamps = sorted(backups_by_time.keys(), reverse=True)
1832
 
1833
+ full_backup_timestamps = set()
 
1834
  for ts in sorted_timestamps:
1835
+ meta_file = self._find_meta_file_for_timestamp(ts, files, path_prefix_slash)
1836
+ if meta_file:
 
 
 
 
 
 
 
 
 
 
 
 
1837
  try:
1838
+ cached_meta = self._get_cached_meta(meta_file)
1839
  if cached_meta is not None:
1840
  meta = cached_meta
1841
  else:
1842
+ local_meta = self._download_backup(meta_file, self.config.work_dir)
1843
  with open(local_meta, "r") as f:
1844
  meta = json.load(f)
1845
+ self._set_cached_meta(meta_file, meta)
1846
  if meta.get("backup_type") == "full":
1847
+ full_backup_timestamps.add(ts)
 
 
1848
  except Exception:
1849
  pass
1850
 
1851
+ print(f"[PRUNE] Identified {len(full_backup_timestamps)} full backup(s), ensuring minimum 12 are kept")
 
1852
 
1853
+ if len(full_backup_timestamps) < min_full_backups:
1854
+ print(f"[PRUNE] Not enough full backups ({len(full_backup_timestamps)}) to meet minimum {min_full_backups}, skipping cleanup")
1855
+ return []
1856
 
1857
+ kept_full_backups_list = sorted(full_backup_timestamps, reverse=True)[:min_full_backups]
1858
+ kept_full_backups = set(kept_full_backups_list)
1859
+ oldest_kept_full = kept_full_backups_list[-1] if kept_full_backups_list else None
1860
+ print(f"[PRUNE] Will keep {len(kept_full_backups)} newest full backups: {kept_full_backups_list[:3]}...")
1861
+ if oldest_kept_full:
1862
+ print(f"[PRUNE] Oldest kept full backup: {oldest_kept_full}")
 
1863
 
1864
+ old_full_timestamps = [ts for ts in sorted_timestamps if ts in full_backup_timestamps and ts not in kept_full_backups]
 
 
 
 
 
 
1865
 
1866
+ incremental_timestamps = [ts for ts in sorted_timestamps if ts not in full_backup_timestamps]
1867
+ timestamps_to_delete_candidates = list(reversed(old_full_timestamps))
1868
+
1869
+ for ts in reversed(incremental_timestamps):
1870
+ if oldest_kept_full and ts >= oldest_kept_full:
1871
+ continue
1872
+ timestamps_to_delete_candidates.append(ts)
1873
+
1874
+ timestamps_to_delete = timestamps_to_delete_candidates[:keep_count]
1875
 
1876
  if not timestamps_to_delete:
1877
+ print("[PRUNE] No backups to delete after applying retention policy")
1878
  return []
1879
 
1880
+ print(f"[PRUNE] Will delete {len(timestamps_to_delete)} backup(s): {len([t for t in timestamps_to_delete if t in full_backup_timestamps])} full, {len([t for t in timestamps_to_delete if t not in full_backup_timestamps])} incremental")
1881
+ print(f"[PRUNE] Oldest backup to delete: {timestamps_to_delete[-1]}")
1882
+
1883
  # Limit batch size to avoid rate limiting (max 50 files per cleanup)
1884
  max_files_per_cleanup = 50
1885
  total_files_to_delete = 0