File size: 4,632 Bytes
5365372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
"""
HuggingClaw Workspace Sync β€” HuggingFace Hub based backup
Uses huggingface_hub Python library instead of git for more reliable
HF Dataset operations (handles auth, LFS, retries automatically).

Falls back to git-based sync if HF_USERNAME or HF_TOKEN are not set.
"""

import os
import sys
import time
import signal
import subprocess
from pathlib import Path

WORKSPACE = Path("/home/node/.openclaw/workspace")
INTERVAL = int(os.environ.get("SYNC_INTERVAL", "600"))
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_USERNAME = os.environ.get("HF_USERNAME", "")
BACKUP_DATASET = os.environ.get("BACKUP_DATASET_NAME", "huggingclaw-backup")

running = True

def signal_handler(sig, frame):
    global running
    running = False

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)


def has_changes():
    """Check if workspace has uncommitted changes (git-based check)."""
    try:
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        result = subprocess.run(
            ["git", "diff", "--cached", "--quiet"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode != 0
    except Exception:
        return False


def sync_with_hf_hub():
    """Sync workspace using huggingface_hub library."""
    try:
        from huggingface_hub import HfApi, upload_folder

        api = HfApi(token=HF_TOKEN)
        repo_id = f"{HF_USERNAME}/{BACKUP_DATASET}"

        # Ensure dataset exists
        try:
            api.repo_info(repo_id=repo_id, repo_type="dataset")
        except Exception:
            print(f"  πŸ“ Creating dataset {repo_id}...")
            try:
                api.create_repo(repo_id=repo_id, repo_type="dataset", private=True)
                print(f"  βœ… Dataset created: {repo_id}")
            except Exception as e:
                print(f"  ⚠️  Could not create dataset: {e}")
                return False

        # Upload workspace
        upload_folder(
            folder_path=str(WORKSPACE),
            repo_id=repo_id,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message=f"Auto-sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
            ignore_patterns=[".git/*", ".git"],
        )
        return True

    except ImportError:
        print("  ⚠️  huggingface_hub not installed, falling back to git")
        return False
    except Exception as e:
        print(f"  ⚠️  HF Hub sync failed: {e}")
        return False


def sync_with_git():
    """Fallback: sync workspace using git."""
    try:
        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        subprocess.run(
            ["git", "commit", "-m", f"Auto-sync {ts}"],
            cwd=WORKSPACE, capture_output=True
        )
        result = subprocess.run(
            ["git", "push", "origin", "main"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode == 0
    except Exception:
        return False


def main():
    # Wait for workspace to initialize
    time.sleep(30)

    if not WORKSPACE.exists():
        print("πŸ“ Workspace sync: workspace not found, exiting.")
        return

    use_hf_hub = bool(HF_TOKEN and HF_USERNAME)

    if use_hf_hub:
        print(f"πŸ”„ Workspace sync started (huggingface_hub): every {INTERVAL}s β†’ {HF_USERNAME}/{BACKUP_DATASET}")
    else:
        git_dir = WORKSPACE / ".git"
        if not git_dir.exists():
            print("πŸ“ Workspace sync: no git repo and no HF credentials, skipping.")
            return
        print(f"πŸ”„ Workspace sync started (git): every {INTERVAL}s")

    while running:
        time.sleep(INTERVAL)
        if not running:
            break

        if not has_changes():
            continue

        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

        if use_hf_hub:
            if sync_with_hf_hub():
                print(f"πŸ”„ Workspace sync (hf_hub): pushed changes ({ts})")
            else:
                # Fallback to git
                if sync_with_git():
                    print(f"πŸ”„ Workspace sync (git fallback): pushed changes ({ts})")
                else:
                    print(f"πŸ”„ Workspace sync: failed ({ts}), will retry")
        else:
            if sync_with_git():
                print(f"πŸ”„ Workspace sync (git): pushed changes ({ts})")
            else:
                print(f"πŸ”„ Workspace sync: push failed ({ts}), will retry")


if __name__ == "__main__":
    main()