File size: 6,394 Bytes
5365372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51ec4bc
5365372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51ec4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51ec4bc
 
5365372
 
 
 
51ec4bc
5365372
 
 
 
51ec4bc
5365372
51ec4bc
 
 
 
5365372
 
 
51ec4bc
5365372
51ec4bc
 
 
 
5365372
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python3
"""
HuggingClaw Workspace Sync β€” HuggingFace Hub based backup
Uses huggingface_hub Python library instead of git for more reliable
HF Dataset operations (handles auth, LFS, retries automatically).

Falls back to git-based sync if HF_USERNAME or HF_TOKEN are not set.
"""

import os
import sys
import time
import signal
import subprocess
from pathlib import Path

WORKSPACE = Path("/home/node/.openclaw/workspace")
INTERVAL = int(os.environ.get("SYNC_INTERVAL", "600"))
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_USERNAME = os.environ.get("HF_USERNAME", "")
BACKUP_DATASET = os.environ.get("BACKUP_DATASET_NAME", "huggingclaw-backup")
WEBHOOK_URL = os.environ.get("WEBHOOK_URL", "")

running = True

def signal_handler(sig, frame):
    global running
    running = False

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)


def has_changes():
    """Check if workspace has uncommitted changes (git-based check)."""
    try:
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        result = subprocess.run(
            ["git", "diff", "--cached", "--quiet"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode != 0
    except Exception:
        return False

def write_sync_status(status, message=""):
    """Write sync status to file for the health server dashboard."""
    try:
        import json
        data = {
            "status": status,
            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
            "message": message
        }
        with open("/tmp/sync-status.json", "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f"  ⚠️ Could not write sync status: {e}")

def trigger_webhook(event, status, message):
    """Trigger webhook notification."""
    if not WEBHOOK_URL:
        return
    try:
        import urllib.request
        import json
        data = json.dumps({
            "event": event,
            "status": status,
            "message": message,
            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        }).encode('utf-8')
        req = urllib.request.Request(WEBHOOK_URL, data=data, headers={'Content-Type': 'application/json'})
        urllib.request.urlopen(req, timeout=10)
    except Exception as e:
        print(f"  ⚠️ Webhook delivery failed: {e}")

def sync_with_hf_hub():
    """Sync workspace using huggingface_hub library."""
    try:
        from huggingface_hub import HfApi, upload_folder

        api = HfApi(token=HF_TOKEN)
        repo_id = f"{HF_USERNAME}/{BACKUP_DATASET}"

        # Ensure dataset exists
        try:
            api.repo_info(repo_id=repo_id, repo_type="dataset")
        except Exception:
            print(f"  πŸ“ Creating dataset {repo_id}...")
            try:
                api.create_repo(repo_id=repo_id, repo_type="dataset", private=True)
                print(f"  βœ… Dataset created: {repo_id}")
            except Exception as e:
                print(f"  ⚠️  Could not create dataset: {e}")
                return False

        # Upload workspace
        upload_folder(
            folder_path=str(WORKSPACE),
            repo_id=repo_id,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message=f"Auto-sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
            ignore_patterns=[".git/*", ".git"],
        )
        return True

    except ImportError:
        print("  ⚠️  huggingface_hub not installed, falling back to git")
        return False
    except Exception as e:
        print(f"  ⚠️  HF Hub sync failed: {e}")
        return False


def sync_with_git():
    """Fallback: sync workspace using git."""
    try:
        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        subprocess.run(
            ["git", "commit", "-m", f"Auto-sync {ts}"],
            cwd=WORKSPACE, capture_output=True
        )
        result = subprocess.run(
            ["git", "push", "origin", "main"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode == 0
    except Exception:
        return False


def main():
    # Wait for workspace to initialize
    time.sleep(30)

    if not WORKSPACE.exists():
        print("πŸ“ Workspace sync: workspace not found, exiting.")
        return

    use_hf_hub = bool(HF_TOKEN and HF_USERNAME)

    if use_hf_hub:
        print(f"πŸ”„ Workspace sync started (huggingface_hub): every {INTERVAL}s β†’ {HF_USERNAME}/{BACKUP_DATASET}")
    else:
        git_dir = WORKSPACE / ".git"
        if not git_dir.exists():
            print("πŸ“ Workspace sync: no git repo and no HF credentials, skipping.")
            return
        print(f"πŸ”„ Workspace sync started (git): every {INTERVAL}s")

    while running:
        time.sleep(INTERVAL)
        if not running:
            break

        if not has_changes():
            continue

        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        
        write_sync_status("syncing", f"Starting sync at {ts}")

        if use_hf_hub:
            if sync_with_hf_hub():
                print(f"πŸ”„ Workspace sync (hf_hub): pushed changes ({ts})")
                write_sync_status("success", "Successfully pushed to HF Hub")
            else:
                # Fallback to git
                if sync_with_git():
                    print(f"πŸ”„ Workspace sync (git fallback): pushed changes ({ts})")
                    write_sync_status("success", "Successfully pushed via git fallback")
                else:
                    msg = f"Workspace sync: failed ({ts}), will retry"
                    print(f"πŸ”„ {msg}")
                    write_sync_status("error", msg)
                    trigger_webhook("sync", "error", msg)
        else:
            if sync_with_git():
                print(f"πŸ”„ Workspace sync (git): pushed changes ({ts})")
                write_sync_status("success", "Successfully pushed via git")
            else:
                msg = f"Workspace sync: push failed ({ts}), will retry"
                print(f"πŸ”„ {msg}")
                write_sync_status("error", msg)
                trigger_webhook("sync", "error", msg)


if __name__ == "__main__":
    main()