File size: 8,330 Bytes
5365372
 
 
 
 
 
 
 
 
 
 
 
 
969345a
5365372
 
 
 
969345a
 
 
 
5365372
7cd1716
5365372
 
 
51ec4bc
5365372
 
 
 
 
 
 
 
 
 
 
969345a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
 
 
 
 
 
 
 
 
 
 
 
51ec4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cd1716
 
 
 
 
 
 
 
5365372
969345a
 
5365372
 
 
 
 
 
 
 
 
 
969345a
 
5365372
 
 
 
51ec4bc
 
5365372
 
 
 
51ec4bc
5365372
 
 
 
51ec4bc
5365372
51ec4bc
 
 
 
5365372
 
 
51ec4bc
5365372
51ec4bc
 
 
 
5365372
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#!/usr/bin/env python3
"""
HuggingClaw Workspace Sync β€” HuggingFace Hub based backup
Uses huggingface_hub Python library instead of git for more reliable
HF Dataset operations (handles auth, LFS, retries automatically).

Falls back to git-based sync if HF_USERNAME or HF_TOKEN are not set.
"""

import os
import sys
import time
import signal
import shutil
import subprocess
from pathlib import Path

WORKSPACE = Path("/home/node/.openclaw/workspace")
STATE_DIR = WORKSPACE / ".huggingclaw-state"
WHATSAPP_CREDS_DIR = Path("/home/node/.openclaw/credentials/whatsapp/default")
WHATSAPP_BACKUP_DIR = STATE_DIR / "credentials" / "whatsapp" / "default"
RESET_MARKER = WORKSPACE / ".reset_credentials"
INTERVAL = int(os.environ.get("SYNC_INTERVAL", "600"))
INITIAL_DELAY = int(os.environ.get("SYNC_START_DELAY", "10"))
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_USERNAME = os.environ.get("HF_USERNAME", "")
BACKUP_DATASET = os.environ.get("BACKUP_DATASET_NAME", "huggingclaw-backup")
WEBHOOK_URL = os.environ.get("WEBHOOK_URL", "")

running = True

def signal_handler(sig, frame):
    global running
    running = False

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)


def count_files(path: Path) -> int:
    """Count regular files recursively under a path."""
    if not path.exists():
        return 0
    return sum(1 for child in path.rglob("*") if child.is_file())


def snapshot_state_into_workspace() -> None:
    """
    Mirror persistent state into the workspace-backed dataset repo.

    This keeps WhatsApp credentials in a hidden folder that is synced together
    with the workspace, without changing the live credentials location.
    """
    try:
        STATE_DIR.mkdir(parents=True, exist_ok=True)

        if RESET_MARKER.exists():
            if WHATSAPP_BACKUP_DIR.exists():
                shutil.rmtree(WHATSAPP_BACKUP_DIR, ignore_errors=True)
                print("🧹 Removed backed-up WhatsApp credentials after reset request.")
            RESET_MARKER.unlink(missing_ok=True)
            return

        if not WHATSAPP_CREDS_DIR.exists():
            return

        file_count = count_files(WHATSAPP_CREDS_DIR)
        if file_count < 2:
            if file_count > 0:
                print(f"πŸ“¦ WhatsApp backup skipped: credentials incomplete ({file_count} files).")
            return

        WHATSAPP_BACKUP_DIR.parent.mkdir(parents=True, exist_ok=True)
        if WHATSAPP_BACKUP_DIR.exists():
            shutil.rmtree(WHATSAPP_BACKUP_DIR, ignore_errors=True)
        shutil.copytree(WHATSAPP_CREDS_DIR, WHATSAPP_BACKUP_DIR)
    except Exception as e:
        print(f"  ⚠️ Could not snapshot WhatsApp state: {e}")


def has_changes():
    """Check if workspace has uncommitted changes (git-based check)."""
    try:
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        result = subprocess.run(
            ["git", "diff", "--cached", "--quiet"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode != 0
    except Exception:
        return False

def write_sync_status(status, message=""):
    """Write sync status to file for the health server dashboard."""
    try:
        import json
        data = {
            "status": status,
            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
            "message": message
        }
        with open("/tmp/sync-status.json", "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f"  ⚠️ Could not write sync status: {e}")

def trigger_webhook(event, status, message):
    """Trigger webhook notification."""
    if not WEBHOOK_URL:
        return
    try:
        import urllib.request
        import json
        data = json.dumps({
            "event": event,
            "status": status,
            "message": message,
            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        }).encode('utf-8')
        req = urllib.request.Request(WEBHOOK_URL, data=data, headers={'Content-Type': 'application/json'})
        urllib.request.urlopen(req, timeout=10)
    except Exception as e:
        print(f"  ⚠️ Webhook delivery failed: {e}")

def sync_with_hf_hub():
    """Sync workspace using huggingface_hub library."""
    try:
        from huggingface_hub import HfApi, upload_folder

        api = HfApi(token=HF_TOKEN)
        repo_id = f"{HF_USERNAME}/{BACKUP_DATASET}"

        # Ensure dataset exists
        try:
            api.repo_info(repo_id=repo_id, repo_type="dataset")
        except Exception:
            print(f"  πŸ“ Creating dataset {repo_id}...")
            try:
                api.create_repo(repo_id=repo_id, repo_type="dataset", private=True)
                print(f"  βœ… Dataset created: {repo_id}")
            except Exception as e:
                print(f"  ⚠️  Could not create dataset: {e}")
                return False

        # Upload workspace
        upload_folder(
            folder_path=str(WORKSPACE),
            repo_id=repo_id,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message=f"Auto-sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
            ignore_patterns=[".git/*", ".git"],
        )
        return True

    except ImportError:
        print("  ⚠️  huggingface_hub not installed, falling back to git")
        return False
    except Exception as e:
        print(f"  ⚠️  HF Hub sync failed: {e}")
        return False


def sync_with_git():
    """Fallback: sync workspace using git."""
    try:
        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        subprocess.run(
            ["git", "commit", "-m", f"Auto-sync {ts}"],
            cwd=WORKSPACE, capture_output=True
        )
        result = subprocess.run(
            ["git", "push", "origin", "main"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode == 0
    except Exception:
        return False


def main():
    if not WORKSPACE.exists():
        print("πŸ“ Workspace sync: workspace not found, exiting.")
        return

    use_hf_hub = bool(HF_TOKEN and HF_USERNAME)
    git_dir = WORKSPACE / ".git"

    if not use_hf_hub and not git_dir.exists():
        print("πŸ“ Workspace sync: no git repo and no HF credentials, skipping.")
        return

    # Give the gateway a short head start before the first sync probe.
    time.sleep(INITIAL_DELAY)

    snapshot_state_into_workspace()

    if use_hf_hub:
        print(f"πŸ”„ Workspace sync started (huggingface_hub): every {INTERVAL}s β†’ {HF_USERNAME}/{BACKUP_DATASET}")
    else:
        print(f"πŸ”„ Workspace sync started (git): every {INTERVAL}s")

    while running:
        time.sleep(INTERVAL)
        if not running:
            break

        snapshot_state_into_workspace()

        if not has_changes():
            continue

        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        
        write_sync_status("syncing", f"Starting sync at {ts}")

        if use_hf_hub:
            if sync_with_hf_hub():
                print(f"πŸ”„ Workspace sync (hf_hub): pushed changes ({ts})")
                write_sync_status("success", "Successfully pushed to HF Hub")
            else:
                # Fallback to git
                if sync_with_git():
                    print(f"πŸ”„ Workspace sync (git fallback): pushed changes ({ts})")
                    write_sync_status("success", "Successfully pushed via git fallback")
                else:
                    msg = f"Workspace sync: failed ({ts}), will retry"
                    print(f"πŸ”„ {msg}")
                    write_sync_status("error", msg)
                    trigger_webhook("sync", "error", msg)
        else:
            if sync_with_git():
                print(f"πŸ”„ Workspace sync (git): pushed changes ({ts})")
                write_sync_status("success", "Successfully pushed via git")
            else:
                msg = f"Workspace sync: push failed ({ts}), will retry"
                print(f"πŸ”„ {msg}")
                write_sync_status("error", msg)
                trigger_webhook("sync", "error", msg)


if __name__ == "__main__":
    main()