Spaces:
Running
Running
Merge pull request #15 from anurag162008/codex/investigate-changes-in-openclaw.json-backup
Browse filesImprove workspace sync (config watch/settle, locking, restart sync) and key-rotator logging
- openclaw-sync.py +27 -1
- start.sh +94 -45
openclaw-sync.py
CHANGED
|
@@ -7,6 +7,7 @@ credentials inside a private HF dataset without embedding HF tokens in git
|
|
| 7 |
remotes or requiring a manual HF_USERNAME secret.
|
| 8 |
"""
|
| 9 |
|
|
|
|
| 10 |
import hashlib
|
| 11 |
import json
|
| 12 |
import logging
|
|
@@ -37,6 +38,7 @@ OPENCLAW_HOME = Path("/home/node/.openclaw")
|
|
| 37 |
OPENCLAW_CONFIG_FILE = OPENCLAW_HOME / "openclaw.json"
|
| 38 |
WORKSPACE = OPENCLAW_HOME / "workspace"
|
| 39 |
STATUS_FILE = Path("/tmp/sync-status.json")
|
|
|
|
| 40 |
INTERVAL = int(os.environ.get("SYNC_INTERVAL", "180"))
|
| 41 |
INITIAL_DELAY = int(os.environ.get("SYNC_START_DELAY", "10"))
|
| 42 |
CONFIG_WATCH_INTERVAL = max(
|
|
@@ -392,7 +394,7 @@ def restore_workspace() -> bool:
|
|
| 392 |
return False
|
| 393 |
|
| 394 |
|
| 395 |
-
def
|
| 396 |
last_fingerprint: str | None = None,
|
| 397 |
last_marker: tuple[int, int, int] | None = None,
|
| 398 |
) -> tuple[str, tuple[int, int, int]]:
|
|
@@ -440,6 +442,19 @@ def sync_once(
|
|
| 440 |
return (current_fingerprint, current_marker)
|
| 441 |
|
| 442 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
def handle_signal(_sig, _frame) -> None:
|
| 444 |
STOP_EVENT.set()
|
| 445 |
|
|
@@ -583,6 +598,17 @@ def main() -> int:
|
|
| 583 |
write_status("error", f"Shutdown sync failed: {exc}")
|
| 584 |
print(f"Workspace sync: shutdown sync failed: {exc}")
|
| 585 |
return 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 586 |
if command == "loop":
|
| 587 |
return loop()
|
| 588 |
|
|
|
|
| 7 |
remotes or requiring a manual HF_USERNAME secret.
|
| 8 |
"""
|
| 9 |
|
| 10 |
+
import fcntl
|
| 11 |
import hashlib
|
| 12 |
import json
|
| 13 |
import logging
|
|
|
|
| 38 |
OPENCLAW_CONFIG_FILE = OPENCLAW_HOME / "openclaw.json"
|
| 39 |
WORKSPACE = OPENCLAW_HOME / "workspace"
|
| 40 |
STATUS_FILE = Path("/tmp/sync-status.json")
|
| 41 |
+
SYNC_LOCK_FILE = Path("/tmp/huggingclaw-sync.lock")
|
| 42 |
INTERVAL = int(os.environ.get("SYNC_INTERVAL", "180"))
|
| 43 |
INITIAL_DELAY = int(os.environ.get("SYNC_START_DELAY", "10"))
|
| 44 |
CONFIG_WATCH_INTERVAL = max(
|
|
|
|
| 394 |
return False
|
| 395 |
|
| 396 |
|
| 397 |
+
def _sync_once_unlocked(
|
| 398 |
last_fingerprint: str | None = None,
|
| 399 |
last_marker: tuple[int, int, int] | None = None,
|
| 400 |
) -> tuple[str, tuple[int, int, int]]:
|
|
|
|
| 442 |
return (current_fingerprint, current_marker)
|
| 443 |
|
| 444 |
|
| 445 |
+
def sync_once(
|
| 446 |
+
last_fingerprint: str | None = None,
|
| 447 |
+
last_marker: tuple[int, int, int] | None = None,
|
| 448 |
+
) -> tuple[str, tuple[int, int, int]]:
|
| 449 |
+
SYNC_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
|
| 450 |
+
with SYNC_LOCK_FILE.open("w", encoding="utf-8") as lock_handle:
|
| 451 |
+
fcntl.flock(lock_handle, fcntl.LOCK_EX)
|
| 452 |
+
try:
|
| 453 |
+
return _sync_once_unlocked(last_fingerprint, last_marker)
|
| 454 |
+
finally:
|
| 455 |
+
fcntl.flock(lock_handle, fcntl.LOCK_UN)
|
| 456 |
+
|
| 457 |
+
|
| 458 |
def handle_signal(_sig, _frame) -> None:
|
| 459 |
STOP_EVENT.set()
|
| 460 |
|
|
|
|
| 598 |
write_status("error", f"Shutdown sync failed: {exc}")
|
| 599 |
print(f"Workspace sync: shutdown sync failed: {exc}")
|
| 600 |
return 1
|
| 601 |
+
if command == "sync-once-settled":
|
| 602 |
+
try:
|
| 603 |
+
trigger, _ = wait_for_config_settle(file_marker(OPENCLAW_CONFIG_FILE))
|
| 604 |
+
if trigger == "stopped":
|
| 605 |
+
return 1
|
| 606 |
+
sync_once()
|
| 607 |
+
return 0
|
| 608 |
+
except Exception as exc:
|
| 609 |
+
write_status("error", f"Settled sync failed: {exc}")
|
| 610 |
+
print(f"Workspace sync: settled sync failed: {exc}")
|
| 611 |
+
return 1
|
| 612 |
if command == "loop":
|
| 613 |
return loop()
|
| 614 |
|
start.sh
CHANGED
|
@@ -1018,59 +1018,108 @@ hc_finish_startup_commands
|
|
| 1018 |
sync_installed_plugins_into_allow
|
| 1019 |
|
| 1020 |
# ββ Launch gateway ββ
|
| 1021 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1022 |
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
GATEWAY_ARGS+=(--verbose)
|
| 1026 |
-
echo "Gateway verbose logging enabled (GATEWAY_VERBOSE=1)"
|
| 1027 |
-
fi
|
| 1028 |
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
# openclaw β fine for passing to `wait` (waits for the whole pipeline to
|
| 1032 |
-
# finish), but kill -0 on it is uninformative. We probe TCP instead.
|
| 1033 |
-
stdbuf -oL -eL openclaw "${GATEWAY_ARGS[@]}" 2>&1 | tee -a /home/node/.openclaw/gateway.log &
|
| 1034 |
-
GATEWAY_PID=$!
|
| 1035 |
-
|
| 1036 |
-
# Poll for the gateway to start listening on 7860. OpenClaw can take 20-30s
|
| 1037 |
-
# on cold start (plugin install + auto-restore). Bail out early if the
|
| 1038 |
-
# pipeline died.
|
| 1039 |
-
GATEWAY_READY_TIMEOUT="${GATEWAY_READY_TIMEOUT:-90}"
|
| 1040 |
-
ready=false
|
| 1041 |
-
for ((i=0; i<GATEWAY_READY_TIMEOUT; i++)); do
|
| 1042 |
-
if (echo > /dev/tcp/127.0.0.1/7860) 2>/dev/null; then
|
| 1043 |
-
ready=true
|
| 1044 |
-
break
|
| 1045 |
-
fi
|
| 1046 |
-
if ! kill -0 "$GATEWAY_PID" 2>/dev/null; then
|
| 1047 |
-
break
|
| 1048 |
fi
|
| 1049 |
-
sleep 1
|
| 1050 |
-
done
|
| 1051 |
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
|
|
|
|
|
|
|
|
|
| 1059 |
|
| 1060 |
-
# 11. Start WhatsApp Guardian after the gateway is accepting connections
|
| 1061 |
-
if [ "$WHATSAPP_ENABLED_NORMALIZED" = "true" ]; then
|
| 1062 |
node /home/node/app/wa-guardian.js &
|
| 1063 |
GUARDIAN_PID=$!
|
| 1064 |
echo "WhatsApp Guardian started (PID: $GUARDIAN_PID)"
|
| 1065 |
-
|
| 1066 |
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
|
| 1070 |
-
|
| 1071 |
-
if [
|
| 1072 |
-
|
| 1073 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1074 |
|
| 1075 |
-
#
|
| 1076 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1018 |
sync_installed_plugins_into_allow
|
| 1019 |
|
| 1020 |
# ββ Launch gateway ββ
|
| 1021 |
+
GATEWAY_RESTART_DELAY="${GATEWAY_RESTART_DELAY:-2}"
|
| 1022 |
+
GATEWAY_MAX_RESTARTS="${GATEWAY_MAX_RESTARTS:-0}"
|
| 1023 |
+
GATEWAY_RESTART_COUNT=0
|
| 1024 |
+
SYNC_LOOP_PID=""
|
| 1025 |
+
GUARDIAN_PID=""
|
| 1026 |
+
|
| 1027 |
+
sync_before_gateway_restart() {
|
| 1028 |
+
[ -n "${HF_TOKEN:-}" ] || return 0
|
| 1029 |
+
[ -f "/home/node/app/openclaw-sync.py" ] || return 0
|
| 1030 |
+
|
| 1031 |
+
echo "Gateway stopped; saving latest OpenClaw state before restart..."
|
| 1032 |
+
python3 /home/node/app/openclaw-sync.py sync-once-settled || \
|
| 1033 |
+
echo "Warning: could not sync settled state before gateway restart"
|
| 1034 |
+
}
|
| 1035 |
|
| 1036 |
+
start_background_sync_once() {
|
| 1037 |
+
[ -n "${HF_TOKEN:-}" ] || return 0
|
|
|
|
|
|
|
|
|
|
| 1038 |
|
| 1039 |
+
if [ -n "$SYNC_LOOP_PID" ] && kill -0 "$SYNC_LOOP_PID" 2>/dev/null; then
|
| 1040 |
+
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1041 |
fi
|
|
|
|
|
|
|
| 1042 |
|
| 1043 |
+
python3 -u /home/node/app/openclaw-sync.py loop &
|
| 1044 |
+
SYNC_LOOP_PID=$!
|
| 1045 |
+
}
|
| 1046 |
+
|
| 1047 |
+
start_guardian_once() {
|
| 1048 |
+
[ "$WHATSAPP_ENABLED_NORMALIZED" = "true" ] || return 0
|
| 1049 |
+
|
| 1050 |
+
if [ -n "$GUARDIAN_PID" ] && kill -0 "$GUARDIAN_PID" 2>/dev/null; then
|
| 1051 |
+
return 0
|
| 1052 |
+
fi
|
| 1053 |
|
|
|
|
|
|
|
| 1054 |
node /home/node/app/wa-guardian.js &
|
| 1055 |
GUARDIAN_PID=$!
|
| 1056 |
echo "WhatsApp Guardian started (PID: $GUARDIAN_PID)"
|
| 1057 |
+
}
|
| 1058 |
|
| 1059 |
+
while true; do
|
| 1060 |
+
echo "Launching OpenClaw gateway on port 7860..."
|
| 1061 |
|
| 1062 |
+
GATEWAY_ARGS=(gateway run --port 7860 --bind lan)
|
| 1063 |
+
if [ "${GATEWAY_VERBOSE:-0}" = "1" ]; then
|
| 1064 |
+
GATEWAY_ARGS+=(--verbose)
|
| 1065 |
+
echo "Gateway verbose logging enabled (GATEWAY_VERBOSE=1)"
|
| 1066 |
+
fi
|
| 1067 |
+
|
| 1068 |
+
# Use stdbuf -oL -eL to ensure logs are not buffered and appear immediately
|
| 1069 |
+
# in the console. NOTE: $! captures the LAST pipeline element (tee), not
|
| 1070 |
+
# openclaw β fine for passing to `wait` (waits for the whole pipeline to
|
| 1071 |
+
# finish), but kill -0 on it is uninformative. We probe TCP instead.
|
| 1072 |
+
stdbuf -oL -eL openclaw "${GATEWAY_ARGS[@]}" 2>&1 | tee -a /home/node/.openclaw/gateway.log &
|
| 1073 |
+
GATEWAY_PID=$!
|
| 1074 |
+
|
| 1075 |
+
# Poll for the gateway to start listening on 7860. OpenClaw can take 20-30s
|
| 1076 |
+
# on cold start (plugin install + auto-restore). Bail out early if the
|
| 1077 |
+
# pipeline died.
|
| 1078 |
+
GATEWAY_READY_TIMEOUT="${GATEWAY_READY_TIMEOUT:-90}"
|
| 1079 |
+
ready=false
|
| 1080 |
+
for ((i=0; i<GATEWAY_READY_TIMEOUT; i++)); do
|
| 1081 |
+
if (echo > /dev/tcp/127.0.0.1/7860) 2>/dev/null; then
|
| 1082 |
+
ready=true
|
| 1083 |
+
break
|
| 1084 |
+
fi
|
| 1085 |
+
if ! kill -0 "$GATEWAY_PID" 2>/dev/null; then
|
| 1086 |
+
break
|
| 1087 |
+
fi
|
| 1088 |
+
sleep 1
|
| 1089 |
+
done
|
| 1090 |
+
|
| 1091 |
+
if [ "$ready" != "true" ]; then
|
| 1092 |
+
echo ""
|
| 1093 |
+
echo "Gateway failed to start. Last 30 lines of log:"
|
| 1094 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββ"
|
| 1095 |
+
tail -30 /home/node/.openclaw/gateway.log
|
| 1096 |
+
exit 1
|
| 1097 |
+
fi
|
| 1098 |
+
|
| 1099 |
+
# 11. Start WhatsApp Guardian after the gateway is accepting connections
|
| 1100 |
+
start_guardian_once
|
| 1101 |
|
| 1102 |
+
# 11.5 Warm up the managed browser so first browser actions have a live tab
|
| 1103 |
+
warmup_browser
|
| 1104 |
+
|
| 1105 |
+
# 12. Start Workspace Sync after startup settles. Keep only one loop active;
|
| 1106 |
+
# config edits can make OpenClaw exit/reload, and the gateway loop below will
|
| 1107 |
+
# relaunch it without rerunning all startup code.
|
| 1108 |
+
start_background_sync_once
|
| 1109 |
+
|
| 1110 |
+
set +e
|
| 1111 |
+
wait "$GATEWAY_PID"
|
| 1112 |
+
GATEWAY_EXIT_CODE=$?
|
| 1113 |
+
set -e
|
| 1114 |
+
|
| 1115 |
+
sync_before_gateway_restart
|
| 1116 |
+
|
| 1117 |
+
GATEWAY_RESTART_COUNT=$((GATEWAY_RESTART_COUNT + 1))
|
| 1118 |
+
if [ "$GATEWAY_MAX_RESTARTS" != "0" ] && [ "$GATEWAY_RESTART_COUNT" -ge "$GATEWAY_MAX_RESTARTS" ]; then
|
| 1119 |
+
echo "Gateway exited with code ${GATEWAY_EXIT_CODE}; restart limit (${GATEWAY_MAX_RESTARTS}) reached."
|
| 1120 |
+
exit "$GATEWAY_EXIT_CODE"
|
| 1121 |
+
fi
|
| 1122 |
+
|
| 1123 |
+
echo "Gateway exited with code ${GATEWAY_EXIT_CODE}; restarting in ${GATEWAY_RESTART_DELAY}s..."
|
| 1124 |
+
sleep "$GATEWAY_RESTART_DELAY"
|
| 1125 |
+
done
|