anurag008w commited on
Commit
3999915
Β·
unverified Β·
2 Parent(s): f60e9d3ad8f514

Merge pull request #15 from anurag162008/codex/investigate-changes-in-openclaw.json-backup

Browse files

Improve workspace sync (config watch/settle, locking, restart sync) and key-rotator logging

Files changed (2) hide show
  1. openclaw-sync.py +27 -1
  2. start.sh +94 -45
openclaw-sync.py CHANGED
@@ -7,6 +7,7 @@ credentials inside a private HF dataset without embedding HF tokens in git
7
  remotes or requiring a manual HF_USERNAME secret.
8
  """
9
 
 
10
  import hashlib
11
  import json
12
  import logging
@@ -37,6 +38,7 @@ OPENCLAW_HOME = Path("/home/node/.openclaw")
37
  OPENCLAW_CONFIG_FILE = OPENCLAW_HOME / "openclaw.json"
38
  WORKSPACE = OPENCLAW_HOME / "workspace"
39
  STATUS_FILE = Path("/tmp/sync-status.json")
 
40
  INTERVAL = int(os.environ.get("SYNC_INTERVAL", "180"))
41
  INITIAL_DELAY = int(os.environ.get("SYNC_START_DELAY", "10"))
42
  CONFIG_WATCH_INTERVAL = max(
@@ -392,7 +394,7 @@ def restore_workspace() -> bool:
392
  return False
393
 
394
 
395
- def sync_once(
396
  last_fingerprint: str | None = None,
397
  last_marker: tuple[int, int, int] | None = None,
398
  ) -> tuple[str, tuple[int, int, int]]:
@@ -440,6 +442,19 @@ def sync_once(
440
  return (current_fingerprint, current_marker)
441
 
442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  def handle_signal(_sig, _frame) -> None:
444
  STOP_EVENT.set()
445
 
@@ -583,6 +598,17 @@ def main() -> int:
583
  write_status("error", f"Shutdown sync failed: {exc}")
584
  print(f"Workspace sync: shutdown sync failed: {exc}")
585
  return 1
 
 
 
 
 
 
 
 
 
 
 
586
  if command == "loop":
587
  return loop()
588
 
 
7
  remotes or requiring a manual HF_USERNAME secret.
8
  """
9
 
10
+ import fcntl
11
  import hashlib
12
  import json
13
  import logging
 
38
  OPENCLAW_CONFIG_FILE = OPENCLAW_HOME / "openclaw.json"
39
  WORKSPACE = OPENCLAW_HOME / "workspace"
40
  STATUS_FILE = Path("/tmp/sync-status.json")
41
+ SYNC_LOCK_FILE = Path("/tmp/huggingclaw-sync.lock")
42
  INTERVAL = int(os.environ.get("SYNC_INTERVAL", "180"))
43
  INITIAL_DELAY = int(os.environ.get("SYNC_START_DELAY", "10"))
44
  CONFIG_WATCH_INTERVAL = max(
 
394
  return False
395
 
396
 
397
+ def _sync_once_unlocked(
398
  last_fingerprint: str | None = None,
399
  last_marker: tuple[int, int, int] | None = None,
400
  ) -> tuple[str, tuple[int, int, int]]:
 
442
  return (current_fingerprint, current_marker)
443
 
444
 
445
+ def sync_once(
446
+ last_fingerprint: str | None = None,
447
+ last_marker: tuple[int, int, int] | None = None,
448
+ ) -> tuple[str, tuple[int, int, int]]:
449
+ SYNC_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
450
+ with SYNC_LOCK_FILE.open("w", encoding="utf-8") as lock_handle:
451
+ fcntl.flock(lock_handle, fcntl.LOCK_EX)
452
+ try:
453
+ return _sync_once_unlocked(last_fingerprint, last_marker)
454
+ finally:
455
+ fcntl.flock(lock_handle, fcntl.LOCK_UN)
456
+
457
+
458
  def handle_signal(_sig, _frame) -> None:
459
  STOP_EVENT.set()
460
 
 
598
  write_status("error", f"Shutdown sync failed: {exc}")
599
  print(f"Workspace sync: shutdown sync failed: {exc}")
600
  return 1
601
+ if command == "sync-once-settled":
602
+ try:
603
+ trigger, _ = wait_for_config_settle(file_marker(OPENCLAW_CONFIG_FILE))
604
+ if trigger == "stopped":
605
+ return 1
606
+ sync_once()
607
+ return 0
608
+ except Exception as exc:
609
+ write_status("error", f"Settled sync failed: {exc}")
610
+ print(f"Workspace sync: settled sync failed: {exc}")
611
+ return 1
612
  if command == "loop":
613
  return loop()
614
 
start.sh CHANGED
@@ -1018,59 +1018,108 @@ hc_finish_startup_commands
1018
  sync_installed_plugins_into_allow
1019
 
1020
  # ── Launch gateway ──
1021
- echo "Launching OpenClaw gateway on port 7860..."
 
 
 
 
 
 
 
 
 
 
 
 
 
1022
 
1023
- GATEWAY_ARGS=(gateway run --port 7860 --bind lan)
1024
- if [ "${GATEWAY_VERBOSE:-0}" = "1" ]; then
1025
- GATEWAY_ARGS+=(--verbose)
1026
- echo "Gateway verbose logging enabled (GATEWAY_VERBOSE=1)"
1027
- fi
1028
 
1029
- # Use stdbuf -oL -eL to ensure logs are not buffered and appear immediately
1030
- # in the console. NOTE: $! captures the LAST pipeline element (tee), not
1031
- # openclaw β€” fine for passing to `wait` (waits for the whole pipeline to
1032
- # finish), but kill -0 on it is uninformative. We probe TCP instead.
1033
- stdbuf -oL -eL openclaw "${GATEWAY_ARGS[@]}" 2>&1 | tee -a /home/node/.openclaw/gateway.log &
1034
- GATEWAY_PID=$!
1035
-
1036
- # Poll for the gateway to start listening on 7860. OpenClaw can take 20-30s
1037
- # on cold start (plugin install + auto-restore). Bail out early if the
1038
- # pipeline died.
1039
- GATEWAY_READY_TIMEOUT="${GATEWAY_READY_TIMEOUT:-90}"
1040
- ready=false
1041
- for ((i=0; i<GATEWAY_READY_TIMEOUT; i++)); do
1042
- if (echo > /dev/tcp/127.0.0.1/7860) 2>/dev/null; then
1043
- ready=true
1044
- break
1045
- fi
1046
- if ! kill -0 "$GATEWAY_PID" 2>/dev/null; then
1047
- break
1048
  fi
1049
- sleep 1
1050
- done
1051
 
1052
- if [ "$ready" != "true" ]; then
1053
- echo ""
1054
- echo "Gateway failed to start. Last 30 lines of log:"
1055
- echo "────────────────────────────────────────────"
1056
- tail -30 /home/node/.openclaw/gateway.log
1057
- exit 1
1058
- fi
 
 
 
1059
 
1060
- # 11. Start WhatsApp Guardian after the gateway is accepting connections
1061
- if [ "$WHATSAPP_ENABLED_NORMALIZED" = "true" ]; then
1062
  node /home/node/app/wa-guardian.js &
1063
  GUARDIAN_PID=$!
1064
  echo "WhatsApp Guardian started (PID: $GUARDIAN_PID)"
1065
- fi
1066
 
1067
- # 11.5 Warm up the managed browser so first browser actions have a live tab
1068
- warmup_browser
1069
 
1070
- # 12. Start Workspace Sync after startup settles
1071
- if [ -n "${HF_TOKEN:-}" ]; then
1072
- python3 -u /home/node/app/openclaw-sync.py loop &
1073
- fi
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1074
 
1075
- # Wait for gateway (allows trap to fire)
1076
- wait $GATEWAY_PID
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1018
  sync_installed_plugins_into_allow
1019
 
1020
  # ── Launch gateway ──
1021
+ GATEWAY_RESTART_DELAY="${GATEWAY_RESTART_DELAY:-2}"
1022
+ GATEWAY_MAX_RESTARTS="${GATEWAY_MAX_RESTARTS:-0}"
1023
+ GATEWAY_RESTART_COUNT=0
1024
+ SYNC_LOOP_PID=""
1025
+ GUARDIAN_PID=""
1026
+
1027
+ sync_before_gateway_restart() {
1028
+ [ -n "${HF_TOKEN:-}" ] || return 0
1029
+ [ -f "/home/node/app/openclaw-sync.py" ] || return 0
1030
+
1031
+ echo "Gateway stopped; saving latest OpenClaw state before restart..."
1032
+ python3 /home/node/app/openclaw-sync.py sync-once-settled || \
1033
+ echo "Warning: could not sync settled state before gateway restart"
1034
+ }
1035
 
1036
+ start_background_sync_once() {
1037
+ [ -n "${HF_TOKEN:-}" ] || return 0
 
 
 
1038
 
1039
+ if [ -n "$SYNC_LOOP_PID" ] && kill -0 "$SYNC_LOOP_PID" 2>/dev/null; then
1040
+ return 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1041
  fi
 
 
1042
 
1043
+ python3 -u /home/node/app/openclaw-sync.py loop &
1044
+ SYNC_LOOP_PID=$!
1045
+ }
1046
+
1047
+ start_guardian_once() {
1048
+ [ "$WHATSAPP_ENABLED_NORMALIZED" = "true" ] || return 0
1049
+
1050
+ if [ -n "$GUARDIAN_PID" ] && kill -0 "$GUARDIAN_PID" 2>/dev/null; then
1051
+ return 0
1052
+ fi
1053
 
 
 
1054
  node /home/node/app/wa-guardian.js &
1055
  GUARDIAN_PID=$!
1056
  echo "WhatsApp Guardian started (PID: $GUARDIAN_PID)"
1057
+ }
1058
 
1059
+ while true; do
1060
+ echo "Launching OpenClaw gateway on port 7860..."
1061
 
1062
+ GATEWAY_ARGS=(gateway run --port 7860 --bind lan)
1063
+ if [ "${GATEWAY_VERBOSE:-0}" = "1" ]; then
1064
+ GATEWAY_ARGS+=(--verbose)
1065
+ echo "Gateway verbose logging enabled (GATEWAY_VERBOSE=1)"
1066
+ fi
1067
+
1068
+ # Use stdbuf -oL -eL to ensure logs are not buffered and appear immediately
1069
+ # in the console. NOTE: $! captures the LAST pipeline element (tee), not
1070
+ # openclaw β€” fine for passing to `wait` (waits for the whole pipeline to
1071
+ # finish), but kill -0 on it is uninformative. We probe TCP instead.
1072
+ stdbuf -oL -eL openclaw "${GATEWAY_ARGS[@]}" 2>&1 | tee -a /home/node/.openclaw/gateway.log &
1073
+ GATEWAY_PID=$!
1074
+
1075
+ # Poll for the gateway to start listening on 7860. OpenClaw can take 20-30s
1076
+ # on cold start (plugin install + auto-restore). Bail out early if the
1077
+ # pipeline died.
1078
+ GATEWAY_READY_TIMEOUT="${GATEWAY_READY_TIMEOUT:-90}"
1079
+ ready=false
1080
+ for ((i=0; i<GATEWAY_READY_TIMEOUT; i++)); do
1081
+ if (echo > /dev/tcp/127.0.0.1/7860) 2>/dev/null; then
1082
+ ready=true
1083
+ break
1084
+ fi
1085
+ if ! kill -0 "$GATEWAY_PID" 2>/dev/null; then
1086
+ break
1087
+ fi
1088
+ sleep 1
1089
+ done
1090
+
1091
+ if [ "$ready" != "true" ]; then
1092
+ echo ""
1093
+ echo "Gateway failed to start. Last 30 lines of log:"
1094
+ echo "────────────────────────────────────────────"
1095
+ tail -30 /home/node/.openclaw/gateway.log
1096
+ exit 1
1097
+ fi
1098
+
1099
+ # 11. Start WhatsApp Guardian after the gateway is accepting connections
1100
+ start_guardian_once
1101
 
1102
+ # 11.5 Warm up the managed browser so first browser actions have a live tab
1103
+ warmup_browser
1104
+
1105
+ # 12. Start Workspace Sync after startup settles. Keep only one loop active;
1106
+ # config edits can make OpenClaw exit/reload, and the gateway loop below will
1107
+ # relaunch it without rerunning all startup code.
1108
+ start_background_sync_once
1109
+
1110
+ set +e
1111
+ wait "$GATEWAY_PID"
1112
+ GATEWAY_EXIT_CODE=$?
1113
+ set -e
1114
+
1115
+ sync_before_gateway_restart
1116
+
1117
+ GATEWAY_RESTART_COUNT=$((GATEWAY_RESTART_COUNT + 1))
1118
+ if [ "$GATEWAY_MAX_RESTARTS" != "0" ] && [ "$GATEWAY_RESTART_COUNT" -ge "$GATEWAY_MAX_RESTARTS" ]; then
1119
+ echo "Gateway exited with code ${GATEWAY_EXIT_CODE}; restart limit (${GATEWAY_MAX_RESTARTS}) reached."
1120
+ exit "$GATEWAY_EXIT_CODE"
1121
+ fi
1122
+
1123
+ echo "Gateway exited with code ${GATEWAY_EXIT_CODE}; restarting in ${GATEWAY_RESTART_DELAY}s..."
1124
+ sleep "$GATEWAY_RESTART_DELAY"
1125
+ done