heiyuheiyu commited on
Commit
eece7de
·
verified ·
1 Parent(s): 7165cf6

Upload Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +46 -177
Dockerfile CHANGED
@@ -72,7 +72,6 @@ repo_id = os.getenv("HF_DATASET")
72
  token = os.getenv("HF_TOKEN")
73
 
74
  OPENCLAW_LOCAL = "/root/.openclaw"
75
- SCATTER_REMOTE_PREFIX = "openclaw/" # 散裝文件在 Dataset 中的前綴
76
 
77
  # ── 通用工具函數 ──────────────────────────────────────────────────────────────
78
 
@@ -121,12 +120,26 @@ def restore():
121
  if not repo_id or not token:
122
  print("Skip Restore: HF_DATASET or HF_TOKEN not set")
123
  return
 
 
 
 
 
 
 
 
 
 
124
  try:
125
  all_files = list(api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token))
126
 
127
- # ── 1. 恢復 tar.gz 備份 ──────────────────────────────────────────────
128
  # 備份範圍:/root/.openclaw 下所有文件及文件夾(保持完整目錄結構)
129
  # tar 包內 arcname 直接對應 .openclaw/ 下的相對路徑,解壓目標為 /root/.openclaw/
 
 
 
 
130
  now = datetime.now()
131
  for i in range(5):
132
  day = (now - timedelta(days=i)).strftime("%Y-%m-%d")
@@ -136,113 +149,18 @@ def restore():
136
  path = hf_hub_download(repo_id=repo_id, filename=name, repo_type="dataset", token=token)
137
  os.makedirs(OPENCLAW_LOCAL, exist_ok=True)
138
  with tarfile.open(path, "r:gz") as tar:
139
- tar.extractall(path=OPENCLAW_LOCAL)
 
 
 
 
140
  print(f"Success: Restored from {name}")
141
-
142
- # ── RESTORE_SKIP:解壓後刪除指定條目,讓後續腳本用默認值重新生成 ──
143
- # 格式:逗號分隔,填寫相對於 /root/.openclaw 的路徑
144
- # 支持多級子目錄,例如:RESTORE_SKIP=agents/main/sessions,cron,extensions/foo/bar
145
- # 若路徑不存在則忽略,不影響腳本繼續運行
146
- # 留空或不設置 = 全部恢復(默認行為)
147
- # 注意:用完後清空此變量,否則每次重啟都會跳過恢復
148
- skip_set = _parse_skip_list("RESTORE_SKIP")
149
- for entry in skip_set:
150
- target = os.path.join(OPENCLAW_LOCAL, entry)
151
- if os.path.isdir(target):
152
- shutil.rmtree(target)
153
- print(f"Restore skip (RESTORE_SKIP): removed dir {entry}")
154
- elif os.path.isfile(target):
155
- os.remove(target)
156
- print(f"Restore skip (RESTORE_SKIP): removed file {entry}")
157
- else:
158
- print(f"Restore skip (RESTORE_SKIP): not found, skipped {entry}")
159
  break
160
 
161
- # ── 2. 恢復散裝文件 ──────────────────────────────────────────────────
162
- # 備份範圍:/root/.openclaw 下所有文件及文件夾(保持完整目錄結構)
163
- # Dataset 中存儲路徑:openclaw/<相對於 .openclaw 的路徑>
164
- scatter_files = [f for f in all_files if f.startswith(SCATTER_REMOTE_PREFIX)]
165
- if scatter_files:
166
- os.makedirs(OPENCLAW_LOCAL, exist_ok=True)
167
- for remote_path in scatter_files:
168
- rel = remote_path[len(SCATTER_REMOTE_PREFIX):]
169
- if not rel:
170
- continue
171
- local_path = os.path.join(OPENCLAW_LOCAL, rel)
172
- print(f"Restoring file: {rel}")
173
- dl = hf_hub_download(repo_id=repo_id, filename=remote_path, repo_type="dataset", token=token)
174
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
175
- shutil.copy2(dl, local_path)
176
- print(f"Scatter restore: {len(scatter_files)} file(s) restored.")
177
- else:
178
- print("No scatter files found in Dataset, skipping scatter restore.")
179
-
180
  except Exception as e:
181
  print(f"Restore Error: {e}")
182
 
183
- # ── restore_workspace() ───────────────────────────────────────────────────────
184
 
185
- def restore_workspace():
186
- """
187
- 從 HuggingFace Dataset 的 openclaw/ 前綴下拉取最新文件到本地 /root/.openclaw/。
188
- 與 restore() 的區別:
189
- - 不處理 tar.gz 備份
190
- - 專門用於"用戶在 Dataset 網頁上編輯了文件後,讓容器立刻感知更新"的場景
191
- - 由後台定時循環每 30 分鐘自動調用一次
192
- - 也可以通過釘釘發指令手動觸發:python3 /usr/local/bin/sync.py restore_workspace
193
- 衝突保護邏輯:
194
- - Dataset 上的版本始終優先(網頁手動編輯的版本會覆蓋本地)
195
- - 這與 backup() 的邏輯相反:backup() 是本地新則上傳,restore_workspace() 是遠端新則下載
196
- - 若本地文件比遠端更新(本地有未備份的修改),跳過下載,保留本地版本
197
- """
198
- if not repo_id or not token:
199
- print("Skip restore_workspace: HF_DATASET or HF_TOKEN not set")
200
- return
201
- try:
202
- all_files = list(api.list_repo_files(
203
- repo_id=repo_id, repo_type="dataset", token=token
204
- ))
205
- scatter_files = [f for f in all_files if f.startswith(SCATTER_REMOTE_PREFIX)]
206
- if not scatter_files:
207
- print("restore_workspace: No scatter files found in Dataset, skipping.")
208
- return
209
- os.makedirs(OPENCLAW_LOCAL, exist_ok=True)
210
- updated = 0
211
- skipped = 0
212
- for remote_path in scatter_files:
213
- rel = remote_path[len(SCATTER_REMOTE_PREFIX):]
214
- if not rel:
215
- continue
216
- local_path = os.path.join(OPENCLAW_LOCAL, rel)
217
- try:
218
- remote_info = api.list_repo_tree(
219
- repo_id=repo_id, repo_type="dataset", token=token,
220
- path_in_repo=SCATTER_REMOTE_PREFIX.rstrip("/"), recursive=True
221
- )
222
- remote_mtime = None
223
- for item in remote_info:
224
- if hasattr(item, "path") and item.path == remote_path:
225
- if item.last_commit and item.last_commit.date:
226
- remote_mtime = item.last_commit.date.timestamp()
227
- break
228
- if remote_mtime and os.path.exists(local_path):
229
- local_mtime = os.path.getmtime(local_path)
230
- if local_mtime >= remote_mtime:
231
- skipped += 1
232
- continue
233
- except Exception:
234
- pass
235
- dl = hf_hub_download(
236
- repo_id=repo_id, filename=remote_path,
237
- repo_type="dataset", token=token
238
- )
239
- os.makedirs(os.path.dirname(local_path), exist_ok=True)
240
- shutil.copy2(dl, local_path)
241
- print(f"restore_workspace: updated {rel}")
242
- updated += 1
243
- print(f"restore_workspace: {updated} updated, {skipped} already up-to-date.")
244
- except Exception as e:
245
- print(f"restore_workspace Error: {e}")
246
 
247
  # ── backup() ──────────────────────────────────────────────────────────────────
248
 
@@ -251,7 +169,7 @@ def backup():
251
  print("Skip Backup: HF_DATASET or HF_TOKEN not set")
252
  return
253
 
254
- # ── 1. tar.gz 打包備份 ───────────────────────────────────────────────────
255
  # 備份範圍:/root/.openclaw 下所有文件及文件夾(完整目錄結構)
256
  # tar 包解壓目標為 /root/.openclaw/,arcname 對應其內部相對路徑
257
  #
@@ -259,6 +177,11 @@ def backup():
259
  # 格式:逗號分隔,支持多級子目錄
260
  # 例如:BACKUP_TAR_SKIP=agents/main/sessions,cron,extensions/foo/bar
261
  # 留空或不設置 = 備份全部
 
 
 
 
 
262
  try:
263
  tar_skip_set = _parse_skip_list("BACKUP_TAR_SKIP")
264
  day = datetime.now().strftime("%Y-%m-%d")
@@ -272,76 +195,9 @@ def backup():
272
  except Exception as e:
273
  print(f"Backup tar.gz Error: {e}")
274
 
275
- # ── 2. 散裝文件備份(逐個上傳,跳過 Dataset 上更新的文件)────────────────
276
- # 備份範圍:/root/.openclaw 下所有文件及文件夾(保持完整目錄結構)
277
- # Dataset 中存儲路徑:openclaw/<相對於 .openclaw 的路徑>
278
- #
279
- # BACKUP_SCATTER_SKIP:散裝備份時跳過的文件/目錄(相對於 /root/.openclaw)
280
- # 格式:逗號分隔,支持多級子目錄
281
- # 例如:BACKUP_SCATTER_SKIP=agents/main/sessions,cron,workspace/tmp
282
- # 留空或不設置 = 備份全部
283
- #
284
- # 核心改動:原來逐個 upload_file 每個文件產生1個commit,
285
- # 很快觸發 HF 免費帳號 128 commits/小時限制。
286
- # 改為:把需要上傳的文件複製到臨時目錄,再用 upload_folder 一次性
287
- # 合成單個 commit 上傳,徹底解決 429 rate limit 問題。
288
- try:
289
- scatter_skip_set = _parse_skip_list("BACKUP_SCATTER_SKIP")
290
-
291
- # 獲取 Dataset 上已有文件的最後修改時間(用於跳過 Dataset 更新的文件)
292
- remote_mtimes = {}
293
- try:
294
- repo_info = api.list_repo_tree(
295
- repo_id=repo_id, repo_type="dataset", token=token,
296
- path_in_repo=SCATTER_REMOTE_PREFIX.rstrip("/"), recursive=True
297
- )
298
- for item in repo_info:
299
- if hasattr(item, "path") and hasattr(item, "last_commit"):
300
- rel = item.path[len(SCATTER_REMOTE_PREFIX):]
301
- if item.last_commit and item.last_commit.date:
302
- remote_mtimes[rel] = item.last_commit.date.timestamp()
303
- except Exception:
304
- pass
305
-
306
- # 把需要上傳的文件複製到臨時目錄,保持目錄結構,再一次性 upload_folder
307
- import tempfile
308
- with tempfile.TemporaryDirectory() as tmpdir:
309
- uploaded = 0
310
- skipped = 0
311
- for abs_path, rel_to_base in _walk_local(OPENCLAW_LOCAL, skip_set=scatter_skip_set):
312
- local_mtime = os.path.getmtime(abs_path)
313
- remote_mtime = remote_mtimes.get(rel_to_base)
314
- if remote_mtime and remote_mtime > local_mtime:
315
- print(f"Scatter skip (Dataset newer): {rel_to_base}")
316
- skipped += 1
317
- continue
318
- dst = os.path.join(tmpdir, rel_to_base)
319
- os.makedirs(os.path.dirname(dst), exist_ok=True)
320
- shutil.copy2(abs_path, dst)
321
- uploaded += 1
322
-
323
- if uploaded > 0:
324
- # upload_folder 將 tmpdir 下所有文件合成單個 commit 上傳
325
- # path_in_repo 指定遠端存放前綴(openclaw/)
326
- api.upload_folder(
327
- folder_path=tmpdir,
328
- path_in_repo=SCATTER_REMOTE_PREFIX.rstrip("/"),
329
- repo_id=repo_id,
330
- repo_type="dataset",
331
- token=token,
332
- commit_message=f"Scatter backup {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
333
- )
334
- print(f"Scatter backup: {uploaded} file(s) uploaded in single commit, {skipped} skipped (Dataset newer).")
335
- else:
336
- print(f"Scatter backup: nothing to upload ({skipped} skipped, Dataset newer).")
337
- except Exception as e:
338
- print(f"Scatter Backup Error: {e}")
339
-
340
  if __name__ == "__main__":
341
  if len(sys.argv) > 1 and sys.argv[1] == "backup":
342
  backup()
343
- elif len(sys.argv) > 1 and sys.argv[1] == "restore_workspace":
344
- restore_workspace()
345
  else:
346
  restore()
347
  EOF
@@ -402,10 +258,26 @@ RUN cat <<'EOF' > /usr/local/bin/start-openclaw
402
  #!/bin/bash
403
  set -e
404
 
405
- mkdir -p /root/.openclaw/sessions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
- # 執行恢復邏輯
408
- python3 /usr/local/bin/sync.py restore
409
 
410
  # provider1 model2~5 fallback 到自身 model1
411
  provide1_models_id2="${provide1_models_id2:-$provide1_models_id1}"
@@ -705,9 +577,6 @@ fi
705
  # 增量備份循環(每 30 分鐘後台運行)
706
  (while true; do sleep 1800; python3 /usr/local/bin/sync.py backup; done) &
707
 
708
- # Workspace 同步循環(每 30 分鐘後台運行)
709
- (while true; do sleep 1800; python3 /usr/local/bin/sync.py restore_workspace; done) &
710
-
711
  # 把 HF Space Secrets 裡的 GEMINI key 寫入 auth-profiles.json
712
  mkdir -p /root/.openclaw/agents/main/agent
713
  python3 - << 'PYEOF'
@@ -901,7 +770,7 @@ CS_PORT = int(os.environ.get("CODE_SERVER_PORT", "13337"))
901
  CS_PASSWORD = os.environ.get("CODE_SERVER_PASSWORD", "changeme123!")
902
  CS_USER_DATA_DIR = "/root/.code-server"
903
  CS_EXTENSIONS_DIR= "/root/.code-server/extensions"
904
- CS_WORKSPACE = "/"
905
  CS_LOG = "/root/.openclaw/logs/code-server.log"
906
  IDLE_MINUTES = int(os.environ.get("IDE_IDLE_MINUTES", "30"))
907
  LAST_ACCESS_FILE = "/tmp/cs-last-access"
 
72
  token = os.getenv("HF_TOKEN")
73
 
74
  OPENCLAW_LOCAL = "/root/.openclaw"
 
75
 
76
  # ── 通用工具函數 ──────────────────────────────────────────────────────────────
77
 
 
120
  if not repo_id or not token:
121
  print("Skip Restore: HF_DATASET or HF_TOKEN not set")
122
  return
123
+
124
+ # RESTORE_SKIP=all:不恢復任何 tar.gz 內文件到本地,直接返回
125
+ restore_skip_raw = os.getenv("RESTORE_SKIP", "").strip()
126
+ if restore_skip_raw == "all":
127
+ print("Restore skip: RESTORE_SKIP=all, skipping all restore.")
128
+ return
129
+
130
+ # 解析跳過列表(用於在解壓時逐條目跳過,不是解壓後刪除)
131
+ skip_set = _parse_skip_list("RESTORE_SKIP")
132
+
133
  try:
134
  all_files = list(api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token))
135
 
136
+ # ── tar.gz 備份恢復 ──────────────────────────────────────────────
137
  # 備份範圍:/root/.openclaw 下所有文件及文件夾(保持完整目錄結構)
138
  # tar 包內 arcname 直接對應 .openclaw/ 下的相對路徑,解壓目標為 /root/.openclaw/
139
+ # RESTORE_SKIP:逗號分隔,填寫相對於 /root/.openclaw 的路徑
140
+ # 支持多級子目錄,例如:RESTORE_SKIP=agents/main/sessions,cron,extensions/foo/bar
141
+ # 留空或不設置 = 全部恢復(默認行為)
142
+ # RESTORE_SKIP=all = 跳過全部恢復(已在上方提前返回)
143
  now = datetime.now()
144
  for i in range(5):
145
  day = (now - timedelta(days=i)).strftime("%Y-%m-%d")
 
149
  path = hf_hub_download(repo_id=repo_id, filename=name, repo_type="dataset", token=token)
150
  os.makedirs(OPENCLAW_LOCAL, exist_ok=True)
151
  with tarfile.open(path, "r:gz") as tar:
152
+ for member in tar.getmembers():
153
+ if _is_skipped(member.name, skip_set):
154
+ print(f"Restore skip (RESTORE_SKIP): skipping {member.name}")
155
+ continue
156
+ tar.extract(member, path=OPENCLAW_LOCAL)
157
  print(f"Success: Restored from {name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  break
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  except Exception as e:
161
  print(f"Restore Error: {e}")
162
 
 
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
  # ── backup() ──────────────────────────────────────────────────────────────────
166
 
 
169
  print("Skip Backup: HF_DATASET or HF_TOKEN not set")
170
  return
171
 
172
+ # ── tar.gz 打包備份 ──────────────────────────────────────────────────────
173
  # 備份範圍:/root/.openclaw 下所有文件及文件夾(完整目錄結構)
174
  # tar 包解壓目標為 /root/.openclaw/,arcname 對應其內部相對路徑
175
  #
 
177
  # 格式:逗號分隔,支持多級子目錄
178
  # 例如:BACKUP_TAR_SKIP=agents/main/sessions,cron,extensions/foo/bar
179
  # 留空或不設置 = 備份全部
180
+ # BACKUP_TAR_SKIP=all = 不執行 tar.gz 備份,不生成任何 tar.gz 備份文件
181
+ backup_tar_skip_raw = os.getenv("BACKUP_TAR_SKIP", "").strip()
182
+ if backup_tar_skip_raw == "all":
183
+ print("Backup tar.gz skip: BACKUP_TAR_SKIP=all, skipping tar.gz backup.")
184
+ return
185
  try:
186
  tar_skip_set = _parse_skip_list("BACKUP_TAR_SKIP")
187
  day = datetime.now().strftime("%Y-%m-%d")
 
195
  except Exception as e:
196
  print(f"Backup tar.gz Error: {e}")
197
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  if __name__ == "__main__":
199
  if len(sys.argv) > 1 and sys.argv[1] == "backup":
200
  backup()
 
 
201
  else:
202
  restore()
203
  EOF
 
258
  #!/bin/bash
259
  set -e
260
 
261
+ # ── restore() 觸發條件判斷 ──────────────────────────────────────────────────
262
+ # HF Space 磁盤是 ephemeral 的,每次容器重啟(首次部署、factory rebuild、普通 restart)
263
+ # /root/.openclaw 都從空狀態開始。平台本身無法區分三種重啟場景。
264
+ # 注意:start-openclaw-code-server 在調用本腳本之前會執行
265
+ # mkdir -p /root/.openclaw/logs
266
+ # 因此不能以目錄是否存在或為空作為判定依據。
267
+ #
268
+ # 改用 openclaw.json 作為判定標誌:
269
+ # - 該文件由本腳本在 restore() 之後的配置寫入步驟生成
270
+ # - mkdir -p /root/.openclaw/logs 不會創建它
271
+ # - 首次部署 / 任何形式的容器重建:文件不存在 → 執行 restore()
272
+ # - 運行中再次調用本腳本(文件已存在) → 跳過 restore()
273
+ if [ ! -f /root/.openclaw/openclaw.json ]; then
274
+ echo "=== First deploy / rebuild detected: running restore() ==="
275
+ python3 /usr/local/bin/sync.py restore
276
+ else
277
+ echo "=== openclaw.json exists, skipping restore() ==="
278
+ fi
279
 
280
+ mkdir -p /root/.openclaw/sessions
 
281
 
282
  # provider1 model2~5 fallback 到自身 model1
283
  provide1_models_id2="${provide1_models_id2:-$provide1_models_id1}"
 
577
  # 增量備份循環(每 30 分鐘後台運行)
578
  (while true; do sleep 1800; python3 /usr/local/bin/sync.py backup; done) &
579
 
 
 
 
580
  # 把 HF Space Secrets 裡的 GEMINI key 寫入 auth-profiles.json
581
  mkdir -p /root/.openclaw/agents/main/agent
582
  python3 - << 'PYEOF'
 
770
  CS_PASSWORD = os.environ.get("CODE_SERVER_PASSWORD", "changeme123!")
771
  CS_USER_DATA_DIR = "/root/.code-server"
772
  CS_EXTENSIONS_DIR= "/root/.code-server/extensions"
773
+ CS_WORKSPACE = "/root/.openclaw"
774
  CS_LOG = "/root/.openclaw/logs/code-server.log"
775
  IDLE_MINUTES = int(os.environ.get("IDE_IDLE_MINUTES", "30"))
776
  LAST_ACCESS_FILE = "/tmp/cs-last-access"