gallyg commited on
Commit
b2e8697
·
verified ·
1 Parent(s): 98e80c7

Create sync_run.py

Browse files
Files changed (1) hide show
  1. sync_run.py +56 -0
sync_run.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import subprocess
4
+ import threading
5
+ from huggingface_hub import HfApi, snapshot_download
6
+
7
+ # 配置
8
+ TOKEN = os.getenv("HF_TOKEN")
9
+ DATASET_ID = os.getenv("DATASET_ID")
10
+ LOCAL_DIR = "data" # 项目存储账号和数据库的目录
11
+
12
+ api = HfApi(token=TOKEN)
13
+
14
+ def download_data():
15
+ """从 Dataset 下载数据到本地"""
16
+ if not DATASET_ID:
17
+ print("未配置 DATASET_ID,跳过下载")
18
+ return
19
+ try:
20
+ print(f"正在从 {DATASET_ID} 同步数据...")
21
+ snapshot_download(
22
+ repo_id=DATASET_ID,
23
+ repo_type="dataset",
24
+ local_dir=LOCAL_DIR,
25
+ token=TOKEN
26
+ )
27
+ print("同步完成")
28
+ except Exception as e:
29
+ print(f"下载失败(可能是首次运行,仓库为空): {e}")
30
+
31
+ def upload_data():
32
+ """定时上传数据到 Dataset"""
33
+ while True:
34
+ time.sleep(300) # 每 5 分钟备份一次
35
+ if DATASET_ID:
36
+ try:
37
+ api.upload_folder(
38
+ folder_path=LOCAL_DIR,
39
+ repo_id=DATASET_ID,
40
+ repo_type="dataset"
41
+ )
42
+ print("备份成功至 Dataset")
43
+ except Exception as e:
44
+ print(f"备份失败: {e}")
45
+
46
+ if __name__ == "__main__":
47
+ # 1. 启动前先下载
48
+ download_data()
49
+
50
+ # 2. 启动定时备份线程
51
+ backup_thread = threading.Thread(target=upload_data, daemon=True)
52
+ backup_thread.start()
53
+
54
+ # 3. 运行原有的 网页UI (注意:此处要和你的 Dockerfile 启动命令一致)
55
+ # 使用 subprocess 运行,防止主进程阻塞
56
+ subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])