gallyg commited on
Commit
09dada1
·
verified ·
1 Parent(s): 5a09aa9

Update sync_run.py

Browse files
Files changed (1) hide show
  1. sync_run.py +56 -45
sync_run.py CHANGED
@@ -7,69 +7,80 @@ from huggingface_hub import HfApi, snapshot_download
7
  # 配置
8
  TOKEN = os.getenv("HF_TOKEN")
9
  DATASET_ID = os.getenv("DATASET_ID")
10
- # 需要备份的文件夹列表
11
- SYNC_FOLDERS = ["data", "output"]
 
 
12
 
13
  api = HfApi(token=TOKEN)
14
 
15
  def download_data():
16
- """初始化:从 Dataset 下载所有备份文件"""
17
  if not DATASET_ID:
18
- print("[System] 未配置 DATASET_ID,跳过下载。")
19
  return
20
- for folder in SYNC_FOLDERS:
21
- try:
22
- print(f"[System] 正在同步云端 {folder} 文件夹...")
23
- snapshot_download(
24
- repo_id=DATASET_ID,
25
- repo_type="dataset",
26
- local_dir=folder,
27
- allow_patterns=f"{folder}/*", # 只下载对应文件夹内容
28
- token=TOKEN
29
- )
30
- except Exception as e:
31
- print(f"[System] 下载 {folder} 失败 (可能仓库为空): {e}")
 
32
 
33
  def upload_data():
34
- """定时任务:上传本地修改到 Dataset"""
35
  while True:
36
- time.sleep(300) # 每 5 分钟检查一次
 
37
  if not DATASET_ID:
38
  continue
39
 
40
- print(f"[Backup] {time.strftime('%Y-%m-%d %H:%M:%S')} 开始检查文件变动...")
41
- for folder in SYNC_FOLDERS:
42
- if not os.path.exists(folder) or not os.listdir(folder):
43
- continue
44
-
45
- try:
46
- # upload_folder 会自动比对哈希值,只有文件变了才会真的上传
47
- api.upload_folder(
48
- folder_path=folder,
49
- path_in_repo=folder, # 在 Dataset 中对应的路径
50
- repo_id=DATASET_ID,
51
- repo_type="dataset",
52
- commit_message=f"Scheduled backup of {folder}",
53
- run_as_future=True # 异步运行,不阻塞
54
- )
55
- except Exception as e:
56
- # 忽略 "No files have been modified" 这种不算错误的错误
57
- if "No files have been modified" not in str(e):
58
- print(f"[Backup] {folder} 上传失败: {e}")
59
- print(f"[Backup] 检查完毕。")
 
 
 
 
 
60
 
61
  if __name__ == "__main__":
62
- # 创建本地文件夹防止报错
63
- for f in SYNC_FOLDERS:
64
- os.makedirs(f, exist_ok=True)
65
 
66
- # 1. 启动前先下载
67
  download_data()
68
 
69
- # 2. 启动定时备份线程
70
  backup_thread = threading.Thread(target=upload_data, daemon=True)
71
  backup_thread.start()
72
 
73
- # 3. 运行 网页UI
74
- print("[System] 正在启动 网页UI...")
 
 
75
  subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])
 
7
  # 配置
8
  TOKEN = os.getenv("HF_TOKEN")
9
  DATASET_ID = os.getenv("DATASET_ID")
10
+
11
+ # 定义需要监控的目录和文件类型
12
+ SYNC_FOLDERS = ["data", "output", "config"]
13
+ SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"]
14
 
15
  api = HfApi(token=TOKEN)
16
 
17
  def download_data():
18
+ """启动前:从 Dataset 下载所有备份"""
19
  if not DATASET_ID:
20
+ print("[System] 警告: 未配置 DATASET_ID")
21
  return
22
+ try:
23
+ print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...")
24
+ # 下载到当前目录,允许覆盖,但排除代码文件防止版本冲突
25
+ snapshot_download(
26
+ repo_id=DATASET_ID,
27
+ repo_type="dataset",
28
+ local_dir=".",
29
+ token=TOKEN,
30
+ ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"]
31
+ )
32
+ print("[System] 数据拉取完成。")
33
+ except Exception as e:
34
+ print(f"[System] 拉取失败 (首次运行或网络问题): {e}")
35
 
36
  def upload_data():
37
+ """运行中:定时将改动上传 Dataset"""
38
  while True:
39
+ # 每 2 分钟备份一次(缩短时间,防止改完密码还没备份就休眠了)
40
+ time.sleep(120)
41
  if not DATASET_ID:
42
  continue
43
 
44
+ try:
45
+ # 扫描当前目录下所有符合条件的文件进行上传
46
+ # 我们直接上传整个根目录,但通过 allow_patterns 过滤出我们要的文件
47
+ api.upload_folder(
48
+ folder_path=".",
49
+ repo_id=DATASET_ID,
50
+ repo_type="dataset",
51
+ commit_message="Auto-backup data and configs",
52
+ allow_patterns=[
53
+ "data/**",
54
+ "output/**",
55
+ "config/**",
56
+ "*.json",
57
+ "*.db",
58
+ "*.yaml",
59
+ "*.yml",
60
+ ".env"
61
+ ],
62
+ ignore_patterns=["logs/**", "__pycache__/**"],
63
+ run_as_future=True
64
+ )
65
+ print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交")
66
+ except Exception as e:
67
+ if "No files have been modified" not in str(e):
68
+ print(f"[Backup] 备份出错: {e}")
69
 
70
  if __name__ == "__main__":
71
+ # 确保基础目录存在
72
+ for folder in SYNC_FOLDERS:
73
+ os.makedirs(folder, exist_ok=True)
74
 
75
+ # 1. 启动前同步云端数据到本地
76
  download_data()
77
 
78
+ # 2. 启动后台备份线程
79
  backup_thread = threading.Thread(target=upload_data, daemon=True)
80
  backup_thread.start()
81
 
82
+ # 3. 启动主程序
83
+ # 注意:根据你的日志,程序默认端口是 8000,但 HF 要求 7860
84
+ # 我们强制它运行在 7860
85
+ print("[System] 正在启动 OpenAI/Codex CLI...")
86
  subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])