Spaces:
gallyg
/
Configuration error

gallyg commited on
Commit
5a09aa9
·
verified ·
1 Parent(s): 085c4b4

Update sync_run.py

Browse files
Files changed (1) hide show
  1. sync_run.py +42 -23
sync_run.py CHANGED
@@ -7,43 +7,62 @@ from huggingface_hub import HfApi, snapshot_download
7
  # 配置
8
  TOKEN = os.getenv("HF_TOKEN")
9
  DATASET_ID = os.getenv("DATASET_ID")
10
- LOCAL_DIR = "data" # 项目存储账号和数据库目录
 
11
 
12
  api = HfApi(token=TOKEN)
13
 
14
  def download_data():
15
- """从 Dataset 下载数据到本地"""
16
  if not DATASET_ID:
17
- print("未配置 DATASET_ID,跳过下载")
18
  return
19
- try:
20
- print(f"正在从 {DATASET_ID} 同步数据...")
21
- snapshot_download(
22
- repo_id=DATASET_ID,
23
- repo_type="dataset",
24
- local_dir=LOCAL_DIR,
25
- token=TOKEN
26
- )
27
- print("同步完成")
28
- except Exception as e:
29
- print(f"下载失败(可能是首次运行,仓库为空): {e}")
 
30
 
31
  def upload_data():
32
- """定时上传数据到 Dataset"""
33
  while True:
34
- time.sleep(300) # 每 5 分钟备份一次
35
- if DATASET_ID:
 
 
 
 
 
 
 
36
  try:
 
37
  api.upload_folder(
38
- folder_path=LOCAL_DIR,
 
39
  repo_id=DATASET_ID,
40
- repo_type="dataset"
 
 
41
  )
42
- print("备份成功至 Dataset")
43
  except Exception as e:
44
- print(f"备份失败: {e}")
 
 
 
45
 
46
  if __name__ == "__main__":
 
 
 
 
47
  # 1. 启动前先下载
48
  download_data()
49
 
@@ -51,6 +70,6 @@ if __name__ == "__main__":
51
  backup_thread = threading.Thread(target=upload_data, daemon=True)
52
  backup_thread.start()
53
 
54
- # 3. 运行原有的 网页UI (注意:此处要和你的 Dockerfile 启动命令一致)
55
- # 使用 subprocess 运行,防止主进程阻塞
56
  subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])
 
7
  # 配置
8
  TOKEN = os.getenv("HF_TOKEN")
9
  DATASET_ID = os.getenv("DATASET_ID")
10
+ # 需要备份文件夹列表
11
+ SYNC_FOLDERS = ["data", "output"]
12
 
13
  api = HfApi(token=TOKEN)
14
 
15
  def download_data():
16
+ """初始化:从 Dataset 下载所有备份文件"""
17
  if not DATASET_ID:
18
+ print("[System] 未配置 DATASET_ID,跳过下载")
19
  return
20
+ for folder in SYNC_FOLDERS:
21
+ try:
22
+ print(f"[System] 正在同步云端 {folder} 文件夹...")
23
+ snapshot_download(
24
+ repo_id=DATASET_ID,
25
+ repo_type="dataset",
26
+ local_dir=folder,
27
+ allow_patterns=f"{folder}/*", # 只下载对应文件夹内容
28
+ token=TOKEN
29
+ )
30
+ except Exception as e:
31
+ print(f"[System] 下载 {folder} 失败 (可能仓库为空): {e}")
32
 
33
  def upload_data():
34
+ """定时任务:上传本地修改到 Dataset"""
35
  while True:
36
+ time.sleep(300) # 每 5 分钟检查一次
37
+ if not DATASET_ID:
38
+ continue
39
+
40
+ print(f"[Backup] {time.strftime('%Y-%m-%d %H:%M:%S')} 开始检查文件变动...")
41
+ for folder in SYNC_FOLDERS:
42
+ if not os.path.exists(folder) or not os.listdir(folder):
43
+ continue
44
+
45
  try:
46
+ # upload_folder 会自动比对哈希值,只有文件变了才会真的上传
47
  api.upload_folder(
48
+ folder_path=folder,
49
+ path_in_repo=folder, # 在 Dataset 中对应的路径
50
  repo_id=DATASET_ID,
51
+ repo_type="dataset",
52
+ commit_message=f"Scheduled backup of {folder}",
53
+ run_as_future=True # 异步运行,不阻塞
54
  )
 
55
  except Exception as e:
56
+ # 忽略 "No files have been modified" 这种不算错误的错误
57
+ if "No files have been modified" not in str(e):
58
+ print(f"[Backup] {folder} 上传失败: {e}")
59
+ print(f"[Backup] 检查完毕。")
60
 
61
  if __name__ == "__main__":
62
+ # 创建本地文件夹防止报错
63
+ for f in SYNC_FOLDERS:
64
+ os.makedirs(f, exist_ok=True)
65
+
66
  # 1. 启动前先下载
67
  download_data()
68
 
 
70
  backup_thread = threading.Thread(target=upload_data, daemon=True)
71
  backup_thread.start()
72
 
73
+ # 3. 运行 网页UI
74
+ print("[System] 正在启动 网页UI...")
75
  subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])