File size: 3,041 Bytes
b2e8697
 
 
 
 
 
 
 
 
09dada1
 
 
 
b2e8697
 
 
 
09dada1
b2e8697
09dada1
b2e8697
09dada1
 
 
 
 
 
 
 
 
 
 
 
 
b2e8697
 
09dada1
b2e8697
09dada1
7103a30
5a09aa9
 
 
09dada1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2e8697
 
09dada1
 
 
5a09aa9
09dada1
b2e8697
 
09dada1
b2e8697
 
 
09dada1
 
 
 
b2e8697
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import time
import subprocess
import threading
from huggingface_hub import HfApi, snapshot_download

# 配置
TOKEN = os.getenv("HF_TOKEN")
DATASET_ID = os.getenv("DATASET_ID")

# 定义需要监控的目录和文件类型
SYNC_FOLDERS = ["data", "output", "config"]
SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"]

api = HfApi(token=TOKEN)

def download_data():
    """启动前:从 Dataset 下载所有备份"""
    if not DATASET_ID:
        print("[System] 警告: 未配置 DATASET_ID")
        return
    try:
        print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...")
        # 下载到当前目录,允许覆盖,但排除代码文件防止版本冲突
        snapshot_download(
            repo_id=DATASET_ID,
            repo_type="dataset",
            local_dir=".", 
            token=TOKEN,
            ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"]
        )
        print("[System] 数据拉取完成。")
    except Exception as e:
        print(f"[System] 拉取失败 (首次运行或网络问题): {e}")

def upload_data():
    """运行中:定时将改动上传至 Dataset"""
    while True:
        # 每 2 分钟备份一次(缩短时间,防止改完密码还没备份就休眠了)
        time.sleep(1800) 
        if not DATASET_ID:
            continue
            
        try:
            # 扫描当前目录下所有符合条件的文件进行上传
            # 我们直接上传整个根目录,但通过 allow_patterns 过滤出我们要的文件
            api.upload_folder(
                folder_path=".",
                repo_id=DATASET_ID,
                repo_type="dataset",
                commit_message="Auto-backup data and configs",
                allow_patterns=[
                    "data/**", 
                    "output/**", 
                    "config/**",
                    "*.json", 
                    "*.db", 
                    "*.yaml", 
                    "*.yml", 
                    ".env"
                ],
                ignore_patterns=["logs/**", "__pycache__/**"],
                run_as_future=True
            )
            print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交")
        except Exception as e:
            if "No files have been modified" not in str(e):
                print(f"[Backup] 备份出错: {e}")

if __name__ == "__main__":
    # 确保基础目录存在
    for folder in SYNC_FOLDERS:
        os.makedirs(folder, exist_ok=True)

    # 1. 启动前同步云端数据到本地
    download_data()

    # 2. 启动后台备份线程
    backup_thread = threading.Thread(target=upload_data, daemon=True)
    backup_thread.start()

    # 3. 启动主程序
    # 注意:根据你的日志,程序默认端口是 8000,但 HF 要求 7860
    # 我们强制它运行在 7860
    print("[System] 正在启动 OpenAI/Codex CLI...")
    subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])