File size: 3,041 Bytes
b2e8697 09dada1 b2e8697 09dada1 b2e8697 09dada1 b2e8697 09dada1 b2e8697 09dada1 b2e8697 09dada1 7103a30 5a09aa9 09dada1 b2e8697 09dada1 5a09aa9 09dada1 b2e8697 09dada1 b2e8697 09dada1 b2e8697 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | import os
import time
import subprocess
import threading
from huggingface_hub import HfApi, snapshot_download
# 配置
TOKEN = os.getenv("HF_TOKEN")
DATASET_ID = os.getenv("DATASET_ID")
# 定义需要监控的目录和文件类型
SYNC_FOLDERS = ["data", "output", "config"]
SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"]
api = HfApi(token=TOKEN)
def download_data():
"""启动前:从 Dataset 下载所有备份"""
if not DATASET_ID:
print("[System] 警告: 未配置 DATASET_ID")
return
try:
print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...")
# 下载到当前目录,允许覆盖,但排除代码文件防止版本冲突
snapshot_download(
repo_id=DATASET_ID,
repo_type="dataset",
local_dir=".",
token=TOKEN,
ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"]
)
print("[System] 数据拉取完成。")
except Exception as e:
print(f"[System] 拉取失败 (首次运行或网络问题): {e}")
def upload_data():
"""运行中:定时将改动上传至 Dataset"""
while True:
# 每 2 分钟备份一次(缩短时间,防止改完密码还没备份就休眠了)
time.sleep(1800)
if not DATASET_ID:
continue
try:
# 扫描当前目录下所有符合条件的文件进行上传
# 我们直接上传整个根目录,但通过 allow_patterns 过滤出我们要的文件
api.upload_folder(
folder_path=".",
repo_id=DATASET_ID,
repo_type="dataset",
commit_message="Auto-backup data and configs",
allow_patterns=[
"data/**",
"output/**",
"config/**",
"*.json",
"*.db",
"*.yaml",
"*.yml",
".env"
],
ignore_patterns=["logs/**", "__pycache__/**"],
run_as_future=True
)
print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交")
except Exception as e:
if "No files have been modified" not in str(e):
print(f"[Backup] 备份出错: {e}")
if __name__ == "__main__":
# 确保基础目录存在
for folder in SYNC_FOLDERS:
os.makedirs(folder, exist_ok=True)
# 1. 启动前同步云端数据到本地
download_data()
# 2. 启动后台备份线程
backup_thread = threading.Thread(target=upload_data, daemon=True)
backup_thread.start()
# 3. 启动主程序
# 注意:根据你的日志,程序默认端口是 8000,但 HF 要求 7860
# 我们强制它运行在 7860
print("[System] 正在启动 OpenAI/Codex CLI...")
subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"]) |