import os import time import subprocess import threading from huggingface_hub import HfApi, snapshot_download # 配置 TOKEN = os.getenv("HF_TOKEN") DATASET_ID = os.getenv("DATASET_ID") # 定义需要监控的目录和文件类型 SYNC_FOLDERS = ["data", "output", "config"] SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"] api = HfApi(token=TOKEN) def download_data(): """启动前:从 Dataset 下载所有备份""" if not DATASET_ID: print("[System] 警告: 未配置 DATASET_ID") return try: print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...") # 下载到当前目录,允许覆盖,但排除代码文件防止版本冲突 snapshot_download( repo_id=DATASET_ID, repo_type="dataset", local_dir=".", token=TOKEN, ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"] ) print("[System] 数据拉取完成。") except Exception as e: print(f"[System] 拉取失败 (首次运行或网络问题): {e}") def upload_data(): """运行中:定时将改动上传至 Dataset""" while True: # 每 2 分钟备份一次(缩短时间,防止改完密码还没备份就休眠了) time.sleep(1800) if not DATASET_ID: continue try: # 扫描当前目录下所有符合条件的文件进行上传 # 我们直接上传整个根目录,但通过 allow_patterns 过滤出我们要的文件 api.upload_folder( folder_path=".", repo_id=DATASET_ID, repo_type="dataset", commit_message="Auto-backup data and configs", allow_patterns=[ "data/**", "output/**", "config/**", "*.json", "*.db", "*.yaml", "*.yml", ".env" ], ignore_patterns=["logs/**", "__pycache__/**"], run_as_future=True ) print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交") except Exception as e: if "No files have been modified" not in str(e): print(f"[Backup] 备份出错: {e}") if __name__ == "__main__": # 确保基础目录存在 for folder in SYNC_FOLDERS: os.makedirs(folder, exist_ok=True) # 1. 启动前同步云端数据到本地 download_data() # 2. 启动后台备份线程 backup_thread = threading.Thread(target=upload_data, daemon=True) backup_thread.start() # 3. 启动主程序 # 注意:根据你的日志,程序默认端口是 8000,但 HF 要求 7860 # 我们强制它运行在 7860 print("[System] 正在启动 OpenAI/Codex CLI...") subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])