| import os |
| import time |
| import subprocess |
| import threading |
| from huggingface_hub import HfApi, snapshot_download |
|
|
| |
| TOKEN = os.getenv("HF_TOKEN") |
| DATASET_ID = os.getenv("DATASET_ID") |
|
|
| |
| SYNC_FOLDERS = ["data", "output", "config"] |
| SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"] |
|
|
| api = HfApi(token=TOKEN) |
|
|
| def download_data(): |
| """启动前:从 Dataset 下载所有备份""" |
| if not DATASET_ID: |
| print("[System] 警告: 未配置 DATASET_ID") |
| return |
| try: |
| print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...") |
| |
| snapshot_download( |
| repo_id=DATASET_ID, |
| repo_type="dataset", |
| local_dir=".", |
| token=TOKEN, |
| ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"] |
| ) |
| print("[System] 数据拉取完成。") |
| except Exception as e: |
| print(f"[System] 拉取失败 (首次运行或网络问题): {e}") |
|
|
| def upload_data(): |
| """运行中:定时将改动上传至 Dataset""" |
| while True: |
| |
| time.sleep(1800) |
| if not DATASET_ID: |
| continue |
| |
| try: |
| |
| |
| api.upload_folder( |
| folder_path=".", |
| repo_id=DATASET_ID, |
| repo_type="dataset", |
| commit_message="Auto-backup data and configs", |
| allow_patterns=[ |
| "data/**", |
| "output/**", |
| "config/**", |
| "*.json", |
| "*.db", |
| "*.yaml", |
| "*.yml", |
| ".env" |
| ], |
| ignore_patterns=["logs/**", "__pycache__/**"], |
| run_as_future=True |
| ) |
| print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交") |
| except Exception as e: |
| if "No files have been modified" not in str(e): |
| print(f"[Backup] 备份出错: {e}") |
|
|
| if __name__ == "__main__": |
| |
| for folder in SYNC_FOLDERS: |
| os.makedirs(folder, exist_ok=True) |
|
|
| |
| download_data() |
|
|
| |
| backup_thread = threading.Thread(target=upload_data, daemon=True) |
| backup_thread.start() |
|
|
| |
| |
| |
| print("[System] 正在启动 OpenAI/Codex CLI...") |
| subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"]) |