| |
| """ |
| 从 Hugging Face Dataset 仓库恢复 OpenCode 数据到 ~/.local/share/opencode。 |
| 需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。 |
| """ |
| import os |
| import re |
| import shutil |
| import sys |
|
|
| def _normalize_repo_id(value): |
| """接受 repo_id 或完整 URL,返回 namespace/repo_name。""" |
| if not value or not value.strip(): |
| return None |
| value = value.strip() |
| |
| m = re.search(r"(?:huggingface\.co/datasets/|^)([\w.-]+/[\w.-]+)/?$", value) |
| if m: |
| return m.group(1) |
| |
| if "/" in value: |
| return value |
| return None |
|
|
| def main(): |
| token = os.environ.get("HF_TOKEN") |
| raw = os.environ.get("OPENCODE_DATASET_REPO") |
| repo_id = _normalize_repo_id(raw) |
| data_dir = os.path.expanduser("~/.local/share/opencode") |
|
|
| if not token or not repo_id: |
| return 0 |
|
|
| try: |
| from huggingface_hub import HfApi, snapshot_download |
| except ImportError: |
| print("restore: huggingface_hub not installed, skip restore", file=sys.stderr) |
| return 0 |
|
|
| try: |
| api = HfApi(token=token) |
| files = api.list_repo_files(repo_id, repo_type="dataset") |
| if not files or set(files) <= {".gitattributes"}: |
| return 0 |
| except Exception as e: |
| print(f"restore: list repo failed ({e}), skip restore", file=sys.stderr) |
| return 0 |
|
|
| os.makedirs(data_dir, exist_ok=True) |
| tmp_dir = data_dir + ".restore_tmp" |
| try: |
| snapshot_download( |
| repo_id=repo_id, |
| repo_type="dataset", |
| local_dir=tmp_dir, |
| token=token, |
| ) |
| for name in os.listdir(tmp_dir): |
| if name == ".gitattributes": |
| continue |
| src = os.path.join(tmp_dir, name) |
| dst = os.path.join(data_dir, name) |
| if os.path.isdir(src): |
| if os.path.exists(dst): |
| shutil.rmtree(dst, ignore_errors=True) |
| shutil.copytree(src, dst) |
| else: |
| shutil.copy2(src, dst) |
| finally: |
| if os.path.isdir(tmp_dir): |
| shutil.rmtree(tmp_dir, ignore_errors=True) |
| return 0 |
|
|
| if __name__ == "__main__": |
| sys.exit(main()) |
|
|