| |
| """HuggingFace 公共存储工具 - Python 版本 |
| |
| 用于上传/下载项目文件到 HF Storage Space |
| |
| 环境变量: |
| HF_STORAGE_REPO 存储库名称 (格式: username/repo) |
| HF_TOKEN HuggingFace Token (需要 write 权限) |
| HF_API_URL HF API 地址 (默认: https://huggingface.co) |
| """ |
|
|
| import argparse |
| import json |
| import os |
| import sys |
| from pathlib import Path |
| from typing import Optional |
|
|
| try: |
| from huggingface_hub import HfApi, hf_hub_download |
| from huggingface_hub.utils import HfHubHTTPError |
| except ModuleNotFoundError: |
| print("错误: 需要安装 huggingface_hub") |
| print("安装: pip install huggingface_hub") |
| sys.exit(1) |
|
|
|
|
| class HFStorage: |
| """HuggingFace 存储工具""" |
|
|
| def __init__(self, repo: Optional[str] = None, token: Optional[str] = None): |
| self.repo = os.getenv("HF_STORAGE_REPO", repo or "").strip() |
| self.token = os.getenv("HF_TOKEN", token or "").strip() |
| if not self.repo: |
| raise ValueError("HF_STORAGE_REPO 未设置") |
| if not self.token: |
| raise ValueError("HF_TOKEN 未设置") |
| self.api = HfApi(token=self.token) |
|
|
| def upload_file(self, file_path: str, dest: Optional[str] = None) -> bool: |
| """上传单个文件""" |
| local_path = Path(file_path) |
| if not local_path.exists(): |
| print(f"错误: 文件不存在: {file_path}") |
| return False |
|
|
| dest_path = dest or local_path.name |
| print(f"上传: {local_path} -> {self.repo}/{dest_path}") |
|
|
| try: |
| self.api.upload_file( |
| path_or_fileobj=str(local_path), |
| path_in_repo=dest_path, |
| repo_id=self.repo, |
| repo_type="space", |
| ) |
| print(f" 成功!") |
| return True |
| except Exception as e: |
| print(f" 失败: {e}") |
| return False |
|
|
| def upload_directory(self, dir_path: str, prefix: str = "") -> dict: |
| """上传整个目录""" |
| local_dir = Path(dir_path) |
| if not local_dir.is_dir(): |
| print(f"错误: 目录不存在: {dir_path}") |
| return {"success": 0, "failed": 0} |
|
|
| results = {"success": 0, "failed": 0, "files": []} |
|
|
| for file_path in local_dir.rglob("*"): |
| if file_path.is_file(): |
| rel_path = file_path.relative_to(local_dir) |
| dest_path = f"{prefix}/{rel_path}" if prefix else str(rel_path) |
|
|
| print(f"上传: {rel_path} -> {dest_path}") |
| try: |
| self.api.upload_file( |
| path_or_fileobj=str(file_path), |
| path_in_repo=dest_path, |
| repo_id=self.repo, |
| repo_type="space", |
| ) |
| results["success"] += 1 |
| results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "success"}) |
| except Exception as e: |
| results["failed"] += 1 |
| results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "failed", "error": str(e)}) |
| print(f" 失败: {e}") |
|
|
| return results |
|
|
| def download_file(self, filename: str, local_path: str = ".") -> Optional[Path]: |
| """下载文件""" |
| print(f"下载: {self.repo}/{filename} -> {local_path}") |
|
|
| try: |
| path = hf_hub_download( |
| repo_id=self.repo, |
| filename=filename, |
| local_dir=local_path, |
| token=self.token, |
| ) |
| print(f" 成功: {path}") |
| return Path(path) |
| except HfHubHTTPError as e: |
| if e.response and e.response.status_code == 404: |
| print(f" 文件不存在: {filename}") |
| else: |
| print(f" 失败: {e}") |
| return None |
| except Exception as e: |
| print(f" 失败: {e}") |
| return None |
|
|
| def list_files(self, path: str = "") -> list: |
| """列出文件""" |
| print(f"列出: {self.repo}{'/' + path if path else ''}") |
|
|
| try: |
| files = self.api.list_repo_files(repo_id=self.repo, repo_type="space") |
| if path: |
| files = [f for f in files if f.startswith(path)] |
|
|
| for f in files: |
| print(f" {f}") |
|
|
| return list(files) |
| except Exception as e: |
| print(f" 失败: {e}") |
| return [] |
|
|
| def sync(self, local_dir: str, remote_dir: str = "") -> dict: |
| """同步目录""" |
| local_path = Path(local_dir) |
| if not local_path.is_dir(): |
| print(f"错误: 目录不存在: {local_dir}") |
| return {"success": 0, "failed": 0, "skipped": 0} |
|
|
| print(f"同步: {local_dir} -> {self.repo}{'/' + remote_dir if remote_dir else ''}") |
|
|
| |
| remote_files = set() |
| try: |
| for f in self.api.list_repo_files(repo_id=self.repo, repo_type="space"): |
| if not remote_dir or f.startswith(remote_dir + "/"): |
| key = f[len(remote_dir) + 1:] if remote_dir else f |
| remote_files.add(key) |
| except Exception as e: |
| print(f" 获取远程文件列表失败: {e}") |
|
|
| results = {"success": 0, "failed": 0, "skipped": 0} |
|
|
| for file_path in local_path.rglob("*"): |
| if file_path.is_file(): |
| rel_path = str(file_path.relative_to(local_path)) |
| dest_path = f"{remote_dir}/{rel_path}" if remote_dir else rel_path |
|
|
| if rel_path in remote_files: |
| print(f" 跳过(已存在): {rel_path}") |
| results["skipped"] += 1 |
| else: |
| print(f" 上传: {rel_path} -> {dest_path}") |
| try: |
| self.api.upload_file( |
| path_or_fileobj=str(file_path), |
| path_in_repo=dest_path, |
| repo_id=self.repo, |
| repo_type="space", |
| ) |
| results["success"] += 1 |
| except Exception as e: |
| results["failed"] += 1 |
| print(f" 失败: {e}") |
|
|
| return results |
|
|
| def delete_file(self, filename: str) -> bool: |
| """删除文件""" |
| print(f"删除: {self.repo}/{filename}") |
|
|
| try: |
| self.api.delete_file( |
| repo_id=self.repo, |
| path_in_repo=filename, |
| repo_type="space", |
| ) |
| print(" 成功!") |
| return True |
| except Exception as e: |
| print(f" 失败: {e}") |
| return False |
|
|
| def get_info(self) -> dict: |
| """获取存储库信息""" |
| try: |
| info = self.api.repo_info(repo_id=self.repo, repo_type="space") |
| return { |
| "id": info.id, |
| "name": info.id, |
| "private": info.private, |
| "created_at": str(info.created_at) if info.created_at else None, |
| } |
| except Exception as e: |
| print(f"获取信息失败: {e}") |
| return {} |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="HuggingFace 公共存储工具", |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| epilog=""" |
| 示例: |
| # 设置环境变量 |
| export HF_STORAGE_REPO="username/storage" |
| export HF_TOKEN="hf_xxxx" |
| |
| # 上传文件 |
| hf-storage.py upload ./build.tar.gz |
| |
| # 上传目录 |
| hf-storage.py upload-dir ./src |
| |
| # 下载文件 |
| hf-storage.py download ./build.tar.gz |
| |
| # 列出文件 |
| hf-storage.py list |
| |
| # 同步目录 |
| hf-storage.py sync ./dist /public |
| """ |
| ) |
| sub = parser.add_subparsers(dest="command", help="命令") |
|
|
| |
| p_upload = sub.add_parser("upload", help="上传单个文件") |
| p_upload.add_argument("file", help="本地文件路径") |
| p_upload.add_argument("dest", nargs="?", help="远程目标路径") |
|
|
| |
| p_upload_dir = sub.add_parser("upload-dir", help="上传整个目录") |
| p_upload_dir.add_argument("dir", help="本地目录路径") |
| p_upload_dir.add_argument("prefix", nargs="?", help="远程目录前缀") |
|
|
| |
| p_download = sub.add_parser("download", help="下载文件") |
| p_download.add_argument("file", help="远程文件路径") |
| p_download.add_argument("path", nargs="?", default=".", help="本地保存路径") |
|
|
| |
| p_list = sub.add_parser("list", help="列出文件") |
| p_list.add_argument("path", nargs="?", default="", help="列出特定路径下的文件") |
|
|
| |
| p_sync = sub.add_parser("sync", help="同步目录") |
| p_sync.add_argument("local_dir", help="本地目录") |
| p_sync.add_argument("remote_dir", nargs="?", default="", help="远程目录") |
|
|
| |
| p_delete = sub.add_parser("delete", help="删除文件") |
| p_delete.add_argument("file", help="要删除的远程文件路径") |
|
|
| |
| p_info = sub.add_parser("info", help="获取存储库信息") |
|
|
| args = parser.parse_args() |
|
|
| try: |
| storage = HFStorage() |
| except ValueError as e: |
| print(f"错误: {e}") |
| print("请设置 HF_STORAGE_REPO 和 HF_TOKEN 环境变量") |
| sys.exit(1) |
|
|
| if args.command == "upload": |
| storage.upload_file(args.file, args.dest) |
| elif args.command == "upload-dir": |
| results = storage.upload_directory(args.dir, args.prefix or "") |
| print(f"\n上传完成: 成功 {results['success']}, 失败 {results['failed']}") |
| elif args.command == "download": |
| storage.download_file(args.file, args.path) |
| elif args.command == "list": |
| storage.list_files(args.path) |
| elif args.command == "sync": |
| results = storage.sync(args.local_dir, args.remote_dir or "") |
| print(f"\n同步完成: 成功 {results['success']}, 失败 {results['failed']}, 跳过 {results['skipped']}") |
| elif args.command == "delete": |
| storage.delete_file(args.file) |
| elif args.command == "info": |
| info = storage.get_info() |
| print(json.dumps(info, indent=2)) |
| else: |
| parser.print_help() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|