#!/usr/bin/env python3 """HuggingFace 公共存储工具 - Python 版本 用于上传/下载项目文件到 HF Storage Space 环境变量: HF_STORAGE_REPO 存储库名称 (格式: username/repo) HF_TOKEN HuggingFace Token (需要 write 权限) HF_API_URL HF API 地址 (默认: https://huggingface.co) """ import argparse import json import os import sys from pathlib import Path from typing import Optional try: from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.utils import HfHubHTTPError except ModuleNotFoundError: print("错误: 需要安装 huggingface_hub") print("安装: pip install huggingface_hub") sys.exit(1) class HFStorage: """HuggingFace 存储工具""" def __init__(self, repo: Optional[str] = None, token: Optional[str] = None): self.repo = os.getenv("HF_STORAGE_REPO", repo or "").strip() self.token = os.getenv("HF_TOKEN", token or "").strip() if not self.repo: raise ValueError("HF_STORAGE_REPO 未设置") if not self.token: raise ValueError("HF_TOKEN 未设置") self.api = HfApi(token=self.token) def upload_file(self, file_path: str, dest: Optional[str] = None) -> bool: """上传单个文件""" local_path = Path(file_path) if not local_path.exists(): print(f"错误: 文件不存在: {file_path}") return False dest_path = dest or local_path.name print(f"上传: {local_path} -> {self.repo}/{dest_path}") try: self.api.upload_file( path_or_fileobj=str(local_path), path_in_repo=dest_path, repo_id=self.repo, repo_type="space", ) print(f" 成功!") return True except Exception as e: print(f" 失败: {e}") return False def upload_directory(self, dir_path: str, prefix: str = "") -> dict: """上传整个目录""" local_dir = Path(dir_path) if not local_dir.is_dir(): print(f"错误: 目录不存在: {dir_path}") return {"success": 0, "failed": 0} results = {"success": 0, "failed": 0, "files": []} for file_path in local_dir.rglob("*"): if file_path.is_file(): rel_path = file_path.relative_to(local_dir) dest_path = f"{prefix}/{rel_path}" if prefix else str(rel_path) print(f"上传: {rel_path} -> {dest_path}") try: self.api.upload_file( path_or_fileobj=str(file_path), path_in_repo=dest_path, repo_id=self.repo, repo_type="space", ) results["success"] += 1 results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "success"}) except Exception as e: results["failed"] += 1 results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "failed", "error": str(e)}) print(f" 失败: {e}") return results def download_file(self, filename: str, local_path: str = ".") -> Optional[Path]: """下载文件""" print(f"下载: {self.repo}/{filename} -> {local_path}") try: path = hf_hub_download( repo_id=self.repo, filename=filename, local_dir=local_path, token=self.token, ) print(f" 成功: {path}") return Path(path) except HfHubHTTPError as e: if e.response and e.response.status_code == 404: print(f" 文件不存在: {filename}") else: print(f" 失败: {e}") return None except Exception as e: print(f" 失败: {e}") return None def list_files(self, path: str = "") -> list: """列出文件""" print(f"列出: {self.repo}{'/' + path if path else ''}") try: files = self.api.list_repo_files(repo_id=self.repo, repo_type="space") if path: files = [f for f in files if f.startswith(path)] for f in files: print(f" {f}") return list(files) except Exception as e: print(f" 失败: {e}") return [] def sync(self, local_dir: str, remote_dir: str = "") -> dict: """同步目录""" local_path = Path(local_dir) if not local_path.is_dir(): print(f"错误: 目录不存在: {local_dir}") return {"success": 0, "failed": 0, "skipped": 0} print(f"同步: {local_dir} -> {self.repo}{'/' + remote_dir if remote_dir else ''}") # 获取远程文件列表 remote_files = set() try: for f in self.api.list_repo_files(repo_id=self.repo, repo_type="space"): if not remote_dir or f.startswith(remote_dir + "/"): key = f[len(remote_dir) + 1:] if remote_dir else f remote_files.add(key) except Exception as e: print(f" 获取远程文件列表失败: {e}") results = {"success": 0, "failed": 0, "skipped": 0} for file_path in local_path.rglob("*"): if file_path.is_file(): rel_path = str(file_path.relative_to(local_path)) dest_path = f"{remote_dir}/{rel_path}" if remote_dir else rel_path if rel_path in remote_files: print(f" 跳过(已存在): {rel_path}") results["skipped"] += 1 else: print(f" 上传: {rel_path} -> {dest_path}") try: self.api.upload_file( path_or_fileobj=str(file_path), path_in_repo=dest_path, repo_id=self.repo, repo_type="space", ) results["success"] += 1 except Exception as e: results["failed"] += 1 print(f" 失败: {e}") return results def delete_file(self, filename: str) -> bool: """删除文件""" print(f"删除: {self.repo}/{filename}") try: self.api.delete_file( repo_id=self.repo, path_in_repo=filename, repo_type="space", ) print(" 成功!") return True except Exception as e: print(f" 失败: {e}") return False def get_info(self) -> dict: """获取存储库信息""" try: info = self.api.repo_info(repo_id=self.repo, repo_type="space") return { "id": info.id, "name": info.id, "private": info.private, "created_at": str(info.created_at) if info.created_at else None, } except Exception as e: print(f"获取信息失败: {e}") return {} def main(): parser = argparse.ArgumentParser( description="HuggingFace 公共存储工具", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例: # 设置环境变量 export HF_STORAGE_REPO="username/storage" export HF_TOKEN="hf_xxxx" # 上传文件 hf-storage.py upload ./build.tar.gz # 上传目录 hf-storage.py upload-dir ./src # 下载文件 hf-storage.py download ./build.tar.gz # 列出文件 hf-storage.py list # 同步目录 hf-storage.py sync ./dist /public """ ) sub = parser.add_subparsers(dest="command", help="命令") # upload p_upload = sub.add_parser("upload", help="上传单个文件") p_upload.add_argument("file", help="本地文件路径") p_upload.add_argument("dest", nargs="?", help="远程目标路径") # upload-dir p_upload_dir = sub.add_parser("upload-dir", help="上传整个目录") p_upload_dir.add_argument("dir", help="本地目录路径") p_upload_dir.add_argument("prefix", nargs="?", help="远程目录前缀") # download p_download = sub.add_parser("download", help="下载文件") p_download.add_argument("file", help="远程文件路径") p_download.add_argument("path", nargs="?", default=".", help="本地保存路径") # list p_list = sub.add_parser("list", help="列出文件") p_list.add_argument("path", nargs="?", default="", help="列出特定路径下的文件") # sync p_sync = sub.add_parser("sync", help="同步目录") p_sync.add_argument("local_dir", help="本地目录") p_sync.add_argument("remote_dir", nargs="?", default="", help="远程目录") # delete p_delete = sub.add_parser("delete", help="删除文件") p_delete.add_argument("file", help="要删除的远程文件路径") # info p_info = sub.add_parser("info", help="获取存储库信息") args = parser.parse_args() try: storage = HFStorage() except ValueError as e: print(f"错误: {e}") print("请设置 HF_STORAGE_REPO 和 HF_TOKEN 环境变量") sys.exit(1) if args.command == "upload": storage.upload_file(args.file, args.dest) elif args.command == "upload-dir": results = storage.upload_directory(args.dir, args.prefix or "") print(f"\n上传完成: 成功 {results['success']}, 失败 {results['failed']}") elif args.command == "download": storage.download_file(args.file, args.path) elif args.command == "list": storage.list_files(args.path) elif args.command == "sync": results = storage.sync(args.local_dir, args.remote_dir or "") print(f"\n同步完成: 成功 {results['success']}, 失败 {results['failed']}, 跳过 {results['skipped']}") elif args.command == "delete": storage.delete_file(args.file) elif args.command == "info": info = storage.get_info() print(json.dumps(info, indent=2)) else: parser.print_help() if __name__ == "__main__": main()