action / scripts /hf-storage.py
GGSheng's picture
feat: deploy Gemma 4 to hf space
020c337 verified
#!/usr/bin/env python3
"""HuggingFace 公共存储工具 - Python 版本
用于上传/下载项目文件到 HF Storage Space
环境变量:
HF_STORAGE_REPO 存储库名称 (格式: username/repo)
HF_TOKEN HuggingFace Token (需要 write 权限)
HF_API_URL HF API 地址 (默认: https://huggingface.co)
"""
import argparse
import json
import os
import sys
from pathlib import Path
from typing import Optional
try:
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.utils import HfHubHTTPError
except ModuleNotFoundError:
print("错误: 需要安装 huggingface_hub")
print("安装: pip install huggingface_hub")
sys.exit(1)
class HFStorage:
"""HuggingFace 存储工具"""
def __init__(self, repo: Optional[str] = None, token: Optional[str] = None):
self.repo = os.getenv("HF_STORAGE_REPO", repo or "").strip()
self.token = os.getenv("HF_TOKEN", token or "").strip()
if not self.repo:
raise ValueError("HF_STORAGE_REPO 未设置")
if not self.token:
raise ValueError("HF_TOKEN 未设置")
self.api = HfApi(token=self.token)
def upload_file(self, file_path: str, dest: Optional[str] = None) -> bool:
"""上传单个文件"""
local_path = Path(file_path)
if not local_path.exists():
print(f"错误: 文件不存在: {file_path}")
return False
dest_path = dest or local_path.name
print(f"上传: {local_path} -> {self.repo}/{dest_path}")
try:
self.api.upload_file(
path_or_fileobj=str(local_path),
path_in_repo=dest_path,
repo_id=self.repo,
repo_type="space",
)
print(f" 成功!")
return True
except Exception as e:
print(f" 失败: {e}")
return False
def upload_directory(self, dir_path: str, prefix: str = "") -> dict:
"""上传整个目录"""
local_dir = Path(dir_path)
if not local_dir.is_dir():
print(f"错误: 目录不存在: {dir_path}")
return {"success": 0, "failed": 0}
results = {"success": 0, "failed": 0, "files": []}
for file_path in local_dir.rglob("*"):
if file_path.is_file():
rel_path = file_path.relative_to(local_dir)
dest_path = f"{prefix}/{rel_path}" if prefix else str(rel_path)
print(f"上传: {rel_path} -> {dest_path}")
try:
self.api.upload_file(
path_or_fileobj=str(file_path),
path_in_repo=dest_path,
repo_id=self.repo,
repo_type="space",
)
results["success"] += 1
results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "success"})
except Exception as e:
results["failed"] += 1
results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "failed", "error": str(e)})
print(f" 失败: {e}")
return results
def download_file(self, filename: str, local_path: str = ".") -> Optional[Path]:
"""下载文件"""
print(f"下载: {self.repo}/{filename} -> {local_path}")
try:
path = hf_hub_download(
repo_id=self.repo,
filename=filename,
local_dir=local_path,
token=self.token,
)
print(f" 成功: {path}")
return Path(path)
except HfHubHTTPError as e:
if e.response and e.response.status_code == 404:
print(f" 文件不存在: {filename}")
else:
print(f" 失败: {e}")
return None
except Exception as e:
print(f" 失败: {e}")
return None
def list_files(self, path: str = "") -> list:
"""列出文件"""
print(f"列出: {self.repo}{'/' + path if path else ''}")
try:
files = self.api.list_repo_files(repo_id=self.repo, repo_type="space")
if path:
files = [f for f in files if f.startswith(path)]
for f in files:
print(f" {f}")
return list(files)
except Exception as e:
print(f" 失败: {e}")
return []
def sync(self, local_dir: str, remote_dir: str = "") -> dict:
"""同步目录"""
local_path = Path(local_dir)
if not local_path.is_dir():
print(f"错误: 目录不存在: {local_dir}")
return {"success": 0, "failed": 0, "skipped": 0}
print(f"同步: {local_dir} -> {self.repo}{'/' + remote_dir if remote_dir else ''}")
# 获取远程文件列表
remote_files = set()
try:
for f in self.api.list_repo_files(repo_id=self.repo, repo_type="space"):
if not remote_dir or f.startswith(remote_dir + "/"):
key = f[len(remote_dir) + 1:] if remote_dir else f
remote_files.add(key)
except Exception as e:
print(f" 获取远程文件列表失败: {e}")
results = {"success": 0, "failed": 0, "skipped": 0}
for file_path in local_path.rglob("*"):
if file_path.is_file():
rel_path = str(file_path.relative_to(local_path))
dest_path = f"{remote_dir}/{rel_path}" if remote_dir else rel_path
if rel_path in remote_files:
print(f" 跳过(已存在): {rel_path}")
results["skipped"] += 1
else:
print(f" 上传: {rel_path} -> {dest_path}")
try:
self.api.upload_file(
path_or_fileobj=str(file_path),
path_in_repo=dest_path,
repo_id=self.repo,
repo_type="space",
)
results["success"] += 1
except Exception as e:
results["failed"] += 1
print(f" 失败: {e}")
return results
def delete_file(self, filename: str) -> bool:
"""删除文件"""
print(f"删除: {self.repo}/{filename}")
try:
self.api.delete_file(
repo_id=self.repo,
path_in_repo=filename,
repo_type="space",
)
print(" 成功!")
return True
except Exception as e:
print(f" 失败: {e}")
return False
def get_info(self) -> dict:
"""获取存储库信息"""
try:
info = self.api.repo_info(repo_id=self.repo, repo_type="space")
return {
"id": info.id,
"name": info.id,
"private": info.private,
"created_at": str(info.created_at) if info.created_at else None,
}
except Exception as e:
print(f"获取信息失败: {e}")
return {}
def main():
parser = argparse.ArgumentParser(
description="HuggingFace 公共存储工具",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
# 设置环境变量
export HF_STORAGE_REPO="username/storage"
export HF_TOKEN="hf_xxxx"
# 上传文件
hf-storage.py upload ./build.tar.gz
# 上传目录
hf-storage.py upload-dir ./src
# 下载文件
hf-storage.py download ./build.tar.gz
# 列出文件
hf-storage.py list
# 同步目录
hf-storage.py sync ./dist /public
"""
)
sub = parser.add_subparsers(dest="command", help="命令")
# upload
p_upload = sub.add_parser("upload", help="上传单个文件")
p_upload.add_argument("file", help="本地文件路径")
p_upload.add_argument("dest", nargs="?", help="远程目标路径")
# upload-dir
p_upload_dir = sub.add_parser("upload-dir", help="上传整个目录")
p_upload_dir.add_argument("dir", help="本地目录路径")
p_upload_dir.add_argument("prefix", nargs="?", help="远程目录前缀")
# download
p_download = sub.add_parser("download", help="下载文件")
p_download.add_argument("file", help="远程文件路径")
p_download.add_argument("path", nargs="?", default=".", help="本地保存路径")
# list
p_list = sub.add_parser("list", help="列出文件")
p_list.add_argument("path", nargs="?", default="", help="列出特定路径下的文件")
# sync
p_sync = sub.add_parser("sync", help="同步目录")
p_sync.add_argument("local_dir", help="本地目录")
p_sync.add_argument("remote_dir", nargs="?", default="", help="远程目录")
# delete
p_delete = sub.add_parser("delete", help="删除文件")
p_delete.add_argument("file", help="要删除的远程文件路径")
# info
p_info = sub.add_parser("info", help="获取存储库信息")
args = parser.parse_args()
try:
storage = HFStorage()
except ValueError as e:
print(f"错误: {e}")
print("请设置 HF_STORAGE_REPO 和 HF_TOKEN 环境变量")
sys.exit(1)
if args.command == "upload":
storage.upload_file(args.file, args.dest)
elif args.command == "upload-dir":
results = storage.upload_directory(args.dir, args.prefix or "")
print(f"\n上传完成: 成功 {results['success']}, 失败 {results['failed']}")
elif args.command == "download":
storage.download_file(args.file, args.path)
elif args.command == "list":
storage.list_files(args.path)
elif args.command == "sync":
results = storage.sync(args.local_dir, args.remote_dir or "")
print(f"\n同步完成: 成功 {results['success']}, 失败 {results['failed']}, 跳过 {results['skipped']}")
elif args.command == "delete":
storage.delete_file(args.file)
elif args.command == "info":
info = storage.get_info()
print(json.dumps(info, indent=2))
else:
parser.print_help()
if __name__ == "__main__":
main()