File size: 10,393 Bytes
3b47d98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
#!/usr/bin/env python3
"""HuggingFace 公共存储工具 - Python 版本

用于上传/下载项目文件到 HF Storage Space

环境变量:
    HF_STORAGE_REPO    存储库名称 (格式: username/repo)
    HF_TOKEN           HuggingFace Token (需要 write 权限)
    HF_API_URL         HF API 地址 (默认: https://huggingface.co)
"""

import argparse
import json
import os
import sys
from pathlib import Path
from typing import Optional

try:
    from huggingface_hub import HfApi, hf_hub_download
    from huggingface_hub.utils import HfHubHTTPError
except ModuleNotFoundError:
    print("错误: 需要安装 huggingface_hub")
    print("安装: pip install huggingface_hub")
    sys.exit(1)


class HFStorage:
    """HuggingFace 存储工具"""

    def __init__(self, repo: Optional[str] = None, token: Optional[str] = None):
        self.repo = os.getenv("HF_STORAGE_REPO", repo or "").strip()
        self.token = os.getenv("HF_TOKEN", token or "").strip()
        if not self.repo:
            raise ValueError("HF_STORAGE_REPO 未设置")
        if not self.token:
            raise ValueError("HF_TOKEN 未设置")
        self.api = HfApi(token=self.token)

    def upload_file(self, file_path: str, dest: Optional[str] = None) -> bool:
        """上传单个文件"""
        local_path = Path(file_path)
        if not local_path.exists():
            print(f"错误: 文件不存在: {file_path}")
            return False

        dest_path = dest or local_path.name
        print(f"上传: {local_path} -> {self.repo}/{dest_path}")

        try:
            self.api.upload_file(
                path_or_fileobj=str(local_path),
                path_in_repo=dest_path,
                repo_id=self.repo,
                repo_type="space",
            )
            print(f"  成功!")
            return True
        except Exception as e:
            print(f"  失败: {e}")
            return False

    def upload_directory(self, dir_path: str, prefix: str = "") -> dict:
        """上传整个目录"""
        local_dir = Path(dir_path)
        if not local_dir.is_dir():
            print(f"错误: 目录不存在: {dir_path}")
            return {"success": 0, "failed": 0}

        results = {"success": 0, "failed": 0, "files": []}

        for file_path in local_dir.rglob("*"):
            if file_path.is_file():
                rel_path = file_path.relative_to(local_dir)
                dest_path = f"{prefix}/{rel_path}" if prefix else str(rel_path)

                print(f"上传: {rel_path} -> {dest_path}")
                try:
                    self.api.upload_file(
                        path_or_fileobj=str(file_path),
                        path_in_repo=dest_path,
                        repo_id=self.repo,
                        repo_type="space",
                    )
                    results["success"] += 1
                    results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "success"})
                except Exception as e:
                    results["failed"] += 1
                    results["files"].append({"local": str(rel_path), "remote": dest_path, "status": "failed", "error": str(e)})
                    print(f"  失败: {e}")

        return results

    def download_file(self, filename: str, local_path: str = ".") -> Optional[Path]:
        """下载文件"""
        print(f"下载: {self.repo}/{filename} -> {local_path}")

        try:
            path = hf_hub_download(
                repo_id=self.repo,
                filename=filename,
                local_dir=local_path,
                token=self.token,
            )
            print(f"  成功: {path}")
            return Path(path)
        except HfHubHTTPError as e:
            if e.response and e.response.status_code == 404:
                print(f"  文件不存在: {filename}")
            else:
                print(f"  失败: {e}")
            return None
        except Exception as e:
            print(f"  失败: {e}")
            return None

    def list_files(self, path: str = "") -> list:
        """列出文件"""
        print(f"列出: {self.repo}{'/' + path if path else ''}")

        try:
            files = self.api.list_repo_files(repo_id=self.repo, repo_type="space")
            if path:
                files = [f for f in files if f.startswith(path)]

            for f in files:
                print(f"  {f}")

            return list(files)
        except Exception as e:
            print(f"  失败: {e}")
            return []

    def sync(self, local_dir: str, remote_dir: str = "") -> dict:
        """同步目录"""
        local_path = Path(local_dir)
        if not local_path.is_dir():
            print(f"错误: 目录不存在: {local_dir}")
            return {"success": 0, "failed": 0, "skipped": 0}

        print(f"同步: {local_dir} -> {self.repo}{'/' + remote_dir if remote_dir else ''}")

        # 获取远程文件列表
        remote_files = set()
        try:
            for f in self.api.list_repo_files(repo_id=self.repo, repo_type="space"):
                if not remote_dir or f.startswith(remote_dir + "/"):
                    key = f[len(remote_dir) + 1:] if remote_dir else f
                    remote_files.add(key)
        except Exception as e:
            print(f"  获取远程文件列表失败: {e}")

        results = {"success": 0, "failed": 0, "skipped": 0}

        for file_path in local_path.rglob("*"):
            if file_path.is_file():
                rel_path = str(file_path.relative_to(local_path))
                dest_path = f"{remote_dir}/{rel_path}" if remote_dir else rel_path

                if rel_path in remote_files:
                    print(f"  跳过(已存在): {rel_path}")
                    results["skipped"] += 1
                else:
                    print(f"  上传: {rel_path} -> {dest_path}")
                    try:
                        self.api.upload_file(
                            path_or_fileobj=str(file_path),
                            path_in_repo=dest_path,
                            repo_id=self.repo,
                            repo_type="space",
                        )
                        results["success"] += 1
                    except Exception as e:
                        results["failed"] += 1
                        print(f"    失败: {e}")

        return results

    def delete_file(self, filename: str) -> bool:
        """删除文件"""
        print(f"删除: {self.repo}/{filename}")

        try:
            self.api.delete_file(
                repo_id=self.repo,
                path_in_repo=filename,
                repo_type="space",
            )
            print("  成功!")
            return True
        except Exception as e:
            print(f"  失败: {e}")
            return False

    def get_info(self) -> dict:
        """获取存储库信息"""
        try:
            info = self.api.repo_info(repo_id=self.repo, repo_type="space")
            return {
                "id": info.id,
                "name": info.id,
                "private": info.private,
                "created_at": str(info.created_at) if info.created_at else None,
            }
        except Exception as e:
            print(f"获取信息失败: {e}")
            return {}


def main():
    parser = argparse.ArgumentParser(
        description="HuggingFace 公共存储工具",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
示例:
  # 设置环境变量
  export HF_STORAGE_REPO="username/storage"
  export HF_TOKEN="hf_xxxx"

  # 上传文件
  hf-storage.py upload ./build.tar.gz

  # 上传目录
  hf-storage.py upload-dir ./src

  # 下载文件
  hf-storage.py download ./build.tar.gz

  # 列出文件
  hf-storage.py list

  # 同步目录
  hf-storage.py sync ./dist /public
        """
    )
    sub = parser.add_subparsers(dest="command", help="命令")

    # upload
    p_upload = sub.add_parser("upload", help="上传单个文件")
    p_upload.add_argument("file", help="本地文件路径")
    p_upload.add_argument("dest", nargs="?", help="远程目标路径")

    # upload-dir
    p_upload_dir = sub.add_parser("upload-dir", help="上传整个目录")
    p_upload_dir.add_argument("dir", help="本地目录路径")
    p_upload_dir.add_argument("prefix", nargs="?", help="远程目录前缀")

    # download
    p_download = sub.add_parser("download", help="下载文件")
    p_download.add_argument("file", help="远程文件路径")
    p_download.add_argument("path", nargs="?", default=".", help="本地保存路径")

    # list
    p_list = sub.add_parser("list", help="列出文件")
    p_list.add_argument("path", nargs="?", default="", help="列出特定路径下的文件")

    # sync
    p_sync = sub.add_parser("sync", help="同步目录")
    p_sync.add_argument("local_dir", help="本地目录")
    p_sync.add_argument("remote_dir", nargs="?", default="", help="远程目录")

    # delete
    p_delete = sub.add_parser("delete", help="删除文件")
    p_delete.add_argument("file", help="要删除的远程文件路径")

    # info
    p_info = sub.add_parser("info", help="获取存储库信息")

    args = parser.parse_args()

    try:
        storage = HFStorage()
    except ValueError as e:
        print(f"错误: {e}")
        print("请设置 HF_STORAGE_REPO 和 HF_TOKEN 环境变量")
        sys.exit(1)

    if args.command == "upload":
        storage.upload_file(args.file, args.dest)
    elif args.command == "upload-dir":
        results = storage.upload_directory(args.dir, args.prefix or "")
        print(f"\n上传完成: 成功 {results['success']}, 失败 {results['failed']}")
    elif args.command == "download":
        storage.download_file(args.file, args.path)
    elif args.command == "list":
        storage.list_files(args.path)
    elif args.command == "sync":
        results = storage.sync(args.local_dir, args.remote_dir or "")
        print(f"\n同步完成: 成功 {results['success']}, 失败 {results['failed']}, 跳过 {results['skipped']}")
    elif args.command == "delete":
        storage.delete_file(args.file)
    elif args.command == "info":
        info = storage.get_info()
        print(json.dumps(info, indent=2))
    else:
        parser.print_help()


if __name__ == "__main__":
    main()