| #!/usr/bin/env python3 | |
| """ | |
| Hugging Face 断点续传下载脚本 | |
| 镜像站: hf-mirror.com | |
| 目标: MMInstruction/M3IT | |
| """ | |
| import os | |
| import sys | |
| # 设置国内镜像站 | |
| os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" | |
| from huggingface_hub import snapshot_download | |
| from huggingface_hub import hf_hub_download | |
| import huggingface_hub | |
| REPO_ID = "MMInstruction/M3IT" | |
| LOCAL_DIR = "/workspace/xiaobin/dataset" | |
| REPO_TYPE = "dataset" # M3IT 是数据集 | |
| def download(): | |
| print(f"镜像站: {os.environ['HF_ENDPOINT']}") | |
| print(f"下载仓库: {REPO_ID}") | |
| print(f"保存目录: {LOCAL_DIR}") | |
| print("-" * 50) | |
| os.makedirs(LOCAL_DIR, exist_ok=True) | |
| try: | |
| snapshot_download( | |
| repo_id=REPO_ID, | |
| repo_type=REPO_TYPE, | |
| local_dir=LOCAL_DIR, | |
| local_dir_use_symlinks=False, # 直接复制文件,不用软链接 | |
| resume_download=True, # 断点续传 | |
| ignore_patterns=["*.gitattributes"], | |
| ) | |
| print("\n下载完成!") | |
| except Exception as e: | |
| print(f"\n出错: {e}") | |
| print("提示: 如果是模型仓库,请将 REPO_TYPE 改为 'model' 后重试") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| download() |