#!/usr/bin/env python3 """ Hugging Face 断点续传下载脚本 镜像站: hf-mirror.com 目标: MMInstruction/M3IT """ import os import sys # 设置国内镜像站 os.environ["HF_ENDPOINT"] = "https://hf-mirror.com" from huggingface_hub import snapshot_download from huggingface_hub import hf_hub_download import huggingface_hub REPO_ID = "MMInstruction/M3IT" LOCAL_DIR = "/workspace/xiaobin/dataset" REPO_TYPE = "dataset" # M3IT 是数据集 def download(): print(f"镜像站: {os.environ['HF_ENDPOINT']}") print(f"下载仓库: {REPO_ID}") print(f"保存目录: {LOCAL_DIR}") print("-" * 50) os.makedirs(LOCAL_DIR, exist_ok=True) try: snapshot_download( repo_id=REPO_ID, repo_type=REPO_TYPE, local_dir=LOCAL_DIR, local_dir_use_symlinks=False, # 直接复制文件,不用软链接 resume_download=True, # 断点续传 ignore_patterns=["*.gitattributes"], ) print("\n下载完成!") except Exception as e: print(f"\n出错: {e}") print("提示: 如果是模型仓库,请将 REPO_TYPE 改为 'model' 后重试") sys.exit(1) if __name__ == "__main__": download()