ICL / download_hf.py
Lekr0's picture
Add files using upload-large-folder tool
90afcf2 verified
#!/usr/bin/env python3
"""
Hugging Face 断点续传下载脚本
镜像站: hf-mirror.com
目标: MMInstruction/M3IT
"""
import os
import sys
# 设置国内镜像站
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
from huggingface_hub import snapshot_download
from huggingface_hub import hf_hub_download
import huggingface_hub
REPO_ID = "MMInstruction/M3IT"
LOCAL_DIR = "/workspace/xiaobin/dataset"
REPO_TYPE = "dataset" # M3IT 是数据集
def download():
print(f"镜像站: {os.environ['HF_ENDPOINT']}")
print(f"下载仓库: {REPO_ID}")
print(f"保存目录: {LOCAL_DIR}")
print("-" * 50)
os.makedirs(LOCAL_DIR, exist_ok=True)
try:
snapshot_download(
repo_id=REPO_ID,
repo_type=REPO_TYPE,
local_dir=LOCAL_DIR,
local_dir_use_symlinks=False, # 直接复制文件,不用软链接
resume_download=True, # 断点续传
ignore_patterns=["*.gitattributes"],
)
print("\n下载完成!")
except Exception as e:
print(f"\n出错: {e}")
print("提示: 如果是模型仓库,请将 REPO_TYPE 改为 'model' 后重试")
sys.exit(1)
if __name__ == "__main__":
download()