| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """Contains command to download files from the Hub with the CLI. |
| |
| Usage: |
| hf download --help |
| |
| # Download file |
| hf download gpt2 config.json |
| |
| # Download entire repo |
| hf download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78 |
| |
| # Download repo with filters |
| hf download gpt2 --include="*.safetensors" |
| |
| # Download with token |
| hf download Wauplin/private-model --token=hf_*** |
| |
| # Download quietly (no progress bar, no warnings, only the returned path) |
| hf download gpt2 config.json --quiet |
| |
| # Download to local dir |
| hf download gpt2 --local-dir=./models/gpt2 |
| |
| # Download a subfolder |
| hf download HuggingFaceM4/FineVision art/ --repo-type=dataset |
| """ |
|
|
| import warnings |
| from typing import Annotated |
|
|
| import typer |
|
|
| from huggingface_hub._snapshot_download import snapshot_download |
| from huggingface_hub.errors import CLIError |
| from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download |
| from huggingface_hub.utils import _format_size |
|
|
| from ._cli_utils import FormatWithAutoOpt, RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt |
| from ._output import OutputFormatWithAuto, out |
|
|
|
|
| DOWNLOAD_EXAMPLES = [ |
| "hf download meta-llama/Llama-3.2-1B-Instruct", |
| "hf download meta-llama/Llama-3.2-1B-Instruct config.json tokenizer.json", |
| 'hf download meta-llama/Llama-3.2-1B-Instruct --include "*.safetensors" --exclude "*.bin"', |
| "hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./models/llama", |
| "hf download HuggingFaceM4/FineVision art/ --repo-type dataset", |
| ] |
|
|
|
|
| def download( |
| repo_id: RepoIdArg, |
| filenames: Annotated[ |
| list[str] | None, |
| typer.Argument( |
| help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).", |
| ), |
| ] = None, |
| repo_type: RepoTypeOpt = RepoTypeOpt.model, |
| revision: RevisionOpt = None, |
| include: Annotated[ |
| list[str] | None, |
| typer.Option( |
| help="Glob patterns to include from files to download. eg: *.json", |
| ), |
| ] = None, |
| exclude: Annotated[ |
| list[str] | None, |
| typer.Option( |
| help="Glob patterns to exclude from files to download.", |
| ), |
| ] = None, |
| cache_dir: Annotated[ |
| str | None, |
| typer.Option( |
| help="Directory where to save files.", |
| ), |
| ] = None, |
| local_dir: Annotated[ |
| str | None, |
| typer.Option( |
| help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-a-local-folder for more details.", |
| ), |
| ] = None, |
| force_download: Annotated[ |
| bool, |
| typer.Option( |
| help="If True, the files will be downloaded even if they are already cached.", |
| ), |
| ] = False, |
| dry_run: Annotated[ |
| bool, |
| typer.Option( |
| help="If True, perform a dry run without actually downloading the file.", |
| ), |
| ] = False, |
| token: TokenOpt = None, |
| max_workers: Annotated[ |
| int, |
| typer.Option( |
| help="Maximum number of workers to use for downloading files. Default is 8.", |
| ), |
| ] = 8, |
| format: FormatWithAutoOpt = OutputFormatWithAuto.auto, |
| ) -> None: |
| """Download files from the Hub.""" |
|
|
| def run_download() -> str | DryRunFileInfo | list[DryRunFileInfo]: |
| filenames_list = filenames if filenames is not None else [] |
|
|
| |
| |
| subfolders = [f for f in filenames_list if f.endswith("/")] |
| subfolder_patterns = [f"{f.rstrip('/')}/**" for f in subfolders] |
| regular_filenames = [f for f in filenames_list if not f.endswith("/")] |
|
|
| |
| |
| if len(subfolder_patterns) > 0: |
| if include is not None and len(include) > 0: |
| raise CLIError( |
| f"Cannot combine subfolder argument ('{subfolders[0]}') with `--include`. " |
| f'Please use `--include "{subfolders[0]}*"` instead.' |
| ) |
| if exclude is not None and len(exclude) > 0: |
| raise CLIError( |
| f"Cannot combine subfolder argument ('{subfolders[0]}') with `--exclude`. " |
| f'Please use `--include "{subfolders[0]}*"` with `--exclude` instead.' |
| ) |
|
|
| |
| if len(regular_filenames) > 0: |
| if include is not None and len(include) > 0: |
| warnings.warn("Ignoring `--include` since filenames have being explicitly set.") |
| if exclude is not None and len(exclude) > 0: |
| warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.") |
|
|
| |
| if len(regular_filenames) == 1 and len(subfolder_patterns) == 0: |
| return hf_hub_download( |
| repo_id=repo_id, |
| repo_type=repo_type.value, |
| revision=revision, |
| filename=regular_filenames[0], |
| cache_dir=cache_dir, |
| force_download=force_download, |
| token=token, |
| local_dir=local_dir, |
| library_name="huggingface-cli", |
| dry_run=dry_run, |
| ) |
|
|
| |
| if len(regular_filenames) == 0 and len(subfolder_patterns) == 0: |
| |
| allow_patterns = include |
| ignore_patterns = exclude |
| else: |
| |
| allow_patterns = regular_filenames + subfolder_patterns |
| ignore_patterns = None |
|
|
| return snapshot_download( |
| repo_id=repo_id, |
| repo_type=repo_type.value, |
| revision=revision, |
| allow_patterns=allow_patterns, |
| ignore_patterns=ignore_patterns, |
| force_download=force_download, |
| cache_dir=cache_dir, |
| token=token, |
| local_dir=local_dir, |
| library_name="huggingface-cli", |
| max_workers=max_workers, |
| dry_run=dry_run, |
| ) |
|
|
| def _print_result(result: str | DryRunFileInfo | list[DryRunFileInfo]) -> None: |
| if isinstance(result, str): |
| out.result("Downloaded", path=result) |
| return |
|
|
| |
| if isinstance(result, DryRunFileInfo): |
| result = [result] |
| will_download = [r for r in result if r.will_download] |
| out.text( |
| f"[dry-run] Will download {len(will_download)} files" |
| f" (out of {len(result)})" |
| f" totalling {_format_size(sum(r.file_size for r in will_download))}." |
| ) |
| items = [ |
| { |
| "file": info.filename, |
| "size": _format_size(info.file_size) if info.will_download else "-", |
| } |
| for info in sorted(result, key=lambda x: x.filename) |
| ] |
| out.table(items) |
|
|
| _print_result(run_download()) |
|
|