# Copyright 202-present, the HuggingFace Inc. team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Contains command to download files from the Hub with the CLI. Usage: hf download --help # Download file hf download gpt2 config.json # Download entire repo hf download fffiloni/zeroscope --repo-type=space --revision=refs/pr/78 # Download repo with filters hf download gpt2 --include="*.safetensors" # Download with token hf download Wauplin/private-model --token=hf_*** # Download quietly (no progress bar, no warnings, only the returned path) hf download gpt2 config.json --quiet # Download to local dir hf download gpt2 --local-dir=./models/gpt2 # Download a subfolder hf download HuggingFaceM4/FineVision art/ --repo-type=dataset """ import warnings from typing import Annotated import typer from huggingface_hub._snapshot_download import snapshot_download from huggingface_hub.errors import CLIError from huggingface_hub.file_download import DryRunFileInfo, hf_hub_download from huggingface_hub.utils import _format_size from ._cli_utils import FormatWithAutoOpt, RepoIdArg, RepoTypeOpt, RevisionOpt, TokenOpt from ._output import OutputFormatWithAuto, out DOWNLOAD_EXAMPLES = [ "hf download meta-llama/Llama-3.2-1B-Instruct", "hf download meta-llama/Llama-3.2-1B-Instruct config.json tokenizer.json", 'hf download meta-llama/Llama-3.2-1B-Instruct --include "*.safetensors" --exclude "*.bin"', "hf download meta-llama/Llama-3.2-1B-Instruct --local-dir ./models/llama", "hf download HuggingFaceM4/FineVision art/ --repo-type dataset", ] def download( repo_id: RepoIdArg, filenames: Annotated[ list[str] | None, typer.Argument( help="Files to download (e.g. `config.json`, `data/metadata.jsonl`).", ), ] = None, repo_type: RepoTypeOpt = RepoTypeOpt.model, revision: RevisionOpt = None, include: Annotated[ list[str] | None, typer.Option( help="Glob patterns to include from files to download. eg: *.json", ), ] = None, exclude: Annotated[ list[str] | None, typer.Option( help="Glob patterns to exclude from files to download.", ), ] = None, cache_dir: Annotated[ str | None, typer.Option( help="Directory where to save files.", ), ] = None, local_dir: Annotated[ str | None, typer.Option( help="If set, the downloaded file will be placed under this directory. Check out https://huggingface.co/docs/huggingface_hub/guides/download#download-files-to-a-local-folder for more details.", ), ] = None, force_download: Annotated[ bool, typer.Option( help="If True, the files will be downloaded even if they are already cached.", ), ] = False, dry_run: Annotated[ bool, typer.Option( help="If True, perform a dry run without actually downloading the file.", ), ] = False, token: TokenOpt = None, max_workers: Annotated[ int, typer.Option( help="Maximum number of workers to use for downloading files. Default is 8.", ), ] = 8, format: FormatWithAutoOpt = OutputFormatWithAuto.auto, ) -> None: """Download files from the Hub.""" def run_download() -> str | DryRunFileInfo | list[DryRunFileInfo]: filenames_list = filenames if filenames is not None else [] # Separate subfolder patterns (ending with '/') from regular filenames # Subfolders like "art/" are converted to include patterns like "art/**" subfolders = [f for f in filenames_list if f.endswith("/")] subfolder_patterns = [f"{f.rstrip('/')}/**" for f in subfolders] regular_filenames = [f for f in filenames_list if not f.endswith("/")] # Error if subfolder patterns are combined with --include/--exclude # Guide user to use --include instead of subfolder argument if len(subfolder_patterns) > 0: if include is not None and len(include) > 0: raise CLIError( f"Cannot combine subfolder argument ('{subfolders[0]}') with `--include`. " f'Please use `--include "{subfolders[0]}*"` instead.' ) if exclude is not None and len(exclude) > 0: raise CLIError( f"Cannot combine subfolder argument ('{subfolders[0]}') with `--exclude`. " f'Please use `--include "{subfolders[0]}*"` with `--exclude` instead.' ) # Warn user if patterns are ignored (only if regular filenames are provided) if len(regular_filenames) > 0: if include is not None and len(include) > 0: warnings.warn("Ignoring `--include` since filenames have being explicitly set.") if exclude is not None and len(exclude) > 0: warnings.warn("Ignoring `--exclude` since filenames have being explicitly set.") # Single file to download (not a subfolder): use `hf_hub_download` if len(regular_filenames) == 1 and len(subfolder_patterns) == 0: return hf_hub_download( repo_id=repo_id, repo_type=repo_type.value, revision=revision, filename=regular_filenames[0], cache_dir=cache_dir, force_download=force_download, token=token, local_dir=local_dir, library_name="huggingface-cli", dry_run=dry_run, ) # Otherwise: use `snapshot_download` to ensure all files comes from same revision if len(regular_filenames) == 0 and len(subfolder_patterns) == 0: # No filenames provided: use include/exclude patterns allow_patterns = include ignore_patterns = exclude else: # Combine regular filenames and subfolder patterns as allow_patterns allow_patterns = regular_filenames + subfolder_patterns ignore_patterns = None return snapshot_download( repo_id=repo_id, repo_type=repo_type.value, revision=revision, allow_patterns=allow_patterns, ignore_patterns=ignore_patterns, force_download=force_download, cache_dir=cache_dir, token=token, local_dir=local_dir, library_name="huggingface-cli", max_workers=max_workers, dry_run=dry_run, ) def _print_result(result: str | DryRunFileInfo | list[DryRunFileInfo]) -> None: if isinstance(result, str): out.result("Downloaded", path=result) return # Print dry run info if isinstance(result, DryRunFileInfo): result = [result] will_download = [r for r in result if r.will_download] out.text( f"[dry-run] Will download {len(will_download)} files" f" (out of {len(result)})" f" totalling {_format_size(sum(r.file_size for r in will_download))}." ) items = [ { "file": info.filename, "size": _format_size(info.file_size) if info.will_download else "-", } for info in sorted(result, key=lambda x: x.filename) ] out.table(items) _print_result(run_download())