# Copyright 2025 The HuggingFace Team. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Contains commands to interact with papers on the Hugging Face Hub. Usage: # list daily papers (most recently submitted) hf papers ls # list trending papers hf papers ls --sort=trending # list papers from a specific date, ordered by upvotes hf papers ls --date=2025-01-23 # list today's papers, ordered by upvotes hf papers ls --date=today # list papers from a specific week hf papers ls --week=2025-W09 # list papers by a specific submitter hf papers ls --submitter=someuser # search papers hf papers search "vision language" # get info about a paper hf papers info 2502.08025 # read a paper as markdown hf papers read 2502.08025 """ import datetime import enum from typing import Annotated, get_args import typer from huggingface_hub.errors import CLIError, HfHubHTTPError from huggingface_hub.hf_api import DailyPapersSort_T from ._cli_utils import ( FormatWithAutoOpt, LimitOpt, TokenOpt, api_object_to_dict, get_hf_api, typer_factory, ) from ._output import OutputFormatWithAuto, out _SORT_OPTIONS = get_args(DailyPapersSort_T) PaperSortEnum = enum.Enum("PaperSortEnum", {s: s for s in _SORT_OPTIONS}, type=str) # type: ignore[misc] def _parse_date(value: str | None) -> str | None: """Parse date option, converting 'today' to current date.""" if value is None: return None if value.lower() == "today": return datetime.date.today().isoformat() return value papers_cli = typer_factory(help="Interact with papers on the Hub.") @papers_cli.command( "list | ls", examples=[ "hf papers ls", "hf papers ls --sort trending", "hf papers ls --date 2025-01-23", "hf papers ls --week 2025-W09", "hf papers ls --submitter akhaliq", "hf papers ls --format json", ], ) def papers_ls( date: Annotated[ str | None, typer.Option( help="Date in ISO format (YYYY-MM-DD) or 'today'.", callback=_parse_date, ), ] = None, week: Annotated[ str | None, typer.Option(help="ISO week to filter by, e.g. '2025-W09'."), ] = None, month: Annotated[ str | None, typer.Option(help="Month to filter by in ISO format (YYYY-MM), e.g. '2025-02'."), ] = None, submitter: Annotated[ str | None, typer.Option(help="Filter by username of the submitter."), ] = None, sort: Annotated[ PaperSortEnum | None, typer.Option(help="Sort results."), ] = None, limit: LimitOpt = 50, format: FormatWithAutoOpt = OutputFormatWithAuto.auto, token: TokenOpt = None, ) -> None: """List daily papers on the Hub.""" api = get_hf_api(token=token) sort_key = sort.value if sort else None results = [] for paper_info in api.list_daily_papers( date=date, week=week, month=month, submitter=submitter, sort=sort_key, limit=limit, ): item = api_object_to_dict(paper_info) submitted_by = item.get("submitted_by") or {} item["submitted_by_name"] = submitted_by.get("fullname") or submitted_by.get("username") or "" results.append(item) out.table( results, headers=["id", "title", "upvotes", "comments", "published_at", "submitted_by_name"], alignments={"upvotes": "right", "comments": "right"}, ) @papers_cli.command( "search", examples=[ 'hf papers search "vision language"', 'hf papers search "attention mechanism" --limit 10', 'hf papers search "diffusion" --format json', ], ) def papers_search( query: Annotated[str, typer.Argument(help="Search query string.")], limit: LimitOpt = 20, format: FormatWithAutoOpt = OutputFormatWithAuto.auto, token: TokenOpt = None, ) -> None: """Search papers on the Hub.""" api = get_hf_api(token=token) results = [api_object_to_dict(paper_info) for paper_info in api.list_papers(query=query, limit=limit)] out.table(results, headers=["id", "title", "summary", "upvotes", "published_at"], alignments={"upvotes": "right"}) @papers_cli.command( "info", examples=[ "hf papers info 2601.15621", ], ) def papers_info( paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")], format: FormatWithAutoOpt = OutputFormatWithAuto.auto, token: TokenOpt = None, ) -> None: """Get info about a paper on the Hub.""" api = get_hf_api(token=token) try: info = api.paper_info(id=paper_id) except HfHubHTTPError as e: if e.response.status_code == 404: raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e raise out.dict(info) @papers_cli.command( "read", examples=[ "hf papers read 2601.15621", ], ) def papers_read( paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")], token: TokenOpt = None, ) -> None: """Read a paper as markdown.""" api = get_hf_api(token=token) try: content = api.read_paper(id=paper_id) except HfHubHTTPError as e: if e.response.status_code == 404: raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e raise out.text(content)