| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """Contains commands to interact with papers on the Hugging Face Hub. |
| |
| Usage: |
| # list daily papers (most recently submitted) |
| hf papers ls |
| |
| # list trending papers |
| hf papers ls --sort=trending |
| |
| # list papers from a specific date, ordered by upvotes |
| hf papers ls --date=2025-01-23 |
| |
| # list today's papers, ordered by upvotes |
| hf papers ls --date=today |
| |
| # list papers from a specific week |
| hf papers ls --week=2025-W09 |
| |
| # list papers by a specific submitter |
| hf papers ls --submitter=someuser |
| |
| # search papers |
| hf papers search "vision language" |
| |
| # get info about a paper |
| hf papers info 2502.08025 |
| |
| # read a paper as markdown |
| hf papers read 2502.08025 |
| """ |
|
|
| import datetime |
| import enum |
| from typing import Annotated, get_args |
|
|
| import typer |
|
|
| from huggingface_hub.errors import CLIError, HfHubHTTPError |
| from huggingface_hub.hf_api import DailyPapersSort_T |
|
|
| from ._cli_utils import ( |
| FormatWithAutoOpt, |
| LimitOpt, |
| TokenOpt, |
| api_object_to_dict, |
| get_hf_api, |
| typer_factory, |
| ) |
| from ._output import OutputFormatWithAuto, out |
|
|
|
|
| _SORT_OPTIONS = get_args(DailyPapersSort_T) |
| PaperSortEnum = enum.Enum("PaperSortEnum", {s: s for s in _SORT_OPTIONS}, type=str) |
|
|
|
|
| def _parse_date(value: str | None) -> str | None: |
| """Parse date option, converting 'today' to current date.""" |
| if value is None: |
| return None |
| if value.lower() == "today": |
| return datetime.date.today().isoformat() |
| return value |
|
|
|
|
| papers_cli = typer_factory(help="Interact with papers on the Hub.") |
|
|
|
|
| @papers_cli.command( |
| "list | ls", |
| examples=[ |
| "hf papers ls", |
| "hf papers ls --sort trending", |
| "hf papers ls --date 2025-01-23", |
| "hf papers ls --week 2025-W09", |
| "hf papers ls --submitter akhaliq", |
| "hf papers ls --format json", |
| ], |
| ) |
| def papers_ls( |
| date: Annotated[ |
| str | None, |
| typer.Option( |
| help="Date in ISO format (YYYY-MM-DD) or 'today'.", |
| callback=_parse_date, |
| ), |
| ] = None, |
| week: Annotated[ |
| str | None, |
| typer.Option(help="ISO week to filter by, e.g. '2025-W09'."), |
| ] = None, |
| month: Annotated[ |
| str | None, |
| typer.Option(help="Month to filter by in ISO format (YYYY-MM), e.g. '2025-02'."), |
| ] = None, |
| submitter: Annotated[ |
| str | None, |
| typer.Option(help="Filter by username of the submitter."), |
| ] = None, |
| sort: Annotated[ |
| PaperSortEnum | None, |
| typer.Option(help="Sort results."), |
| ] = None, |
| limit: LimitOpt = 50, |
| format: FormatWithAutoOpt = OutputFormatWithAuto.auto, |
| token: TokenOpt = None, |
| ) -> None: |
| """List daily papers on the Hub.""" |
| api = get_hf_api(token=token) |
| sort_key = sort.value if sort else None |
| results = [] |
| for paper_info in api.list_daily_papers( |
| date=date, |
| week=week, |
| month=month, |
| submitter=submitter, |
| sort=sort_key, |
| limit=limit, |
| ): |
| item = api_object_to_dict(paper_info) |
| submitted_by = item.get("submitted_by") or {} |
| item["submitted_by_name"] = submitted_by.get("fullname") or submitted_by.get("username") or "" |
| results.append(item) |
| out.table( |
| results, |
| headers=["id", "title", "upvotes", "comments", "published_at", "submitted_by_name"], |
| alignments={"upvotes": "right", "comments": "right"}, |
| ) |
|
|
|
|
| @papers_cli.command( |
| "search", |
| examples=[ |
| 'hf papers search "vision language"', |
| 'hf papers search "attention mechanism" --limit 10', |
| 'hf papers search "diffusion" --format json', |
| ], |
| ) |
| def papers_search( |
| query: Annotated[str, typer.Argument(help="Search query string.")], |
| limit: LimitOpt = 20, |
| format: FormatWithAutoOpt = OutputFormatWithAuto.auto, |
| token: TokenOpt = None, |
| ) -> None: |
| """Search papers on the Hub.""" |
| api = get_hf_api(token=token) |
| results = [api_object_to_dict(paper_info) for paper_info in api.list_papers(query=query, limit=limit)] |
| out.table(results, headers=["id", "title", "summary", "upvotes", "published_at"], alignments={"upvotes": "right"}) |
|
|
|
|
| @papers_cli.command( |
| "info", |
| examples=[ |
| "hf papers info 2601.15621", |
| ], |
| ) |
| def papers_info( |
| paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")], |
| format: FormatWithAutoOpt = OutputFormatWithAuto.auto, |
| token: TokenOpt = None, |
| ) -> None: |
| """Get info about a paper on the Hub.""" |
| api = get_hf_api(token=token) |
| try: |
| info = api.paper_info(id=paper_id) |
| except HfHubHTTPError as e: |
| if e.response.status_code == 404: |
| raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e |
| raise |
| out.dict(info) |
|
|
|
|
| @papers_cli.command( |
| "read", |
| examples=[ |
| "hf papers read 2601.15621", |
| ], |
| ) |
| def papers_read( |
| paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")], |
| token: TokenOpt = None, |
| ) -> None: |
| """Read a paper as markdown.""" |
| api = get_hf_api(token=token) |
| try: |
| content = api.read_paper(id=paper_id) |
| except HfHubHTTPError as e: |
| if e.response.status_code == 404: |
| raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e |
| raise |
| out.text(content) |
|
|