Spaces:

ycwhencpp
/

train-new

Paused

File size: 5,969 Bytes

5e9fb2f

# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to interact with papers on the Hugging Face Hub.

Usage:
    # list daily papers (most recently submitted)
    hf papers ls

    # list trending papers
    hf papers ls --sort=trending

    # list papers from a specific date, ordered by upvotes
    hf papers ls --date=2025-01-23

    # list today's papers, ordered by upvotes
    hf papers ls --date=today

    # list papers from a specific week
    hf papers ls --week=2025-W09

    # list papers by a specific submitter
    hf papers ls --submitter=someuser

    # search papers
    hf papers search "vision language"

    # get info about a paper
    hf papers info 2502.08025

    # read a paper as markdown
    hf papers read 2502.08025
"""

import datetime
import enum
from typing import Annotated, get_args

import typer

from huggingface_hub.errors import CLIError, HfHubHTTPError
from huggingface_hub.hf_api import DailyPapersSort_T

from ._cli_utils import (
    FormatWithAutoOpt,
    LimitOpt,
    TokenOpt,
    api_object_to_dict,
    get_hf_api,
    typer_factory,
)
from ._output import OutputFormatWithAuto, out


_SORT_OPTIONS = get_args(DailyPapersSort_T)
PaperSortEnum = enum.Enum("PaperSortEnum", {s: s for s in _SORT_OPTIONS}, type=str)  # type: ignore[misc]


def _parse_date(value: str | None) -> str | None:
    """Parse date option, converting 'today' to current date."""
    if value is None:
        return None
    if value.lower() == "today":
        return datetime.date.today().isoformat()
    return value


papers_cli = typer_factory(help="Interact with papers on the Hub.")


@papers_cli.command(
    "list | ls",
    examples=[
        "hf papers ls",
        "hf papers ls --sort trending",
        "hf papers ls --date 2025-01-23",
        "hf papers ls --week 2025-W09",
        "hf papers ls --submitter akhaliq",
        "hf papers ls --format json",
    ],
)
def papers_ls(
    date: Annotated[
        str | None,
        typer.Option(
            help="Date in ISO format (YYYY-MM-DD) or 'today'.",
            callback=_parse_date,
        ),
    ] = None,
    week: Annotated[
        str | None,
        typer.Option(help="ISO week to filter by, e.g. '2025-W09'."),
    ] = None,
    month: Annotated[
        str | None,
        typer.Option(help="Month to filter by in ISO format (YYYY-MM), e.g. '2025-02'."),
    ] = None,
    submitter: Annotated[
        str | None,
        typer.Option(help="Filter by username of the submitter."),
    ] = None,
    sort: Annotated[
        PaperSortEnum | None,
        typer.Option(help="Sort results."),
    ] = None,
    limit: LimitOpt = 50,
    format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
    token: TokenOpt = None,
) -> None:
    """List daily papers on the Hub."""
    api = get_hf_api(token=token)
    sort_key = sort.value if sort else None
    results = []
    for paper_info in api.list_daily_papers(
        date=date,
        week=week,
        month=month,
        submitter=submitter,
        sort=sort_key,
        limit=limit,
    ):
        item = api_object_to_dict(paper_info)
        submitted_by = item.get("submitted_by") or {}
        item["submitted_by_name"] = submitted_by.get("fullname") or submitted_by.get("username") or ""
        results.append(item)
    out.table(
        results,
        headers=["id", "title", "upvotes", "comments", "published_at", "submitted_by_name"],
        alignments={"upvotes": "right", "comments": "right"},
    )


@papers_cli.command(
    "search",
    examples=[
        'hf papers search "vision language"',
        'hf papers search "attention mechanism" --limit 10',
        'hf papers search "diffusion" --format json',
    ],
)
def papers_search(
    query: Annotated[str, typer.Argument(help="Search query string.")],
    limit: LimitOpt = 20,
    format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
    token: TokenOpt = None,
) -> None:
    """Search papers on the Hub."""
    api = get_hf_api(token=token)
    results = [api_object_to_dict(paper_info) for paper_info in api.list_papers(query=query, limit=limit)]
    out.table(results, headers=["id", "title", "summary", "upvotes", "published_at"], alignments={"upvotes": "right"})


@papers_cli.command(
    "info",
    examples=[
        "hf papers info 2601.15621",
    ],
)
def papers_info(
    paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")],
    format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
    token: TokenOpt = None,
) -> None:
    """Get info about a paper on the Hub."""
    api = get_hf_api(token=token)
    try:
        info = api.paper_info(id=paper_id)
    except HfHubHTTPError as e:
        if e.response.status_code == 404:
            raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e
        raise
    out.dict(info)


@papers_cli.command(
    "read",
    examples=[
        "hf papers read 2601.15621",
    ],
)
def papers_read(
    paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")],
    token: TokenOpt = None,
) -> None:
    """Read a paper as markdown."""
    api = get_hf_api(token=token)
    try:
        content = api.read_paper(id=paper_id)
    except HfHubHTTPError as e:
        if e.response.status_code == 404:
            raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e
        raise
    out.text(content)