File size: 5,969 Bytes
5e9fb2f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains commands to interact with papers on the Hugging Face Hub.

Usage:
    # list daily papers (most recently submitted)
    hf papers ls

    # list trending papers
    hf papers ls --sort=trending

    # list papers from a specific date, ordered by upvotes
    hf papers ls --date=2025-01-23

    # list today's papers, ordered by upvotes
    hf papers ls --date=today

    # list papers from a specific week
    hf papers ls --week=2025-W09

    # list papers by a specific submitter
    hf papers ls --submitter=someuser

    # search papers
    hf papers search "vision language"

    # get info about a paper
    hf papers info 2502.08025

    # read a paper as markdown
    hf papers read 2502.08025
"""

import datetime
import enum
from typing import Annotated, get_args

import typer

from huggingface_hub.errors import CLIError, HfHubHTTPError
from huggingface_hub.hf_api import DailyPapersSort_T

from ._cli_utils import (
    FormatWithAutoOpt,
    LimitOpt,
    TokenOpt,
    api_object_to_dict,
    get_hf_api,
    typer_factory,
)
from ._output import OutputFormatWithAuto, out


_SORT_OPTIONS = get_args(DailyPapersSort_T)
PaperSortEnum = enum.Enum("PaperSortEnum", {s: s for s in _SORT_OPTIONS}, type=str)  # type: ignore[misc]


def _parse_date(value: str | None) -> str | None:
    """Parse date option, converting 'today' to current date."""
    if value is None:
        return None
    if value.lower() == "today":
        return datetime.date.today().isoformat()
    return value


papers_cli = typer_factory(help="Interact with papers on the Hub.")


@papers_cli.command(
    "list | ls",
    examples=[
        "hf papers ls",
        "hf papers ls --sort trending",
        "hf papers ls --date 2025-01-23",
        "hf papers ls --week 2025-W09",
        "hf papers ls --submitter akhaliq",
        "hf papers ls --format json",
    ],
)
def papers_ls(
    date: Annotated[
        str | None,
        typer.Option(
            help="Date in ISO format (YYYY-MM-DD) or 'today'.",
            callback=_parse_date,
        ),
    ] = None,
    week: Annotated[
        str | None,
        typer.Option(help="ISO week to filter by, e.g. '2025-W09'."),
    ] = None,
    month: Annotated[
        str | None,
        typer.Option(help="Month to filter by in ISO format (YYYY-MM), e.g. '2025-02'."),
    ] = None,
    submitter: Annotated[
        str | None,
        typer.Option(help="Filter by username of the submitter."),
    ] = None,
    sort: Annotated[
        PaperSortEnum | None,
        typer.Option(help="Sort results."),
    ] = None,
    limit: LimitOpt = 50,
    format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
    token: TokenOpt = None,
) -> None:
    """List daily papers on the Hub."""
    api = get_hf_api(token=token)
    sort_key = sort.value if sort else None
    results = []
    for paper_info in api.list_daily_papers(
        date=date,
        week=week,
        month=month,
        submitter=submitter,
        sort=sort_key,
        limit=limit,
    ):
        item = api_object_to_dict(paper_info)
        submitted_by = item.get("submitted_by") or {}
        item["submitted_by_name"] = submitted_by.get("fullname") or submitted_by.get("username") or ""
        results.append(item)
    out.table(
        results,
        headers=["id", "title", "upvotes", "comments", "published_at", "submitted_by_name"],
        alignments={"upvotes": "right", "comments": "right"},
    )


@papers_cli.command(
    "search",
    examples=[
        'hf papers search "vision language"',
        'hf papers search "attention mechanism" --limit 10',
        'hf papers search "diffusion" --format json',
    ],
)
def papers_search(
    query: Annotated[str, typer.Argument(help="Search query string.")],
    limit: LimitOpt = 20,
    format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
    token: TokenOpt = None,
) -> None:
    """Search papers on the Hub."""
    api = get_hf_api(token=token)
    results = [api_object_to_dict(paper_info) for paper_info in api.list_papers(query=query, limit=limit)]
    out.table(results, headers=["id", "title", "summary", "upvotes", "published_at"], alignments={"upvotes": "right"})


@papers_cli.command(
    "info",
    examples=[
        "hf papers info 2601.15621",
    ],
)
def papers_info(
    paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")],
    format: FormatWithAutoOpt = OutputFormatWithAuto.auto,
    token: TokenOpt = None,
) -> None:
    """Get info about a paper on the Hub."""
    api = get_hf_api(token=token)
    try:
        info = api.paper_info(id=paper_id)
    except HfHubHTTPError as e:
        if e.response.status_code == 404:
            raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e
        raise
    out.dict(info)


@papers_cli.command(
    "read",
    examples=[
        "hf papers read 2601.15621",
    ],
)
def papers_read(
    paper_id: Annotated[str, typer.Argument(help="The arXiv paper ID (e.g. '2502.08025').")],
    token: TokenOpt = None,
) -> None:
    """Read a paper as markdown."""
    api = get_hf_api(token=token)
    try:
        content = api.read_paper(id=paper_id)
    except HfHubHTTPError as e:
        if e.response.status_code == 404:
            raise CLIError(f"Paper '{paper_id}' not found on the Hub.") from e
        raise
    out.text(content)