akseljoonas HF Staff commited on
Commit
b95477e
Β·
1 Parent(s): d1d8c1f

fix: use correct HF API filter param and improve integration tests

Browse files
agent/tools/papers_tool.py CHANGED
@@ -144,6 +144,13 @@ def _find_section(sections: list[dict], query: str) -> dict | None:
144
  # ---------------------------------------------------------------------------
145
 
146
 
 
 
 
 
 
 
 
147
  def _truncate(text: str, max_len: int) -> str:
148
  if len(text) <= max_len:
149
  return text
@@ -272,7 +279,7 @@ def _format_datasets(datasets: list, arxiv_id: str, sort: str) -> str:
272
  ds_id = ds.get("id", "unknown")
273
  downloads = ds.get("downloads", 0)
274
  likes = ds.get("likes", 0)
275
- desc = _truncate(ds.get("description") or "", MAX_SUMMARY_LEN)
276
  tags = ds.get("tags") or []
277
  interesting = [t for t in tags if not t.startswith(("arxiv:", "region:"))][:5]
278
 
@@ -550,7 +557,7 @@ async def _op_find_datasets(args: dict[str, Any], limit: int) -> ToolResult:
550
  resp = await client.get(
551
  f"{HF_API}/datasets",
552
  params={
553
- "other": f"arxiv:{arxiv_id}",
554
  "limit": limit,
555
  "sort": sort_key,
556
  "direction": -1,
@@ -585,7 +592,7 @@ async def _op_find_models(args: dict[str, Any], limit: int) -> ToolResult:
585
  resp = await client.get(
586
  f"{HF_API}/models",
587
  params={
588
- "other": f"arxiv:{arxiv_id}",
589
  "limit": limit,
590
  "sort": sort_key,
591
  "direction": -1,
@@ -645,7 +652,7 @@ async def _op_find_all_resources(args: dict[str, Any], limit: int) -> ToolResult
645
  client.get(
646
  f"{HF_API}/datasets",
647
  params={
648
- "other": f"arxiv:{arxiv_id}",
649
  "limit": per_cat,
650
  "sort": "downloads",
651
  "direction": -1,
@@ -654,7 +661,7 @@ async def _op_find_all_resources(args: dict[str, Any], limit: int) -> ToolResult
654
  client.get(
655
  f"{HF_API}/models",
656
  params={
657
- "other": f"arxiv:{arxiv_id}",
658
  "limit": per_cat,
659
  "sort": "downloads",
660
  "direction": -1,
 
144
  # ---------------------------------------------------------------------------
145
 
146
 
147
+ def _clean_description(text: str) -> str:
148
+ """Strip HTML card artifacts and collapse whitespace from HF API descriptions."""
149
+ text = re.sub(r"[\t]+", " ", text)
150
+ text = re.sub(r"\n{2,}", "\n", text)
151
+ return text.strip()
152
+
153
+
154
  def _truncate(text: str, max_len: int) -> str:
155
  if len(text) <= max_len:
156
  return text
 
279
  ds_id = ds.get("id", "unknown")
280
  downloads = ds.get("downloads", 0)
281
  likes = ds.get("likes", 0)
282
+ desc = _truncate(_clean_description(ds.get("description") or ""), MAX_SUMMARY_LEN)
283
  tags = ds.get("tags") or []
284
  interesting = [t for t in tags if not t.startswith(("arxiv:", "region:"))][:5]
285
 
 
557
  resp = await client.get(
558
  f"{HF_API}/datasets",
559
  params={
560
+ "filter": f"arxiv:{arxiv_id}",
561
  "limit": limit,
562
  "sort": sort_key,
563
  "direction": -1,
 
592
  resp = await client.get(
593
  f"{HF_API}/models",
594
  params={
595
+ "filter": f"arxiv:{arxiv_id}",
596
  "limit": limit,
597
  "sort": sort_key,
598
  "direction": -1,
 
652
  client.get(
653
  f"{HF_API}/datasets",
654
  params={
655
+ "filter": f"arxiv:{arxiv_id}",
656
  "limit": per_cat,
657
  "sort": "downloads",
658
  "direction": -1,
 
661
  client.get(
662
  f"{HF_API}/models",
663
  params={
664
+ "filter": f"arxiv:{arxiv_id}",
665
  "limit": per_cat,
666
  "sort": "downloads",
667
  "direction": -1,
tests/integration/tools/test_papers_integration.py CHANGED
@@ -2,8 +2,11 @@
2
  """
3
  Integration tests for HF Papers Tool
4
  Tests with real HF and arXiv APIs β€” all endpoints are public, no auth required.
 
 
5
  """
6
  import asyncio
 
7
  import sys
8
 
9
  sys.path.insert(0, ".")
@@ -15,252 +18,553 @@ GREEN = "\033[92m"
15
  YELLOW = "\033[93m"
16
  RED = "\033[91m"
17
  BLUE = "\033[94m"
 
18
  RESET = "\033[0m"
19
 
 
 
 
20
 
21
  def print_test(msg):
 
22
  print(f"{BLUE}[TEST]{RESET} {msg}")
 
23
 
24
 
25
  def print_success(msg):
26
- print(f"{GREEN}βœ“{RESET} {msg}")
27
-
28
-
29
- def print_warning(msg):
30
- print(f"{YELLOW}⚠{RESET} {msg}")
31
 
32
 
33
  def print_error(msg):
34
- print(f"{RED}βœ—{RESET} {msg}")
35
 
36
 
37
- def print_snippet(output, length=600):
38
- """Print a snippet of raw test output."""
39
- out = output[:length].replace("\n", "\\n")
40
- if len(output) > length:
41
- out += "..."
42
- print(f"{YELLOW}[RAW OUTPUT SNIPPET]{RESET} {out}")
 
43
 
44
 
45
- passed = 0
46
- failed = 0
 
 
 
 
 
 
 
 
 
47
 
48
 
49
- async def run_tool(args: dict) -> tuple[str, bool]:
50
- """Call the handler and return (output, success)."""
51
  return await hf_papers_handler(args)
52
 
53
 
54
- async def check(name: str, args: dict, *, expect_success: bool = True, expect_in: list[str] | None = None) -> str:
55
- """Run a tool call, validate, and track pass/fail.
56
- Prints a snippet of raw output of each test."""
57
- global passed, failed
58
- print_test(name)
59
- output, success = await run_tool(args)
60
- print_snippet(output)
61
 
62
- if success != expect_success:
63
- print_error(f"Expected success={expect_success}, got {success}")
64
- print(f" Output: {output[:300]}")
65
- failed += 1
66
- return output
67
 
68
- if expect_in:
69
- missing = [s for s in expect_in if s.lower() not in output.lower()]
70
- if missing:
71
- print_error(f"Missing expected strings: {missing}")
72
- print(f" Output: {output[:300]}")
73
- failed += 1
74
- return output
75
 
76
- print_success(f"OK ({len(output)} chars)")
77
- passed += 1
78
- return output
 
79
 
 
 
 
80
 
81
- # ---------------------------------------------------------------------------
82
- # Test suites
83
- # ---------------------------------------------------------------------------
 
 
 
84
 
 
 
 
85
 
86
- async def test_paper_discovery():
87
- print(f"\n{YELLOW}{'=' * 70}{RESET}")
88
- print(f"{YELLOW}Test Suite 1: Paper Discovery{RESET}")
89
- print(f"{YELLOW}{'=' * 70}{RESET}\n")
90
 
91
- # Trending papers
92
- output = await check(
93
- "trending (limit=3)",
94
- {"operation": "trending", "limit": 3},
95
- expect_in=["Trending Papers"],
96
- )
97
 
98
- # Trending with keyword filter
99
- await check(
100
- "trending with query='language'",
101
- {"operation": "trending", "query": "language", "limit": 5},
102
- )
103
 
104
- # Search
105
- await check(
106
- "search 'direct preference optimization'",
107
- {"operation": "search", "query": "direct preference optimization", "limit": 3},
108
- expect_in=["preference"],
109
- )
110
 
111
- # Paper details (DPO paper)
112
- await check(
113
- "paper_details for 2305.18290 (DPO paper)",
114
- {"operation": "paper_details", "arxiv_id": "2305.18290"},
115
- expect_in=["2305.18290", "Direct Preference"],
116
- )
 
 
 
117
 
 
118
 
119
- async def test_read_paper():
120
- print(f"\n{YELLOW}{'=' * 70}{RESET}")
121
- print(f"{YELLOW}Test Suite 2: Read Paper{RESET}")
122
- print(f"{YELLOW}{'=' * 70}{RESET}\n")
123
 
124
- # Read paper TOC (no section specified)
125
- output = await check(
126
- "read_paper TOC for 2305.18290",
127
- {"operation": "read_paper", "arxiv_id": "2305.18290"},
128
- expect_in=["Sections", "Abstract"],
129
  )
 
 
 
 
 
130
 
131
- # Read specific section by number
132
- await check(
133
- "read_paper section='4' (DPO paper)",
134
- {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "4"},
 
 
 
135
  )
136
 
137
- # Read specific section by name
138
- await check(
139
- "read_paper section='Experiments'",
140
- {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "Experiments"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  )
142
 
143
- # Fallback for a paper that might not have HTML
144
- # Using a very old paper ID β€” may or may not have HTML
145
- await check(
146
- "read_paper fallback (old paper 1706.03762 β€” Attention Is All You Need)",
147
- {"operation": "read_paper", "arxiv_id": "1706.03762"},
148
- expect_in=["Attention"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  )
 
 
 
 
 
 
 
 
150
 
 
 
151
 
152
- async def test_linked_resources():
153
- print(f"\n{YELLOW}{'=' * 70}{RESET}")
154
- print(f"{YELLOW}Test Suite 3: Linked Resources{RESET}")
155
- print(f"{YELLOW}{'=' * 70}{RESET}\n")
156
 
157
- # Find datasets linked to DPO paper
158
- await check(
159
- "find_datasets for 2305.18290",
160
- {"operation": "find_datasets", "arxiv_id": "2305.18290", "limit": 5},
 
 
 
161
  )
 
 
 
 
162
 
163
- # Find models linked to DPO paper
164
- await check(
165
- "find_models for 2305.18290",
166
- {"operation": "find_models", "arxiv_id": "2305.18290", "limit": 5},
 
167
  )
168
 
169
- # Find collections
170
- await check(
171
- "find_collections for 2305.18290",
172
- {"operation": "find_collections", "arxiv_id": "2305.18290"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  )
 
 
 
 
 
 
 
174
 
175
- # Find all resources (parallel fan-out)
176
- await check(
177
- "find_all_resources for 2305.18290",
178
- {"operation": "find_all_resources", "arxiv_id": "2305.18290"},
179
- expect_in=["Datasets", "Models", "Collections"],
 
 
 
 
180
  )
 
 
 
 
 
 
 
 
 
 
 
 
181
 
 
 
 
182
 
183
- async def test_edge_cases():
184
- print(f"\n{YELLOW}{'=' * 70}{RESET}")
185
- print(f"{YELLOW}Test Suite 4: Edge Cases{RESET}")
186
- print(f"{YELLOW}{'=' * 70}{RESET}\n")
187
 
188
- # Search with no results
189
- await check(
190
- "search gibberish query",
191
- {"operation": "search", "query": "xyzzyplugh_nonexistent_9999"},
192
- expect_in=["No papers found"],
 
 
193
  )
 
 
 
 
 
 
 
194
 
195
- # Missing required param
196
- await check(
197
- "search without query β†’ error",
198
- {"operation": "search"},
199
- expect_success=False,
200
- expect_in=["required"],
201
  )
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- # Missing arxiv_id
204
- await check(
205
- "find_datasets without arxiv_id β†’ error",
206
- {"operation": "find_datasets"},
207
- expect_success=False,
208
- expect_in=["required"],
 
 
 
 
 
 
209
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
- # Invalid arxiv_id
212
- await check(
213
- "paper_details with nonexistent ID",
214
- {"operation": "paper_details", "arxiv_id": "0000.00000"},
215
- expect_success=False,
216
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
- # Invalid operation
219
- await check(
220
- "invalid operation β†’ error",
221
- {"operation": "nonexistent_op"},
222
- expect_success=False,
223
- expect_in=["Unknown operation"],
 
 
 
 
 
 
224
  )
 
 
 
 
 
 
 
 
225
 
226
- # read_paper with nonexistent section
227
- await check(
228
- "read_paper with bad section name",
229
- {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "Nonexistent Section XYZ"},
230
- expect_success=False,
231
- expect_in=["not found"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
 
235
  async def main():
236
  print("=" * 70)
237
  print(f"{BLUE}HF Papers Tool β€” Integration Tests{RESET}")
 
238
  print("=" * 70)
239
- print(f"{BLUE}All APIs are public, no authentication required.{RESET}\n")
240
-
241
- try:
242
- await test_paper_discovery()
243
- await test_read_paper()
244
- await test_linked_resources()
245
- await test_edge_cases()
246
- except Exception as e:
247
- print_error(f"Test suite crashed: {e}")
248
- import traceback
249
- traceback.print_exc()
250
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
  # Summary
253
  print(f"\n{'=' * 70}")
254
- total = passed + failed
255
- if failed == 0:
256
- print(f"{GREEN}βœ“ All {total} tests passed!{RESET}")
257
- else:
258
- print(f"{RED}βœ— {failed}/{total} tests failed{RESET}")
259
- print(f"{GREEN}βœ“ {passed}/{total} tests passed{RESET}")
260
-
 
 
 
 
261
  print(f"{'=' * 70}\n")
262
 
263
- if failed > 0:
264
  sys.exit(1)
265
 
266
 
 
2
  """
3
  Integration tests for HF Papers Tool
4
  Tests with real HF and arXiv APIs β€” all endpoints are public, no auth required.
5
+
6
+ Run: python tests/integration/tools/test_papers_integration.py
7
  """
8
  import asyncio
9
+ import re
10
  import sys
11
 
12
  sys.path.insert(0, ".")
 
18
  YELLOW = "\033[93m"
19
  RED = "\033[91m"
20
  BLUE = "\033[94m"
21
+ DIM = "\033[2m"
22
  RESET = "\033[0m"
23
 
24
+ assertions_passed = 0
25
+ assertions_failed = 0
26
+
27
 
28
  def print_test(msg):
29
+ print(f"\n{BLUE}{'─' * 70}{RESET}")
30
  print(f"{BLUE}[TEST]{RESET} {msg}")
31
+ print(f"{BLUE}{'─' * 70}{RESET}")
32
 
33
 
34
  def print_success(msg):
35
+ print(f"{GREEN} βœ“ {msg}{RESET}")
 
 
 
 
36
 
37
 
38
  def print_error(msg):
39
+ print(f"{RED} βœ— {msg}{RESET}")
40
 
41
 
42
+ def print_output(output: str, max_lines: int = 40):
43
+ """Print the full tool output, indented, with line limit."""
44
+ lines = output.split("\n")
45
+ for line in lines[:max_lines]:
46
+ print(f"{DIM} β”‚ {RESET}{line}")
47
+ if len(lines) > max_lines:
48
+ print(f"{DIM} β”‚ ... ({len(lines) - max_lines} more lines){RESET}")
49
 
50
 
51
+ def assert_true(condition: bool, msg: str) -> bool:
52
+ """Assert and print result. Returns True if passed."""
53
+ global assertions_passed, assertions_failed
54
+ if condition:
55
+ print_success(msg)
56
+ assertions_passed += 1
57
+ return True
58
+ else:
59
+ print_error(msg)
60
+ assertions_failed += 1
61
+ return False
62
 
63
 
64
+ async def run(args: dict) -> tuple[str, bool]:
 
65
  return await hf_papers_handler(args)
66
 
67
 
68
+ # ---------------------------------------------------------------------------
69
+ # Test Suite 1: Paper Discovery
70
+ # ---------------------------------------------------------------------------
 
 
 
 
71
 
 
 
 
 
 
72
 
73
+ async def test_trending():
74
+ print_test("trending (limit=3)")
75
+ output, success = await run({"operation": "trending", "limit": 3})
76
+ print_output(output)
 
 
 
77
 
78
+ ok = True
79
+ ok &= assert_true(success, "success=True")
80
+ ok &= assert_true("# Trending Papers" in output, "has '# Trending Papers' heading")
81
+ ok &= assert_true("Showing 3 paper(s)" in output, "shows exactly 3 papers")
82
 
83
+ # Check that each paper has an arxiv_id line
84
+ arxiv_ids = re.findall(r"\*\*arxiv_id:\*\* (\S+)", output)
85
+ ok &= assert_true(len(arxiv_ids) == 3, f"found 3 arxiv IDs: {arxiv_ids}")
86
 
87
+ # Check that IDs look valid (digits and dots)
88
+ for aid in arxiv_ids:
89
+ ok &= assert_true(
90
+ re.match(r"\d{4}\.\d{4,5}", aid) is not None,
91
+ f"arxiv_id '{aid}' looks valid (NNNN.NNNNN format)",
92
+ )
93
 
94
+ # Check each paper has an HF URL
95
+ hf_urls = re.findall(r"https://huggingface\.co/papers/\S+", output)
96
+ ok &= assert_true(len(hf_urls) == 3, f"found 3 HF paper URLs")
97
 
98
+ return ok
 
 
 
99
 
 
 
 
 
 
 
100
 
101
+ async def test_trending_with_query():
102
+ print_test("trending with query='language' (limit=5)")
103
+ output, success = await run({"operation": "trending", "query": "language", "limit": 5})
104
+ print_output(output)
 
105
 
106
+ ok = True
107
+ ok &= assert_true(success, "success=True")
108
+ ok &= assert_true("Filtered by: 'language'" in output, "shows filter applied")
 
 
 
109
 
110
+ # The filter may return 0-5 results depending on today's papers
111
+ match = re.search(r"Showing (\d+) paper\(s\)", output)
112
+ ok &= assert_true(match is not None, "has 'Showing N paper(s)' line")
113
+ if match:
114
+ count = int(match.group(1))
115
+ ok &= assert_true(count <= 5, f"returned {count} papers (within limit)")
116
+ # If we got results, verify they mention language somewhere
117
+ if count > 0:
118
+ print_success(f"got {count} filtered results")
119
 
120
+ return ok
121
 
 
 
 
 
122
 
123
+ async def test_search():
124
+ print_test("search 'direct preference optimization' (limit=3)")
125
+ output, success = await run(
126
+ {"operation": "search", "query": "direct preference optimization", "limit": 3}
 
127
  )
128
+ print_output(output)
129
+
130
+ ok = True
131
+ ok &= assert_true(success, "success=True")
132
+ ok &= assert_true("Papers matching" in output, "has matching header")
133
 
134
+ arxiv_ids = re.findall(r"\*\*arxiv_id:\*\* (\S+)", output)
135
+ ok &= assert_true(len(arxiv_ids) == 3, f"found 3 results: {arxiv_ids}")
136
+
137
+ # At least one result should mention "preference" in title or summary
138
+ ok &= assert_true(
139
+ "preference" in output.lower(),
140
+ "results mention 'preference' (relevant to query)",
141
  )
142
 
143
+ return ok
144
+
145
+
146
+ async def test_paper_details():
147
+ print_test("paper_details for 2305.18290 (DPO paper)")
148
+ output, success = await run({"operation": "paper_details", "arxiv_id": "2305.18290"})
149
+ print_output(output)
150
+
151
+ ok = True
152
+ ok &= assert_true(success, "success=True")
153
+ ok &= assert_true("Direct Preference Optimization" in output, "title contains 'Direct Preference Optimization'")
154
+ ok &= assert_true("2305.18290" in output, "contains arxiv_id")
155
+ ok &= assert_true("https://arxiv.org/abs/2305.18290" in output, "has arxiv URL")
156
+ ok &= assert_true("https://huggingface.co/papers/2305.18290" in output, "has HF URL")
157
+ ok &= assert_true("**Authors:**" in output, "has authors section")
158
+ ok &= assert_true("**upvotes:**" in output, "has upvotes")
159
+
160
+ # Check for abstract or AI summary
161
+ ok &= assert_true(
162
+ "## Abstract" in output or "## AI Summary" in output,
163
+ "has Abstract or AI Summary section",
164
  )
165
 
166
+ # Check for next steps hint
167
+ ok &= assert_true("read_paper" in output, "mentions read_paper as next step")
168
+ ok &= assert_true("find_all_resources" in output, "mentions find_all_resources as next step")
169
+
170
+ return ok
171
+
172
+
173
+ # ---------------------------------------------------------------------------
174
+ # Test Suite 2: Read Paper
175
+ # ---------------------------------------------------------------------------
176
+
177
+
178
+ async def test_read_paper_toc():
179
+ print_test("read_paper TOC for 2305.18290 (no section β†’ should return abstract + sections)")
180
+ output, success = await run({"operation": "read_paper", "arxiv_id": "2305.18290"})
181
+ print_output(output)
182
+
183
+ ok = True
184
+ ok &= assert_true(success, "success=True")
185
+ ok &= assert_true("## Abstract" in output, "has Abstract section")
186
+ ok &= assert_true("## Sections" in output, "has Sections heading (TOC)")
187
+
188
+ # Check that sections are listed with bold titles
189
+ section_titles = re.findall(r"- \*\*(.+?)\*\*:", output)
190
+ ok &= assert_true(len(section_titles) >= 5, f"found {len(section_titles)} sections (expect >=5 for a full paper)")
191
+ if section_titles:
192
+ print_success(f"sections found: {section_titles[:5]}{'...' if len(section_titles) > 5 else ''}")
193
+
194
+ # Check that expected DPO paper sections are present
195
+ section_text = " ".join(section_titles).lower()
196
+ ok &= assert_true("introduction" in section_text, "'Introduction' section present")
197
+ ok &= assert_true("experiment" in section_text, "'Experiment' section present")
198
+
199
+ # Check for the tip about reading specific sections
200
+ ok &= assert_true("section=" in output, "has tip about using section parameter")
201
+
202
+ # Check the abstract has actual content (not empty)
203
+ abstract_match = re.search(r"## Abstract\n(.+?)(?:\n##|\n\*\*Tip)", output, re.DOTALL)
204
+ if abstract_match:
205
+ abstract_text = abstract_match.group(1).strip()
206
+ ok &= assert_true(len(abstract_text) > 100, f"abstract has real content ({len(abstract_text)} chars)")
207
+ else:
208
+ ok &= assert_true(False, "could extract abstract text")
209
+
210
+ return ok
211
+
212
+
213
+ async def test_read_paper_section_by_number():
214
+ print_test("read_paper section='4' for 2305.18290")
215
+ output, success = await run(
216
+ {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "4"}
217
  )
218
+ print_output(output, max_lines=30)
219
+
220
+ ok = True
221
+ ok &= assert_true(success, "success=True")
222
+ ok &= assert_true("https://arxiv.org/abs/2305.18290" in output, "has arxiv URL")
223
+
224
+ # Should have a section heading at top
225
+ ok &= assert_true(output.startswith("# "), "starts with heading")
226
 
227
+ # Should have substantial content
228
+ ok &= assert_true(len(output) > 500, f"section has substantial content ({len(output)} chars)")
229
 
230
+ # Should NOT have TOC structure (this is a single section, not the TOC)
231
+ ok &= assert_true("## Sections" not in output, "is a single section (not TOC)")
 
 
232
 
233
+ return ok
234
+
235
+
236
+ async def test_read_paper_section_by_name():
237
+ print_test("read_paper section='Experiments' for 2305.18290")
238
+ output, success = await run(
239
+ {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "Experiments"}
240
  )
241
+ print_output(output, max_lines=30)
242
+
243
+ ok = True
244
+ ok &= assert_true(success, "success=True")
245
 
246
+ # Title should contain "Experiments"
247
+ first_line = output.split("\n")[0]
248
+ ok &= assert_true(
249
+ "experiment" in first_line.lower(),
250
+ f"heading contains 'Experiments': '{first_line}'",
251
  )
252
 
253
+ ok &= assert_true(len(output) > 500, f"section has substantial content ({len(output)} chars)")
254
+
255
+ return ok
256
+
257
+
258
+ async def test_read_paper_old_paper():
259
+ print_test("read_paper for 1706.03762 (Attention Is All You Need β€” 2017 paper)")
260
+ output, success = await run({"operation": "read_paper", "arxiv_id": "1706.03762"})
261
+ print_output(output, max_lines=30)
262
+
263
+ ok = True
264
+ ok &= assert_true(success, "success=True")
265
+ ok &= assert_true("attention" in output.lower(), "mentions 'attention' (relevant content)")
266
+
267
+ # Either we get sections (HTML available) or abstract fallback
268
+ has_sections = "## Sections" in output
269
+ has_abstract_fallback = "HTML version not available" in output
270
+ ok &= assert_true(
271
+ has_sections or has_abstract_fallback or "## Abstract" in output,
272
+ "got either full sections, or abstract fallback",
273
  )
274
+ if has_sections:
275
+ print_success("HTML version available β€” got full sections")
276
+ elif has_abstract_fallback:
277
+ print_success("HTML not available β€” graceful fallback to abstract")
278
+
279
+ return ok
280
+
281
 
282
+ # ---------------------------------------------------------------------------
283
+ # Test Suite 3: Linked Resources
284
+ # ---------------------------------------------------------------------------
285
+
286
+
287
+ async def test_find_datasets():
288
+ print_test("find_datasets for 2305.18290 (limit=5, sort=downloads)")
289
+ output, success = await run(
290
+ {"operation": "find_datasets", "arxiv_id": "2305.18290", "limit": 5}
291
  )
292
+ print_output(output)
293
+
294
+ ok = True
295
+ ok &= assert_true(success, "success=True")
296
+ ok &= assert_true("Datasets linked to paper 2305.18290" in output, "has correct heading")
297
+ ok &= assert_true("sorted by downloads" in output, "sorted by downloads (default)")
298
+
299
+ # Check we got dataset entries with IDs
300
+ dataset_ids = re.findall(r"\[([^\]]+)\]\(https://huggingface\.co/datasets/", output)
301
+ ok &= assert_true(len(dataset_ids) > 0, f"found {len(dataset_ids)} dataset links")
302
+ if dataset_ids:
303
+ print_success(f"dataset IDs: {dataset_ids}")
304
 
305
+ # Check download counts are present
306
+ downloads = re.findall(r"Downloads: ([\d,]+)", output)
307
+ ok &= assert_true(len(downloads) > 0, f"found download counts: {downloads}")
308
 
309
+ # Check for inspect hint
310
+ ok &= assert_true("hf_inspect_dataset" in output, "has inspect dataset hint")
 
 
311
 
312
+ return ok
313
+
314
+
315
+ async def test_find_datasets_sort_likes():
316
+ print_test("find_datasets for 2305.18290 (sort=likes, limit=3)")
317
+ output, success = await run(
318
+ {"operation": "find_datasets", "arxiv_id": "2305.18290", "limit": 3, "sort": "likes"}
319
  )
320
+ print_output(output)
321
+
322
+ ok = True
323
+ ok &= assert_true(success, "success=True")
324
+ ok &= assert_true("sorted by likes" in output, "sorted by likes")
325
+
326
+ return ok
327
 
328
+
329
+ async def test_find_models():
330
+ print_test("find_models for 2305.18290 (limit=5)")
331
+ output, success = await run(
332
+ {"operation": "find_models", "arxiv_id": "2305.18290", "limit": 5}
 
333
  )
334
+ print_output(output)
335
+
336
+ ok = True
337
+ ok &= assert_true(success, "success=True")
338
+ ok &= assert_true("Models linked to paper 2305.18290" in output, "has correct heading")
339
+
340
+ # Check model links
341
+ model_ids = re.findall(r"\[([^\]]+)\]\(https://huggingface\.co/", output)
342
+ ok &= assert_true(len(model_ids) > 0, f"found {len(model_ids)} model links")
343
+ if model_ids:
344
+ print_success(f"model IDs: {model_ids}")
345
 
346
+ # Check for pipeline_tag / library info
347
+ has_task = "Task:" in output
348
+ has_library = "Library:" in output
349
+ ok &= assert_true(has_task or has_library, "has Task or Library metadata")
350
+
351
+ return ok
352
+
353
+
354
+ async def test_find_collections():
355
+ print_test("find_collections for 2305.18290")
356
+ output, success = await run(
357
+ {"operation": "find_collections", "arxiv_id": "2305.18290"}
358
  )
359
+ print_output(output)
360
+
361
+ ok = True
362
+ ok &= assert_true(success, "success=True")
363
+ ok &= assert_true("Collections containing paper" in output, "has correct heading")
364
+
365
+ # Check collection entries
366
+ collection_urls = re.findall(r"https://huggingface\.co/collections/\S+", output)
367
+ ok &= assert_true(len(collection_urls) > 0, f"found {len(collection_urls)} collection URLs")
368
+
369
+ # Check for metadata
370
+ ok &= assert_true("Upvotes:" in output, "has upvote counts")
371
+ ok &= assert_true("Items:" in output, "has item counts")
372
+
373
+ return ok
374
 
375
+
376
+ async def test_find_all_resources():
377
+ print_test("find_all_resources for 2305.18290 (parallel fan-out)")
378
+ output, success = await run(
379
+ {"operation": "find_all_resources", "arxiv_id": "2305.18290"}
380
  )
381
+ print_output(output)
382
+
383
+ ok = True
384
+ ok &= assert_true(success, "success=True")
385
+ ok &= assert_true("# Resources linked to paper 2305.18290" in output, "has unified heading")
386
+ ok &= assert_true("https://huggingface.co/papers/2305.18290" in output, "has paper URL")
387
+
388
+ # All three sections should be present
389
+ ok &= assert_true("## Datasets" in output, "has Datasets section")
390
+ ok &= assert_true("## Models" in output, "has Models section")
391
+ ok &= assert_true("## Collections" in output, "has Collections section")
392
+
393
+ # Check that sections have actual entries (not just "None found")
394
+ ok &= assert_true("downloads)" in output, "datasets/models have download counts")
395
 
396
+ return ok
397
+
398
+
399
+ # ---------------------------------------------------------------------------
400
+ # Test Suite 4: Edge Cases
401
+ # ---------------------------------------------------------------------------
402
+
403
+
404
+ async def test_search_no_results():
405
+ print_test("search with gibberish query β†’ should return empty gracefully")
406
+ output, success = await run(
407
+ {"operation": "search", "query": "xyzzyplugh_nonexistent_topic_9999"}
408
  )
409
+ print_output(output)
410
+
411
+ ok = True
412
+ ok &= assert_true(success, "success=True (empty results is not an error)")
413
+ ok &= assert_true("No papers found" in output, "says 'No papers found'")
414
+
415
+ return ok
416
+
417
 
418
+ async def test_missing_query():
419
+ print_test("search without query β†’ should error")
420
+ output, success = await run({"operation": "search"})
421
+ print_output(output)
422
+
423
+ ok = True
424
+ ok &= assert_true(not success, "success=False (missing required param)")
425
+ ok &= assert_true("required" in output.lower(), "error mentions 'required'")
426
+
427
+ return ok
428
+
429
+
430
+ async def test_missing_arxiv_id():
431
+ print_test("find_datasets without arxiv_id β†’ should error")
432
+ output, success = await run({"operation": "find_datasets"})
433
+ print_output(output)
434
+
435
+ ok = True
436
+ ok &= assert_true(not success, "success=False")
437
+ ok &= assert_true("required" in output.lower(), "error mentions 'required'")
438
+
439
+ return ok
440
+
441
+
442
+ async def test_invalid_arxiv_id():
443
+ print_test("paper_details with nonexistent arxiv ID")
444
+ output, success = await run({"operation": "paper_details", "arxiv_id": "0000.00000"})
445
+ print_output(output)
446
+
447
+ ok = True
448
+ ok &= assert_true(not success, "success=False (API returns error)")
449
+
450
+ return ok
451
+
452
+
453
+ async def test_invalid_operation():
454
+ print_test("invalid operation name β†’ should error")
455
+ output, success = await run({"operation": "nonexistent_op"})
456
+ print_output(output)
457
+
458
+ ok = True
459
+ ok &= assert_true(not success, "success=False")
460
+ ok &= assert_true("Unknown operation" in output, "says 'Unknown operation'")
461
+ ok &= assert_true("trending" in output, "lists valid operations")
462
+
463
+ return ok
464
+
465
+
466
+ async def test_read_paper_bad_section():
467
+ print_test("read_paper with nonexistent section β†’ should error with available sections")
468
+ output, success = await run(
469
+ {"operation": "read_paper", "arxiv_id": "2305.18290", "section": "Nonexistent Section XYZ"}
470
  )
471
+ print_output(output)
472
+
473
+ ok = True
474
+ ok &= assert_true(not success, "success=False")
475
+ ok &= assert_true("not found" in output.lower(), "says section 'not found'")
476
+ ok &= assert_true("Introduction" in output, "lists available sections (includes Introduction)")
477
+
478
+ return ok
479
+
480
+
481
+ # ---------------------------------------------------------------------------
482
+ # Main
483
+ # ---------------------------------------------------------------------------
484
 
485
 
486
  async def main():
487
  print("=" * 70)
488
  print(f"{BLUE}HF Papers Tool β€” Integration Tests{RESET}")
489
+ print(f"{BLUE}All APIs are public, no authentication required.{RESET}")
490
  print("=" * 70)
491
+
492
+ all_tests = [
493
+ # Suite 1: Paper Discovery
494
+ ("Paper Discovery", [
495
+ test_trending,
496
+ test_trending_with_query,
497
+ test_search,
498
+ test_paper_details,
499
+ ]),
500
+ # Suite 2: Read Paper
501
+ ("Read Paper", [
502
+ test_read_paper_toc,
503
+ test_read_paper_section_by_number,
504
+ test_read_paper_section_by_name,
505
+ test_read_paper_old_paper,
506
+ ]),
507
+ # Suite 3: Linked Resources
508
+ ("Linked Resources", [
509
+ test_find_datasets,
510
+ test_find_datasets_sort_likes,
511
+ test_find_models,
512
+ test_find_collections,
513
+ test_find_all_resources,
514
+ ]),
515
+ # Suite 4: Edge Cases
516
+ ("Edge Cases", [
517
+ test_search_no_results,
518
+ test_missing_query,
519
+ test_missing_arxiv_id,
520
+ test_invalid_arxiv_id,
521
+ test_invalid_operation,
522
+ test_read_paper_bad_section,
523
+ ]),
524
+ ]
525
+
526
+ global assertions_passed, assertions_failed
527
+ suite_results = []
528
+
529
+ for suite_name, tests in all_tests:
530
+ print(f"\n{YELLOW}{'=' * 70}{RESET}")
531
+ print(f"{YELLOW}Test Suite: {suite_name} ({len(tests)} tests){RESET}")
532
+ print(f"{YELLOW}{'=' * 70}{RESET}")
533
+
534
+ suite_pass = 0
535
+ suite_fail = 0
536
+
537
+ for test_fn in tests:
538
+ try:
539
+ test_ok = await test_fn()
540
+ if test_ok:
541
+ suite_pass += 1
542
+ else:
543
+ suite_fail += 1
544
+ except Exception as e:
545
+ print_error(f"CRASHED: {e}")
546
+ import traceback
547
+ traceback.print_exc()
548
+ suite_fail += 1
549
+
550
+ suite_results.append((suite_name, suite_pass, suite_fail))
551
 
552
  # Summary
553
  print(f"\n{'=' * 70}")
554
+ print(f"{BLUE}Summary{RESET}")
555
+ print(f"{'=' * 70}")
556
+ for suite_name, sp, sf in suite_results:
557
+ icon = f"{GREEN}βœ“{RESET}" if sf == 0 else f"{RED}βœ—{RESET}"
558
+ print(f" {icon} {suite_name}: {sp}/{sp + sf} tests passed")
559
+
560
+ print(f"{'─' * 70}")
561
+ total_tests = sum(sp + sf for _, sp, sf in suite_results)
562
+ total_failed = sum(sf for _, _, sf in suite_results)
563
+ print(f" Assertions: {assertions_passed} passed, {assertions_failed} failed")
564
+ print(f" Tests: {total_tests - total_failed}/{total_tests} passed")
565
  print(f"{'=' * 70}\n")
566
 
567
+ if total_failed > 0 or assertions_failed > 0:
568
  sys.exit(1)
569
 
570