akseljoonas HF Staff commited on
Commit
caa9017
·
1 Parent(s): 1d719d4

unified ref tag and imporved read_file

Browse files
agent/tools/github_find_examples.py CHANGED
@@ -106,7 +106,7 @@ def _get_repo_tree(org: str, repo: str, token: str) -> tuple[List[Dict[str, Any]
106
  files = [
107
  {
108
  "path": item["path"],
109
- "sha": item["sha"],
110
  "size": item.get("size", 0),
111
  "url": f"https://github.com/{full_repo}/blob/{default_branch}/{item['path']}",
112
  }
@@ -338,8 +338,12 @@ def find_examples(
338
 
339
  for i, file in enumerate(results, 1):
340
  lines.append(f"{i}. **{file['path']}** (score: {file['score']})")
341
- lines.append(f" Size: {file['size']:,} bytes | SHA: {file['sha'][:7]}")
342
  lines.append(f" URL: {file['url']}")
 
 
 
 
343
  lines.append("")
344
 
345
  return {
 
106
  files = [
107
  {
108
  "path": item["path"],
109
+ "ref": item["sha"],
110
  "size": item.get("size", 0),
111
  "url": f"https://github.com/{full_repo}/blob/{default_branch}/{item['path']}",
112
  }
 
338
 
339
  for i, file in enumerate(results, 1):
340
  lines.append(f"{i}. **{file['path']}** (score: {file['score']})")
341
+ lines.append(f" Size: {file['size']:,} bytes | Ref: {file['ref'][:7]}")
342
  lines.append(f" URL: {file['url']}")
343
+
344
+ # Copyable parameters for read_file tool
345
+ read_params = f"{{'repo': '{org}/{repo}', 'path': '{file['path']}'}}"
346
+ lines.append(f" To read, use: {read_params}")
347
  lines.append("")
348
 
349
  return {
agent/tools/github_list_repos.py CHANGED
@@ -186,6 +186,9 @@ def list_repos(
186
  lines.append(f" URL: {repo['html_url']}")
187
  if repo["topics"]:
188
  lines.append(f" Topics: {', '.join(repo['topics'][:5])}")
 
 
 
189
  lines.append("")
190
 
191
  return {
 
186
  lines.append(f" URL: {repo['html_url']}")
187
  if repo["topics"]:
188
  lines.append(f" Topics: {', '.join(repo['topics'][:5])}")
189
+
190
+ # Copyable parameters for other tools
191
+ lines.append(f" Use in tools: {{'repo': '{repo['full_name']}'}}")
192
  lines.append("")
193
 
194
  return {
agent/tools/github_read_file.py CHANGED
@@ -5,14 +5,65 @@ Fetch exact file contents with metadata, supporting line ranges for efficient re
5
  """
6
 
7
  import base64
 
8
  import os
9
  from typing import Any, Dict, Optional
10
 
 
11
  import requests
 
 
12
 
13
  from agent.tools.types import ToolResult
14
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def read_file(
17
  repo: str,
18
  path: str,
@@ -126,16 +177,14 @@ def read_file(
126
  }
127
  content = raw_response.text
128
 
129
- # Get metadata
130
- file_sha = data.get("sha")
131
- file_size = data.get("size", 0)
132
 
133
  # Process line ranges
134
  lines = content.split("\n")
135
  total_lines = len(lines)
136
 
137
  truncated = False
138
- message = None
139
 
140
  if line_start is None and line_end is None:
141
  # No range specified
@@ -143,7 +192,6 @@ def read_file(
143
  line_start = 1
144
  line_end = 300
145
  truncated = True
146
- message = f"File has {total_lines} lines. Showing first 300 lines. Use line_start and line_end to view more."
147
  else:
148
  line_start = 1
149
  line_end = total_lines
@@ -170,21 +218,19 @@ def read_file(
170
  selected_content = "\n".join(selected_lines)
171
 
172
  # Format output
173
- lines_output = [f"**File: {repo}/{path}**"]
174
- lines_output.append(f"SHA: {file_sha}")
175
- lines_output.append(f"Size: {file_size:,} bytes")
176
- lines_output.append(
177
- f"Lines: {line_start}-{line_end} of {total_lines} total lines"
178
- )
179
  if ref and ref != "HEAD":
180
  lines_output.append(f"Ref: {ref}")
181
- if truncated and message:
182
- lines_output.append(f"⚠️ {message}")
183
- lines_output.append("\n**Content:**")
184
  lines_output.append("```")
185
  lines_output.append(selected_content)
186
  lines_output.append("```")
187
-
 
 
 
188
  return {
189
  "formatted": "\n".join(lines_output),
190
  "totalResults": 1,
@@ -210,12 +256,11 @@ GITHUB_READ_FILE_TOOL_SPEC = {
210
  "- Auto-truncates large files to 300 lines (with warning)\n"
211
  "- Works with any branch, tag, or commit SHA\n"
212
  "- Returns file metadata (SHA, size, line count)\n"
213
- "- Handles both small and large files efficiently\n\n"
214
  "## Examples:\n\n"
215
  "**Read entire README:**\n"
216
- "{'repo': 'facebook/react', 'path': 'README.md'}\n\n"
217
  "**Read specific line range:**\n"
218
- "{'repo': 'torvalds/linux', 'path': 'kernel/sched/core.c', 'line_start': 100, 'line_end': 150}\n\n"
219
  "**Read from specific branch:**\n"
220
  "{'repo': 'python/cpython', 'path': 'Lib/ast.py', 'ref': 'main', 'line_start': 1, 'line_end': 50}\n\n"
221
  "**Read from specific commit:**\n"
 
5
  """
6
 
7
  import base64
8
+ import json
9
  import os
10
  from typing import Any, Dict, Optional
11
 
12
+ import nbformat
13
  import requests
14
+ from nbconvert import MarkdownExporter
15
+ from nbconvert.preprocessors import ClearOutputPreprocessor, TagRemovePreprocessor
16
 
17
  from agent.tools.types import ToolResult
18
 
19
 
20
+ def _convert_ipynb_to_markdown(content: str) -> str:
21
+ """
22
+ Convert Jupyter notebook JSON to LLM-friendly Markdown.
23
+
24
+ Args:
25
+ content: Raw notebook JSON string
26
+
27
+ Returns:
28
+ Converted Markdown string
29
+ """
30
+ try:
31
+ # Parse notebook JSON
32
+ nb_dict = json.loads(content)
33
+
34
+ # Normalize cell sources (can be string or list of strings)
35
+ if "cells" in nb_dict:
36
+ for cell in nb_dict["cells"]:
37
+ if "source" in cell and isinstance(cell["source"], list):
38
+ cell["source"] = "".join(cell["source"])
39
+
40
+ # Read notebook with explicit version
41
+ nb = nbformat.reads(json.dumps(nb_dict), as_version=4)
42
+
43
+ # Strip outputs for LLM readability (outputs can be noisy/large)
44
+ clear = ClearOutputPreprocessor()
45
+ nb, _ = clear.preprocess(nb, {})
46
+
47
+ # Optionally remove cells tagged with "hide" or similar
48
+ remove = TagRemovePreprocessor(
49
+ remove_cell_tags={"hide", "hidden", "remove"},
50
+ remove_input_tags=set(),
51
+ remove_all_outputs_tags=set(),
52
+ )
53
+ nb, _ = remove.preprocess(nb, {})
54
+
55
+ # Convert to markdown
56
+ exporter = MarkdownExporter()
57
+ markdown, _ = exporter.from_notebook_node(nb)
58
+
59
+ return markdown
60
+
61
+ except json.JSONDecodeError:
62
+ return content
63
+ except Exception:
64
+ return content
65
+
66
+
67
  def read_file(
68
  repo: str,
69
  path: str,
 
177
  }
178
  content = raw_response.text
179
 
180
+ if path.lower().endswith(".ipynb"):
181
+ content = _convert_ipynb_to_markdown(content)
 
182
 
183
  # Process line ranges
184
  lines = content.split("\n")
185
  total_lines = len(lines)
186
 
187
  truncated = False
 
188
 
189
  if line_start is None and line_end is None:
190
  # No range specified
 
192
  line_start = 1
193
  line_end = 300
194
  truncated = True
 
195
  else:
196
  line_start = 1
197
  line_end = total_lines
 
218
  selected_content = "\n".join(selected_lines)
219
 
220
  # Format output
221
+ lines_output = [f"**Reading file from repo: {repo}, path: {path}**"]
222
+
 
 
 
 
223
  if ref and ref != "HEAD":
224
  lines_output.append(f"Ref: {ref}")
225
+
226
+ lines_output.append("\n**File content:")
 
227
  lines_output.append("```")
228
  lines_output.append(selected_content)
229
  lines_output.append("```")
230
+ if truncated:
231
+ lines_output.append(
232
+ f"Currently showing lines {line_start}-{line_end} out of {total_lines} total lines. Use line_start and line_end to view more lines."
233
+ )
234
  return {
235
  "formatted": "\n".join(lines_output),
236
  "totalResults": 1,
 
256
  "- Auto-truncates large files to 300 lines (with warning)\n"
257
  "- Works with any branch, tag, or commit SHA\n"
258
  "- Returns file metadata (SHA, size, line count)\n"
 
259
  "## Examples:\n\n"
260
  "**Read entire README:**\n"
261
+ "{'repo': 'huggingface/transformers', 'path': 'README.md'}\n\n"
262
  "**Read specific line range:**\n"
263
+ "{'repo': 'huggingface/trl', 'path': '/examples/scripts/grpo_vlm.py', 'line_start': 100, 'line_end': 150}\n\n"
264
  "**Read from specific branch:**\n"
265
  "{'repo': 'python/cpython', 'path': 'Lib/ast.py', 'ref': 'main', 'line_start': 1, 'line_end': 50}\n\n"
266
  "**Read from specific commit:**\n"
agent/tools/github_search_code.py CHANGED
@@ -215,6 +215,10 @@ def search_code(
215
  )
216
  lines_output.append(f" URL: {match['url']}")
217
 
 
 
 
 
218
  # Show snippet (first 5 lines)
219
  snippet_lines = match["snippet"].split("\n")[:5]
220
  if snippet_lines:
 
215
  )
216
  lines_output.append(f" URL: {match['url']}")
217
 
218
+ # Copyable parameters for read_file tool
219
+ read_params = f"{{'repo': '{match['repo']}', 'path': '{match['path']}', 'ref': '{match['ref'][:7]}'}}"
220
+ lines_output.append(f" To read, use: {read_params}")
221
+
222
  # Show snippet (first 5 lines)
223
  snippet_lines = match["snippet"].split("\n")[:5]
224
  if snippet_lines:
pyproject.toml CHANGED
@@ -24,4 +24,7 @@ dependencies = [
24
  "ipykernel>=7.1.0",
25
  "ipywidgets>=8.1.8",
26
  "thefuzz>=0.22.1",
 
 
 
27
  ]
 
24
  "ipykernel>=7.1.0",
25
  "ipywidgets>=8.1.8",
26
  "thefuzz>=0.22.1",
27
+ "nbconvert>=7.16.6",
28
+ "nbformat>=5.10.4",
29
+ "markitdown[all,docx,outlook,pdf,pptx,xls,xlsx]>=0.1.4",
30
  ]
uv.lock CHANGED
The diff for this file is too large to render. See raw diff