Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
ccbe2d2
1
Parent(s): b70fed7
poc github tools
Browse files- agent/core/agent_loop.py +8 -4
- agent/core/tools.py +33 -1
- agent/main.py +12 -6
- agent/tools/__init__.py +32 -0
- agent/tools/github_find_examples.py +524 -0
- agent/tools/github_list_repos.py +324 -0
- agent/tools/github_read_file.py +392 -0
- agent/tools/github_search_code.py +381 -0
- agent/tools/jobs_tool.py +19 -5
agent/core/agent_loop.py
CHANGED
|
@@ -25,9 +25,15 @@ def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
|
|
| 25 |
args = tool_args.get("args", {})
|
| 26 |
# Sometimes LLM passes args as string instead of dict
|
| 27 |
if isinstance(args, str):
|
| 28 |
-
return
|
|
|
|
|
|
|
|
|
|
| 29 |
if not isinstance(args, dict) and args is not None:
|
| 30 |
-
return
|
|
|
|
|
|
|
|
|
|
| 31 |
return True, None
|
| 32 |
|
| 33 |
|
|
@@ -38,8 +44,6 @@ def _needs_approval(tool_name: str, tool_args: dict) -> bool:
|
|
| 38 |
if not args_valid:
|
| 39 |
return False
|
| 40 |
|
| 41 |
-
args = tool_args.get("args", {})
|
| 42 |
-
|
| 43 |
if tool_name == "hf_jobs":
|
| 44 |
# Check if it's a run or uv operation
|
| 45 |
operation = tool_args.get("operation", "")
|
|
|
|
| 25 |
args = tool_args.get("args", {})
|
| 26 |
# Sometimes LLM passes args as string instead of dict
|
| 27 |
if isinstance(args, str):
|
| 28 |
+
return (
|
| 29 |
+
False,
|
| 30 |
+
f"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}",
|
| 31 |
+
)
|
| 32 |
if not isinstance(args, dict) and args is not None:
|
| 33 |
+
return (
|
| 34 |
+
False,
|
| 35 |
+
f"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}",
|
| 36 |
+
)
|
| 37 |
return True, None
|
| 38 |
|
| 39 |
|
|
|
|
| 44 |
if not args_valid:
|
| 45 |
return False
|
| 46 |
|
|
|
|
|
|
|
| 47 |
if tool_name == "hf_jobs":
|
| 48 |
# Check if it's a run or uv operation
|
| 49 |
operation = tool_args.get("operation", "")
|
agent/core/tools.py
CHANGED
|
@@ -19,6 +19,13 @@ from agent.tools.docs_tools import (
|
|
| 19 |
explore_hf_docs_handler,
|
| 20 |
hf_docs_fetch_handler,
|
| 21 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
|
| 23 |
from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
|
| 24 |
from agent.tools.private_hf_repo_tools import (
|
|
@@ -224,7 +231,7 @@ class ToolRouter:
|
|
| 224 |
def create_builtin_tools() -> list[ToolSpec]:
|
| 225 |
"""Create built-in tool specifications"""
|
| 226 |
print(
|
| 227 |
-
f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}"
|
| 228 |
)
|
| 229 |
# in order of importance
|
| 230 |
return [
|
|
@@ -266,4 +273,29 @@ def create_builtin_tools() -> list[ToolSpec]:
|
|
| 266 |
parameters=UTILS_TOOL_SPEC["parameters"],
|
| 267 |
handler=utils_handler,
|
| 268 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
]
|
|
|
|
| 19 |
explore_hf_docs_handler,
|
| 20 |
hf_docs_fetch_handler,
|
| 21 |
)
|
| 22 |
+
from agent.tools.github_find_examples import (
|
| 23 |
+
FIND_EXAMPLES_TOOL_SPEC,
|
| 24 |
+
find_examples_handler,
|
| 25 |
+
)
|
| 26 |
+
from agent.tools.github_list_repos import LIST_REPOS_TOOL_SPEC, list_repos_handler
|
| 27 |
+
from agent.tools.github_read_file import READ_FILE_TOOL_SPEC, read_file_handler
|
| 28 |
+
from agent.tools.github_search_code import SEARCH_CODE_TOOL_SPEC, search_code_handler
|
| 29 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
|
| 30 |
from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
|
| 31 |
from agent.tools.private_hf_repo_tools import (
|
|
|
|
| 231 |
def create_builtin_tools() -> list[ToolSpec]:
|
| 232 |
"""Create built-in tool specifications"""
|
| 233 |
print(
|
| 234 |
+
f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}, {FIND_EXAMPLES_TOOL_SPEC['name']}, {READ_FILE_TOOL_SPEC['name']}, {LIST_REPOS_TOOL_SPEC['name']}, {SEARCH_CODE_TOOL_SPEC['name']}"
|
| 235 |
)
|
| 236 |
# in order of importance
|
| 237 |
return [
|
|
|
|
| 273 |
parameters=UTILS_TOOL_SPEC["parameters"],
|
| 274 |
handler=utils_handler,
|
| 275 |
),
|
| 276 |
+
# GitHub tools - 4 separate tools
|
| 277 |
+
ToolSpec(
|
| 278 |
+
name=FIND_EXAMPLES_TOOL_SPEC["name"],
|
| 279 |
+
description=FIND_EXAMPLES_TOOL_SPEC["description"],
|
| 280 |
+
parameters=FIND_EXAMPLES_TOOL_SPEC["parameters"],
|
| 281 |
+
handler=find_examples_handler,
|
| 282 |
+
),
|
| 283 |
+
ToolSpec(
|
| 284 |
+
name=READ_FILE_TOOL_SPEC["name"],
|
| 285 |
+
description=READ_FILE_TOOL_SPEC["description"],
|
| 286 |
+
parameters=READ_FILE_TOOL_SPEC["parameters"],
|
| 287 |
+
handler=read_file_handler,
|
| 288 |
+
),
|
| 289 |
+
ToolSpec(
|
| 290 |
+
name=LIST_REPOS_TOOL_SPEC["name"],
|
| 291 |
+
description=LIST_REPOS_TOOL_SPEC["description"],
|
| 292 |
+
parameters=LIST_REPOS_TOOL_SPEC["parameters"],
|
| 293 |
+
handler=list_repos_handler,
|
| 294 |
+
),
|
| 295 |
+
ToolSpec(
|
| 296 |
+
name=SEARCH_CODE_TOOL_SPEC["name"],
|
| 297 |
+
description=SEARCH_CODE_TOOL_SPEC["description"],
|
| 298 |
+
parameters=SEARCH_CODE_TOOL_SPEC["parameters"],
|
| 299 |
+
handler=search_code_handler,
|
| 300 |
+
),
|
| 301 |
]
|
agent/main.py
CHANGED
|
@@ -222,11 +222,15 @@ async def event_listener(
|
|
| 222 |
|
| 223 |
# Build repo URL
|
| 224 |
type_path = "" if repo_type == "model" else f"{repo_type}s"
|
| 225 |
-
repo_url =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
print(f"Repository: {repo_id}")
|
| 228 |
print(f"Type: {repo_type}")
|
| 229 |
-
print(
|
| 230 |
print(f"URL: {repo_url}")
|
| 231 |
|
| 232 |
# Show file preview for upload_file operation
|
|
@@ -237,9 +241,9 @@ async def event_listener(
|
|
| 237 |
|
| 238 |
if isinstance(file_content, str):
|
| 239 |
# Calculate metrics
|
| 240 |
-
all_lines = file_content.split(
|
| 241 |
line_count = len(all_lines)
|
| 242 |
-
size_bytes = len(file_content.encode(
|
| 243 |
size_kb = size_bytes / 1024
|
| 244 |
size_mb = size_kb / 1024
|
| 245 |
|
|
@@ -251,8 +255,10 @@ async def event_listener(
|
|
| 251 |
|
| 252 |
# Show preview
|
| 253 |
preview_lines = all_lines[:5]
|
| 254 |
-
preview =
|
| 255 |
-
print(
|
|
|
|
|
|
|
| 256 |
if len(all_lines) > 5:
|
| 257 |
print("...")
|
| 258 |
|
|
|
|
| 222 |
|
| 223 |
# Build repo URL
|
| 224 |
type_path = "" if repo_type == "model" else f"{repo_type}s"
|
| 225 |
+
repo_url = (
|
| 226 |
+
f"https://huggingface.co/{type_path}/{repo_id}".replace(
|
| 227 |
+
"//", "/"
|
| 228 |
+
)
|
| 229 |
+
)
|
| 230 |
|
| 231 |
print(f"Repository: {repo_id}")
|
| 232 |
print(f"Type: {repo_type}")
|
| 233 |
+
print("Private: Yes")
|
| 234 |
print(f"URL: {repo_url}")
|
| 235 |
|
| 236 |
# Show file preview for upload_file operation
|
|
|
|
| 241 |
|
| 242 |
if isinstance(file_content, str):
|
| 243 |
# Calculate metrics
|
| 244 |
+
all_lines = file_content.split("\n")
|
| 245 |
line_count = len(all_lines)
|
| 246 |
+
size_bytes = len(file_content.encode("utf-8"))
|
| 247 |
size_kb = size_bytes / 1024
|
| 248 |
size_mb = size_kb / 1024
|
| 249 |
|
|
|
|
| 255 |
|
| 256 |
# Show preview
|
| 257 |
preview_lines = all_lines[:5]
|
| 258 |
+
preview = "\n".join(preview_lines)
|
| 259 |
+
print(
|
| 260 |
+
f"Content preview (first 5 lines):\n{preview}"
|
| 261 |
+
)
|
| 262 |
if len(all_lines) > 5:
|
| 263 |
print("...")
|
| 264 |
|
agent/tools/__init__.py
CHANGED
|
@@ -2,6 +2,26 @@
|
|
| 2 |
Hugging Face tools for the agent
|
| 3 |
"""
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
|
| 6 |
from agent.tools.types import ToolResult
|
| 7 |
|
|
@@ -10,4 +30,16 @@ __all__ = [
|
|
| 10 |
"HF_JOBS_TOOL_SPEC",
|
| 11 |
"hf_jobs_handler",
|
| 12 |
"HfJobsTool",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
]
|
|
|
|
| 2 |
Hugging Face tools for the agent
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
from agent.tools.github_find_examples import (
|
| 6 |
+
FIND_EXAMPLES_TOOL_SPEC,
|
| 7 |
+
FindExamplesTool,
|
| 8 |
+
find_examples_handler,
|
| 9 |
+
)
|
| 10 |
+
from agent.tools.github_list_repos import (
|
| 11 |
+
LIST_REPOS_TOOL_SPEC,
|
| 12 |
+
ListReposTool,
|
| 13 |
+
list_repos_handler,
|
| 14 |
+
)
|
| 15 |
+
from agent.tools.github_read_file import (
|
| 16 |
+
READ_FILE_TOOL_SPEC,
|
| 17 |
+
ReadFileTool,
|
| 18 |
+
read_file_handler,
|
| 19 |
+
)
|
| 20 |
+
from agent.tools.github_search_code import (
|
| 21 |
+
SEARCH_CODE_TOOL_SPEC,
|
| 22 |
+
SearchCodeTool,
|
| 23 |
+
search_code_handler,
|
| 24 |
+
)
|
| 25 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
|
| 26 |
from agent.tools.types import ToolResult
|
| 27 |
|
|
|
|
| 30 |
"HF_JOBS_TOOL_SPEC",
|
| 31 |
"hf_jobs_handler",
|
| 32 |
"HfJobsTool",
|
| 33 |
+
"FIND_EXAMPLES_TOOL_SPEC",
|
| 34 |
+
"find_examples_handler",
|
| 35 |
+
"FindExamplesTool",
|
| 36 |
+
"READ_FILE_TOOL_SPEC",
|
| 37 |
+
"read_file_handler",
|
| 38 |
+
"ReadFileTool",
|
| 39 |
+
"LIST_REPOS_TOOL_SPEC",
|
| 40 |
+
"list_repos_handler",
|
| 41 |
+
"ListReposTool",
|
| 42 |
+
"SEARCH_CODE_TOOL_SPEC",
|
| 43 |
+
"search_code_handler",
|
| 44 |
+
"SearchCodeTool",
|
| 45 |
]
|
agent/tools/github_find_examples.py
ADDED
|
@@ -0,0 +1,524 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Find Examples Tool
|
| 3 |
+
|
| 4 |
+
Finds examples, guides, and tutorials for a library using deterministic queries and heuristics.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import math
|
| 9 |
+
import os
|
| 10 |
+
from dataclasses import asdict, dataclass
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
from typing import Any, Dict, List, Optional
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import requests
|
| 16 |
+
except ImportError:
|
| 17 |
+
raise ImportError(
|
| 18 |
+
"requests library is required. Install with: pip install requests"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
from agent.tools.types import ToolResult
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class Example:
|
| 26 |
+
"""An example file with metadata and relevance score."""
|
| 27 |
+
|
| 28 |
+
repo: str
|
| 29 |
+
path: str
|
| 30 |
+
ref: str
|
| 31 |
+
url: str
|
| 32 |
+
score: float
|
| 33 |
+
reason: str
|
| 34 |
+
repo_stars: int
|
| 35 |
+
repo_updated: str
|
| 36 |
+
file_size: int
|
| 37 |
+
|
| 38 |
+
def to_dict(self):
|
| 39 |
+
return asdict(self)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class GitHubAPIError(Exception):
|
| 43 |
+
"""Raised when GitHub API returns an error."""
|
| 44 |
+
|
| 45 |
+
pass
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# Path-based scoring weights
|
| 49 |
+
PATH_SCORES = {
|
| 50 |
+
"README.md": 100,
|
| 51 |
+
"readme.md": 100,
|
| 52 |
+
"docs/": 80,
|
| 53 |
+
"doc/": 80,
|
| 54 |
+
"examples/": 90,
|
| 55 |
+
"example/": 90,
|
| 56 |
+
"notebooks/": 70,
|
| 57 |
+
"notebook/": 70,
|
| 58 |
+
"tutorials/": 85,
|
| 59 |
+
"tutorial/": 85,
|
| 60 |
+
"guides/": 85,
|
| 61 |
+
"guide/": 85,
|
| 62 |
+
"tests/": 40,
|
| 63 |
+
"test/": 40,
|
| 64 |
+
"demos/": 75,
|
| 65 |
+
"demo/": 75,
|
| 66 |
+
"samples/": 75,
|
| 67 |
+
"sample/": 75,
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
# Content-based scoring keywords
|
| 71 |
+
CONTENT_KEYWORDS = {
|
| 72 |
+
'if __name__ == "__main__"': 50,
|
| 73 |
+
"if __name__ == '__main__'": 50,
|
| 74 |
+
"quickstart": 60,
|
| 75 |
+
"quick start": 60,
|
| 76 |
+
"getting started": 60,
|
| 77 |
+
"tutorial": 50,
|
| 78 |
+
"example usage": 55,
|
| 79 |
+
"usage example": 55,
|
| 80 |
+
"how to use": 45,
|
| 81 |
+
"basic example": 50,
|
| 82 |
+
"simple example": 50,
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# File extension preferences
|
| 86 |
+
PREFERRED_EXTENSIONS = {
|
| 87 |
+
".py": 10,
|
| 88 |
+
".ipynb": 15,
|
| 89 |
+
".md": 20,
|
| 90 |
+
".rst": 10,
|
| 91 |
+
".js": 10,
|
| 92 |
+
".ts": 10,
|
| 93 |
+
".go": 10,
|
| 94 |
+
".java": 10,
|
| 95 |
+
".cpp": 10,
|
| 96 |
+
".c": 10,
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _get_github_token() -> str:
|
| 101 |
+
"""Get GitHub token from environment."""
|
| 102 |
+
token = os.environ.get("GITHUB_TOKEN")
|
| 103 |
+
if not token:
|
| 104 |
+
raise GitHubAPIError(
|
| 105 |
+
"GITHUB_TOKEN environment variable is required. "
|
| 106 |
+
"Set it with: export GITHUB_TOKEN=your_token_here"
|
| 107 |
+
)
|
| 108 |
+
return token
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _execute_search(query: str, token: str, limit: int = 20) -> List[Dict[str, Any]]:
|
| 112 |
+
"""Execute a GitHub code search query."""
|
| 113 |
+
headers = {
|
| 114 |
+
"Accept": "application/vnd.github.text-match+json",
|
| 115 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 116 |
+
"Authorization": f"Bearer {token}",
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
results = []
|
| 120 |
+
page = 1
|
| 121 |
+
per_page = min(100, limit)
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
while len(results) < limit:
|
| 125 |
+
params = {"q": query, "per_page": per_page, "page": page}
|
| 126 |
+
url = "https://api.github.com/search/code"
|
| 127 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 128 |
+
|
| 129 |
+
if response.status_code != 200:
|
| 130 |
+
break
|
| 131 |
+
|
| 132 |
+
data = response.json()
|
| 133 |
+
items = data.get("items", [])
|
| 134 |
+
|
| 135 |
+
if not items:
|
| 136 |
+
break
|
| 137 |
+
|
| 138 |
+
for item in items:
|
| 139 |
+
results.append(
|
| 140 |
+
{
|
| 141 |
+
"repo": item.get("repository", {}).get("full_name", ""),
|
| 142 |
+
"path": item.get("path", ""),
|
| 143 |
+
"sha": item.get("sha", ""),
|
| 144 |
+
"url": item.get("html_url", ""),
|
| 145 |
+
"size": item.get("size", 0),
|
| 146 |
+
"text_matches": item.get("text_matches", []),
|
| 147 |
+
}
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
if len(results) >= limit or len(items) < per_page:
|
| 151 |
+
break
|
| 152 |
+
|
| 153 |
+
page += 1
|
| 154 |
+
|
| 155 |
+
except Exception:
|
| 156 |
+
pass
|
| 157 |
+
|
| 158 |
+
return results[:limit]
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _fetch_repo_metadata(repos: List[str], token: str) -> Dict[str, Dict[str, Any]]:
|
| 162 |
+
"""Fetch metadata for repositories."""
|
| 163 |
+
headers = {
|
| 164 |
+
"Accept": "application/vnd.github+json",
|
| 165 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 166 |
+
"Authorization": f"Bearer {token}",
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
metadata = {}
|
| 170 |
+
|
| 171 |
+
for repo in repos:
|
| 172 |
+
try:
|
| 173 |
+
url = f"https://api.github.com/repos/{repo}"
|
| 174 |
+
response = requests.get(url, headers=headers, timeout=10)
|
| 175 |
+
|
| 176 |
+
if response.status_code == 200:
|
| 177 |
+
data = response.json()
|
| 178 |
+
metadata[repo] = {
|
| 179 |
+
"stars": data.get("stargazers_count", 0),
|
| 180 |
+
"updated_at": data.get("updated_at", ""),
|
| 181 |
+
"description": data.get("description", ""),
|
| 182 |
+
}
|
| 183 |
+
except:
|
| 184 |
+
continue
|
| 185 |
+
|
| 186 |
+
return metadata
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _score_and_rank(
|
| 190 |
+
results: List[Dict[str, Any]], library: str, token: str
|
| 191 |
+
) -> List[Example]:
|
| 192 |
+
"""Score results based on heuristics and rank them."""
|
| 193 |
+
repos = list(set(r["repo"] for r in results))
|
| 194 |
+
repo_metadata = _fetch_repo_metadata(repos, token)
|
| 195 |
+
|
| 196 |
+
scored_examples = []
|
| 197 |
+
|
| 198 |
+
for result in results:
|
| 199 |
+
repo = result["repo"]
|
| 200 |
+
path = result["path"]
|
| 201 |
+
|
| 202 |
+
score = 0.0
|
| 203 |
+
reasons = []
|
| 204 |
+
|
| 205 |
+
# Path-based scoring
|
| 206 |
+
path_lower = path.lower()
|
| 207 |
+
for pattern, points in PATH_SCORES.items():
|
| 208 |
+
if pattern.lower() in path_lower:
|
| 209 |
+
score += points
|
| 210 |
+
reasons.append(f"in {pattern}")
|
| 211 |
+
break
|
| 212 |
+
|
| 213 |
+
# File extension scoring
|
| 214 |
+
for ext, points in PREFERRED_EXTENSIONS.items():
|
| 215 |
+
if path_lower.endswith(ext):
|
| 216 |
+
score += points
|
| 217 |
+
break
|
| 218 |
+
|
| 219 |
+
# Content-based scoring
|
| 220 |
+
text_content = ""
|
| 221 |
+
for match in result.get("text_matches", []):
|
| 222 |
+
text_content += match.get("fragment", "").lower() + " "
|
| 223 |
+
|
| 224 |
+
for keyword, points in CONTENT_KEYWORDS.items():
|
| 225 |
+
if keyword.lower() in text_content:
|
| 226 |
+
score += points
|
| 227 |
+
reasons.append(f"contains '{keyword}'")
|
| 228 |
+
|
| 229 |
+
# Repo-based scoring
|
| 230 |
+
metadata = repo_metadata.get(repo, {})
|
| 231 |
+
stars = metadata.get("stars", 0)
|
| 232 |
+
updated = metadata.get("updated_at", "")
|
| 233 |
+
|
| 234 |
+
if stars > 0:
|
| 235 |
+
star_score = math.log10(stars + 1) * 10
|
| 236 |
+
score += star_score
|
| 237 |
+
|
| 238 |
+
# Recency bonus
|
| 239 |
+
if updated:
|
| 240 |
+
try:
|
| 241 |
+
updated_date = datetime.fromisoformat(updated.replace("Z", "+00:00"))
|
| 242 |
+
if datetime.now(updated_date.tzinfo) - updated_date < timedelta(
|
| 243 |
+
days=180
|
| 244 |
+
):
|
| 245 |
+
score += 20
|
| 246 |
+
reasons.append("recently updated")
|
| 247 |
+
except:
|
| 248 |
+
pass
|
| 249 |
+
|
| 250 |
+
# Filename quality
|
| 251 |
+
filename = path.split("/")[-1].lower()
|
| 252 |
+
if any(
|
| 253 |
+
word in filename
|
| 254 |
+
for word in ["example", "tutorial", "guide", "quickstart", "demo"]
|
| 255 |
+
):
|
| 256 |
+
score += 30
|
| 257 |
+
reasons.append("descriptive filename")
|
| 258 |
+
|
| 259 |
+
# Size penalty
|
| 260 |
+
if result["size"] > 100000:
|
| 261 |
+
score *= 0.5
|
| 262 |
+
reasons.append("large file")
|
| 263 |
+
|
| 264 |
+
example = Example(
|
| 265 |
+
repo=repo,
|
| 266 |
+
path=path,
|
| 267 |
+
ref=result["sha"],
|
| 268 |
+
url=result["url"],
|
| 269 |
+
score=score,
|
| 270 |
+
reason=", ".join(reasons) if reasons else "matches library",
|
| 271 |
+
repo_stars=stars,
|
| 272 |
+
repo_updated=updated,
|
| 273 |
+
file_size=result["size"],
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
scored_examples.append(example)
|
| 277 |
+
|
| 278 |
+
scored_examples.sort(key=lambda x: x.score, reverse=True)
|
| 279 |
+
return scored_examples
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def _search_by_path(
|
| 283 |
+
library: str, org: str, repo_scope: Optional[str], token: str
|
| 284 |
+
) -> List[Dict[str, Any]]:
|
| 285 |
+
"""Search for library in example/tutorial/docs directories."""
|
| 286 |
+
results = []
|
| 287 |
+
path_patterns = [
|
| 288 |
+
"examples/",
|
| 289 |
+
"example/",
|
| 290 |
+
"docs/",
|
| 291 |
+
"tutorials/",
|
| 292 |
+
"notebooks/",
|
| 293 |
+
"guides/",
|
| 294 |
+
]
|
| 295 |
+
|
| 296 |
+
for path in path_patterns:
|
| 297 |
+
query_parts = [f"org:{org}", f"{library}", f"path:{path}"]
|
| 298 |
+
if repo_scope:
|
| 299 |
+
query_parts[0] = f"repo:{org}/{repo_scope}"
|
| 300 |
+
|
| 301 |
+
query = " ".join(query_parts)
|
| 302 |
+
results.extend(_execute_search(query, token, limit=20))
|
| 303 |
+
|
| 304 |
+
return results
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def _search_by_content(
|
| 308 |
+
library: str, org: str, repo_scope: Optional[str], token: str
|
| 309 |
+
) -> List[Dict[str, Any]]:
|
| 310 |
+
"""Search for library with specific content patterns."""
|
| 311 |
+
results = []
|
| 312 |
+
content_patterns = [
|
| 313 |
+
f"{library} if __name__",
|
| 314 |
+
f"{library} quickstart",
|
| 315 |
+
f"{library} tutorial",
|
| 316 |
+
f"{library} usage example",
|
| 317 |
+
]
|
| 318 |
+
|
| 319 |
+
for pattern in content_patterns:
|
| 320 |
+
query_parts = [f"org:{org}", pattern]
|
| 321 |
+
if repo_scope:
|
| 322 |
+
query_parts[0] = f"repo:{org}/{repo_scope}"
|
| 323 |
+
|
| 324 |
+
query = " ".join(query_parts)
|
| 325 |
+
results.extend(_execute_search(query, token, limit=15))
|
| 326 |
+
|
| 327 |
+
return results
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def _search_readmes(
|
| 331 |
+
library: str, org: str, repo_scope: Optional[str], token: str
|
| 332 |
+
) -> List[Dict[str, Any]]:
|
| 333 |
+
"""Search for library mentions in README files."""
|
| 334 |
+
query_parts = [f"org:{org}", f"{library}", "filename:README"]
|
| 335 |
+
if repo_scope:
|
| 336 |
+
query_parts[0] = f"repo:{org}/{repo_scope}"
|
| 337 |
+
|
| 338 |
+
query = " ".join(query_parts)
|
| 339 |
+
return _execute_search(query, token, limit=20)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def find_examples(
|
| 343 |
+
library: str,
|
| 344 |
+
org: str = "huggingface",
|
| 345 |
+
repo_scope: Optional[str] = None,
|
| 346 |
+
max_results: int = 10,
|
| 347 |
+
) -> List[Example]:
|
| 348 |
+
"""
|
| 349 |
+
Find examples, guides, and tutorials for a library using deterministic queries.
|
| 350 |
+
|
| 351 |
+
Uses a playbook of smart searches and heuristics to find canonical examples:
|
| 352 |
+
- Prefers README.md, docs/**, examples/**, notebooks/**, tests/**
|
| 353 |
+
- Prefers files with if __name__ == "__main__", "quickstart", "tutorial"
|
| 354 |
+
- Prefers repos with higher stars and more recent updates
|
| 355 |
+
|
| 356 |
+
Args:
|
| 357 |
+
library: Library name to search for (e.g., "transformers", "torch")
|
| 358 |
+
org: GitHub organization to search in (default: "huggingface")
|
| 359 |
+
repo_scope: Optional specific repository (e.g., "transformers")
|
| 360 |
+
max_results: Maximum number of results to return (default: 10)
|
| 361 |
+
|
| 362 |
+
Returns:
|
| 363 |
+
List of Example objects, ranked by relevance score
|
| 364 |
+
"""
|
| 365 |
+
token = _get_github_token()
|
| 366 |
+
|
| 367 |
+
all_results = []
|
| 368 |
+
all_results.extend(_search_by_path(library, org, repo_scope, token))
|
| 369 |
+
all_results.extend(_search_by_content(library, org, repo_scope, token))
|
| 370 |
+
all_results.extend(_search_readmes(library, org, repo_scope, token))
|
| 371 |
+
|
| 372 |
+
# Deduplicate
|
| 373 |
+
seen = set()
|
| 374 |
+
unique_results = []
|
| 375 |
+
for result in all_results:
|
| 376 |
+
key = (result["repo"], result["path"])
|
| 377 |
+
if key not in seen:
|
| 378 |
+
seen.add(key)
|
| 379 |
+
unique_results.append(result)
|
| 380 |
+
|
| 381 |
+
scored_examples = _score_and_rank(unique_results, library, token)
|
| 382 |
+
return scored_examples[:max_results]
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
async def _async_call(func, *args, **kwargs):
|
| 386 |
+
"""Wrap synchronous calls for async context."""
|
| 387 |
+
return await asyncio.to_thread(func, *args, **kwargs)
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def _format_examples_table(examples: List[Example]) -> str:
|
| 391 |
+
"""Format examples as a markdown table."""
|
| 392 |
+
if not examples:
|
| 393 |
+
return "No examples found."
|
| 394 |
+
|
| 395 |
+
lines = [
|
| 396 |
+
"| Rank | File | Score | Stars | Reason |",
|
| 397 |
+
"|------|------|-------|-------|--------|",
|
| 398 |
+
]
|
| 399 |
+
|
| 400 |
+
for i, ex in enumerate(examples, 1):
|
| 401 |
+
file_path = f"{ex.repo}/{ex.path}"
|
| 402 |
+
if len(file_path) > 60:
|
| 403 |
+
file_path = file_path[:57] + "..."
|
| 404 |
+
reason = ex.reason if len(ex.reason) < 40 else ex.reason[:37] + "..."
|
| 405 |
+
lines.append(
|
| 406 |
+
f"| {i} | {file_path} | {ex.score:.1f} | {ex.repo_stars:,} | {reason} |"
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
return "\n".join(lines)
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
class FindExamplesTool:
|
| 413 |
+
"""Tool for finding examples and tutorials for libraries."""
|
| 414 |
+
|
| 415 |
+
async def execute(self, params: Dict[str, Any]) -> ToolResult:
|
| 416 |
+
"""Execute find_examples operation."""
|
| 417 |
+
library = params.get("library")
|
| 418 |
+
if not library:
|
| 419 |
+
return {
|
| 420 |
+
"formatted": "Error: 'library' parameter is required",
|
| 421 |
+
"totalResults": 0,
|
| 422 |
+
"resultsShared": 0,
|
| 423 |
+
"isError": True,
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
org = params.get("org", "huggingface")
|
| 427 |
+
repo_scope = params.get("repo_scope")
|
| 428 |
+
max_results = params.get("max_results", 10)
|
| 429 |
+
|
| 430 |
+
try:
|
| 431 |
+
examples = await _async_call(
|
| 432 |
+
find_examples,
|
| 433 |
+
library=library,
|
| 434 |
+
org=org,
|
| 435 |
+
repo_scope=repo_scope,
|
| 436 |
+
max_results=max_results,
|
| 437 |
+
)
|
| 438 |
+
|
| 439 |
+
if not examples:
|
| 440 |
+
return {
|
| 441 |
+
"formatted": f"No examples found for '{library}' in {org}",
|
| 442 |
+
"totalResults": 0,
|
| 443 |
+
"resultsShared": 0,
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
table = _format_examples_table(examples)
|
| 447 |
+
response = f"**Found {len(examples)} examples for '{library}' in {org}:**\n\n{table}"
|
| 448 |
+
|
| 449 |
+
# Add URLs and suggest using read_file
|
| 450 |
+
response += "\n\n**Top examples (use read_file to view):**\n"
|
| 451 |
+
for i, ex in enumerate(examples[:3], 1):
|
| 452 |
+
response += f"{i}. [{ex.repo}/{ex.path}]({ex.url})\n"
|
| 453 |
+
response += f" Use: read_file(repo='{ex.repo}', path='{ex.path}')\n"
|
| 454 |
+
|
| 455 |
+
return {
|
| 456 |
+
"formatted": response,
|
| 457 |
+
"totalResults": len(examples),
|
| 458 |
+
"resultsShared": len(examples),
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
except GitHubAPIError as e:
|
| 462 |
+
return {
|
| 463 |
+
"formatted": f"GitHub API Error: {str(e)}",
|
| 464 |
+
"totalResults": 0,
|
| 465 |
+
"resultsShared": 0,
|
| 466 |
+
"isError": True,
|
| 467 |
+
}
|
| 468 |
+
except Exception as e:
|
| 469 |
+
return {
|
| 470 |
+
"formatted": f"Error: {str(e)}",
|
| 471 |
+
"totalResults": 0,
|
| 472 |
+
"resultsShared": 0,
|
| 473 |
+
"isError": True,
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
# Tool specification
|
| 478 |
+
FIND_EXAMPLES_TOOL_SPEC = {
|
| 479 |
+
"name": "find_examples",
|
| 480 |
+
"description": (
|
| 481 |
+
"Find examples, guides, and tutorials for a library using deterministic queries and heuristics.\n\n"
|
| 482 |
+
"Uses best practices retrieval without semantic search:\n"
|
| 483 |
+
"- Prefers README.md, docs/**, examples/**, notebooks/**, tests/**\n"
|
| 484 |
+
"- Prefers files with if __name__ == '__main__', 'quickstart', 'tutorial', 'usage'\n"
|
| 485 |
+
"- Prefers repos with higher stars and more recent updates\n\n"
|
| 486 |
+
"Returns a ranked list of canonical example files.\n\n"
|
| 487 |
+
"Examples:\n"
|
| 488 |
+
"- Find transformers examples: {'library': 'transformers', 'org': 'huggingface', 'max_results': 5}\n"
|
| 489 |
+
"- Find torch examples in specific repo: {'library': 'torch', 'org': 'pytorch', 'repo_scope': 'examples'}\n\n"
|
| 490 |
+
"Use read_file tool to view the content of returned files.\n\n"
|
| 491 |
+
),
|
| 492 |
+
"parameters": {
|
| 493 |
+
"type": "object",
|
| 494 |
+
"properties": {
|
| 495 |
+
"library": {
|
| 496 |
+
"type": "string",
|
| 497 |
+
"description": "Library name to search for (e.g., 'transformers', 'torch', 'react')",
|
| 498 |
+
},
|
| 499 |
+
"org": {
|
| 500 |
+
"type": "string",
|
| 501 |
+
"description": "GitHub organization to search in (default: 'huggingface')",
|
| 502 |
+
},
|
| 503 |
+
"repo_scope": {
|
| 504 |
+
"type": "string",
|
| 505 |
+
"description": "Optional specific repository to search within",
|
| 506 |
+
},
|
| 507 |
+
"max_results": {
|
| 508 |
+
"type": "integer",
|
| 509 |
+
"description": "Maximum number of results to return (default: 10)",
|
| 510 |
+
},
|
| 511 |
+
},
|
| 512 |
+
"required": ["library"],
|
| 513 |
+
},
|
| 514 |
+
}
|
| 515 |
+
|
| 516 |
+
|
| 517 |
+
async def find_examples_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
|
| 518 |
+
"""Handler for agent tool router."""
|
| 519 |
+
try:
|
| 520 |
+
tool = FindExamplesTool()
|
| 521 |
+
result = await tool.execute(arguments)
|
| 522 |
+
return result["formatted"], not result.get("isError", False)
|
| 523 |
+
except Exception as e:
|
| 524 |
+
return f"Error executing find_examples: {str(e)}", False
|
agent/tools/github_list_repos.py
ADDED
|
@@ -0,0 +1,324 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub List Repos Tool
|
| 3 |
+
|
| 4 |
+
Lists repositories for a user or organization with sorting options.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import os
|
| 9 |
+
from dataclasses import asdict, dataclass
|
| 10 |
+
from typing import Any, Dict, List, Literal, Optional
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
import requests
|
| 14 |
+
except ImportError:
|
| 15 |
+
raise ImportError(
|
| 16 |
+
"requests library is required. Install with: pip install requests"
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
from agent.tools.types import ToolResult
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class Repository:
|
| 24 |
+
"""Repository information."""
|
| 25 |
+
|
| 26 |
+
id: int
|
| 27 |
+
name: str
|
| 28 |
+
full_name: str
|
| 29 |
+
description: Optional[str]
|
| 30 |
+
html_url: str
|
| 31 |
+
language: Optional[str]
|
| 32 |
+
stars: int
|
| 33 |
+
forks: int
|
| 34 |
+
open_issues: int
|
| 35 |
+
private: bool
|
| 36 |
+
fork: bool
|
| 37 |
+
archived: bool
|
| 38 |
+
default_branch: str
|
| 39 |
+
created_at: Optional[str] = None
|
| 40 |
+
updated_at: Optional[str] = None
|
| 41 |
+
topics: Optional[List[str]] = None
|
| 42 |
+
|
| 43 |
+
def to_dict(self):
|
| 44 |
+
return asdict(self)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class GitHubAPIError(Exception):
|
| 48 |
+
"""Raised when GitHub API returns an error."""
|
| 49 |
+
|
| 50 |
+
pass
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _get_github_token() -> str:
|
| 54 |
+
"""Get GitHub token from environment."""
|
| 55 |
+
token = os.environ.get("GITHUB_TOKEN")
|
| 56 |
+
if not token:
|
| 57 |
+
raise GitHubAPIError(
|
| 58 |
+
"GITHUB_TOKEN environment variable is required. "
|
| 59 |
+
"Set it with: export GITHUB_TOKEN=your_token_here"
|
| 60 |
+
)
|
| 61 |
+
return token
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _fetch_repositories(
|
| 65 |
+
query: str, sort: str, order: str, limit: Optional[int], token: str
|
| 66 |
+
) -> List[Repository]:
|
| 67 |
+
"""Fetch repositories from GitHub Search API."""
|
| 68 |
+
headers = {
|
| 69 |
+
"Accept": "application/vnd.github+json",
|
| 70 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 71 |
+
"Authorization": f"Bearer {token}",
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
all_repos = []
|
| 75 |
+
page = 1
|
| 76 |
+
per_page = min(100, limit) if limit else 100
|
| 77 |
+
|
| 78 |
+
while True:
|
| 79 |
+
params = {
|
| 80 |
+
"q": query,
|
| 81 |
+
"sort": sort,
|
| 82 |
+
"order": order,
|
| 83 |
+
"page": page,
|
| 84 |
+
"per_page": per_page,
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
url = "https://api.github.com/search/repositories"
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 91 |
+
|
| 92 |
+
if response.status_code != 200:
|
| 93 |
+
break
|
| 94 |
+
|
| 95 |
+
data = response.json()
|
| 96 |
+
items = data.get("items", [])
|
| 97 |
+
|
| 98 |
+
if not items:
|
| 99 |
+
break
|
| 100 |
+
|
| 101 |
+
for item in items:
|
| 102 |
+
repo = Repository(
|
| 103 |
+
id=item.get("id"),
|
| 104 |
+
name=item.get("name"),
|
| 105 |
+
full_name=item.get("full_name"),
|
| 106 |
+
description=item.get("description"),
|
| 107 |
+
html_url=item.get("html_url"),
|
| 108 |
+
language=item.get("language"),
|
| 109 |
+
stars=item.get("stargazers_count", 0),
|
| 110 |
+
forks=item.get("forks_count", 0),
|
| 111 |
+
open_issues=item.get("open_issues_count", 0),
|
| 112 |
+
private=item.get("private", False),
|
| 113 |
+
fork=item.get("fork", False),
|
| 114 |
+
archived=item.get("archived", False),
|
| 115 |
+
default_branch=item.get("default_branch", "main"),
|
| 116 |
+
created_at=item.get("created_at"),
|
| 117 |
+
updated_at=item.get("updated_at"),
|
| 118 |
+
topics=item.get("topics", []),
|
| 119 |
+
)
|
| 120 |
+
all_repos.append(repo)
|
| 121 |
+
|
| 122 |
+
if limit and len(all_repos) >= limit:
|
| 123 |
+
all_repos = all_repos[:limit]
|
| 124 |
+
break
|
| 125 |
+
|
| 126 |
+
total_count = data.get("total_count", 0)
|
| 127 |
+
if len(all_repos) >= total_count:
|
| 128 |
+
break
|
| 129 |
+
|
| 130 |
+
if page * per_page >= 1000:
|
| 131 |
+
break
|
| 132 |
+
|
| 133 |
+
page += 1
|
| 134 |
+
|
| 135 |
+
except requests.exceptions.RequestException:
|
| 136 |
+
break
|
| 137 |
+
|
| 138 |
+
return all_repos
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def list_repos(
|
| 142 |
+
owner: str,
|
| 143 |
+
owner_type: Literal["user", "org"] = "org",
|
| 144 |
+
sort: Literal["stars", "forks", "updated", "created"] = "stars",
|
| 145 |
+
order: Literal["asc", "desc"] = "desc",
|
| 146 |
+
limit: Optional[int] = None,
|
| 147 |
+
) -> List[Repository]:
|
| 148 |
+
"""
|
| 149 |
+
List repositories for a user or organization using GitHub Search API.
|
| 150 |
+
|
| 151 |
+
Backed by https://api.github.com/search/repositories?q=org:huggingface&sort=stars&order=desc
|
| 152 |
+
or can use GraphQL + client-side sort.
|
| 153 |
+
|
| 154 |
+
Args:
|
| 155 |
+
owner: GitHub username or organization name
|
| 156 |
+
owner_type: Whether the owner is a "user" or "org" (default: "org")
|
| 157 |
+
sort: Sort field - "stars", "forks", "updated", or "created" (default: "stars")
|
| 158 |
+
order: Sort order - "asc" or "desc" (default: "desc")
|
| 159 |
+
limit: Maximum number of repositories to return (default: no limit)
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
List of Repository objects
|
| 163 |
+
"""
|
| 164 |
+
token = _get_github_token()
|
| 165 |
+
|
| 166 |
+
if owner_type == "org":
|
| 167 |
+
query = f"org:{owner}"
|
| 168 |
+
else:
|
| 169 |
+
query = f"user:{owner}"
|
| 170 |
+
|
| 171 |
+
repos = _fetch_repositories(
|
| 172 |
+
query=query, sort=sort, order=order, limit=limit, token=token
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
return repos
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
async def _async_call(func, *args, **kwargs):
|
| 179 |
+
"""Wrap synchronous calls for async context."""
|
| 180 |
+
return await asyncio.to_thread(func, *args, **kwargs)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _format_repos_table(repos: List[Repository]) -> str:
|
| 184 |
+
"""Format repositories as a markdown table."""
|
| 185 |
+
if not repos:
|
| 186 |
+
return "No repositories found."
|
| 187 |
+
|
| 188 |
+
lines = [
|
| 189 |
+
"| Repo | Stars | Forks | Language | Description |",
|
| 190 |
+
"|------|-------|-------|----------|-------------|",
|
| 191 |
+
]
|
| 192 |
+
|
| 193 |
+
for repo in repos:
|
| 194 |
+
desc = repo.description or "N/A"
|
| 195 |
+
if len(desc) > 50:
|
| 196 |
+
desc = desc[:47] + "..."
|
| 197 |
+
lang = repo.language or "N/A"
|
| 198 |
+
lines.append(
|
| 199 |
+
f"| {repo.full_name} | {repo.stars:,} | {repo.forks:,} | {lang} | {desc} |"
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
return "\n".join(lines)
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
class ListReposTool:
|
| 206 |
+
"""Tool for listing GitHub repositories."""
|
| 207 |
+
|
| 208 |
+
async def execute(self, params: Dict[str, Any]) -> ToolResult:
|
| 209 |
+
"""Execute list_repos operation."""
|
| 210 |
+
owner = params.get("owner")
|
| 211 |
+
if not owner:
|
| 212 |
+
return {
|
| 213 |
+
"formatted": "Error: 'owner' parameter is required",
|
| 214 |
+
"totalResults": 0,
|
| 215 |
+
"resultsShared": 0,
|
| 216 |
+
"isError": True,
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
owner_type = params.get("owner_type", "org")
|
| 220 |
+
sort = params.get("sort", "stars")
|
| 221 |
+
order = params.get("order", "desc")
|
| 222 |
+
limit = params.get("limit")
|
| 223 |
+
|
| 224 |
+
try:
|
| 225 |
+
repos = await _async_call(
|
| 226 |
+
list_repos,
|
| 227 |
+
owner=owner,
|
| 228 |
+
owner_type=owner_type,
|
| 229 |
+
sort=sort,
|
| 230 |
+
order=order,
|
| 231 |
+
limit=limit,
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
if not repos:
|
| 235 |
+
return {
|
| 236 |
+
"formatted": f"No repositories found for {owner}",
|
| 237 |
+
"totalResults": 0,
|
| 238 |
+
"resultsShared": 0,
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
table = _format_repos_table(repos)
|
| 242 |
+
response = f"**Found {len(repos)} repositories for {owner} (sorted by {sort}, {order}):**\n\n{table}"
|
| 243 |
+
|
| 244 |
+
# Add links to top repos
|
| 245 |
+
response += "\n\n**Top repositories:**\n"
|
| 246 |
+
for i, repo in enumerate(repos[:5], 1):
|
| 247 |
+
response += (
|
| 248 |
+
f"{i}. [{repo.full_name}]({repo.html_url}) - ⭐ {repo.stars:,}\n"
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
return {
|
| 252 |
+
"formatted": response,
|
| 253 |
+
"totalResults": len(repos),
|
| 254 |
+
"resultsShared": len(repos),
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
except GitHubAPIError as e:
|
| 258 |
+
return {
|
| 259 |
+
"formatted": f"GitHub API Error: {str(e)}",
|
| 260 |
+
"totalResults": 0,
|
| 261 |
+
"resultsShared": 0,
|
| 262 |
+
"isError": True,
|
| 263 |
+
}
|
| 264 |
+
except Exception as e:
|
| 265 |
+
return {
|
| 266 |
+
"formatted": f"Error: {str(e)}",
|
| 267 |
+
"totalResults": 0,
|
| 268 |
+
"resultsShared": 0,
|
| 269 |
+
"isError": True,
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# Tool specification
|
| 274 |
+
LIST_REPOS_TOOL_SPEC = {
|
| 275 |
+
"name": "list_repos",
|
| 276 |
+
"description": (
|
| 277 |
+
"List repositories for a user or organization with sorting options.\n\n"
|
| 278 |
+
"Backed by GitHub Search API: https://api.github.com/search/repositories?q=org:huggingface&sort=stars&order=desc\n\n"
|
| 279 |
+
"Examples:\n"
|
| 280 |
+
"- Top 10 starred repos: {'owner': 'huggingface', 'sort': 'stars', 'limit': 10}\n"
|
| 281 |
+
"- Recently updated: {'owner': 'microsoft', 'sort': 'updated', 'order': 'desc', 'limit': 5}\n"
|
| 282 |
+
"- User repos: {'owner': 'torvalds', 'owner_type': 'user', 'sort': 'stars'}\n"
|
| 283 |
+
"- All repos: {'owner': 'pytorch', 'sort': 'forks'}\n\n"
|
| 284 |
+
),
|
| 285 |
+
"parameters": {
|
| 286 |
+
"type": "object",
|
| 287 |
+
"properties": {
|
| 288 |
+
"owner": {
|
| 289 |
+
"type": "string",
|
| 290 |
+
"description": "GitHub username or organization name (e.g., 'huggingface', 'torvalds')",
|
| 291 |
+
},
|
| 292 |
+
"owner_type": {
|
| 293 |
+
"type": "string",
|
| 294 |
+
"enum": ["user", "org"],
|
| 295 |
+
"description": "Whether the owner is a 'user' or 'org' (default: 'org')",
|
| 296 |
+
},
|
| 297 |
+
"sort": {
|
| 298 |
+
"type": "string",
|
| 299 |
+
"enum": ["stars", "forks", "updated", "created"],
|
| 300 |
+
"description": "Sort field: 'stars', 'forks', 'updated', or 'created' (default: 'stars')",
|
| 301 |
+
},
|
| 302 |
+
"order": {
|
| 303 |
+
"type": "string",
|
| 304 |
+
"enum": ["asc", "desc"],
|
| 305 |
+
"description": "Sort order: 'asc' or 'desc' (default: 'desc')",
|
| 306 |
+
},
|
| 307 |
+
"limit": {
|
| 308 |
+
"type": "integer",
|
| 309 |
+
"description": "Maximum number of repositories to return (default: no limit, returns all)",
|
| 310 |
+
},
|
| 311 |
+
},
|
| 312 |
+
"required": ["owner"],
|
| 313 |
+
},
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
async def list_repos_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
|
| 318 |
+
"""Handler for agent tool router."""
|
| 319 |
+
try:
|
| 320 |
+
tool = ListReposTool()
|
| 321 |
+
result = await tool.execute(arguments)
|
| 322 |
+
return result["formatted"], not result.get("isError", False)
|
| 323 |
+
except Exception as e:
|
| 324 |
+
return f"Error executing list_repos: {str(e)}", False
|
agent/tools/github_read_file.py
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Read File Tool
|
| 3 |
+
|
| 4 |
+
Reads file contents from a GitHub repository with line range support.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import base64
|
| 9 |
+
import os
|
| 10 |
+
from dataclasses import asdict, dataclass
|
| 11 |
+
from typing import Any, Dict, Optional, Tuple
|
| 12 |
+
|
| 13 |
+
try:
|
| 14 |
+
import requests
|
| 15 |
+
except ImportError:
|
| 16 |
+
raise ImportError(
|
| 17 |
+
"requests library is required. Install with: pip install requests"
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
from agent.tools.types import ToolResult
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class FileContents:
|
| 25 |
+
"""File contents with metadata."""
|
| 26 |
+
|
| 27 |
+
content: str
|
| 28 |
+
sha: str
|
| 29 |
+
path: str
|
| 30 |
+
size: int
|
| 31 |
+
last_modified: Optional[str]
|
| 32 |
+
last_commit_sha: Optional[str]
|
| 33 |
+
line_start: int
|
| 34 |
+
line_end: int
|
| 35 |
+
total_lines: int
|
| 36 |
+
truncated: bool
|
| 37 |
+
message: Optional[str] = None
|
| 38 |
+
|
| 39 |
+
def to_dict(self):
|
| 40 |
+
return asdict(self)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class GitHubAPIError(Exception):
|
| 44 |
+
"""Raised when GitHub API returns an error."""
|
| 45 |
+
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _get_github_token() -> str:
|
| 50 |
+
"""Get GitHub token from environment."""
|
| 51 |
+
token = os.environ.get("GITHUB_TOKEN")
|
| 52 |
+
if not token:
|
| 53 |
+
raise GitHubAPIError(
|
| 54 |
+
"GITHUB_TOKEN environment variable is required. "
|
| 55 |
+
"Set it with: export GITHUB_TOKEN=your_token_here"
|
| 56 |
+
)
|
| 57 |
+
return token
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _fetch_raw_content(owner: str, repo: str, path: str, ref: str, token: str) -> str:
|
| 61 |
+
"""Fetch raw file content for large files."""
|
| 62 |
+
headers = {
|
| 63 |
+
"Accept": "application/vnd.github.raw",
|
| 64 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 65 |
+
"Authorization": f"Bearer {token}",
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
|
| 69 |
+
params = {"ref": ref}
|
| 70 |
+
|
| 71 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 72 |
+
|
| 73 |
+
if response.status_code != 200:
|
| 74 |
+
raise GitHubAPIError(
|
| 75 |
+
f"Failed to fetch raw content: HTTP {response.status_code}"
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
return response.text
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _get_last_commit_info(
|
| 82 |
+
owner: str, repo: str, path: str, ref: Optional[str], token: str
|
| 83 |
+
) -> Tuple[Optional[str], Optional[str]]:
|
| 84 |
+
"""Get last commit information for a specific file."""
|
| 85 |
+
headers = {
|
| 86 |
+
"Accept": "application/vnd.github+json",
|
| 87 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 88 |
+
"Authorization": f"Bearer {token}",
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/commits"
|
| 92 |
+
params = {"path": path, "per_page": 1}
|
| 93 |
+
|
| 94 |
+
if ref and ref != "HEAD":
|
| 95 |
+
params["sha"] = ref
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 99 |
+
|
| 100 |
+
if response.status_code == 200:
|
| 101 |
+
commits = response.json()
|
| 102 |
+
if commits:
|
| 103 |
+
commit = commits[0]
|
| 104 |
+
commit_sha = commit.get("sha")
|
| 105 |
+
commit_date = commit.get("commit", {}).get("committer", {}).get("date")
|
| 106 |
+
return commit_date, commit_sha
|
| 107 |
+
|
| 108 |
+
except:
|
| 109 |
+
pass
|
| 110 |
+
|
| 111 |
+
return None, None
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _fetch_file_contents(
|
| 115 |
+
owner: str,
|
| 116 |
+
repo: str,
|
| 117 |
+
path: str,
|
| 118 |
+
ref: str,
|
| 119 |
+
line_start: Optional[int],
|
| 120 |
+
line_end: Optional[int],
|
| 121 |
+
token: str,
|
| 122 |
+
) -> FileContents:
|
| 123 |
+
"""Fetch file contents from GitHub API."""
|
| 124 |
+
headers = {
|
| 125 |
+
"Accept": "application/vnd.github+json",
|
| 126 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 127 |
+
"Authorization": f"Bearer {token}",
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}"
|
| 131 |
+
params = {}
|
| 132 |
+
|
| 133 |
+
if ref and ref != "HEAD":
|
| 134 |
+
params["ref"] = ref
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 138 |
+
|
| 139 |
+
if response.status_code == 404:
|
| 140 |
+
raise GitHubAPIError(
|
| 141 |
+
f"File not found: {path} in {owner}/{repo} (ref: {ref})"
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
if response.status_code != 200:
|
| 145 |
+
error_msg = f"GitHub API error (status {response.status_code})"
|
| 146 |
+
try:
|
| 147 |
+
error_data = response.json()
|
| 148 |
+
if "message" in error_data:
|
| 149 |
+
error_msg += f": {error_data['message']}"
|
| 150 |
+
except:
|
| 151 |
+
pass
|
| 152 |
+
raise GitHubAPIError(error_msg)
|
| 153 |
+
|
| 154 |
+
data = response.json()
|
| 155 |
+
|
| 156 |
+
if data.get("type") != "file":
|
| 157 |
+
raise GitHubAPIError(
|
| 158 |
+
f"Path {path} is not a file (type: {data.get('type')})"
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
file_sha = data.get("sha")
|
| 162 |
+
file_size = data.get("size", 0)
|
| 163 |
+
|
| 164 |
+
# Decode content
|
| 165 |
+
content_b64 = data.get("content", "")
|
| 166 |
+
if content_b64:
|
| 167 |
+
content_b64 = content_b64.replace("\n", "").replace(" ", "")
|
| 168 |
+
content = base64.b64decode(content_b64).decode("utf-8", errors="replace")
|
| 169 |
+
else:
|
| 170 |
+
content = _fetch_raw_content(owner, repo, path, ref or "HEAD", token)
|
| 171 |
+
|
| 172 |
+
except requests.exceptions.RequestException as e:
|
| 173 |
+
raise GitHubAPIError(f"Failed to connect to GitHub API: {e}")
|
| 174 |
+
|
| 175 |
+
# Get last commit info
|
| 176 |
+
last_modified, last_commit_sha = _get_last_commit_info(
|
| 177 |
+
owner, repo, path, ref, token
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# Process line ranges
|
| 181 |
+
lines = content.split("\n")
|
| 182 |
+
total_lines = len(lines)
|
| 183 |
+
|
| 184 |
+
truncated = False
|
| 185 |
+
message = None
|
| 186 |
+
|
| 187 |
+
if line_start is None and line_end is None:
|
| 188 |
+
if total_lines > 300:
|
| 189 |
+
line_start = 1
|
| 190 |
+
line_end = 300
|
| 191 |
+
truncated = True
|
| 192 |
+
message = (
|
| 193 |
+
f"File has {total_lines} lines. Returned only the first 300 lines. "
|
| 194 |
+
f"To view more, use the line_start and line_end parameters."
|
| 195 |
+
)
|
| 196 |
+
else:
|
| 197 |
+
line_start = 1
|
| 198 |
+
line_end = total_lines
|
| 199 |
+
else:
|
| 200 |
+
if line_start is None:
|
| 201 |
+
line_start = 1
|
| 202 |
+
if line_end is None:
|
| 203 |
+
line_end = total_lines
|
| 204 |
+
|
| 205 |
+
if line_start < 1:
|
| 206 |
+
line_start = 1
|
| 207 |
+
if line_end > total_lines:
|
| 208 |
+
line_end = total_lines
|
| 209 |
+
if line_start > line_end:
|
| 210 |
+
raise ValueError(
|
| 211 |
+
f"line_start ({line_start}) cannot be greater than line_end ({line_end})"
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
selected_lines = lines[line_start - 1 : line_end]
|
| 215 |
+
selected_content = "\n".join(selected_lines)
|
| 216 |
+
|
| 217 |
+
return FileContents(
|
| 218 |
+
content=selected_content,
|
| 219 |
+
sha=file_sha,
|
| 220 |
+
path=path,
|
| 221 |
+
size=file_size,
|
| 222 |
+
last_modified=last_modified,
|
| 223 |
+
last_commit_sha=last_commit_sha,
|
| 224 |
+
line_start=line_start,
|
| 225 |
+
line_end=line_end,
|
| 226 |
+
total_lines=total_lines,
|
| 227 |
+
truncated=truncated,
|
| 228 |
+
message=message,
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def read_file(
|
| 233 |
+
repo: str,
|
| 234 |
+
path: str,
|
| 235 |
+
ref: str = "HEAD",
|
| 236 |
+
line_start: Optional[int] = None,
|
| 237 |
+
line_end: Optional[int] = None,
|
| 238 |
+
) -> FileContents:
|
| 239 |
+
"""
|
| 240 |
+
Read file contents from a GitHub repository.
|
| 241 |
+
|
| 242 |
+
Returns raw file text plus metadata (commit SHA, last modified).
|
| 243 |
+
If file is more than 300 lines and no line range is specified,
|
| 244 |
+
returns only the first 300 lines with a message.
|
| 245 |
+
|
| 246 |
+
Args:
|
| 247 |
+
repo: Repository in format "owner/repo" (e.g., "huggingface/transformers")
|
| 248 |
+
path: Path to file in repository (e.g., "README.md")
|
| 249 |
+
ref: Git reference - branch name, tag, or commit SHA (default: "HEAD")
|
| 250 |
+
line_start: Starting line number (1-indexed, inclusive)
|
| 251 |
+
line_end: Ending line number (1-indexed, inclusive)
|
| 252 |
+
|
| 253 |
+
Returns:
|
| 254 |
+
FileContents object with content and metadata
|
| 255 |
+
"""
|
| 256 |
+
if "/" not in repo:
|
| 257 |
+
raise ValueError("repo must be in format 'owner/repo'")
|
| 258 |
+
|
| 259 |
+
owner, repo_name = repo.split("/", 1)
|
| 260 |
+
token = _get_github_token()
|
| 261 |
+
|
| 262 |
+
return _fetch_file_contents(
|
| 263 |
+
owner=owner,
|
| 264 |
+
repo=repo_name,
|
| 265 |
+
path=path,
|
| 266 |
+
ref=ref,
|
| 267 |
+
line_start=line_start,
|
| 268 |
+
line_end=line_end,
|
| 269 |
+
token=token,
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
async def _async_call(func, *args, **kwargs):
|
| 274 |
+
"""Wrap synchronous calls for async context."""
|
| 275 |
+
return await asyncio.to_thread(func, *args, **kwargs)
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
class ReadFileTool:
|
| 279 |
+
"""Tool for reading files from GitHub repositories."""
|
| 280 |
+
|
| 281 |
+
async def execute(self, params: Dict[str, Any]) -> ToolResult:
|
| 282 |
+
"""Execute read_file operation."""
|
| 283 |
+
repo = params.get("repo")
|
| 284 |
+
path = params.get("path")
|
| 285 |
+
|
| 286 |
+
if not repo or not path:
|
| 287 |
+
return {
|
| 288 |
+
"formatted": "Error: 'repo' and 'path' parameters are required",
|
| 289 |
+
"totalResults": 0,
|
| 290 |
+
"resultsShared": 0,
|
| 291 |
+
"isError": True,
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
ref = params.get("ref", "HEAD")
|
| 295 |
+
line_start = params.get("line_start")
|
| 296 |
+
line_end = params.get("line_end")
|
| 297 |
+
|
| 298 |
+
try:
|
| 299 |
+
file_contents = await _async_call(
|
| 300 |
+
read_file,
|
| 301 |
+
repo=repo,
|
| 302 |
+
path=path,
|
| 303 |
+
ref=ref,
|
| 304 |
+
line_start=line_start,
|
| 305 |
+
line_end=line_end,
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
response = f"**File: {file_contents.path}**\n"
|
| 309 |
+
response += f"**Repo: {repo}**\n"
|
| 310 |
+
response += f"**Lines:** {file_contents.line_start}-{file_contents.line_end} of {file_contents.total_lines}\n"
|
| 311 |
+
response += f"**SHA:** {file_contents.sha}\n"
|
| 312 |
+
|
| 313 |
+
if file_contents.last_modified:
|
| 314 |
+
response += f"**Last modified:** {file_contents.last_modified}\n"
|
| 315 |
+
|
| 316 |
+
if file_contents.message:
|
| 317 |
+
response += f"\n⚠️ {file_contents.message}\n"
|
| 318 |
+
|
| 319 |
+
response += f"\n```\n{file_contents.content}\n```"
|
| 320 |
+
|
| 321 |
+
return {
|
| 322 |
+
"formatted": response,
|
| 323 |
+
"totalResults": 1,
|
| 324 |
+
"resultsShared": 1,
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
except GitHubAPIError as e:
|
| 328 |
+
return {
|
| 329 |
+
"formatted": f"GitHub API Error: {str(e)}",
|
| 330 |
+
"totalResults": 0,
|
| 331 |
+
"resultsShared": 0,
|
| 332 |
+
"isError": True,
|
| 333 |
+
}
|
| 334 |
+
except Exception as e:
|
| 335 |
+
return {
|
| 336 |
+
"formatted": f"Error: {str(e)}",
|
| 337 |
+
"totalResults": 0,
|
| 338 |
+
"resultsShared": 0,
|
| 339 |
+
"isError": True,
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
# Tool specification
|
| 344 |
+
READ_FILE_TOOL_SPEC = {
|
| 345 |
+
"name": "read_file",
|
| 346 |
+
"description": (
|
| 347 |
+
"Read file contents from a GitHub repository.\n\n"
|
| 348 |
+
"Returns raw file text plus metadata (commit SHA, last modified).\n"
|
| 349 |
+
"If file is more than 300 lines, returns only the first 300 lines and includes line_start and line_end indexes.\n"
|
| 350 |
+
"Use line_start and line_end parameters to view specific line ranges.\n\n"
|
| 351 |
+
"Examples:\n"
|
| 352 |
+
"- Read README: {'repo': 'huggingface/transformers', 'path': 'README.md'}\n"
|
| 353 |
+
"- Read specific lines: {'repo': 'huggingface/transformers', 'path': 'src/transformers/__init__.py', 'line_start': 1, 'line_end': 50}\n"
|
| 354 |
+
"- Read from branch: {'repo': 'torvalds/linux', 'path': 'MAINTAINERS', 'ref': 'master', 'line_start': 1, 'line_end': 20}\n\n"
|
| 355 |
+
),
|
| 356 |
+
"parameters": {
|
| 357 |
+
"type": "object",
|
| 358 |
+
"properties": {
|
| 359 |
+
"repo": {
|
| 360 |
+
"type": "string",
|
| 361 |
+
"description": "Repository in format 'owner/repo' (e.g., 'huggingface/transformers')",
|
| 362 |
+
},
|
| 363 |
+
"path": {
|
| 364 |
+
"type": "string",
|
| 365 |
+
"description": "Path to file in repository (e.g., 'README.md', 'src/main.py')",
|
| 366 |
+
},
|
| 367 |
+
"ref": {
|
| 368 |
+
"type": "string",
|
| 369 |
+
"description": "Git reference: branch name, tag, or commit SHA (default: 'HEAD')",
|
| 370 |
+
},
|
| 371 |
+
"line_start": {
|
| 372 |
+
"type": "integer",
|
| 373 |
+
"description": "Starting line number (1-indexed, inclusive). Use to read specific range.",
|
| 374 |
+
},
|
| 375 |
+
"line_end": {
|
| 376 |
+
"type": "integer",
|
| 377 |
+
"description": "Ending line number (1-indexed, inclusive). Use to read specific range.",
|
| 378 |
+
},
|
| 379 |
+
},
|
| 380 |
+
"required": ["repo", "path"],
|
| 381 |
+
},
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
|
| 385 |
+
async def read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
|
| 386 |
+
"""Handler for agent tool router."""
|
| 387 |
+
try:
|
| 388 |
+
tool = ReadFileTool()
|
| 389 |
+
result = await tool.execute(arguments)
|
| 390 |
+
return result["formatted"], not result.get("isError", False)
|
| 391 |
+
except Exception as e:
|
| 392 |
+
return f"Error executing read_file: {str(e)}", False
|
agent/tools/github_search_code.py
ADDED
|
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Search Code Tool
|
| 3 |
+
|
| 4 |
+
Searches code across GitHub with glob filtering and line-level results.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import asyncio
|
| 8 |
+
import fnmatch
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
+
from dataclasses import asdict, dataclass
|
| 12 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import requests
|
| 16 |
+
except ImportError:
|
| 17 |
+
raise ImportError(
|
| 18 |
+
"requests library is required. Install with: pip install requests"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
from agent.tools.types import ToolResult
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@dataclass
|
| 25 |
+
class CodeMatch:
|
| 26 |
+
"""A code match with location information."""
|
| 27 |
+
|
| 28 |
+
repo: str
|
| 29 |
+
path: str
|
| 30 |
+
ref: str
|
| 31 |
+
line_start: int
|
| 32 |
+
line_end: int
|
| 33 |
+
snippet: str
|
| 34 |
+
|
| 35 |
+
def to_dict(self):
|
| 36 |
+
return asdict(self)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class GitHubAPIError(Exception):
|
| 40 |
+
"""Raised when GitHub API returns an error."""
|
| 41 |
+
|
| 42 |
+
pass
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _get_github_token() -> str:
|
| 46 |
+
"""Get GitHub token from environment."""
|
| 47 |
+
token = os.environ.get("GITHUB_TOKEN")
|
| 48 |
+
if not token:
|
| 49 |
+
raise GitHubAPIError(
|
| 50 |
+
"GITHUB_TOKEN environment variable is required. "
|
| 51 |
+
"Set it with: export GITHUB_TOKEN=your_token_here"
|
| 52 |
+
)
|
| 53 |
+
return token
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _build_github_query(
|
| 57 |
+
query: str, repo_glob: Optional[str], path_glob: Optional[str], regex: bool
|
| 58 |
+
) -> str:
|
| 59 |
+
"""Build GitHub search query string from parameters."""
|
| 60 |
+
parts = []
|
| 61 |
+
|
| 62 |
+
if regex:
|
| 63 |
+
parts.append(f"/{query}/")
|
| 64 |
+
else:
|
| 65 |
+
if " " in query:
|
| 66 |
+
parts.append(f'"{query}"')
|
| 67 |
+
else:
|
| 68 |
+
parts.append(query)
|
| 69 |
+
|
| 70 |
+
if repo_glob:
|
| 71 |
+
if "/" in repo_glob:
|
| 72 |
+
parts.append(f"repo:{repo_glob}")
|
| 73 |
+
else:
|
| 74 |
+
parts.append(f"user:{repo_glob}")
|
| 75 |
+
|
| 76 |
+
if path_glob:
|
| 77 |
+
if "*" not in path_glob and "?" not in path_glob:
|
| 78 |
+
parts.append(f"path:{path_glob}")
|
| 79 |
+
elif path_glob.startswith("*."):
|
| 80 |
+
ext = path_glob[2:]
|
| 81 |
+
parts.append(f"extension:{ext}")
|
| 82 |
+
elif "/" not in path_glob and "*" in path_glob:
|
| 83 |
+
parts.append(f"filename:{path_glob}")
|
| 84 |
+
else:
|
| 85 |
+
if "." in path_glob:
|
| 86 |
+
ext_match = re.search(r"\*\.(\w+)", path_glob)
|
| 87 |
+
if ext_match:
|
| 88 |
+
parts.append(f"extension:{ext_match.group(1)}")
|
| 89 |
+
|
| 90 |
+
return " ".join(parts)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _fetch_code_search_results(
|
| 94 |
+
query: str, token: str, max_results: int
|
| 95 |
+
) -> List[Dict[str, Any]]:
|
| 96 |
+
"""Fetch code search results from GitHub API."""
|
| 97 |
+
headers = {
|
| 98 |
+
"Accept": "application/vnd.github.text-match+json",
|
| 99 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 100 |
+
"Authorization": f"Bearer {token}",
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
all_items = []
|
| 104 |
+
page = 1
|
| 105 |
+
per_page = min(100, max_results)
|
| 106 |
+
|
| 107 |
+
while len(all_items) < max_results:
|
| 108 |
+
params = {
|
| 109 |
+
"q": query,
|
| 110 |
+
"page": page,
|
| 111 |
+
"per_page": per_page,
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
url = "https://api.github.com/search/code"
|
| 115 |
+
|
| 116 |
+
try:
|
| 117 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 118 |
+
|
| 119 |
+
if response.status_code != 200:
|
| 120 |
+
break
|
| 121 |
+
|
| 122 |
+
data = response.json()
|
| 123 |
+
items = data.get("items", [])
|
| 124 |
+
|
| 125 |
+
if not items:
|
| 126 |
+
break
|
| 127 |
+
|
| 128 |
+
all_items.extend(items)
|
| 129 |
+
|
| 130 |
+
if len(all_items) >= data.get("total_count", 0):
|
| 131 |
+
break
|
| 132 |
+
|
| 133 |
+
page += 1
|
| 134 |
+
|
| 135 |
+
except requests.exceptions.RequestException:
|
| 136 |
+
break
|
| 137 |
+
|
| 138 |
+
return all_items[:max_results]
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def _glob_match(text: str, pattern: str) -> bool:
|
| 142 |
+
"""Check if text matches glob pattern, supporting ** for multi-level paths."""
|
| 143 |
+
if "**" in pattern:
|
| 144 |
+
regex_pattern = pattern.replace("**", "<<<DOUBLESTAR>>>")
|
| 145 |
+
regex_pattern = fnmatch.translate(regex_pattern)
|
| 146 |
+
regex_pattern = regex_pattern.replace("<<<DOUBLESTAR>>>", ".*")
|
| 147 |
+
return re.match(regex_pattern, text) is not None
|
| 148 |
+
else:
|
| 149 |
+
return fnmatch.fnmatch(text, pattern)
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def _estimate_line_numbers(fragment: str) -> Tuple[int, int]:
|
| 153 |
+
"""Estimate line numbers from a code fragment."""
|
| 154 |
+
lines = fragment.split("\n")
|
| 155 |
+
line_count = len([line for line in lines if line.strip()])
|
| 156 |
+
return 1, line_count
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _parse_results_to_matches(
|
| 160 |
+
raw_results: List[Dict[str, Any]],
|
| 161 |
+
repo_glob: Optional[str],
|
| 162 |
+
path_glob: Optional[str],
|
| 163 |
+
) -> List[CodeMatch]:
|
| 164 |
+
"""Parse raw GitHub API results into CodeMatch objects."""
|
| 165 |
+
matches = []
|
| 166 |
+
|
| 167 |
+
for item in raw_results:
|
| 168 |
+
repo_name = item.get("repository", {}).get("full_name", "unknown/unknown")
|
| 169 |
+
file_path = item.get("path", "")
|
| 170 |
+
sha = item.get("sha", "unknown")
|
| 171 |
+
|
| 172 |
+
if repo_glob and not _glob_match(repo_name, repo_glob):
|
| 173 |
+
continue
|
| 174 |
+
|
| 175 |
+
if path_glob and not _glob_match(file_path, path_glob):
|
| 176 |
+
continue
|
| 177 |
+
|
| 178 |
+
text_matches = item.get("text_matches", [])
|
| 179 |
+
|
| 180 |
+
if text_matches:
|
| 181 |
+
for text_match in text_matches:
|
| 182 |
+
fragment = text_match.get("fragment", "")
|
| 183 |
+
line_start, line_end = _estimate_line_numbers(fragment)
|
| 184 |
+
|
| 185 |
+
match = CodeMatch(
|
| 186 |
+
repo=repo_name,
|
| 187 |
+
path=file_path,
|
| 188 |
+
ref=sha,
|
| 189 |
+
line_start=line_start,
|
| 190 |
+
line_end=line_end,
|
| 191 |
+
snippet=fragment.strip(),
|
| 192 |
+
)
|
| 193 |
+
matches.append(match)
|
| 194 |
+
else:
|
| 195 |
+
match = CodeMatch(
|
| 196 |
+
repo=repo_name,
|
| 197 |
+
path=file_path,
|
| 198 |
+
ref=sha,
|
| 199 |
+
line_start=1,
|
| 200 |
+
line_end=1,
|
| 201 |
+
snippet="<match found, but snippet not available>",
|
| 202 |
+
)
|
| 203 |
+
matches.append(match)
|
| 204 |
+
|
| 205 |
+
return matches
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def search_code(
|
| 209 |
+
query: str,
|
| 210 |
+
repo_glob: Optional[str] = None,
|
| 211 |
+
path_glob: Optional[str] = None,
|
| 212 |
+
regex: bool = False,
|
| 213 |
+
max_results: int = 100,
|
| 214 |
+
) -> List[CodeMatch]:
|
| 215 |
+
"""
|
| 216 |
+
Search for code across GitHub with glob filtering and line-level results.
|
| 217 |
+
|
| 218 |
+
Returns: repo, path, ref, line_start, line_end, snippet
|
| 219 |
+
|
| 220 |
+
Args:
|
| 221 |
+
query: Search term or pattern to find in code
|
| 222 |
+
repo_glob: Glob pattern to filter repositories (e.g., "github/*", "facebook/react")
|
| 223 |
+
path_glob: Glob pattern to filter file paths (e.g., "*.py", "src/**/*.js")
|
| 224 |
+
regex: If True, treat query as a regular expression
|
| 225 |
+
max_results: Maximum number of results to return (default: 100)
|
| 226 |
+
|
| 227 |
+
Returns:
|
| 228 |
+
List of CodeMatch objects with repo, path, ref, line numbers, and snippet
|
| 229 |
+
"""
|
| 230 |
+
github_query = _build_github_query(query, repo_glob, path_glob, regex)
|
| 231 |
+
token = _get_github_token()
|
| 232 |
+
|
| 233 |
+
raw_results = _fetch_code_search_results(github_query, token, max_results)
|
| 234 |
+
matches = _parse_results_to_matches(raw_results, repo_glob, path_glob)
|
| 235 |
+
|
| 236 |
+
return matches
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
async def _async_call(func, *args, **kwargs):
|
| 240 |
+
"""Wrap synchronous calls for async context."""
|
| 241 |
+
return await asyncio.to_thread(func, *args, **kwargs)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
def _format_code_matches(matches: List[CodeMatch]) -> str:
|
| 245 |
+
"""Format code matches."""
|
| 246 |
+
if not matches:
|
| 247 |
+
return "No matches found."
|
| 248 |
+
|
| 249 |
+
lines = []
|
| 250 |
+
for i, match in enumerate(matches, 1):
|
| 251 |
+
lines.append(f"**{i}. {match.repo}/{match.path}:{match.line_start}**")
|
| 252 |
+
lines.append("```")
|
| 253 |
+
# Show first 5 lines of snippet
|
| 254 |
+
snippet_lines = match.snippet.split("\n")[:5]
|
| 255 |
+
lines.extend(snippet_lines)
|
| 256 |
+
if len(match.snippet.split("\n")) > 5:
|
| 257 |
+
lines.append("...")
|
| 258 |
+
lines.append("```")
|
| 259 |
+
lines.append("")
|
| 260 |
+
|
| 261 |
+
return "\n".join(lines)
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
class SearchCodeTool:
|
| 265 |
+
"""Tool for searching code across GitHub."""
|
| 266 |
+
|
| 267 |
+
async def execute(self, params: Dict[str, Any]) -> ToolResult:
|
| 268 |
+
"""Execute search_code operation."""
|
| 269 |
+
query = params.get("query")
|
| 270 |
+
if not query:
|
| 271 |
+
return {
|
| 272 |
+
"formatted": "Error: 'query' parameter is required",
|
| 273 |
+
"totalResults": 0,
|
| 274 |
+
"resultsShared": 0,
|
| 275 |
+
"isError": True,
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
repo_glob = params.get("repo_glob")
|
| 279 |
+
path_glob = params.get("path_glob")
|
| 280 |
+
regex = params.get("regex", False)
|
| 281 |
+
max_results = params.get("max_results", 100)
|
| 282 |
+
|
| 283 |
+
try:
|
| 284 |
+
matches = await _async_call(
|
| 285 |
+
search_code,
|
| 286 |
+
query=query,
|
| 287 |
+
repo_glob=repo_glob,
|
| 288 |
+
path_glob=path_glob,
|
| 289 |
+
regex=regex,
|
| 290 |
+
max_results=max_results,
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
if not matches:
|
| 294 |
+
return {
|
| 295 |
+
"formatted": "No matches found",
|
| 296 |
+
"totalResults": 0,
|
| 297 |
+
"resultsShared": 0,
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
formatted = _format_code_matches(matches)
|
| 301 |
+
response = f"**Found {len(matches)} code matches:**\n\n{formatted}"
|
| 302 |
+
|
| 303 |
+
# Add note about viewing full files
|
| 304 |
+
if matches:
|
| 305 |
+
response += "\n**To view full file, use:**\n"
|
| 306 |
+
top_match = matches[0]
|
| 307 |
+
response += (
|
| 308 |
+
f"read_file(repo='{top_match.repo}', path='{top_match.path}')"
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
return {
|
| 312 |
+
"formatted": response,
|
| 313 |
+
"totalResults": len(matches),
|
| 314 |
+
"resultsShared": min(len(matches), 10),
|
| 315 |
+
}
|
| 316 |
+
|
| 317 |
+
except GitHubAPIError as e:
|
| 318 |
+
return {
|
| 319 |
+
"formatted": f"GitHub API Error: {str(e)}",
|
| 320 |
+
"totalResults": 0,
|
| 321 |
+
"resultsShared": 0,
|
| 322 |
+
"isError": True,
|
| 323 |
+
}
|
| 324 |
+
except Exception as e:
|
| 325 |
+
return {
|
| 326 |
+
"formatted": f"Error: {str(e)}",
|
| 327 |
+
"totalResults": 0,
|
| 328 |
+
"resultsShared": 0,
|
| 329 |
+
"isError": True,
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
# Tool specification
|
| 334 |
+
SEARCH_CODE_TOOL_SPEC = {
|
| 335 |
+
"name": "search_code",
|
| 336 |
+
"description": (
|
| 337 |
+
"Search code across GitHub with glob filtering and line-level results.\n\n"
|
| 338 |
+
"Returns: repo, path, ref, line_start, line_end, snippet\n\n"
|
| 339 |
+
"Examples:\n"
|
| 340 |
+
"- Search Python functions: {'query': 'def train', 'path_glob': '*.py', 'repo_glob': 'huggingface/*'}\n"
|
| 341 |
+
"- Search TODO comments: {'query': 'TODO', 'repo_glob': 'github/*', 'max_results': 10}\n"
|
| 342 |
+
"- Regex search: {'query': r'func Test\\w+', 'path_glob': '*.go', 'regex': True}\n"
|
| 343 |
+
"- Search in specific repo: {'query': 'HfApi', 'repo_glob': 'huggingface/huggingface_hub', 'path_glob': '*.py'}\n\n"
|
| 344 |
+
),
|
| 345 |
+
"parameters": {
|
| 346 |
+
"type": "object",
|
| 347 |
+
"properties": {
|
| 348 |
+
"query": {
|
| 349 |
+
"type": "string",
|
| 350 |
+
"description": "Search term or pattern to find in code",
|
| 351 |
+
},
|
| 352 |
+
"repo_glob": {
|
| 353 |
+
"type": "string",
|
| 354 |
+
"description": "Glob pattern to filter repositories (e.g., 'github/*', 'facebook/react')",
|
| 355 |
+
},
|
| 356 |
+
"path_glob": {
|
| 357 |
+
"type": "string",
|
| 358 |
+
"description": "Glob pattern to filter file paths (e.g., '*.py', 'src/**/*.js', 'test_*.py')",
|
| 359 |
+
},
|
| 360 |
+
"regex": {
|
| 361 |
+
"type": "boolean",
|
| 362 |
+
"description": "Treat query as regular expression (default: false)",
|
| 363 |
+
},
|
| 364 |
+
"max_results": {
|
| 365 |
+
"type": "integer",
|
| 366 |
+
"description": "Maximum number of results to return (default: 100)",
|
| 367 |
+
},
|
| 368 |
+
},
|
| 369 |
+
"required": ["query"],
|
| 370 |
+
},
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
async def search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
|
| 375 |
+
"""Handler for agent tool router."""
|
| 376 |
+
try:
|
| 377 |
+
tool = SearchCodeTool()
|
| 378 |
+
result = await tool.execute(arguments)
|
| 379 |
+
return result["formatted"], not result.get("isError", False)
|
| 380 |
+
except Exception as e:
|
| 381 |
+
return f"Error executing search_code: {str(e)}", False
|
agent/tools/jobs_tool.py
CHANGED
|
@@ -40,6 +40,20 @@ GPU_FLAVORS = [
|
|
| 40 |
"h100",
|
| 41 |
"h100x8",
|
| 42 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
SPECIALIZED_FLAVORS = ["inf2x6"]
|
| 44 |
ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
|
| 45 |
|
|
@@ -741,12 +755,12 @@ HF_JOBS_TOOL_SPEC = {
|
|
| 741 |
"1. **Python mode:** Provide 'script' + 'dependencies' → auto-handles pip install\n"
|
| 742 |
"2. **Docker mode:** Provide 'image' + 'command' → full control\n"
|
| 743 |
"(script and command are mutually exclusive)\n\n"
|
| 744 |
-
"## Hardware:\n"
|
| 745 |
-
"CPU:
|
| 746 |
-
"GPU:
|
| 747 |
"## Examples:\n\n"
|
| 748 |
"**Fine-tune LLM and push to Hub:**\n"
|
| 749 |
-
"{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"
|
| 750 |
"**Generate dataset daily and upload:**\n"
|
| 751 |
"{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily'}\n\n"
|
| 752 |
"**Run custom training with Docker:**\n"
|
|
@@ -807,7 +821,7 @@ HF_JOBS_TOOL_SPEC = {
|
|
| 807 |
# Hardware and environment
|
| 808 |
"hardware_flavor": {
|
| 809 |
"type": "string",
|
| 810 |
-
"description": "Hardware type. CPU:
|
| 811 |
},
|
| 812 |
"timeout": {
|
| 813 |
"type": "string",
|
|
|
|
| 40 |
"h100",
|
| 41 |
"h100x8",
|
| 42 |
]
|
| 43 |
+
|
| 44 |
+
# Detailed specs for display (vCPU/RAM/GPU VRAM)
|
| 45 |
+
CPU_FLAVORS_DESC = (
|
| 46 |
+
"cpu-basic(2vCPU/16GB), cpu-upgrade(8vCPU/32GB), cpu-performance, cpu-xl"
|
| 47 |
+
)
|
| 48 |
+
GPU_FLAVORS_DESC = (
|
| 49 |
+
"t4-small(4vCPU/15GB/GPU 16GB), t4-medium(8vCPU/30GB/GPU 16GB), "
|
| 50 |
+
"l4x1(8vCPU/30GB/GPU 24GB), l4x4(48vCPU/186GB/GPU 96GB), "
|
| 51 |
+
"l40sx1(8vCPU/62GB/GPU 48GB), l40sx4(48vCPU/382GB/GPU 192GB), l40sx8(192vCPU/1534GB/GPU 384GB), "
|
| 52 |
+
"a10g-small(4vCPU/14GB/GPU 24GB), a10g-large(12vCPU/46GB/GPU 24GB), "
|
| 53 |
+
"a10g-largex2(24vCPU/92GB/GPU 48GB), a10g-largex4(48vCPU/184GB/GPU 96GB), "
|
| 54 |
+
"a100-large(12vCPU/142GB/GPU 80GB), h100(23vCPU/240GB/GPU 80GB), h100x8(184vCPU/1920GB/GPU 640GB), "
|
| 55 |
+
"zero-a10g(dynamic alloc)"
|
| 56 |
+
)
|
| 57 |
SPECIALIZED_FLAVORS = ["inf2x6"]
|
| 58 |
ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
|
| 59 |
|
|
|
|
| 755 |
"1. **Python mode:** Provide 'script' + 'dependencies' → auto-handles pip install\n"
|
| 756 |
"2. **Docker mode:** Provide 'image' + 'command' → full control\n"
|
| 757 |
"(script and command are mutually exclusive)\n\n"
|
| 758 |
+
"## Available Hardware (vCPU/RAM/GPU):\n"
|
| 759 |
+
f"CPU: {CPU_FLAVORS_DESC}\n"
|
| 760 |
+
f"GPU: {GPU_FLAVORS_DESC}\n"
|
| 761 |
"## Examples:\n\n"
|
| 762 |
"**Fine-tune LLM and push to Hub:**\n"
|
| 763 |
+
"{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen3-4B-Thinking-2507\")\\n# ... training code ...\\nmodel.push_to_hub(\"user-name/my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'env': {'CUSTOM_VAR': 'value'}}\n\n"
|
| 764 |
"**Generate dataset daily and upload:**\n"
|
| 765 |
"{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily'}\n\n"
|
| 766 |
"**Run custom training with Docker:**\n"
|
|
|
|
| 821 |
# Hardware and environment
|
| 822 |
"hardware_flavor": {
|
| 823 |
"type": "string",
|
| 824 |
+
"description": f"Hardware type. Available CPU flavors: {CPU_FLAVORS}. Available GPU flavors: {GPU_FLAVORS}. Use with 'run'/'scheduled run'.",
|
| 825 |
},
|
| 826 |
"timeout": {
|
| 827 |
"type": "string",
|