Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
64a9ca9
1
Parent(s): c747f40
loading date in system prompt, search code update (still not there)
Browse files- agent/context_manager/manager.py +12 -0
- agent/core/tools.py +18 -18
- agent/prompts/system_prompt.yaml +3 -10
- agent/tools/github_search_code.py +167 -55
- agent/tools/utilities.py +2 -2
- agent/tools/utils_tools.py +5 -8
agent/context_manager/manager.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
|
|
|
|
|
|
| 5 |
from pathlib import Path
|
| 6 |
from typing import Any
|
| 7 |
|
|
@@ -42,10 +44,20 @@ class ContextManager:
|
|
| 42 |
prompt_data = yaml.safe_load(f)
|
| 43 |
template_str = prompt_data.get("system_prompt", "")
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
template = Template(template_str)
|
| 46 |
return template.render(
|
| 47 |
tools=tool_specs,
|
| 48 |
num_tools=len(tool_specs),
|
|
|
|
|
|
|
|
|
|
| 49 |
)
|
| 50 |
|
| 51 |
def add_message(self, message: Message, token_count: int = None) -> None:
|
|
|
|
| 2 |
Context management for conversation history
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
import zoneinfo
|
| 6 |
+
from datetime import datetime
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Any
|
| 9 |
|
|
|
|
| 44 |
prompt_data = yaml.safe_load(f)
|
| 45 |
template_str = prompt_data.get("system_prompt", "")
|
| 46 |
|
| 47 |
+
# Get current date and time
|
| 48 |
+
tz = zoneinfo.ZoneInfo("Europe/Paris")
|
| 49 |
+
now = datetime.now(tz)
|
| 50 |
+
current_date = now.strftime("%d-%m-%Y")
|
| 51 |
+
current_time = now.strftime("%H:%M:%S.%f")[:-3]
|
| 52 |
+
current_timezone = f"{now.strftime('%Z')} (UTC{now.strftime('%z')[:3]}:{now.strftime('%z')[3:]})"
|
| 53 |
+
|
| 54 |
template = Template(template_str)
|
| 55 |
return template.render(
|
| 56 |
tools=tool_specs,
|
| 57 |
num_tools=len(tool_specs),
|
| 58 |
+
current_date=current_date,
|
| 59 |
+
current_time=current_time,
|
| 60 |
+
current_timezone=current_timezone,
|
| 61 |
)
|
| 62 |
|
| 63 |
def add_message(self, message: Message, token_count: int = None) -> None:
|
agent/core/tools.py
CHANGED
|
@@ -31,17 +31,15 @@ from agent.tools.github_read_file import (
|
|
| 31 |
GITHUB_READ_FILE_TOOL_SPEC,
|
| 32 |
github_read_file_handler,
|
| 33 |
)
|
| 34 |
-
from agent.tools.github_search_code import (
|
| 35 |
-
GITHUB_SEARCH_CODE_TOOL_SPEC,
|
| 36 |
-
github_search_code_handler,
|
| 37 |
-
)
|
| 38 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
|
| 39 |
from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
|
| 40 |
from agent.tools.private_hf_repo_tools import (
|
| 41 |
PRIVATE_HF_REPO_TOOL_SPEC,
|
| 42 |
private_hf_repo_handler,
|
| 43 |
)
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Suppress aiohttp deprecation warning
|
| 47 |
warnings.filterwarnings(
|
|
@@ -240,7 +238,7 @@ class ToolRouter:
|
|
| 240 |
def create_builtin_tools() -> list[ToolSpec]:
|
| 241 |
"""Create built-in tool specifications"""
|
| 242 |
print(
|
| 243 |
-
f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {
|
| 244 |
)
|
| 245 |
# in order of importance
|
| 246 |
return [
|
|
@@ -276,19 +274,21 @@ def create_builtin_tools() -> list[ToolSpec]:
|
|
| 276 |
parameters=PRIVATE_HF_REPO_TOOL_SPEC["parameters"],
|
| 277 |
handler=private_hf_repo_handler,
|
| 278 |
),
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
| 285 |
# GitHub tools
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
| 292 |
ToolSpec(
|
| 293 |
name=GITHUB_FIND_EXAMPLES_TOOL_SPEC["name"],
|
| 294 |
description=GITHUB_FIND_EXAMPLES_TOOL_SPEC["description"],
|
|
|
|
| 31 |
GITHUB_READ_FILE_TOOL_SPEC,
|
| 32 |
github_read_file_handler,
|
| 33 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
|
| 35 |
from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
|
| 36 |
from agent.tools.private_hf_repo_tools import (
|
| 37 |
PRIVATE_HF_REPO_TOOL_SPEC,
|
| 38 |
private_hf_repo_handler,
|
| 39 |
)
|
| 40 |
+
|
| 41 |
+
# NOTE: Utils tool disabled - date/time now loaded into system prompt at initialization
|
| 42 |
+
# from agent.tools.utils_tools import UTILS_TOOL_SPEC, utils_handler
|
| 43 |
|
| 44 |
# Suppress aiohttp deprecation warning
|
| 45 |
warnings.filterwarnings(
|
|
|
|
| 238 |
def create_builtin_tools() -> list[ToolSpec]:
|
| 239 |
"""Create built-in tool specifications"""
|
| 240 |
print(
|
| 241 |
+
f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {GITHUB_FIND_EXAMPLES_TOOL_SPEC['name']}, {GITHUB_LIST_REPOS_TOOL_SPEC['name']}, {GITHUB_READ_FILE_TOOL_SPEC['name']}"
|
| 242 |
)
|
| 243 |
# in order of importance
|
| 244 |
return [
|
|
|
|
| 274 |
parameters=PRIVATE_HF_REPO_TOOL_SPEC["parameters"],
|
| 275 |
handler=private_hf_repo_handler,
|
| 276 |
),
|
| 277 |
+
# NOTE: Utils tool disabled - date/time now loaded into system prompt at initialization (less tool calls=more reliablity)
|
| 278 |
+
# ToolSpec(
|
| 279 |
+
# name=UTILS_TOOL_SPEC["name"],
|
| 280 |
+
# description=UTILS_TOOL_SPEC["description"],
|
| 281 |
+
# parameters=UTILS_TOOL_SPEC["parameters"],
|
| 282 |
+
# handler=utils_handler,
|
| 283 |
+
# ),
|
| 284 |
# GitHub tools
|
| 285 |
+
# NOTE: Github search code tool disabled - a bit buggy
|
| 286 |
+
# ToolSpec(
|
| 287 |
+
# name=GITHUB_SEARCH_CODE_TOOL_SPEC["name"],
|
| 288 |
+
# description=GITHUB_SEARCH_CODE_TOOL_SPEC["description"],
|
| 289 |
+
# parameters=GITHUB_SEARCH_CODE_TOOL_SPEC["parameters"],
|
| 290 |
+
# handler=github_search_code_handler,
|
| 291 |
+
# ),
|
| 292 |
ToolSpec(
|
| 293 |
name=GITHUB_FIND_EXAMPLES_TOOL_SPEC["name"],
|
| 294 |
description=GITHUB_FIND_EXAMPLES_TOOL_SPEC["description"],
|
agent/prompts/system_prompt.yaml
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
system_prompt: |
|
| 2 |
You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
|
| 3 |
-
|
|
|
|
|
|
|
| 4 |
# Task Approach
|
| 5 |
|
| 6 |
**CRITICAL: Research First, Then Implement**
|
|
@@ -32,15 +34,6 @@ system_prompt: |
|
|
| 32 |
2. Don't be shy to ask questions if needed.
|
| 33 |
3. Don't be overly talkative, explaining everything after a task ended.
|
| 34 |
|
| 35 |
-
# Available Tools
|
| 36 |
-
|
| 37 |
-
You have access to the following categories of tools:
|
| 38 |
-
|
| 39 |
-
- Hugging Face Hub: Search and interact with models, datasets, papers, and documentation
|
| 40 |
-
- Spaces: Use and discover ML applications
|
| 41 |
-
- Jobs: Manage compute jobs for training and inference
|
| 42 |
-
- Image Generation: Generate and transform images
|
| 43 |
-
- Planning : a planning/to-do tool.
|
| 44 |
|
| 45 |
# Conventions
|
| 46 |
|
|
|
|
| 1 |
system_prompt: |
|
| 2 |
You are HF Agent, a powerful AI assistant for Machine Learning Engineering, particularly training Large Language Models. You have access to {{ num_tools }} tools for interacting with Hugging Face Hub and performing ML tasks.
|
| 3 |
+
|
| 4 |
+
_Current Time: **{{ current_date }} {{ current_time }} ({{ current_timezone }})**_
|
| 5 |
+
|
| 6 |
# Task Approach
|
| 7 |
|
| 8 |
**CRITICAL: Research First, Then Implement**
|
|
|
|
| 34 |
2. Don't be shy to ask questions if needed.
|
| 35 |
3. Don't be overly talkative, explaining everything after a task ended.
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# Conventions
|
| 39 |
|
agent/tools/github_search_code.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
"""
|
| 2 |
-
GitHub Code Search Tool - Search code across GitHub with
|
| 3 |
|
| 4 |
-
|
| 5 |
"""
|
| 6 |
|
| 7 |
import fnmatch
|
|
@@ -24,20 +24,124 @@ def _glob_match(text: str, pattern: str) -> bool:
|
|
| 24 |
return fnmatch.fnmatch(text, pattern)
|
| 25 |
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def search_code(
|
| 28 |
query: str,
|
| 29 |
-
|
| 30 |
-
|
| 31 |
regex: bool = False,
|
| 32 |
max_results: int = 20,
|
| 33 |
) -> ToolResult:
|
| 34 |
"""
|
| 35 |
-
Search for code across GitHub with
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
Args:
|
| 38 |
query: Search term or pattern to find in code
|
| 39 |
-
|
| 40 |
-
|
| 41 |
regex: If True, treat query as regular expression
|
| 42 |
max_results: Maximum number of results to return (default 20)
|
| 43 |
|
|
@@ -53,35 +157,24 @@ def search_code(
|
|
| 53 |
"isError": True,
|
| 54 |
}
|
| 55 |
|
| 56 |
-
# Build GitHub query
|
| 57 |
query_parts = []
|
| 58 |
|
|
|
|
| 59 |
if regex:
|
| 60 |
query_parts.append(f"/{query}/")
|
| 61 |
else:
|
| 62 |
query_parts.append(f'"{query}"' if " " in query else query)
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
if "*" not in path_glob and "?" not in path_glob:
|
| 74 |
-
query_parts.append(f"path:{path_glob}")
|
| 75 |
-
elif path_glob.startswith("*."):
|
| 76 |
-
ext = path_glob[2:]
|
| 77 |
-
query_parts.append(f"extension:{ext}")
|
| 78 |
-
elif "/" not in path_glob and "*" in path_glob:
|
| 79 |
-
query_parts.append(f"filename:{path_glob}")
|
| 80 |
-
else:
|
| 81 |
-
# Complex pattern, extract extension if possible
|
| 82 |
-
ext_match = re.search(r"\*\.(\w+)", path_glob)
|
| 83 |
-
if ext_match:
|
| 84 |
-
query_parts.append(f"extension:{ext_match.group(1)}")
|
| 85 |
|
| 86 |
github_query = " ".join(query_parts)
|
| 87 |
|
|
@@ -145,10 +238,10 @@ def search_code(
|
|
| 145 |
file_path = item.get("path", "")
|
| 146 |
sha = item.get("sha", "")
|
| 147 |
|
| 148 |
-
# Apply client-side
|
| 149 |
-
if
|
| 150 |
continue
|
| 151 |
-
if
|
| 152 |
continue
|
| 153 |
|
| 154 |
# Extract text matches
|
|
@@ -241,24 +334,43 @@ def search_code(
|
|
| 241 |
GITHUB_SEARCH_CODE_TOOL_SPEC = {
|
| 242 |
"name": "search_code",
|
| 243 |
"description": (
|
| 244 |
-
"Search for code patterns across GitHub with
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"-
|
| 248 |
-
"-
|
| 249 |
-
"-
|
| 250 |
-
"-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
"## Examples:\n\n"
|
| 252 |
-
"**Search for
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"**
|
| 257 |
-
"
|
| 258 |
-
"
|
| 259 |
-
"
|
| 260 |
-
"**
|
| 261 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
"Perfect for finding code patterns, learning from examples, or exploring implementations."
|
| 263 |
),
|
| 264 |
"parameters": {
|
|
@@ -268,13 +380,13 @@ GITHUB_SEARCH_CODE_TOOL_SPEC = {
|
|
| 268 |
"type": "string",
|
| 269 |
"description": "Search term or pattern to find in code. Required.",
|
| 270 |
},
|
| 271 |
-
"
|
| 272 |
"type": "string",
|
| 273 |
-
"description": "
|
| 274 |
},
|
| 275 |
-
"
|
| 276 |
"type": "string",
|
| 277 |
-
"description": "
|
| 278 |
},
|
| 279 |
"regex": {
|
| 280 |
"type": "boolean",
|
|
@@ -295,8 +407,8 @@ async def github_search_code_handler(arguments: Dict[str, Any]) -> tuple[str, bo
|
|
| 295 |
try:
|
| 296 |
result = search_code(
|
| 297 |
query=arguments["query"],
|
| 298 |
-
|
| 299 |
-
|
| 300 |
regex=arguments.get("regex", False),
|
| 301 |
max_results=arguments.get("max_results", 20),
|
| 302 |
)
|
|
|
|
| 1 |
"""
|
| 2 |
+
GitHub Code Search Tool - Search code across GitHub with intelligent filtering
|
| 3 |
|
| 4 |
+
Maps user-friendly patterns to GitHub's Code Search API capabilities.
|
| 5 |
"""
|
| 6 |
|
| 7 |
import fnmatch
|
|
|
|
| 24 |
return fnmatch.fnmatch(text, pattern)
|
| 25 |
|
| 26 |
|
| 27 |
+
def _parse_repo_filter(repo_pattern: str) -> tuple[Optional[str], Optional[str]]:
|
| 28 |
+
"""
|
| 29 |
+
Parse repository pattern into GitHub API filter and client-side glob pattern.
|
| 30 |
+
|
| 31 |
+
Returns: (api_filter, client_glob)
|
| 32 |
+
- api_filter: GitHub API filter string (e.g., "org:huggingface")
|
| 33 |
+
- client_glob: Pattern for client-side filtering (e.g., "huggingface/trl*")
|
| 34 |
+
|
| 35 |
+
Examples:
|
| 36 |
+
"huggingface/trl" β ("repo:huggingface/trl", None)
|
| 37 |
+
"huggingface/*" β ("org:huggingface", "huggingface/*")
|
| 38 |
+
"huggingface/trl*" β ("org:huggingface", "huggingface/trl*")
|
| 39 |
+
"huggingface" β ("org:huggingface", None)
|
| 40 |
+
"*/*" β (None, "*/*")
|
| 41 |
+
"""
|
| 42 |
+
if not repo_pattern:
|
| 43 |
+
return None, None
|
| 44 |
+
|
| 45 |
+
# Pattern: owner/repo (exact match)
|
| 46 |
+
if "/" in repo_pattern and "*" not in repo_pattern and "?" not in repo_pattern:
|
| 47 |
+
return f"repo:{repo_pattern}", None
|
| 48 |
+
|
| 49 |
+
# Pattern: owner/* or owner/prefix* (org + client filter)
|
| 50 |
+
if "/" in repo_pattern and ("*" in repo_pattern or "?" in repo_pattern):
|
| 51 |
+
org_name = repo_pattern.split("/")[0]
|
| 52 |
+
if "*" not in org_name and "?" not in org_name:
|
| 53 |
+
return f"org:{org_name}", repo_pattern
|
| 54 |
+
# Org name has wildcards - can't filter server-side
|
| 55 |
+
return None, repo_pattern
|
| 56 |
+
|
| 57 |
+
# Pattern: owner (just org name, no wildcards)
|
| 58 |
+
if "*" not in repo_pattern and "?" not in repo_pattern:
|
| 59 |
+
return f"org:{repo_pattern}", None
|
| 60 |
+
|
| 61 |
+
# Pattern: */* or other complex patterns (client-side only)
|
| 62 |
+
return None, repo_pattern
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def _parse_path_filter(path_pattern: str) -> tuple[Optional[str], Optional[str]]:
|
| 66 |
+
"""
|
| 67 |
+
Parse path pattern into GitHub API filter and client-side glob pattern.
|
| 68 |
+
|
| 69 |
+
Returns: (api_filter, client_glob)
|
| 70 |
+
|
| 71 |
+
Examples:
|
| 72 |
+
"*.py" β ("extension:py", None)
|
| 73 |
+
"**/*.py" β ("extension:py", None)
|
| 74 |
+
"src/**/*.py" β ("extension:py", "src/**/*.py")
|
| 75 |
+
"test_*.py" β ("extension:py", "test_*.py")
|
| 76 |
+
"src/main.py" β ("path:src/main.py", None)
|
| 77 |
+
"""
|
| 78 |
+
if not path_pattern:
|
| 79 |
+
return None, None
|
| 80 |
+
|
| 81 |
+
# Exact path (no wildcards)
|
| 82 |
+
if "*" not in path_pattern and "?" not in path_pattern:
|
| 83 |
+
return f"path:{path_pattern}", None
|
| 84 |
+
|
| 85 |
+
# Extract extension if present
|
| 86 |
+
ext_match = re.search(r"\*\.(\w+)$", path_pattern)
|
| 87 |
+
if ext_match:
|
| 88 |
+
extension = ext_match.group(1)
|
| 89 |
+
api_filter = f"extension:{extension}"
|
| 90 |
+
|
| 91 |
+
# Check if there's a directory prefix that needs client-side filtering
|
| 92 |
+
# e.g., "src/**/*.py" needs client filter, "**/*.py" doesn't
|
| 93 |
+
if path_pattern in [f"*.{extension}", f"**/*.{extension}"]:
|
| 94 |
+
# Simple patterns - API filter is enough
|
| 95 |
+
return api_filter, None
|
| 96 |
+
else:
|
| 97 |
+
# Complex pattern - need client-side filter too
|
| 98 |
+
return api_filter, path_pattern
|
| 99 |
+
|
| 100 |
+
# Pattern like "test_*.py" or "README*" - use filename with client filter
|
| 101 |
+
# GitHub's filename: doesn't support wildcards, so we rely on client-side
|
| 102 |
+
if "/" not in path_pattern:
|
| 103 |
+
# Try to extract extension for API filtering
|
| 104 |
+
if "." in path_pattern:
|
| 105 |
+
parts = path_pattern.rsplit(".", 1)
|
| 106 |
+
if "*" not in parts[-1] and "?" not in parts[-1]:
|
| 107 |
+
# Extension is clean
|
| 108 |
+
return f"extension:{parts[-1]}", path_pattern
|
| 109 |
+
# No extension or complex - client-side only
|
| 110 |
+
return None, path_pattern
|
| 111 |
+
|
| 112 |
+
# Complex path pattern - client-side only
|
| 113 |
+
return None, path_pattern
|
| 114 |
+
|
| 115 |
+
|
| 116 |
def search_code(
|
| 117 |
query: str,
|
| 118 |
+
repo_pattern: Optional[str] = None,
|
| 119 |
+
path_pattern: Optional[str] = None,
|
| 120 |
regex: bool = False,
|
| 121 |
max_results: int = 20,
|
| 122 |
) -> ToolResult:
|
| 123 |
"""
|
| 124 |
+
Search for code across GitHub with intelligent pattern matching.
|
| 125 |
+
|
| 126 |
+
This tool intelligently maps user patterns to GitHub's Code Search API capabilities:
|
| 127 |
+
|
| 128 |
+
Repository Patterns:
|
| 129 |
+
- "owner/repo" β Searches exact repository
|
| 130 |
+
- "owner/*" or "owner" β Searches all repos in organization
|
| 131 |
+
- "*/*" β Searches all GitHub (no repo filter)
|
| 132 |
+
- Wildcards trigger client-side filtering when needed
|
| 133 |
+
|
| 134 |
+
Path Patterns:
|
| 135 |
+
- "*.py" β Searches all Python files
|
| 136 |
+
- "**/*.js" β Searches all JavaScript files (any directory)
|
| 137 |
+
- "src/**/*.py" β Python files in src/ (uses client-side filtering)
|
| 138 |
+
- "test_*.py" β Files matching pattern (client-side filtering)
|
| 139 |
+
- "path/to/file.py" β Exact file path
|
| 140 |
|
| 141 |
Args:
|
| 142 |
query: Search term or pattern to find in code
|
| 143 |
+
repo_pattern: Repository pattern (e.g., "huggingface/trl", "huggingface/*", "huggingface")
|
| 144 |
+
path_pattern: File path pattern (e.g., "*.py", "src/**/*.js")
|
| 145 |
regex: If True, treat query as regular expression
|
| 146 |
max_results: Maximum number of results to return (default 20)
|
| 147 |
|
|
|
|
| 157 |
"isError": True,
|
| 158 |
}
|
| 159 |
|
| 160 |
+
# Build GitHub API query
|
| 161 |
query_parts = []
|
| 162 |
|
| 163 |
+
# Add search term
|
| 164 |
if regex:
|
| 165 |
query_parts.append(f"/{query}/")
|
| 166 |
else:
|
| 167 |
query_parts.append(f'"{query}"' if " " in query else query)
|
| 168 |
|
| 169 |
+
# Parse repository filter
|
| 170 |
+
repo_api_filter, repo_client_glob = _parse_repo_filter(repo_pattern)
|
| 171 |
+
if repo_api_filter:
|
| 172 |
+
query_parts.append(repo_api_filter)
|
| 173 |
+
|
| 174 |
+
# Parse path filter
|
| 175 |
+
path_api_filter, path_client_glob = _parse_path_filter(path_pattern)
|
| 176 |
+
if path_api_filter:
|
| 177 |
+
query_parts.append(path_api_filter)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
github_query = " ".join(query_parts)
|
| 180 |
|
|
|
|
| 238 |
file_path = item.get("path", "")
|
| 239 |
sha = item.get("sha", "")
|
| 240 |
|
| 241 |
+
# Apply client-side filtering
|
| 242 |
+
if repo_client_glob and not _glob_match(repo_name, repo_client_glob):
|
| 243 |
continue
|
| 244 |
+
if path_client_glob and not _glob_match(file_path, path_client_glob):
|
| 245 |
continue
|
| 246 |
|
| 247 |
# Extract text matches
|
|
|
|
| 334 |
GITHUB_SEARCH_CODE_TOOL_SPEC = {
|
| 335 |
"name": "search_code",
|
| 336 |
"description": (
|
| 337 |
+
"Search for code patterns across GitHub with intelligent pattern matching.\n\n"
|
| 338 |
+
"This tool automatically maps your patterns to GitHub's Code Search API:\n\n"
|
| 339 |
+
"## Repository Patterns:\n"
|
| 340 |
+
"- **Exact repo**: `'huggingface/trl'` β Searches only that repo\n"
|
| 341 |
+
"- **Organization**: `'huggingface'` or `'huggingface/*'` β All repos in org\n"
|
| 342 |
+
"- **All repos**: `'*/*'` or omit β Searches all GitHub\n"
|
| 343 |
+
"- Wildcards like `'huggingface/trl*'` automatically use client-side filtering\n\n"
|
| 344 |
+
"## Path Patterns:\n"
|
| 345 |
+
"- **Extension**: `'*.py'` or `'**/*.py'` β All Python files\n"
|
| 346 |
+
"- **Directory**: `'src/**/*.js'` β JavaScript files in src/ (client-filtered)\n"
|
| 347 |
+
"- **Pattern**: `'test_*.py'` β Files matching pattern (client-filtered)\n"
|
| 348 |
+
"- **Exact path**: `'README.md'` β Specific file\n\n"
|
| 349 |
+
"## How It Works:\n"
|
| 350 |
+
"1. Converts patterns to GitHub API filters (server-side, fast)\n"
|
| 351 |
+
"2. Falls back to client-side filtering for complex patterns\n"
|
| 352 |
+
"3. Returns code snippets with line numbers and URLs\n\n"
|
| 353 |
"## Examples:\n\n"
|
| 354 |
+
"**Search for function in specific repo:**\n"
|
| 355 |
+
"```python\n"
|
| 356 |
+
"{'query': 'def train', 'repo_pattern': 'huggingface/trl', 'path_pattern': '*.py'}\n"
|
| 357 |
+
"```\n\n"
|
| 358 |
+
"**Search across entire organization:**\n"
|
| 359 |
+
"```python\n"
|
| 360 |
+
"{'query': 'GRPOTrainer', 'repo_pattern': 'huggingface', 'path_pattern': '*.py'}\n"
|
| 361 |
+
"```\n\n"
|
| 362 |
+
"**Search specific directory pattern:**\n"
|
| 363 |
+
"```python\n"
|
| 364 |
+
"{'query': 'TODO', 'repo_pattern': 'facebook/react', 'path_pattern': 'src/**/*.js'}\n"
|
| 365 |
+
"```\n\n"
|
| 366 |
+
"**Regex search across GitHub:**\n"
|
| 367 |
+
"```python\n"
|
| 368 |
+
"{'query': r'class \\w+Trainer', 'path_pattern': '*.py', 'regex': True}\n"
|
| 369 |
+
"```\n\n"
|
| 370 |
+
"**Search all repos (no filter):**\n"
|
| 371 |
+
"```python\n"
|
| 372 |
+
"{'query': 'import transformers', 'path_pattern': '*.py', 'max_results': 50}\n"
|
| 373 |
+
"```\n\n"
|
| 374 |
"Perfect for finding code patterns, learning from examples, or exploring implementations."
|
| 375 |
),
|
| 376 |
"parameters": {
|
|
|
|
| 380 |
"type": "string",
|
| 381 |
"description": "Search term or pattern to find in code. Required.",
|
| 382 |
},
|
| 383 |
+
"repo_pattern": {
|
| 384 |
"type": "string",
|
| 385 |
+
"description": "Repository pattern: 'owner/repo' (exact), 'owner' (org), 'owner/*' (org with filter), '*/*' (all). Optional.",
|
| 386 |
},
|
| 387 |
+
"path_pattern": {
|
| 388 |
"type": "string",
|
| 389 |
+
"description": "File path pattern: '*.ext' (extension), 'dir/**/*.ext' (directory), 'pattern*.ext' (name pattern). Optional.",
|
| 390 |
},
|
| 391 |
"regex": {
|
| 392 |
"type": "boolean",
|
|
|
|
| 407 |
try:
|
| 408 |
result = search_code(
|
| 409 |
query=arguments["query"],
|
| 410 |
+
repo_pattern=arguments.get("repo_pattern"),
|
| 411 |
+
path_pattern=arguments.get("path_pattern"),
|
| 412 |
regex=arguments.get("regex", False),
|
| 413 |
max_results=arguments.get("max_results", 20),
|
| 414 |
)
|
agent/tools/utilities.py
CHANGED
|
@@ -2,8 +2,10 @@
|
|
| 2 |
Utility functions for Hugging Face tools
|
| 3 |
|
| 4 |
Ported from: hf-mcp-server/packages/mcp/src/jobs/formatters.ts
|
|
|
|
| 5 |
"""
|
| 6 |
|
|
|
|
| 7 |
from datetime import datetime
|
| 8 |
from typing import Any, Dict, List, Optional
|
| 9 |
|
|
@@ -126,7 +128,6 @@ def format_scheduled_jobs_table(jobs: List[Dict[str, Any]]) -> str:
|
|
| 126 |
|
| 127 |
def format_job_details(jobs: Any) -> str:
|
| 128 |
"""Format job details as JSON in a markdown code block"""
|
| 129 |
-
import json
|
| 130 |
|
| 131 |
job_array = jobs if isinstance(jobs, list) else [jobs]
|
| 132 |
json_str = json.dumps(job_array, indent=2)
|
|
@@ -135,7 +136,6 @@ def format_job_details(jobs: Any) -> str:
|
|
| 135 |
|
| 136 |
def format_scheduled_job_details(jobs: Any) -> str:
|
| 137 |
"""Format scheduled job details as JSON in a markdown code block"""
|
| 138 |
-
import json
|
| 139 |
|
| 140 |
job_array = jobs if isinstance(jobs, list) else [jobs]
|
| 141 |
json_str = json.dumps(job_array, indent=2)
|
|
|
|
| 2 |
Utility functions for Hugging Face tools
|
| 3 |
|
| 4 |
Ported from: hf-mcp-server/packages/mcp/src/jobs/formatters.ts
|
| 5 |
+
Includes GPU memory validation for job submissions
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import json
|
| 9 |
from datetime import datetime
|
| 10 |
from typing import Any, Dict, List, Optional
|
| 11 |
|
|
|
|
| 128 |
|
| 129 |
def format_job_details(jobs: Any) -> str:
|
| 130 |
"""Format job details as JSON in a markdown code block"""
|
|
|
|
| 131 |
|
| 132 |
job_array = jobs if isinstance(jobs, list) else [jobs]
|
| 133 |
json_str = json.dumps(job_array, indent=2)
|
|
|
|
| 136 |
|
| 137 |
def format_scheduled_job_details(jobs: Any) -> str:
|
| 138 |
"""Format scheduled job details as JSON in a markdown code block"""
|
|
|
|
| 139 |
|
| 140 |
job_array = jobs if isinstance(jobs, list) else [jobs]
|
| 141 |
json_str = json.dumps(job_array, indent=2)
|
agent/tools/utils_tools.py
CHANGED
|
@@ -4,14 +4,9 @@ Utils Tools - General utility operations
|
|
| 4 |
Provides system information like current date/time with timezone support.
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
import
|
| 8 |
from datetime import datetime
|
| 9 |
-
from typing import Any, Dict, Literal
|
| 10 |
-
|
| 11 |
-
try:
|
| 12 |
-
import zoneinfo
|
| 13 |
-
except ImportError:
|
| 14 |
-
from backports import zoneinfo
|
| 15 |
|
| 16 |
from agent.tools.types import ToolResult
|
| 17 |
|
|
@@ -123,7 +118,9 @@ Common timezones: Europe/Paris, America/New_York, America/Los_Angeles, Asia/Toky
|
|
| 123 |
date_str = now.strftime("%d-%m-%Y")
|
| 124 |
|
| 125 |
# Format time as HH:MM:SS.mmm
|
| 126 |
-
time_str = now.strftime("%H:%M:%S.%f")[
|
|
|
|
|
|
|
| 127 |
|
| 128 |
# Get timezone abbreviation/offset
|
| 129 |
tz_offset = now.strftime("%z")
|
|
|
|
| 4 |
Provides system information like current date/time with timezone support.
|
| 5 |
"""
|
| 6 |
|
| 7 |
+
import zoneinfo
|
| 8 |
from datetime import datetime
|
| 9 |
+
from typing import Any, Dict, Literal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from agent.tools.types import ToolResult
|
| 12 |
|
|
|
|
| 118 |
date_str = now.strftime("%d-%m-%Y")
|
| 119 |
|
| 120 |
# Format time as HH:MM:SS.mmm
|
| 121 |
+
time_str = now.strftime("%H:%M:%S.%f")[
|
| 122 |
+
:-3
|
| 123 |
+
] # Remove last 3 digits to keep only milliseconds
|
| 124 |
|
| 125 |
# Get timezone abbreviation/offset
|
| 126 |
tz_offset = now.strftime("%z")
|