Henri Bonamy commited on
Commit
0c21cf1
·
1 Parent(s): 706d95d

Added tools to create repos, view them, upload files to them (private). added a clock tool.

Browse files
agent/context_manager/manager.py CHANGED
@@ -52,7 +52,6 @@ class ContextManager:
52
  """Add a message to the history"""
53
  if token_count:
54
  self.context_length = token_count
55
- print(f"DEBUG : token_count = {self.context_length}")
56
  self.items.append(message)
57
 
58
  def get_messages(self) -> list[Message]:
 
52
  """Add a message to the history"""
53
  if token_count:
54
  self.context_length = token_count
 
55
  self.items.append(message)
56
 
57
  def get_messages(self) -> list[Message]:
agent/core/agent_loop.py CHANGED
@@ -15,14 +15,42 @@ from agent.core.tools import ToolRouter
15
  ToolCall = ChatCompletionMessageToolCall
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def _needs_approval(tool_name: str, tool_args: dict) -> bool:
19
  """Check if a tool call requires user approval before execution"""
20
- if tool_name != "hf_jobs":
 
 
21
  return False
22
 
23
- # Check if it's a run or uv operation
24
- operation = tool_args.get("operation", "")
25
- return operation in ["run", "uv"]
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  class Handlers:
@@ -130,16 +158,23 @@ class Handlers:
130
  # Return early - wait for EXEC_APPROVAL operation
131
  return None
132
 
133
- await session.send_event(
134
- Event(
135
- event_type="tool_call",
136
- data={"tool": tool_name, "arguments": tool_args},
 
 
 
 
 
 
 
 
137
  )
138
- )
139
 
140
- output, success = await session.tool_router.call_tool(
141
- tool_name, tool_args
142
- )
143
 
144
  # Add tool result to history
145
  tool_msg = Message(
 
15
  ToolCall = ChatCompletionMessageToolCall
16
 
17
 
18
+ def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
19
+ """
20
+ Validate tool arguments structure.
21
+
22
+ Returns:
23
+ (is_valid, error_message)
24
+ """
25
+ args = tool_args.get("args", {})
26
+ # Sometimes LLM passes args as string instead of dict
27
+ if isinstance(args, str):
28
+ return False, f"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}"
29
+ if not isinstance(args, dict) and args is not None:
30
+ return False, f"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}"
31
+ return True, None
32
+
33
+
34
  def _needs_approval(tool_name: str, tool_args: dict) -> bool:
35
  """Check if a tool call requires user approval before execution"""
36
+ # If args are malformed, skip approval (validation error will be shown later)
37
+ args_valid, _ = _validate_tool_args(tool_args)
38
+ if not args_valid:
39
  return False
40
 
41
+ args = tool_args.get("args", {})
42
+
43
+ if tool_name == "hf_jobs":
44
+ # Check if it's a run or uv operation
45
+ operation = tool_args.get("operation", "")
46
+ return operation in ["run", "uv"]
47
+
48
+ if tool_name == "hf_private_repos":
49
+ # Repo creation and file uploads require approval
50
+ operation = tool_args.get("operation", "")
51
+ return operation in ["create_repo", "upload_file"]
52
+
53
+ return False
54
 
55
 
56
  class Handlers:
 
158
  # Return early - wait for EXEC_APPROVAL operation
159
  return None
160
 
161
+ # Validate tool arguments before calling
162
+ args_valid, error_msg = _validate_tool_args(tool_args)
163
+ if not args_valid:
164
+ # Return error to agent instead of calling tool
165
+ output = error_msg
166
+ success = False
167
+ else:
168
+ await session.send_event(
169
+ Event(
170
+ event_type="tool_call",
171
+ data={"tool": tool_name, "arguments": tool_args},
172
+ )
173
  )
 
174
 
175
+ output, success = await session.tool_router.call_tool(
176
+ tool_name, tool_args
177
+ )
178
 
179
  # Add tool result to history
180
  tool_msg = Message(
agent/core/tools.py CHANGED
@@ -13,9 +13,14 @@ from lmnr import observe
13
  from mcp.types import EmbeddedResource, ImageContent, TextContent
14
 
15
  from agent.config import MCPServerConfig
 
 
 
 
16
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
17
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
18
  from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
 
19
 
20
  # Suppress aiohttp deprecation warning
21
  warnings.filterwarnings(
@@ -189,7 +194,7 @@ class ToolRouter:
189
  def create_builtin_tools() -> list[ToolSpec]:
190
  """Create built-in tool specifications"""
191
  print(
192
- f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}"
193
  )
194
  return [
195
  ToolSpec(
@@ -198,6 +203,12 @@ def create_builtin_tools() -> list[ToolSpec]:
198
  parameters=HF_JOBS_TOOL_SPEC["parameters"],
199
  handler=hf_jobs_handler,
200
  ),
 
 
 
 
 
 
201
  ToolSpec(
202
  name=SEARCH_DOCS_TOOL_SPEC["name"],
203
  description=SEARCH_DOCS_TOOL_SPEC["description"],
@@ -210,4 +221,10 @@ def create_builtin_tools() -> list[ToolSpec]:
210
  parameters=PLAN_TOOL_SPEC["parameters"],
211
  handler=plan_tool_handler,
212
  ),
 
 
 
 
 
 
213
  ]
 
13
  from mcp.types import EmbeddedResource, ImageContent, TextContent
14
 
15
  from agent.config import MCPServerConfig
16
+ from agent.tools.private_hf_repo_tools import (
17
+ PRIVATE_HF_REPO_TOOL_SPEC,
18
+ private_hf_repo_handler,
19
+ )
20
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
21
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
22
  from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
23
+ from agent.tools.utils_tools import UTILS_TOOL_SPEC, utils_handler
24
 
25
  # Suppress aiohttp deprecation warning
26
  warnings.filterwarnings(
 
194
  def create_builtin_tools() -> list[ToolSpec]:
195
  """Create built-in tool specifications"""
196
  print(
197
+ f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}"
198
  )
199
  return [
200
  ToolSpec(
 
203
  parameters=HF_JOBS_TOOL_SPEC["parameters"],
204
  handler=hf_jobs_handler,
205
  ),
206
+ ToolSpec(
207
+ name=PRIVATE_HF_REPO_TOOL_SPEC["name"],
208
+ description=PRIVATE_HF_REPO_TOOL_SPEC["description"],
209
+ parameters=PRIVATE_HF_REPO_TOOL_SPEC["parameters"],
210
+ handler=private_hf_repo_handler,
211
+ ),
212
  ToolSpec(
213
  name=SEARCH_DOCS_TOOL_SPEC["name"],
214
  description=SEARCH_DOCS_TOOL_SPEC["description"],
 
221
  parameters=PLAN_TOOL_SPEC["parameters"],
222
  handler=plan_tool_handler,
223
  ),
224
+ ToolSpec(
225
+ name=UTILS_TOOL_SPEC["name"],
226
+ description=UTILS_TOOL_SPEC["description"],
227
+ parameters=UTILS_TOOL_SPEC["parameters"],
228
+ handler=utils_handler,
229
+ ),
230
  ]
agent/main.py CHANGED
@@ -29,6 +29,16 @@ from agent.utils.terminal_display import (
29
 
30
  litellm.drop_params = True
31
 
 
 
 
 
 
 
 
 
 
 
32
  lmnr_api_key = os.environ.get("LMNR_API_KEY")
33
  if lmnr_api_key:
34
  try:
@@ -121,7 +131,7 @@ async def event_listener(
121
  arguments = event.data.get("arguments", {}) if event.data else {}
122
 
123
  operation = arguments.get("operation", "")
124
- args = arguments.get("args", {})
125
 
126
  print(f"\nOperation: {operation}")
127
 
@@ -137,19 +147,64 @@ async def event_listener(
137
  print(f"Docker image: {image}")
138
  print(f"Command: {command}")
139
 
140
- # Common parameters
141
- flavor = args.get("flavor", "cpu-basic")
142
- detached = args.get("detached", False)
143
- print(f"Hardware: {flavor}")
144
- print(f"Detached mode: {detached}")
145
-
146
- secrets = args.get("secrets", [])
147
- if secrets:
148
- print(f"Secrets: {', '.join(secrets)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  # Get user decision
151
  print("\n" + format_separator())
152
- print(format_header("JOB EXECUTION APPROVAL REQUIRED"))
 
 
 
 
 
 
153
  print(format_separator())
154
  loop = asyncio.get_event_loop()
155
  response = await loop.run_in_executor(
 
29
 
30
  litellm.drop_params = True
31
 
32
+
33
+ def _safe_get_args(arguments: dict) -> dict:
34
+ """Safely extract args dict from arguments, handling cases where LLM passes string."""
35
+ args = arguments.get("args", {})
36
+ # Sometimes LLM passes args as string instead of dict
37
+ if isinstance(args, str):
38
+ return {}
39
+ return args if isinstance(args, dict) else {}
40
+
41
+
42
  lmnr_api_key = os.environ.get("LMNR_API_KEY")
43
  if lmnr_api_key:
44
  try:
 
131
  arguments = event.data.get("arguments", {}) if event.data else {}
132
 
133
  operation = arguments.get("operation", "")
134
+ args = _safe_get_args(arguments)
135
 
136
  print(f"\nOperation: {operation}")
137
 
 
147
  print(f"Docker image: {image}")
148
  print(f"Command: {command}")
149
 
150
+ # Common parameters
151
+ flavor = args.get("flavor", "cpu-basic")
152
+ detached = args.get("detached", False)
153
+ print(f"Hardware: {flavor}")
154
+ print(f"Detached mode: {detached}")
155
+
156
+ secrets = args.get("secrets", [])
157
+ if secrets:
158
+ print(f"Secrets: {', '.join(secrets)}")
159
+ elif operation in ["create_repo", "upload_file"]:
160
+ repo_id = args.get("repo_id", "")
161
+ repo_type = args.get("repo_type", "dataset")
162
+
163
+ # Build repo URL
164
+ type_path = "" if repo_type == "model" else f"{repo_type}s"
165
+ repo_url = f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
166
+
167
+ print(f"Repository: {repo_id}")
168
+ print(f"Type: {repo_type}")
169
+ print(f"Private: Yes")
170
+ print(f"URL: {repo_url}")
171
+
172
+ # Show file preview for upload_file operation
173
+ if operation == "upload_file":
174
+ path_in_repo = args.get("path_in_repo", "")
175
+ file_content = args.get("file_content", "")
176
+ print(f"File: {path_in_repo}")
177
+
178
+ if isinstance(file_content, str):
179
+ # Calculate metrics
180
+ all_lines = file_content.split('\n')
181
+ line_count = len(all_lines)
182
+ size_bytes = len(file_content.encode('utf-8'))
183
+ size_kb = size_bytes / 1024
184
+ size_mb = size_kb / 1024
185
+
186
+ print(f"Line count: {line_count}")
187
+ if size_kb < 1024:
188
+ print(f"Size: {size_kb:.2f} KB")
189
+ else:
190
+ print(f"Size: {size_mb:.2f} MB")
191
+
192
+ # Show preview
193
+ preview_lines = all_lines[:5]
194
+ preview = '\n'.join(preview_lines)
195
+ print(f"Content preview (first 5 lines):\n{preview}")
196
+ if len(all_lines) > 5:
197
+ print("...")
198
 
199
  # Get user decision
200
  print("\n" + format_separator())
201
+ if tool_name == "hf_jobs":
202
+ header_text = "JOB EXECUTION APPROVAL REQUIRED"
203
+ elif operation == "upload_file":
204
+ header_text = "FILE UPLOAD APPROVAL REQUIRED"
205
+ else:
206
+ header_text = "REPO CREATION APPROVAL REQUIRED"
207
+ print(format_header(header_text))
208
  print(format_separator())
209
  loop = asyncio.get_event_loop()
210
  response = await loop.run_in_executor(
agent/tools/jobs_tool.py CHANGED
@@ -74,7 +74,6 @@ def _substitute_hf_token(params: Dict[str, Any] | None) -> Dict[str, Any] | None
74
  Returns:
75
  Dictionary with HF_TOKEN value substituted from environment
76
  """
77
- print("DEBUG !! : ", params)
78
  if params is None:
79
  return None
80
 
@@ -912,6 +911,7 @@ HF_JOBS_TOOL_SPEC = {
912
  "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
913
  "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
914
  "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
 
915
  "Call this tool with no operation for full usage instructions and examples."
916
  ),
917
  "parameters": {
 
74
  Returns:
75
  Dictionary with HF_TOKEN value substituted from environment
76
  """
 
77
  if params is None:
78
  return None
79
 
 
911
  "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
912
  "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
913
  "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
914
+ "After job completion, if needed or asked by the user, use hf_private_repos tool to store scripts/logs/results to Hub."
915
  "Call this tool with no operation for full usage instructions and examples."
916
  ),
917
  "parameters": {
agent/tools/private_hf_repo_tools.py ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Private HF Repos Tool - Manage private Hugging Face repositories
3
+
4
+ PRIMARY USE: Store job outputs, training scripts, and logs from HF Jobs.
5
+ Since job results are ephemeral, this tool provides persistent storage in private repos.
6
+
7
+ SECONDARY USE: Read back stored files and list repo contents.
8
+ """
9
+
10
+ import asyncio
11
+ from typing import Any, Dict, Literal, Optional
12
+
13
+ from huggingface_hub import HfApi, hf_hub_download
14
+ from huggingface_hub.utils import HfHubHTTPError
15
+
16
+ from agent.tools.types import ToolResult
17
+
18
+ # Operation names
19
+ OperationType = Literal["upload_file", "create_repo", "check_repo", "list_files", "read_file"]
20
+
21
+
22
+ async def _async_call(func, *args, **kwargs):
23
+ """Wrap synchronous HfApi calls for async context."""
24
+ return await asyncio.to_thread(func, *args, **kwargs)
25
+
26
+
27
+ def _build_repo_url(repo_id: str, repo_type: str = "dataset") -> str:
28
+ """Build the Hub URL for a repository."""
29
+ type_path = "" if repo_type == "model" else f"{repo_type}s"
30
+ return f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
31
+
32
+
33
+ def _content_to_bytes(content: str | bytes) -> bytes:
34
+ """Convert string or bytes content to bytes."""
35
+ if isinstance(content, str):
36
+ return content.encode('utf-8')
37
+ return content
38
+
39
+
40
+ class PrivateHfRepoTool:
41
+ """Tool for managing private Hugging Face repositories."""
42
+
43
+ def __init__(self, hf_token: Optional[str] = None):
44
+ self.api = HfApi(token=hf_token)
45
+
46
+ async def execute(self, params: Dict[str, Any]) -> ToolResult:
47
+ """Execute the specified upload operation."""
48
+ operation = params.get("operation")
49
+ args = params.get("args", {})
50
+
51
+ # If no operation provided, return usage instructions
52
+ if not operation:
53
+ return self._show_help()
54
+
55
+ # Normalize operation name
56
+ operation = operation.lower()
57
+
58
+ # Check if help is requested
59
+ if args.get("help"):
60
+ return self._show_operation_help(operation)
61
+
62
+ try:
63
+ # Route to appropriate handler
64
+ if operation == "upload_file":
65
+ return await self._upload_file(args)
66
+ elif operation == "create_repo":
67
+ return await self._create_repo(args)
68
+ elif operation == "check_repo":
69
+ return await self._check_repo(args)
70
+ elif operation == "list_files":
71
+ return await self._list_files(args)
72
+ elif operation == "read_file":
73
+ return await self._read_file(args)
74
+ else:
75
+ return {
76
+ "formatted": f'Unknown operation: "{operation}"\n\n'
77
+ "Available operations: upload_file, create_repo, check_repo, list_files, read_file\n\n"
78
+ "Call this tool with no operation for full usage instructions.",
79
+ "totalResults": 0,
80
+ "resultsShared": 0,
81
+ "isError": True,
82
+ }
83
+
84
+ except HfHubHTTPError as e:
85
+ return {
86
+ "formatted": f"API Error: {str(e)}",
87
+ "totalResults": 0,
88
+ "resultsShared": 0,
89
+ "isError": True,
90
+ }
91
+ except Exception as e:
92
+ return {
93
+ "formatted": f"Error executing {operation}: {str(e)}",
94
+ "totalResults": 0,
95
+ "resultsShared": 0,
96
+ "isError": True,
97
+ }
98
+
99
+ def _show_help(self) -> ToolResult:
100
+ """Show usage instructions when tool is called with no arguments."""
101
+ usage_text = """# Private HF Repos Tool
102
+
103
+ **PRIMARY USE:** Store job outputs, scripts, and logs from HF Jobs to private repos.
104
+ Since job results are ephemeral, use this tool for persistent storage.
105
+
106
+ **SECONDARY USE:** Read back stored files and list repo contents.
107
+
108
+ ## Available Commands
109
+
110
+ ### Write Operations
111
+ - **upload_file** - Upload file content to a repository
112
+ - **create_repo** - Create a new private repository
113
+
114
+ ### Read Operations
115
+ - **list_files** - List all files in a repository
116
+ - **read_file** - Read content of a specific file from a repository
117
+ - **check_repo** - Check if a repository exists
118
+
119
+ ## Examples
120
+
121
+ ### Upload a script to a dataset repo
122
+ Call this tool with:
123
+ ```json
124
+ {
125
+ "operation": "upload_file",
126
+ "args": {
127
+ "file_content": "import pandas as pd\\nprint('Hello from HF!')",
128
+ "path_in_repo": "scripts/hello.py",
129
+ "repo_id": "my-dataset",
130
+ "repo_type": "dataset",
131
+ "create_if_missing": true,
132
+ "commit_message": "Add hello script"
133
+ }
134
+ }
135
+ ```
136
+
137
+ ### Upload logs from a job
138
+ Call this tool with:
139
+ ```json
140
+ {
141
+ "operation": "upload_file",
142
+ "args": {
143
+ "file_content": "Job started...\\nJob completed successfully!",
144
+ "path_in_repo": "jobs/job-abc123/logs.txt",
145
+ "repo_id": "job-results",
146
+ "create_if_missing": true
147
+ }
148
+ }
149
+ ```
150
+
151
+ ### Create a repository
152
+ Call this tool with:
153
+ ```json
154
+ {
155
+ "operation": "create_repo",
156
+ "args": {
157
+ "repo_id": "my-results",
158
+ "repo_type": "dataset"
159
+ }
160
+ }
161
+ ```
162
+ Note: Repositories are always created as private.
163
+
164
+ ### Check if a repository exists
165
+ Call this tool with:
166
+ ```json
167
+ {
168
+ "operation": "check_repo",
169
+ "args": {
170
+ "repo_id": "my-dataset",
171
+ "repo_type": "dataset"
172
+ }
173
+ }
174
+ ```
175
+
176
+ ### List files in a repository
177
+ Call this tool with:
178
+ ```json
179
+ {
180
+ "operation": "list_files",
181
+ "args": {
182
+ "repo_id": "job-results",
183
+ "repo_type": "dataset"
184
+ }
185
+ }
186
+ ```
187
+
188
+ ### Read a file from a repository
189
+ Call this tool with:
190
+ ```json
191
+ {
192
+ "operation": "read_file",
193
+ "args": {
194
+ "repo_id": "job-results",
195
+ "path_in_repo": "jobs/job-abc123/script.py",
196
+ "repo_type": "dataset"
197
+ }
198
+ }
199
+ ```
200
+
201
+ ## Repository Types
202
+
203
+ - **dataset** (default) - For storing data, results, logs, scripts
204
+ - **model** - For ML models and related artifacts
205
+ - **space** - For Spaces and applications
206
+
207
+ ## Tips
208
+
209
+ - **Content-based**: Pass file content directly as strings or bytes, not file paths
210
+ - **Repo ID format**: Use just the repo name (e.g., "my-dataset"). Username is automatically inferred from HF_TOKEN
211
+ - **Automatic repo creation**: Set `create_if_missing: true` to auto-create repos (requires user approval)
212
+ - **Organization**: Use path_in_repo to organize files (e.g., "jobs/job-123/script.py")
213
+ - **After jobs**: Upload job scripts and logs after compute jobs complete for reproducibility
214
+ """
215
+ return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
216
+
217
+ def _show_operation_help(self, operation: str) -> ToolResult:
218
+ """Show help for a specific operation."""
219
+ help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
220
+ return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
221
+
222
+ async def _upload_file(self, args: Dict[str, Any]) -> ToolResult:
223
+ """Upload file content to a Hub repository."""
224
+ # Validate required arguments
225
+ file_content = args.get("file_content")
226
+ path_in_repo = args.get("path_in_repo")
227
+ repo_id = args.get("repo_id")
228
+
229
+ if not file_content:
230
+ return {
231
+ "formatted": "file_content is required",
232
+ "totalResults": 0,
233
+ "resultsShared": 0,
234
+ "isError": True,
235
+ }
236
+
237
+ if not path_in_repo:
238
+ return {
239
+ "formatted": "path_in_repo is required",
240
+ "totalResults": 0,
241
+ "resultsShared": 0,
242
+ "isError": True,
243
+ }
244
+
245
+ if not repo_id:
246
+ return {
247
+ "formatted": "repo_id is required",
248
+ "totalResults": 0,
249
+ "resultsShared": 0,
250
+ "isError": True,
251
+ }
252
+
253
+ repo_type = args.get("repo_type", "dataset")
254
+ create_if_missing = args.get("create_if_missing", False)
255
+
256
+ # Check if repo exists
257
+ try:
258
+ repo_exists = await _async_call(
259
+ self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
260
+ )
261
+
262
+ # Create repo if needed
263
+ if not repo_exists and create_if_missing:
264
+ await self._create_repo(
265
+ {
266
+ "repo_id": repo_id,
267
+ "repo_type": repo_type,
268
+ "private": True,
269
+ }
270
+ )
271
+ elif not repo_exists:
272
+ return {
273
+ "formatted": f"Repository {repo_id} does not exist. Set create_if_missing: true to create it.",
274
+ "totalResults": 0,
275
+ "resultsShared": 0,
276
+ "isError": True,
277
+ }
278
+
279
+ except Exception as e:
280
+ return {
281
+ "formatted": f"Failed to check repository: {str(e)}",
282
+ "totalResults": 0,
283
+ "resultsShared": 0,
284
+ "isError": True,
285
+ }
286
+
287
+ # Convert content to bytes
288
+ file_bytes = _content_to_bytes(file_content)
289
+
290
+ # Upload file
291
+ try:
292
+ await _async_call(
293
+ self.api.upload_file,
294
+ path_or_fileobj=file_bytes,
295
+ path_in_repo=path_in_repo,
296
+ repo_id=repo_id,
297
+ repo_type=repo_type,
298
+ commit_message=args.get("commit_message", f"Upload {path_in_repo}"),
299
+ )
300
+
301
+ repo_url = _build_repo_url(repo_id, repo_type)
302
+ file_url = f"{repo_url}/blob/main/{path_in_repo}"
303
+
304
+ response = f"""✓ File uploaded successfully!
305
+
306
+ **Repository:** {repo_id}
307
+ **File:** {path_in_repo}
308
+ **View at:** {file_url}
309
+ **Browse repo:** {repo_url}"""
310
+
311
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
312
+
313
+ except Exception as e:
314
+ return {
315
+ "formatted": f"Failed to upload file: {str(e)}",
316
+ "totalResults": 0,
317
+ "resultsShared": 0,
318
+ "isError": True,
319
+ }
320
+
321
+ async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:
322
+ """Create a new Hub repository."""
323
+ repo_id = args.get("repo_id")
324
+
325
+ if not repo_id:
326
+ return {
327
+ "formatted": "repo_id is required",
328
+ "totalResults": 0,
329
+ "resultsShared": 0,
330
+ "isError": True,
331
+ }
332
+
333
+ repo_type = args.get("repo_type", "dataset")
334
+ private = True # Always create private repos
335
+
336
+ try:
337
+ # Check if repo already exists
338
+ repo_exists = await _async_call(
339
+ self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
340
+ )
341
+
342
+ if repo_exists:
343
+ repo_url = _build_repo_url(repo_id, repo_type)
344
+ return {
345
+ "formatted": f"Repository {repo_id} already exists.\n**View at:** {repo_url}",
346
+ "totalResults": 1,
347
+ "resultsShared": 1,
348
+ }
349
+
350
+ # Create repository
351
+ repo_url = await _async_call(
352
+ self.api.create_repo,
353
+ repo_id=repo_id,
354
+ repo_type=repo_type,
355
+ private=private,
356
+ exist_ok=True,
357
+ )
358
+
359
+ response = f"""✓ Repository created successfully!
360
+
361
+ **Repository:** {repo_id}
362
+ **Type:** {repo_type}
363
+ **Private:** Yes
364
+ **View at:** {repo_url}"""
365
+
366
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
367
+
368
+ except Exception as e:
369
+ return {
370
+ "formatted": f"Failed to create repository: {str(e)}",
371
+ "totalResults": 0,
372
+ "resultsShared": 0,
373
+ "isError": True,
374
+ }
375
+
376
+ async def _check_repo(self, args: Dict[str, Any]) -> ToolResult:
377
+ """Check if a Hub repository exists."""
378
+ repo_id = args.get("repo_id")
379
+
380
+ if not repo_id:
381
+ return {
382
+ "formatted": "repo_id is required",
383
+ "totalResults": 0,
384
+ "resultsShared": 0,
385
+ "isError": True,
386
+ }
387
+
388
+ repo_type = args.get("repo_type", "dataset")
389
+
390
+ try:
391
+ repo_exists = await _async_call(
392
+ self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
393
+ )
394
+
395
+ if repo_exists:
396
+ repo_url = _build_repo_url(repo_id, repo_type)
397
+ response = f"""✓ Repository exists!
398
+
399
+ **Repository:** {repo_id}
400
+ **Type:** {repo_type}
401
+ **View at:** {repo_url}"""
402
+ else:
403
+ response = f"""Repository does not exist: {repo_id}
404
+
405
+ To create it, call this tool with:
406
+ ```json
407
+ {{
408
+ "operation": "create_repo",
409
+ "args": {{
410
+ "repo_id": "{repo_id}",
411
+ "repo_type": "{repo_type}"
412
+ }}
413
+ }}
414
+ ```"""
415
+
416
+ return {
417
+ "formatted": response,
418
+ "totalResults": 1 if repo_exists else 0,
419
+ "resultsShared": 1 if repo_exists else 0,
420
+ }
421
+
422
+ except Exception as e:
423
+ return {
424
+ "formatted": f"Failed to check repository: {str(e)}",
425
+ "totalResults": 0,
426
+ "resultsShared": 0,
427
+ "isError": True,
428
+ }
429
+
430
+ async def _list_files(self, args: Dict[str, Any]) -> ToolResult:
431
+ """List all files in a Hub repository."""
432
+ repo_id = args.get("repo_id")
433
+
434
+ if not repo_id:
435
+ return {
436
+ "formatted": "repo_id is required",
437
+ "totalResults": 0,
438
+ "resultsShared": 0,
439
+ "isError": True,
440
+ }
441
+
442
+ repo_type = args.get("repo_type", "dataset")
443
+
444
+ try:
445
+ # List all files in the repository
446
+ files = await _async_call(
447
+ self.api.list_repo_files, repo_id=repo_id, repo_type=repo_type
448
+ )
449
+
450
+ if not files:
451
+ return {
452
+ "formatted": f"No files found in repository: {repo_id}",
453
+ "totalResults": 0,
454
+ "resultsShared": 0,
455
+ }
456
+
457
+ # Format file list
458
+ file_list = "\n".join(f"- {f}" for f in sorted(files))
459
+ repo_url = _build_repo_url(repo_id, repo_type)
460
+
461
+ response = f"""✓ Files in repository: {repo_id}
462
+
463
+ **Total files:** {len(files)}
464
+ **Repository URL:** {repo_url}
465
+
466
+ **Files:**
467
+ {file_list}"""
468
+
469
+ return {
470
+ "formatted": response,
471
+ "totalResults": len(files),
472
+ "resultsShared": len(files),
473
+ }
474
+
475
+ except Exception as e:
476
+ return {
477
+ "formatted": f"Failed to list files: {str(e)}",
478
+ "totalResults": 0,
479
+ "resultsShared": 0,
480
+ "isError": True,
481
+ }
482
+
483
+ async def _read_file(self, args: Dict[str, Any]) -> ToolResult:
484
+ """Read content of a specific file from a Hub repository."""
485
+ repo_id = args.get("repo_id")
486
+ path_in_repo = args.get("path_in_repo")
487
+
488
+ if not repo_id:
489
+ return {
490
+ "formatted": "repo_id is required",
491
+ "totalResults": 0,
492
+ "resultsShared": 0,
493
+ "isError": True,
494
+ }
495
+
496
+ if not path_in_repo:
497
+ return {
498
+ "formatted": "path_in_repo is required",
499
+ "totalResults": 0,
500
+ "resultsShared": 0,
501
+ "isError": True,
502
+ }
503
+
504
+ repo_type = args.get("repo_type", "dataset")
505
+
506
+ try:
507
+ # Download file to cache and read it
508
+ file_path = await _async_call(
509
+ hf_hub_download,
510
+ repo_id=repo_id,
511
+ filename=path_in_repo,
512
+ repo_type=repo_type,
513
+ token=self.api.token,
514
+ )
515
+
516
+ # Read file content
517
+ with open(file_path, "r", encoding="utf-8") as f:
518
+ content = f.read()
519
+
520
+ repo_url = _build_repo_url(repo_id, repo_type)
521
+ file_url = f"{repo_url}/blob/main/{path_in_repo}"
522
+
523
+ response = f"""✓ File read successfully!
524
+
525
+ **Repository:** {repo_id}
526
+ **File:** {path_in_repo}
527
+ **Size:** {len(content)} characters
528
+ **View at:** {file_url}
529
+
530
+ **Content:**
531
+ ```
532
+ {content}
533
+ ```"""
534
+
535
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
536
+
537
+ except UnicodeDecodeError:
538
+ # If file is binary, return size info instead
539
+ try:
540
+ with open(file_path, "rb") as f:
541
+ binary_content = f.read()
542
+
543
+ return {
544
+ "formatted": f"File is binary ({len(binary_content)} bytes). Cannot display as text.",
545
+ "totalResults": 1,
546
+ "resultsShared": 1,
547
+ }
548
+ except Exception as e:
549
+ return {
550
+ "formatted": f"Failed to read binary file: {str(e)}",
551
+ "totalResults": 0,
552
+ "resultsShared": 0,
553
+ "isError": True,
554
+ }
555
+ except Exception as e:
556
+ return {
557
+ "formatted": f"Failed to read file: {str(e)}",
558
+ "totalResults": 0,
559
+ "resultsShared": 0,
560
+ "isError": True,
561
+ }
562
+
563
+
564
+ # Tool specification for agent registration
565
+ PRIVATE_HF_REPO_TOOL_SPEC = {
566
+ "name": "hf_private_repos",
567
+ "description": (
568
+ "Manage private Hugging Face repositories. "
569
+ "PRIMARY USE: Store job outputs, scripts, and logs from HF Jobs (ephemeral results need persistent storage). "
570
+ "SECONDARY USE: Read back stored files and list repo contents. "
571
+ "Pass file content as strings/bytes (no filesystem needed). "
572
+ "Call with no operation for full usage instructions."
573
+ ),
574
+ "parameters": {
575
+ "type": "object",
576
+ "properties": {
577
+ "operation": {
578
+ "type": "string",
579
+ "enum": ["upload_file", "create_repo", "check_repo", "list_files", "read_file"],
580
+ "description": (
581
+ "Operation to execute. Valid values: [upload_file, create_repo, check_repo, list_files, read_file]"
582
+ ),
583
+ },
584
+ "args": {
585
+ "type": "object",
586
+ "description": (
587
+ "Operation-specific arguments as a JSON object. "
588
+ "Write ops: file_content (string/bytes), path_in_repo (string), repo_id (string), "
589
+ "repo_type (dataset/model/space), create_if_missing (boolean), commit_message (string). "
590
+ "Read ops: repo_id (string), path_in_repo (for read_file), repo_type (optional)."
591
+ ),
592
+ "additionalProperties": True,
593
+ },
594
+ },
595
+ },
596
+ }
597
+
598
+
599
+ async def private_hf_repo_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
600
+ """Handler for agent tool router."""
601
+ try:
602
+ tool = PrivateHfRepoTool()
603
+ result = await tool.execute(arguments)
604
+ return result["formatted"], not result.get("isError", False)
605
+ except Exception as e:
606
+ return f"Error executing Private HF Repo tool: {str(e)}", False
agent/tools/utils_tools.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utils Tools - General utility operations
3
+
4
+ Provides system information like current date/time with timezone support.
5
+ """
6
+
7
+ import asyncio
8
+ from datetime import datetime
9
+ from typing import Any, Dict, Literal, Optional
10
+
11
+ try:
12
+ import zoneinfo
13
+ except ImportError:
14
+ from backports import zoneinfo
15
+
16
+ from agent.tools.types import ToolResult
17
+
18
+ # Operation names
19
+ OperationType = Literal["get_datetime"]
20
+
21
+
22
+ class UtilsTool:
23
+ """Tool for general utility operations."""
24
+
25
+ async def execute(self, params: Dict[str, Any]) -> ToolResult:
26
+ """Execute the specified utility operation."""
27
+ operation = params.get("operation")
28
+ args = params.get("args", {})
29
+
30
+ # If no operation provided, return usage instructions
31
+ if not operation:
32
+ return self._show_help()
33
+
34
+ # Normalize operation name
35
+ operation = operation.lower()
36
+
37
+ # Check if help is requested
38
+ if args.get("help"):
39
+ return self._show_operation_help(operation)
40
+
41
+ try:
42
+ # Route to appropriate handler
43
+ if operation == "get_datetime":
44
+ return await self._get_datetime(args)
45
+ else:
46
+ return {
47
+ "formatted": f'Unknown operation: "{operation}"\n\n'
48
+ "Available operations: get_datetime\n\n"
49
+ "Call this tool with no operation for full usage instructions.",
50
+ "totalResults": 0,
51
+ "resultsShared": 0,
52
+ "isError": True,
53
+ }
54
+
55
+ except Exception as e:
56
+ return {
57
+ "formatted": f"Error executing {operation}: {str(e)}",
58
+ "totalResults": 0,
59
+ "resultsShared": 0,
60
+ "isError": True,
61
+ }
62
+
63
+ def _show_help(self) -> ToolResult:
64
+ """Show usage instructions when tool is called with no arguments."""
65
+ usage_text = """# Utils Tool
66
+
67
+ Utility operations for system information.
68
+
69
+ ## Available Commands
70
+
71
+ - **get_datetime** - Get current date and time with timezone support
72
+
73
+ ## Examples
74
+
75
+ ### Get current date and time (Paris timezone by default)
76
+ Call this tool with:
77
+ ```json
78
+ {
79
+ "operation": "get_datetime",
80
+ "args": {}
81
+ }
82
+ ```
83
+
84
+ ### Get current date and time in a specific timezone
85
+ Call this tool with:
86
+ ```json
87
+ {
88
+ "operation": "get_datetime",
89
+ "args": {
90
+ "timezone": "America/New_York"
91
+ }
92
+ }
93
+ ```
94
+
95
+ Common timezones: Europe/Paris, America/New_York, America/Los_Angeles, Asia/Tokyo, UTC
96
+
97
+ ## Tips
98
+
99
+ - **Default timezone**: Paris (Europe/Paris)
100
+ - **Date format**: dd-mm-yyyy
101
+ - **Time format**: HH:MM:SS.mmm (24-hour format with milliseconds)
102
+ - **Timezone names**: Use IANA timezone database names (e.g., "Europe/Paris", "UTC")
103
+ """
104
+ return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
105
+
106
+ def _show_operation_help(self, operation: str) -> ToolResult:
107
+ """Show help for a specific operation."""
108
+ help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
109
+ return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
110
+
111
+ async def _get_datetime(self, args: Dict[str, Any]) -> ToolResult:
112
+ """Get current date and time with timezone support."""
113
+ timezone_name = args.get("timezone", "Europe/Paris")
114
+
115
+ try:
116
+ # Get timezone object
117
+ tz = zoneinfo.ZoneInfo(timezone_name)
118
+
119
+ # Get current datetime in specified timezone
120
+ now = datetime.now(tz)
121
+
122
+ # Format date as dd-mm-yyyy
123
+ date_str = now.strftime("%d-%m-%Y")
124
+
125
+ # Format time as HH:MM:SS.mmm
126
+ time_str = now.strftime("%H:%M:%S.%f")[:-3] # Remove last 3 digits to keep only milliseconds
127
+
128
+ # Get timezone abbreviation/offset
129
+ tz_offset = now.strftime("%z")
130
+ tz_name = now.strftime("%Z")
131
+
132
+ response = f"""✓ Current date and time
133
+
134
+ **Date:** {date_str}
135
+ **Time:** {time_str}
136
+ **Timezone:** {timezone_name} ({tz_name}, UTC{tz_offset[:3]}:{tz_offset[3:]})
137
+
138
+ **ISO Format:** {now.isoformat()}
139
+ **Unix Timestamp:** {int(now.timestamp())}"""
140
+
141
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
142
+
143
+ except zoneinfo.ZoneInfoNotFoundError:
144
+ return {
145
+ "formatted": f"Invalid timezone: {timezone_name}\n\n"
146
+ "Use IANA timezone database names like:\n"
147
+ "- Europe/Paris\n"
148
+ "- America/New_York\n"
149
+ "- Asia/Tokyo\n"
150
+ "- UTC\n\n"
151
+ "See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones",
152
+ "totalResults": 0,
153
+ "resultsShared": 0,
154
+ "isError": True,
155
+ }
156
+ except Exception as e:
157
+ return {
158
+ "formatted": f"Failed to get date/time: {str(e)}",
159
+ "totalResults": 0,
160
+ "resultsShared": 0,
161
+ "isError": True,
162
+ }
163
+
164
+
165
+ # Tool specification for agent registration
166
+ UTILS_TOOL_SPEC = {
167
+ "name": "utils",
168
+ "description": (
169
+ "Utility operations for system information. "
170
+ "Get current date (dd-mm-yyyy) and time (HH:MM:SS.mmm) with timezone support. "
171
+ "Default timezone: Paris (Europe/Paris). "
172
+ "Call with no operation for full usage instructions."
173
+ ),
174
+ "parameters": {
175
+ "type": "object",
176
+ "properties": {
177
+ "operation": {
178
+ "type": "string",
179
+ "enum": ["get_datetime"],
180
+ "description": "Operation to execute. Valid values: [get_datetime]",
181
+ },
182
+ "args": {
183
+ "type": "object",
184
+ "description": (
185
+ "Operation-specific arguments as a JSON object. "
186
+ "For get_datetime: timezone (string, optional, default: Europe/Paris). "
187
+ "Use IANA timezone names like 'America/New_York', 'Asia/Tokyo', 'UTC'."
188
+ ),
189
+ "additionalProperties": True,
190
+ },
191
+ },
192
+ },
193
+ }
194
+
195
+
196
+ async def utils_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
197
+ """Handler for agent tool router."""
198
+ try:
199
+ tool = UtilsTool()
200
+ result = await tool.execute(arguments)
201
+ return result["formatted"], not result.get("isError", False)
202
+ except Exception as e:
203
+ return f"Error executing Utils tool: {str(e)}", False
test_mcp_connection.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test script to check MCP server connection"""
2
+ import asyncio
3
+ from pathlib import Path
4
+ from agent.config import load_config
5
+ from agent.core.tools import ToolRouter
6
+
7
+
8
+ async def test_mcp():
9
+ print("Loading config...")
10
+ config_path = Path(__file__).parent / "configs" / "main_agent_config.json"
11
+ config = load_config(config_path)
12
+
13
+ print(f"MCP Servers configured: {list(config.mcpServers.keys())}")
14
+ print(f"\nInitializing ToolRouter...")
15
+
16
+ tool_router = ToolRouter(config.mcpServers)
17
+
18
+ print("Entering async context (this will init MCP servers)...")
19
+ try:
20
+ async with tool_router as router:
21
+ print("✓ MCP initialization successful!")
22
+ tools = router.get_tool_specs_for_llm()
23
+ print(f"\nTotal tools available: {len(tools)}")
24
+
25
+ builtin = [t for t in tools if t['function']['name'] in ['hf_jobs', 'hf_private_repos', 'hf_doc_search', 'plan_tool']]
26
+ mcp = [t for t in tools if t not in builtin]
27
+
28
+ print(f"Built-in tools: {len(builtin)}")
29
+ for tool in builtin:
30
+ print(f" - {tool['function']['name']}")
31
+
32
+ print(f"\nMCP tools: {len(mcp)}")
33
+ for tool in mcp[:5]: # Show first 5
34
+ print(f" - {tool['function']['name']}")
35
+ if len(mcp) > 5:
36
+ print(f" ... and {len(mcp) - 5} more")
37
+
38
+ except Exception as e:
39
+ print(f"✗ Error during MCP initialization: {e}")
40
+ import traceback
41
+ traceback.print_exc()
42
+
43
+
44
+ if __name__ == "__main__":
45
+ asyncio.run(test_mcp())