Henri Bonamy commited on
Commit
2e66e57
·
2 Parent(s): 71477dab70fed7

Merge pull request #8 from huggingface/private-repo-tool

Browse files
agent/context_manager/manager.py CHANGED
@@ -52,7 +52,6 @@ class ContextManager:
52
  """Add a message to the history"""
53
  if token_count:
54
  self.context_length = token_count
55
- print(f"DEBUG : token_count = {self.context_length}")
56
  self.items.append(message)
57
 
58
  def get_messages(self) -> list[Message]:
 
52
  """Add a message to the history"""
53
  if token_count:
54
  self.context_length = token_count
 
55
  self.items.append(message)
56
 
57
  def get_messages(self) -> list[Message]:
agent/core/agent_loop.py CHANGED
@@ -15,14 +15,42 @@ from agent.core.tools import ToolRouter
15
  ToolCall = ChatCompletionMessageToolCall
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def _needs_approval(tool_name: str, tool_args: dict) -> bool:
19
  """Check if a tool call requires user approval before execution"""
20
- if tool_name != "hf_jobs":
 
 
21
  return False
22
 
23
- # Check if it's a run or uv operation
24
- operation = tool_args.get("operation", "")
25
- return operation in ["run", "uv"]
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  class Handlers:
@@ -121,16 +149,23 @@ class Handlers:
121
  tool_name = tc.function.name
122
  tool_args = json.loads(tc.function.arguments)
123
 
124
- await session.send_event(
125
- Event(
126
- event_type="tool_call",
127
- data={"tool": tool_name, "arguments": tool_args},
 
 
 
 
 
 
 
 
128
  )
129
- )
130
 
131
- output, success = await session.tool_router.call_tool(
132
- tool_name, tool_args
133
- )
134
 
135
  # Add tool result to history
136
  tool_msg = Message(
 
15
  ToolCall = ChatCompletionMessageToolCall
16
 
17
 
18
+ def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
19
+ """
20
+ Validate tool arguments structure.
21
+
22
+ Returns:
23
+ (is_valid, error_message)
24
+ """
25
+ args = tool_args.get("args", {})
26
+ # Sometimes LLM passes args as string instead of dict
27
+ if isinstance(args, str):
28
+ return False, f"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}"
29
+ if not isinstance(args, dict) and args is not None:
30
+ return False, f"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}"
31
+ return True, None
32
+
33
+
34
  def _needs_approval(tool_name: str, tool_args: dict) -> bool:
35
  """Check if a tool call requires user approval before execution"""
36
+ # If args are malformed, skip approval (validation error will be shown later)
37
+ args_valid, _ = _validate_tool_args(tool_args)
38
+ if not args_valid:
39
  return False
40
 
41
+ args = tool_args.get("args", {})
42
+
43
+ if tool_name == "hf_jobs":
44
+ # Check if it's a run or uv operation
45
+ operation = tool_args.get("operation", "")
46
+ return operation in ["run", "uv"]
47
+
48
+ if tool_name == "hf_private_repos":
49
+ # Repo creation and file uploads require approval
50
+ operation = tool_args.get("operation", "")
51
+ return operation in ["create_repo", "upload_file"]
52
+
53
+ return False
54
 
55
 
56
  class Handlers:
 
149
  tool_name = tc.function.name
150
  tool_args = json.loads(tc.function.arguments)
151
 
152
+ # Validate tool arguments before calling
153
+ args_valid, error_msg = _validate_tool_args(tool_args)
154
+ if not args_valid:
155
+ # Return error to agent instead of calling tool
156
+ output = error_msg
157
+ success = False
158
+ else:
159
+ await session.send_event(
160
+ Event(
161
+ event_type="tool_call",
162
+ data={"tool": tool_name, "arguments": tool_args},
163
+ )
164
  )
 
165
 
166
+ output, success = await session.tool_router.call_tool(
167
+ tool_name, tool_args
168
+ )
169
 
170
  # Add tool result to history
171
  tool_msg = Message(
agent/core/tools.py CHANGED
@@ -21,6 +21,11 @@ from agent.tools.docs_tools import (
21
  )
22
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
23
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 
 
 
 
 
24
 
25
  # Suppress aiohttp deprecation warning
26
  warnings.filterwarnings(
@@ -219,7 +224,7 @@ class ToolRouter:
219
  def create_builtin_tools() -> list[ToolSpec]:
220
  """Create built-in tool specifications"""
221
  print(
222
- f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}"
223
  )
224
  # in order of importance
225
  return [
@@ -249,4 +254,16 @@ def create_builtin_tools() -> list[ToolSpec]:
249
  parameters=HF_JOBS_TOOL_SPEC["parameters"],
250
  handler=hf_jobs_handler,
251
  ),
 
 
 
 
 
 
 
 
 
 
 
 
252
  ]
 
21
  )
22
  from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
23
  from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
24
+ from agent.tools.private_hf_repo_tools import (
25
+ PRIVATE_HF_REPO_TOOL_SPEC,
26
+ private_hf_repo_handler,
27
+ )
28
+ from agent.tools.utils_tools import UTILS_TOOL_SPEC, utils_handler
29
 
30
  # Suppress aiohttp deprecation warning
31
  warnings.filterwarnings(
 
224
  def create_builtin_tools() -> list[ToolSpec]:
225
  """Create built-in tool specifications"""
226
  print(
227
+ f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}"
228
  )
229
  # in order of importance
230
  return [
 
254
  parameters=HF_JOBS_TOOL_SPEC["parameters"],
255
  handler=hf_jobs_handler,
256
  ),
257
+ ToolSpec(
258
+ name=PRIVATE_HF_REPO_TOOL_SPEC["name"],
259
+ description=PRIVATE_HF_REPO_TOOL_SPEC["description"],
260
+ parameters=PRIVATE_HF_REPO_TOOL_SPEC["parameters"],
261
+ handler=private_hf_repo_handler,
262
+ ),
263
+ ToolSpec(
264
+ name=UTILS_TOOL_SPEC["name"],
265
+ description=UTILS_TOOL_SPEC["description"],
266
+ parameters=UTILS_TOOL_SPEC["parameters"],
267
+ handler=utils_handler,
268
+ ),
269
  ]
agent/main.py CHANGED
@@ -30,6 +30,16 @@ from agent.utils.terminal_display import (
30
 
31
  litellm.drop_params = True
32
 
 
 
 
 
 
 
 
 
 
 
33
  lmnr_api_key = os.environ.get("LMNR_API_KEY")
34
  if lmnr_api_key:
35
  try:
@@ -125,7 +135,7 @@ async def event_listener(
125
  print("\n" + format_separator())
126
  print(
127
  format_header(
128
- f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
129
  )
130
  )
131
  print(format_separator())
@@ -148,60 +158,107 @@ async def event_listener(
148
 
149
  operation = arguments.get("operation", "")
150
 
151
- print(f"\n[Job {i}/{count}]")
 
152
  print(f"Operation: {operation}")
153
 
154
- # Check if this is Python mode (script) or Docker mode (command)
155
- script = arguments.get("script")
156
- command = arguments.get("command")
157
-
158
- if script:
159
- # Python mode
160
- dependencies = arguments.get("dependencies", [])
161
- python_version = arguments.get("python")
162
- script_args = arguments.get("script_args", [])
163
-
164
- # Show script (truncate if too long)
165
- script_display = (
166
- script if len(script) < 200 else script[:200] + "..."
167
- )
168
- print(f"Script: {script_display}")
169
- if dependencies:
170
- print(f"Dependencies: {', '.join(dependencies)}")
171
- if python_version:
172
- print(f"Python version: {python_version}")
173
- if script_args:
174
- print(f"Script args: {' '.join(script_args)}")
175
- elif command:
176
- # Docker mode
177
- image = arguments.get("image", "python:3.12")
178
- command_str = (
179
- " ".join(command)
180
- if isinstance(command, list)
181
- else str(command)
182
- )
183
- print(f"Docker image: {image}")
184
- print(f"Command: {command_str}")
185
-
186
- # Common parameters
187
- hardware_flavor = arguments.get("hardware_flavor", "cpu-basic")
188
- timeout = arguments.get("timeout", "30m")
189
- env = arguments.get("env", {})
190
- schedule = arguments.get("schedule")
191
-
192
- print(f"Hardware: {hardware_flavor}")
193
- print(f"Timeout: {timeout}")
194
-
195
- if env:
196
- env_keys = ", ".join(env.keys())
197
- print(f"Environment variables: {env_keys}")
198
-
199
- if schedule:
200
- print(f"Schedule: {schedule}")
201
-
202
- # Get user decision for this job
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  response = await prompt_session.prompt_async(
204
- f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): "
205
  )
206
 
207
  response = response.strip()
 
30
 
31
  litellm.drop_params = True
32
 
33
+
34
+ def _safe_get_args(arguments: dict) -> dict:
35
+ """Safely extract args dict from arguments, handling cases where LLM passes string."""
36
+ args = arguments.get("args", {})
37
+ # Sometimes LLM passes args as string instead of dict
38
+ if isinstance(args, str):
39
+ return {}
40
+ return args if isinstance(args, dict) else {}
41
+
42
+
43
  lmnr_api_key = os.environ.get("LMNR_API_KEY")
44
  if lmnr_api_key:
45
  try:
 
135
  print("\n" + format_separator())
136
  print(
137
  format_header(
138
+ f"APPROVAL REQUIRED ({count} item{'s' if count != 1 else ''})"
139
  )
140
  )
141
  print(format_separator())
 
158
 
159
  operation = arguments.get("operation", "")
160
 
161
+ print(f"\n[Item {i}/{count}]")
162
+ print(f"Tool: {tool_name}")
163
  print(f"Operation: {operation}")
164
 
165
+ # Handle different tool types
166
+ if tool_name == "hf_jobs":
167
+ # Check if this is Python mode (script) or Docker mode (command)
168
+ script = arguments.get("script")
169
+ command = arguments.get("command")
170
+
171
+ if script:
172
+ # Python mode
173
+ dependencies = arguments.get("dependencies", [])
174
+ python_version = arguments.get("python")
175
+ script_args = arguments.get("script_args", [])
176
+
177
+ # Show script (truncate if too long)
178
+ script_display = (
179
+ script if len(script) < 200 else script[:200] + "..."
180
+ )
181
+ print(f"Script: {script_display}")
182
+ if dependencies:
183
+ print(f"Dependencies: {', '.join(dependencies)}")
184
+ if python_version:
185
+ print(f"Python version: {python_version}")
186
+ if script_args:
187
+ print(f"Script args: {' '.join(script_args)}")
188
+ elif command:
189
+ # Docker mode
190
+ image = arguments.get("image", "python:3.12")
191
+ command_str = (
192
+ " ".join(command)
193
+ if isinstance(command, list)
194
+ else str(command)
195
+ )
196
+ print(f"Docker image: {image}")
197
+ print(f"Command: {command_str}")
198
+
199
+ # Common parameters for jobs
200
+ hardware_flavor = arguments.get("hardware_flavor", "cpu-basic")
201
+ timeout = arguments.get("timeout", "30m")
202
+ env = arguments.get("env", {})
203
+ schedule = arguments.get("schedule")
204
+
205
+ print(f"Hardware: {hardware_flavor}")
206
+ print(f"Timeout: {timeout}")
207
+
208
+ if env:
209
+ env_keys = ", ".join(env.keys())
210
+ print(f"Environment variables: {env_keys}")
211
+
212
+ if schedule:
213
+ print(f"Schedule: {schedule}")
214
+
215
+ elif tool_name == "hf_private_repos":
216
+ # Handle private repo operations
217
+ args = _safe_get_args(arguments)
218
+
219
+ if operation in ["create_repo", "upload_file"]:
220
+ repo_id = args.get("repo_id", "")
221
+ repo_type = args.get("repo_type", "dataset")
222
+
223
+ # Build repo URL
224
+ type_path = "" if repo_type == "model" else f"{repo_type}s"
225
+ repo_url = f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
226
+
227
+ print(f"Repository: {repo_id}")
228
+ print(f"Type: {repo_type}")
229
+ print(f"Private: Yes")
230
+ print(f"URL: {repo_url}")
231
+
232
+ # Show file preview for upload_file operation
233
+ if operation == "upload_file":
234
+ path_in_repo = args.get("path_in_repo", "")
235
+ file_content = args.get("file_content", "")
236
+ print(f"File: {path_in_repo}")
237
+
238
+ if isinstance(file_content, str):
239
+ # Calculate metrics
240
+ all_lines = file_content.split('\n')
241
+ line_count = len(all_lines)
242
+ size_bytes = len(file_content.encode('utf-8'))
243
+ size_kb = size_bytes / 1024
244
+ size_mb = size_kb / 1024
245
+
246
+ print(f"Line count: {line_count}")
247
+ if size_kb < 1024:
248
+ print(f"Size: {size_kb:.2f} KB")
249
+ else:
250
+ print(f"Size: {size_mb:.2f} MB")
251
+
252
+ # Show preview
253
+ preview_lines = all_lines[:5]
254
+ preview = '\n'.join(preview_lines)
255
+ print(f"Content preview (first 5 lines):\n{preview}")
256
+ if len(all_lines) > 5:
257
+ print("...")
258
+
259
+ # Get user decision for this item
260
  response = await prompt_session.prompt_async(
261
+ f"Approve item {i}? (y=yes, n=no, or provide feedback to reject): "
262
  )
263
 
264
  response = response.strip()
agent/tools/jobs_tool.py CHANGED
@@ -756,7 +756,9 @@ HF_JOBS_TOOL_SPEC = {
756
  "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
757
  "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
758
  "## CRITICAL: Files are ephemeral!\n"
759
- "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
 
 
760
  ),
761
  "parameters": {
762
  "type": "object",
 
756
  "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
757
  "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
758
  "## CRITICAL: Files are ephemeral!\n"
759
+ "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script.\n\n"
760
+ "## After job completion:\n"
761
+ "If needed or asked by the user, use hf_private_repos tool to store scripts/logs/results to Hub for persistent storage."
762
  ),
763
  "parameters": {
764
  "type": "object",
agent/tools/private_hf_repo_tools.py ADDED
@@ -0,0 +1,606 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Private HF Repos Tool - Manage private Hugging Face repositories
3
+
4
+ PRIMARY USE: Store job outputs, training scripts, and logs from HF Jobs.
5
+ Since job results are ephemeral, this tool provides persistent storage in private repos.
6
+
7
+ SECONDARY USE: Read back stored files and list repo contents.
8
+ """
9
+
10
+ import asyncio
11
+ from typing import Any, Dict, Literal, Optional
12
+
13
+ from huggingface_hub import HfApi, hf_hub_download
14
+ from huggingface_hub.utils import HfHubHTTPError
15
+
16
+ from agent.tools.types import ToolResult
17
+
18
+ # Operation names
19
+ OperationType = Literal["upload_file", "create_repo", "check_repo", "list_files", "read_file"]
20
+
21
+
22
+ async def _async_call(func, *args, **kwargs):
23
+ """Wrap synchronous HfApi calls for async context."""
24
+ return await asyncio.to_thread(func, *args, **kwargs)
25
+
26
+
27
+ def _build_repo_url(repo_id: str, repo_type: str = "dataset") -> str:
28
+ """Build the Hub URL for a repository."""
29
+ type_path = "" if repo_type == "model" else f"{repo_type}s"
30
+ return f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
31
+
32
+
33
+ def _content_to_bytes(content: str | bytes) -> bytes:
34
+ """Convert string or bytes content to bytes."""
35
+ if isinstance(content, str):
36
+ return content.encode('utf-8')
37
+ return content
38
+
39
+
40
+ class PrivateHfRepoTool:
41
+ """Tool for managing private Hugging Face repositories."""
42
+
43
+ def __init__(self, hf_token: Optional[str] = None):
44
+ self.api = HfApi(token=hf_token)
45
+
46
+ async def execute(self, params: Dict[str, Any]) -> ToolResult:
47
+ """Execute the specified upload operation."""
48
+ operation = params.get("operation")
49
+ args = params.get("args", {})
50
+
51
+ # If no operation provided, return usage instructions
52
+ if not operation:
53
+ return self._show_help()
54
+
55
+ # Normalize operation name
56
+ operation = operation.lower()
57
+
58
+ # Check if help is requested
59
+ if args.get("help"):
60
+ return self._show_operation_help(operation)
61
+
62
+ try:
63
+ # Route to appropriate handler
64
+ if operation == "upload_file":
65
+ return await self._upload_file(args)
66
+ elif operation == "create_repo":
67
+ return await self._create_repo(args)
68
+ elif operation == "check_repo":
69
+ return await self._check_repo(args)
70
+ elif operation == "list_files":
71
+ return await self._list_files(args)
72
+ elif operation == "read_file":
73
+ return await self._read_file(args)
74
+ else:
75
+ return {
76
+ "formatted": f'Unknown operation: "{operation}"\n\n'
77
+ "Available operations: upload_file, create_repo, check_repo, list_files, read_file\n\n"
78
+ "Call this tool with no operation for full usage instructions.",
79
+ "totalResults": 0,
80
+ "resultsShared": 0,
81
+ "isError": True,
82
+ }
83
+
84
+ except HfHubHTTPError as e:
85
+ return {
86
+ "formatted": f"API Error: {str(e)}",
87
+ "totalResults": 0,
88
+ "resultsShared": 0,
89
+ "isError": True,
90
+ }
91
+ except Exception as e:
92
+ return {
93
+ "formatted": f"Error executing {operation}: {str(e)}",
94
+ "totalResults": 0,
95
+ "resultsShared": 0,
96
+ "isError": True,
97
+ }
98
+
99
+ def _show_help(self) -> ToolResult:
100
+ """Show usage instructions when tool is called with no arguments."""
101
+ usage_text = """# Private HF Repos Tool
102
+
103
+ **PRIMARY USE:** Store job outputs, scripts, and logs from HF Jobs to private repos.
104
+ Since job results are ephemeral, use this tool for persistent storage.
105
+
106
+ **SECONDARY USE:** Read back stored files and list repo contents.
107
+
108
+ ## Available Commands
109
+
110
+ ### Write Operations
111
+ - **upload_file** - Upload file content to a repository
112
+ - **create_repo** - Create a new private repository
113
+
114
+ ### Read Operations
115
+ - **list_files** - List all files in a repository
116
+ - **read_file** - Read content of a specific file from a repository
117
+ - **check_repo** - Check if a repository exists
118
+
119
+ ## Examples
120
+
121
+ ### Upload a script to a dataset repo
122
+ Call this tool with:
123
+ ```json
124
+ {
125
+ "operation": "upload_file",
126
+ "args": {
127
+ "file_content": "import pandas as pd\\nprint('Hello from HF!')",
128
+ "path_in_repo": "scripts/hello.py",
129
+ "repo_id": "my-dataset",
130
+ "repo_type": "dataset",
131
+ "create_if_missing": true,
132
+ "commit_message": "Add hello script"
133
+ }
134
+ }
135
+ ```
136
+
137
+ ### Upload logs from a job
138
+ Call this tool with:
139
+ ```json
140
+ {
141
+ "operation": "upload_file",
142
+ "args": {
143
+ "file_content": "Job started...\\nJob completed successfully!",
144
+ "path_in_repo": "jobs/job-abc123/logs.txt",
145
+ "repo_id": "job-results",
146
+ "create_if_missing": true
147
+ }
148
+ }
149
+ ```
150
+
151
+ ### Create a repository
152
+ Call this tool with:
153
+ ```json
154
+ {
155
+ "operation": "create_repo",
156
+ "args": {
157
+ "repo_id": "my-results",
158
+ "repo_type": "dataset"
159
+ }
160
+ }
161
+ ```
162
+ Note: Repositories are always created as private.
163
+
164
+ ### Check if a repository exists
165
+ Call this tool with:
166
+ ```json
167
+ {
168
+ "operation": "check_repo",
169
+ "args": {
170
+ "repo_id": "my-dataset",
171
+ "repo_type": "dataset"
172
+ }
173
+ }
174
+ ```
175
+
176
+ ### List files in a repository
177
+ Call this tool with:
178
+ ```json
179
+ {
180
+ "operation": "list_files",
181
+ "args": {
182
+ "repo_id": "job-results",
183
+ "repo_type": "dataset"
184
+ }
185
+ }
186
+ ```
187
+
188
+ ### Read a file from a repository
189
+ Call this tool with:
190
+ ```json
191
+ {
192
+ "operation": "read_file",
193
+ "args": {
194
+ "repo_id": "job-results",
195
+ "path_in_repo": "jobs/job-abc123/script.py",
196
+ "repo_type": "dataset"
197
+ }
198
+ }
199
+ ```
200
+
201
+ ## Repository Types
202
+
203
+ - **dataset** (default) - For storing data, results, logs, scripts
204
+ - **model** - For ML models and related artifacts
205
+ - **space** - For Spaces and applications
206
+
207
+ ## Tips
208
+
209
+ - **Content-based**: Pass file content directly as strings or bytes, not file paths
210
+ - **Repo ID format**: Use just the repo name (e.g., "my-dataset"). Username is automatically inferred from HF_TOKEN
211
+ - **Automatic repo creation**: Set `create_if_missing: true` to auto-create repos (requires user approval)
212
+ - **Organization**: Use path_in_repo to organize files (e.g., "jobs/job-123/script.py")
213
+ - **After jobs**: Upload job scripts and logs after compute jobs complete for reproducibility
214
+ """
215
+ return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
216
+
217
+ def _show_operation_help(self, operation: str) -> ToolResult:
218
+ """Show help for a specific operation."""
219
+ help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
220
+ return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
221
+
222
+ async def _upload_file(self, args: Dict[str, Any]) -> ToolResult:
223
+ """Upload file content to a Hub repository."""
224
+ # Validate required arguments
225
+ file_content = args.get("file_content")
226
+ path_in_repo = args.get("path_in_repo")
227
+ repo_id = args.get("repo_id")
228
+
229
+ if not file_content:
230
+ return {
231
+ "formatted": "file_content is required",
232
+ "totalResults": 0,
233
+ "resultsShared": 0,
234
+ "isError": True,
235
+ }
236
+
237
+ if not path_in_repo:
238
+ return {
239
+ "formatted": "path_in_repo is required",
240
+ "totalResults": 0,
241
+ "resultsShared": 0,
242
+ "isError": True,
243
+ }
244
+
245
+ if not repo_id:
246
+ return {
247
+ "formatted": "repo_id is required",
248
+ "totalResults": 0,
249
+ "resultsShared": 0,
250
+ "isError": True,
251
+ }
252
+
253
+ repo_type = args.get("repo_type", "dataset")
254
+ create_if_missing = args.get("create_if_missing", False)
255
+
256
+ # Check if repo exists
257
+ try:
258
+ repo_exists = await _async_call(
259
+ self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
260
+ )
261
+
262
+ # Create repo if needed
263
+ if not repo_exists and create_if_missing:
264
+ await self._create_repo(
265
+ {
266
+ "repo_id": repo_id,
267
+ "repo_type": repo_type,
268
+ "private": True,
269
+ }
270
+ )
271
+ elif not repo_exists:
272
+ return {
273
+ "formatted": f"Repository {repo_id} does not exist. Set create_if_missing: true to create it.",
274
+ "totalResults": 0,
275
+ "resultsShared": 0,
276
+ "isError": True,
277
+ }
278
+
279
+ except Exception as e:
280
+ return {
281
+ "formatted": f"Failed to check repository: {str(e)}",
282
+ "totalResults": 0,
283
+ "resultsShared": 0,
284
+ "isError": True,
285
+ }
286
+
287
+ # Convert content to bytes
288
+ file_bytes = _content_to_bytes(file_content)
289
+
290
+ # Upload file
291
+ try:
292
+ await _async_call(
293
+ self.api.upload_file,
294
+ path_or_fileobj=file_bytes,
295
+ path_in_repo=path_in_repo,
296
+ repo_id=repo_id,
297
+ repo_type=repo_type,
298
+ commit_message=args.get("commit_message", f"Upload {path_in_repo}"),
299
+ )
300
+
301
+ repo_url = _build_repo_url(repo_id, repo_type)
302
+ file_url = f"{repo_url}/blob/main/{path_in_repo}"
303
+
304
+ response = f"""✓ File uploaded successfully!
305
+
306
+ **Repository:** {repo_id}
307
+ **File:** {path_in_repo}
308
+ **View at:** {file_url}
309
+ **Browse repo:** {repo_url}"""
310
+
311
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
312
+
313
+ except Exception as e:
314
+ return {
315
+ "formatted": f"Failed to upload file: {str(e)}",
316
+ "totalResults": 0,
317
+ "resultsShared": 0,
318
+ "isError": True,
319
+ }
320
+
321
+ async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:
322
+ """Create a new Hub repository."""
323
+ repo_id = args.get("repo_id")
324
+
325
+ if not repo_id:
326
+ return {
327
+ "formatted": "repo_id is required",
328
+ "totalResults": 0,
329
+ "resultsShared": 0,
330
+ "isError": True,
331
+ }
332
+
333
+ repo_type = args.get("repo_type", "dataset")
334
+ private = True # Always create private repos
335
+
336
+ try:
337
+ # Check if repo already exists
338
+ repo_exists = await _async_call(
339
+ self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
340
+ )
341
+
342
+ if repo_exists:
343
+ repo_url = _build_repo_url(repo_id, repo_type)
344
+ return {
345
+ "formatted": f"Repository {repo_id} already exists.\n**View at:** {repo_url}",
346
+ "totalResults": 1,
347
+ "resultsShared": 1,
348
+ }
349
+
350
+ # Create repository
351
+ repo_url = await _async_call(
352
+ self.api.create_repo,
353
+ repo_id=repo_id,
354
+ repo_type=repo_type,
355
+ private=private,
356
+ exist_ok=True,
357
+ )
358
+
359
+ response = f"""✓ Repository created successfully!
360
+
361
+ **Repository:** {repo_id}
362
+ **Type:** {repo_type}
363
+ **Private:** Yes
364
+ **View at:** {repo_url}"""
365
+
366
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
367
+
368
+ except Exception as e:
369
+ return {
370
+ "formatted": f"Failed to create repository: {str(e)}",
371
+ "totalResults": 0,
372
+ "resultsShared": 0,
373
+ "isError": True,
374
+ }
375
+
376
+ async def _check_repo(self, args: Dict[str, Any]) -> ToolResult:
377
+ """Check if a Hub repository exists."""
378
+ repo_id = args.get("repo_id")
379
+
380
+ if not repo_id:
381
+ return {
382
+ "formatted": "repo_id is required",
383
+ "totalResults": 0,
384
+ "resultsShared": 0,
385
+ "isError": True,
386
+ }
387
+
388
+ repo_type = args.get("repo_type", "dataset")
389
+
390
+ try:
391
+ repo_exists = await _async_call(
392
+ self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
393
+ )
394
+
395
+ if repo_exists:
396
+ repo_url = _build_repo_url(repo_id, repo_type)
397
+ response = f"""✓ Repository exists!
398
+
399
+ **Repository:** {repo_id}
400
+ **Type:** {repo_type}
401
+ **View at:** {repo_url}"""
402
+ else:
403
+ response = f"""Repository does not exist: {repo_id}
404
+
405
+ To create it, call this tool with:
406
+ ```json
407
+ {{
408
+ "operation": "create_repo",
409
+ "args": {{
410
+ "repo_id": "{repo_id}",
411
+ "repo_type": "{repo_type}"
412
+ }}
413
+ }}
414
+ ```"""
415
+
416
+ return {
417
+ "formatted": response,
418
+ "totalResults": 1 if repo_exists else 0,
419
+ "resultsShared": 1 if repo_exists else 0,
420
+ }
421
+
422
+ except Exception as e:
423
+ return {
424
+ "formatted": f"Failed to check repository: {str(e)}",
425
+ "totalResults": 0,
426
+ "resultsShared": 0,
427
+ "isError": True,
428
+ }
429
+
430
+ async def _list_files(self, args: Dict[str, Any]) -> ToolResult:
431
+ """List all files in a Hub repository."""
432
+ repo_id = args.get("repo_id")
433
+
434
+ if not repo_id:
435
+ return {
436
+ "formatted": "repo_id is required",
437
+ "totalResults": 0,
438
+ "resultsShared": 0,
439
+ "isError": True,
440
+ }
441
+
442
+ repo_type = args.get("repo_type", "dataset")
443
+
444
+ try:
445
+ # List all files in the repository
446
+ files = await _async_call(
447
+ self.api.list_repo_files, repo_id=repo_id, repo_type=repo_type
448
+ )
449
+
450
+ if not files:
451
+ return {
452
+ "formatted": f"No files found in repository: {repo_id}",
453
+ "totalResults": 0,
454
+ "resultsShared": 0,
455
+ }
456
+
457
+ # Format file list
458
+ file_list = "\n".join(f"- {f}" for f in sorted(files))
459
+ repo_url = _build_repo_url(repo_id, repo_type)
460
+
461
+ response = f"""✓ Files in repository: {repo_id}
462
+
463
+ **Total files:** {len(files)}
464
+ **Repository URL:** {repo_url}
465
+
466
+ **Files:**
467
+ {file_list}"""
468
+
469
+ return {
470
+ "formatted": response,
471
+ "totalResults": len(files),
472
+ "resultsShared": len(files),
473
+ }
474
+
475
+ except Exception as e:
476
+ return {
477
+ "formatted": f"Failed to list files: {str(e)}",
478
+ "totalResults": 0,
479
+ "resultsShared": 0,
480
+ "isError": True,
481
+ }
482
+
483
+ async def _read_file(self, args: Dict[str, Any]) -> ToolResult:
484
+ """Read content of a specific file from a Hub repository."""
485
+ repo_id = args.get("repo_id")
486
+ path_in_repo = args.get("path_in_repo")
487
+
488
+ if not repo_id:
489
+ return {
490
+ "formatted": "repo_id is required",
491
+ "totalResults": 0,
492
+ "resultsShared": 0,
493
+ "isError": True,
494
+ }
495
+
496
+ if not path_in_repo:
497
+ return {
498
+ "formatted": "path_in_repo is required",
499
+ "totalResults": 0,
500
+ "resultsShared": 0,
501
+ "isError": True,
502
+ }
503
+
504
+ repo_type = args.get("repo_type", "dataset")
505
+
506
+ try:
507
+ # Download file to cache and read it
508
+ file_path = await _async_call(
509
+ hf_hub_download,
510
+ repo_id=repo_id,
511
+ filename=path_in_repo,
512
+ repo_type=repo_type,
513
+ token=self.api.token,
514
+ )
515
+
516
+ # Read file content
517
+ with open(file_path, "r", encoding="utf-8") as f:
518
+ content = f.read()
519
+
520
+ repo_url = _build_repo_url(repo_id, repo_type)
521
+ file_url = f"{repo_url}/blob/main/{path_in_repo}"
522
+
523
+ response = f"""✓ File read successfully!
524
+
525
+ **Repository:** {repo_id}
526
+ **File:** {path_in_repo}
527
+ **Size:** {len(content)} characters
528
+ **View at:** {file_url}
529
+
530
+ **Content:**
531
+ ```
532
+ {content}
533
+ ```"""
534
+
535
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
536
+
537
+ except UnicodeDecodeError:
538
+ # If file is binary, return size info instead
539
+ try:
540
+ with open(file_path, "rb") as f:
541
+ binary_content = f.read()
542
+
543
+ return {
544
+ "formatted": f"File is binary ({len(binary_content)} bytes). Cannot display as text.",
545
+ "totalResults": 1,
546
+ "resultsShared": 1,
547
+ }
548
+ except Exception as e:
549
+ return {
550
+ "formatted": f"Failed to read binary file: {str(e)}",
551
+ "totalResults": 0,
552
+ "resultsShared": 0,
553
+ "isError": True,
554
+ }
555
+ except Exception as e:
556
+ return {
557
+ "formatted": f"Failed to read file: {str(e)}",
558
+ "totalResults": 0,
559
+ "resultsShared": 0,
560
+ "isError": True,
561
+ }
562
+
563
+
564
+ # Tool specification for agent registration
565
+ PRIVATE_HF_REPO_TOOL_SPEC = {
566
+ "name": "hf_private_repos",
567
+ "description": (
568
+ "Manage private Hugging Face repositories. "
569
+ "PRIMARY USE: Store job outputs, scripts, and logs from HF Jobs (ephemeral results need persistent storage). "
570
+ "SECONDARY USE: Read back stored files and list repo contents. "
571
+ "Pass file content as strings/bytes (no filesystem needed). "
572
+ "Call with no operation for full usage instructions."
573
+ ),
574
+ "parameters": {
575
+ "type": "object",
576
+ "properties": {
577
+ "operation": {
578
+ "type": "string",
579
+ "enum": ["upload_file", "create_repo", "check_repo", "list_files", "read_file"],
580
+ "description": (
581
+ "Operation to execute. Valid values: [upload_file, create_repo, check_repo, list_files, read_file]"
582
+ ),
583
+ },
584
+ "args": {
585
+ "type": "object",
586
+ "description": (
587
+ "Operation-specific arguments as a JSON object. "
588
+ "Write ops: file_content (string/bytes), path_in_repo (string), repo_id (string), "
589
+ "repo_type (dataset/model/space), create_if_missing (boolean), commit_message (string). "
590
+ "Read ops: repo_id (string), path_in_repo (for read_file), repo_type (optional)."
591
+ ),
592
+ "additionalProperties": True,
593
+ },
594
+ },
595
+ },
596
+ }
597
+
598
+
599
+ async def private_hf_repo_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
600
+ """Handler for agent tool router."""
601
+ try:
602
+ tool = PrivateHfRepoTool()
603
+ result = await tool.execute(arguments)
604
+ return result["formatted"], not result.get("isError", False)
605
+ except Exception as e:
606
+ return f"Error executing Private HF Repo tool: {str(e)}", False
agent/tools/utils_tools.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utils Tools - General utility operations
3
+
4
+ Provides system information like current date/time with timezone support.
5
+ """
6
+
7
+ import asyncio
8
+ from datetime import datetime
9
+ from typing import Any, Dict, Literal, Optional
10
+
11
+ try:
12
+ import zoneinfo
13
+ except ImportError:
14
+ from backports import zoneinfo
15
+
16
+ from agent.tools.types import ToolResult
17
+
18
+ # Operation names
19
+ OperationType = Literal["get_datetime"]
20
+
21
+
22
+ class UtilsTool:
23
+ """Tool for general utility operations."""
24
+
25
+ async def execute(self, params: Dict[str, Any]) -> ToolResult:
26
+ """Execute the specified utility operation."""
27
+ operation = params.get("operation")
28
+ args = params.get("args", {})
29
+
30
+ # If no operation provided, return usage instructions
31
+ if not operation:
32
+ return self._show_help()
33
+
34
+ # Normalize operation name
35
+ operation = operation.lower()
36
+
37
+ # Check if help is requested
38
+ if args.get("help"):
39
+ return self._show_operation_help(operation)
40
+
41
+ try:
42
+ # Route to appropriate handler
43
+ if operation == "get_datetime":
44
+ return await self._get_datetime(args)
45
+ else:
46
+ return {
47
+ "formatted": f'Unknown operation: "{operation}"\n\n'
48
+ "Available operations: get_datetime\n\n"
49
+ "Call this tool with no operation for full usage instructions.",
50
+ "totalResults": 0,
51
+ "resultsShared": 0,
52
+ "isError": True,
53
+ }
54
+
55
+ except Exception as e:
56
+ return {
57
+ "formatted": f"Error executing {operation}: {str(e)}",
58
+ "totalResults": 0,
59
+ "resultsShared": 0,
60
+ "isError": True,
61
+ }
62
+
63
+ def _show_help(self) -> ToolResult:
64
+ """Show usage instructions when tool is called with no arguments."""
65
+ usage_text = """# Utils Tool
66
+
67
+ Utility operations for system information.
68
+
69
+ ## Available Commands
70
+
71
+ - **get_datetime** - Get current date and time with timezone support
72
+
73
+ ## Examples
74
+
75
+ ### Get current date and time (Paris timezone by default)
76
+ Call this tool with:
77
+ ```json
78
+ {
79
+ "operation": "get_datetime",
80
+ "args": {}
81
+ }
82
+ ```
83
+
84
+ ### Get current date and time in a specific timezone
85
+ Call this tool with:
86
+ ```json
87
+ {
88
+ "operation": "get_datetime",
89
+ "args": {
90
+ "timezone": "America/New_York"
91
+ }
92
+ }
93
+ ```
94
+
95
+ Common timezones: Europe/Paris, America/New_York, America/Los_Angeles, Asia/Tokyo, UTC
96
+
97
+ ## Tips
98
+
99
+ - **Default timezone**: Paris (Europe/Paris)
100
+ - **Date format**: dd-mm-yyyy
101
+ - **Time format**: HH:MM:SS.mmm (24-hour format with milliseconds)
102
+ - **Timezone names**: Use IANA timezone database names (e.g., "Europe/Paris", "UTC")
103
+ """
104
+ return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
105
+
106
+ def _show_operation_help(self, operation: str) -> ToolResult:
107
+ """Show help for a specific operation."""
108
+ help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
109
+ return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
110
+
111
+ async def _get_datetime(self, args: Dict[str, Any]) -> ToolResult:
112
+ """Get current date and time with timezone support."""
113
+ timezone_name = args.get("timezone", "Europe/Paris")
114
+
115
+ try:
116
+ # Get timezone object
117
+ tz = zoneinfo.ZoneInfo(timezone_name)
118
+
119
+ # Get current datetime in specified timezone
120
+ now = datetime.now(tz)
121
+
122
+ # Format date as dd-mm-yyyy
123
+ date_str = now.strftime("%d-%m-%Y")
124
+
125
+ # Format time as HH:MM:SS.mmm
126
+ time_str = now.strftime("%H:%M:%S.%f")[:-3] # Remove last 3 digits to keep only milliseconds
127
+
128
+ # Get timezone abbreviation/offset
129
+ tz_offset = now.strftime("%z")
130
+ tz_name = now.strftime("%Z")
131
+
132
+ response = f"""✓ Current date and time
133
+
134
+ **Date:** {date_str}
135
+ **Time:** {time_str}
136
+ **Timezone:** {timezone_name} ({tz_name}, UTC{tz_offset[:3]}:{tz_offset[3:]})
137
+
138
+ **ISO Format:** {now.isoformat()}
139
+ **Unix Timestamp:** {int(now.timestamp())}"""
140
+
141
+ return {"formatted": response, "totalResults": 1, "resultsShared": 1}
142
+
143
+ except zoneinfo.ZoneInfoNotFoundError:
144
+ return {
145
+ "formatted": f"Invalid timezone: {timezone_name}\n\n"
146
+ "Use IANA timezone database names like:\n"
147
+ "- Europe/Paris\n"
148
+ "- America/New_York\n"
149
+ "- Asia/Tokyo\n"
150
+ "- UTC\n\n"
151
+ "See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones",
152
+ "totalResults": 0,
153
+ "resultsShared": 0,
154
+ "isError": True,
155
+ }
156
+ except Exception as e:
157
+ return {
158
+ "formatted": f"Failed to get date/time: {str(e)}",
159
+ "totalResults": 0,
160
+ "resultsShared": 0,
161
+ "isError": True,
162
+ }
163
+
164
+
165
+ # Tool specification for agent registration
166
+ UTILS_TOOL_SPEC = {
167
+ "name": "utils",
168
+ "description": (
169
+ "Utility operations for system information. "
170
+ "Get current date (dd-mm-yyyy) and time (HH:MM:SS.mmm) with timezone support. "
171
+ "Default timezone: Paris (Europe/Paris). "
172
+ "Call with no operation for full usage instructions."
173
+ ),
174
+ "parameters": {
175
+ "type": "object",
176
+ "properties": {
177
+ "operation": {
178
+ "type": "string",
179
+ "enum": ["get_datetime"],
180
+ "description": "Operation to execute. Valid values: [get_datetime]",
181
+ },
182
+ "args": {
183
+ "type": "object",
184
+ "description": (
185
+ "Operation-specific arguments as a JSON object. "
186
+ "For get_datetime: timezone (string, optional, default: Europe/Paris). "
187
+ "Use IANA timezone names like 'America/New_York', 'Asia/Tokyo', 'UTC'."
188
+ ),
189
+ "additionalProperties": True,
190
+ },
191
+ },
192
+ },
193
+ }
194
+
195
+
196
+ async def utils_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
197
+ """Handler for agent tool router."""
198
+ try:
199
+ tool = UtilsTool()
200
+ result = await tool.execute(arguments)
201
+ return result["formatted"], not result.get("isError", False)
202
+ except Exception as e:
203
+ return f"Error executing Utils tool: {str(e)}", False