Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

Henri Bonamy commited on Jan 5

Commit

2e66e57

2 Parent(s): 71477da b70fed7

Merge pull request #8 from huggingface/private-repo-tool

Browse files

Files changed (7) hide show

agent/context_manager/manager.py +0 -1
agent/core/agent_loop.py +47 -12
agent/core/tools.py +18 -1
agent/main.py +109 -52
agent/tools/jobs_tool.py +3 -1
agent/tools/private_hf_repo_tools.py +606 -0
agent/tools/utils_tools.py +203 -0

agent/context_manager/manager.py CHANGED Viewed

@@ -52,7 +52,6 @@ class ContextManager:
         """Add a message to the history"""
         if token_count:
             self.context_length = token_count
-            print(f"DEBUG : token_count = {self.context_length}")
         self.items.append(message)
     def get_messages(self) -> list[Message]:

         """Add a message to the history"""
         if token_count:
             self.context_length = token_count
         self.items.append(message)
     def get_messages(self) -> list[Message]:

agent/core/agent_loop.py CHANGED Viewed

@@ -15,14 +15,42 @@ from agent.core.tools import ToolRouter
 ToolCall = ChatCompletionMessageToolCall
 def _needs_approval(tool_name: str, tool_args: dict) -> bool:
     """Check if a tool call requires user approval before execution"""
-    if tool_name != "hf_jobs":
         return False
-    # Check if it's a run or uv operation
-    operation = tool_args.get("operation", "")
-    return operation in ["run", "uv"]
 class Handlers:
@@ -121,16 +149,23 @@ class Handlers:
                     tool_name = tc.function.name
                     tool_args = json.loads(tc.function.arguments)
-                    await session.send_event(
-                        Event(
-                            event_type="tool_call",
-                            data={"tool": tool_name, "arguments": tool_args},
                         )
-                    )
-                    output, success = await session.tool_router.call_tool(
-                        tool_name, tool_args
-                    )
                     # Add tool result to history
                     tool_msg = Message(

 ToolCall = ChatCompletionMessageToolCall
+def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
+    """
+    Validate tool arguments structure.
+    Returns:
+        (is_valid, error_message)
+    """
+    args = tool_args.get("args", {})
+    # Sometimes LLM passes args as string instead of dict
+    if isinstance(args, str):
+        return False, f"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}"
+    if not isinstance(args, dict) and args is not None:
+        return False, f"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}"
+    return True, None
 def _needs_approval(tool_name: str, tool_args: dict) -> bool:
     """Check if a tool call requires user approval before execution"""
+    # If args are malformed, skip approval (validation error will be shown later)
+    args_valid, _ = _validate_tool_args(tool_args)
+    if not args_valid:
         return False
+    args = tool_args.get("args", {})
+    if tool_name == "hf_jobs":
+        # Check if it's a run or uv operation
+        operation = tool_args.get("operation", "")
+        return operation in ["run", "uv"]
+    if tool_name == "hf_private_repos":
+        # Repo creation and file uploads require approval
+        operation = tool_args.get("operation", "")
+        return operation in ["create_repo", "upload_file"]
+    return False
 class Handlers:
                     tool_name = tc.function.name
                     tool_args = json.loads(tc.function.arguments)
+                    # Validate tool arguments before calling
+                    args_valid, error_msg = _validate_tool_args(tool_args)
+                    if not args_valid:
+                        # Return error to agent instead of calling tool
+                        output = error_msg
+                        success = False
+                    else:
+                        await session.send_event(
+                            Event(
+                                event_type="tool_call",
+                                data={"tool": tool_name, "arguments": tool_args},
+                            )
                         )
+                        output, success = await session.tool_router.call_tool(
+                            tool_name, tool_args
+                        )
                     # Add tool result to history
                     tool_msg = Message(

agent/core/tools.py CHANGED Viewed

@@ -21,6 +21,11 @@ from agent.tools.docs_tools import (
 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
@@ -219,7 +224,7 @@ class ToolRouter:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
-        f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}"
     )
     # in order of importance
     return [
@@ -249,4 +254,16 @@ def create_builtin_tools() -> list[ToolSpec]:
             parameters=HF_JOBS_TOOL_SPEC["parameters"],
             handler=hf_jobs_handler,
         ),
     ]

 )
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
+from agent.tools.private_hf_repo_tools import (
+    PRIVATE_HF_REPO_TOOL_SPEC,
+    private_hf_repo_handler,
+)
+from agent.tools.utils_tools import UTILS_TOOL_SPEC, utils_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
+        f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}"
     )
     # in order of importance
     return [
             parameters=HF_JOBS_TOOL_SPEC["parameters"],
             handler=hf_jobs_handler,
         ),
+        ToolSpec(
+            name=PRIVATE_HF_REPO_TOOL_SPEC["name"],
+            description=PRIVATE_HF_REPO_TOOL_SPEC["description"],
+            parameters=PRIVATE_HF_REPO_TOOL_SPEC["parameters"],
+            handler=private_hf_repo_handler,
+        ),
+        ToolSpec(
+            name=UTILS_TOOL_SPEC["name"],
+            description=UTILS_TOOL_SPEC["description"],
+            parameters=UTILS_TOOL_SPEC["parameters"],
+            handler=utils_handler,
+        ),
     ]

agent/main.py CHANGED Viewed

@@ -30,6 +30,16 @@ from agent.utils.terminal_display import (
 litellm.drop_params = True
 lmnr_api_key = os.environ.get("LMNR_API_KEY")
 if lmnr_api_key:
     try:
@@ -125,7 +135,7 @@ async def event_listener(
                 print("\n" + format_separator())
                 print(
                     format_header(
-                        f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
                     )
                 )
                 print(format_separator())
@@ -148,60 +158,107 @@ async def event_listener(
                     operation = arguments.get("operation", "")
-                    print(f"\n[Job {i}/{count}]")
                     print(f"Operation: {operation}")
-                    # Check if this is Python mode (script) or Docker mode (command)
-                    script = arguments.get("script")
-                    command = arguments.get("command")
-                    if script:
-                        # Python mode
-                        dependencies = arguments.get("dependencies", [])
-                        python_version = arguments.get("python")
-                        script_args = arguments.get("script_args", [])
-                        # Show script (truncate if too long)
-                        script_display = (
-                            script if len(script) < 200 else script[:200] + "..."
-                        )
-                        print(f"Script: {script_display}")
-                        if dependencies:
-                            print(f"Dependencies: {', '.join(dependencies)}")
-                        if python_version:
-                            print(f"Python version: {python_version}")
-                        if script_args:
-                            print(f"Script args: {' '.join(script_args)}")
-                    elif command:
-                        # Docker mode
-                        image = arguments.get("image", "python:3.12")
-                        command_str = (
-                            " ".join(command)
-                            if isinstance(command, list)
-                            else str(command)
-                        )
-                        print(f"Docker image: {image}")
-                        print(f"Command: {command_str}")
-                    # Common parameters
-                    hardware_flavor = arguments.get("hardware_flavor", "cpu-basic")
-                    timeout = arguments.get("timeout", "30m")
-                    env = arguments.get("env", {})
-                    schedule = arguments.get("schedule")
-                    print(f"Hardware: {hardware_flavor}")
-                    print(f"Timeout: {timeout}")
-                    if env:
-                        env_keys = ", ".join(env.keys())
-                        print(f"Environment variables: {env_keys}")
-                    if schedule:
-                        print(f"Schedule: {schedule}")
-                    # Get user decision for this job
                     response = await prompt_session.prompt_async(
-                        f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): "
                     )
                     response = response.strip()

 litellm.drop_params = True
+def _safe_get_args(arguments: dict) -> dict:
+    """Safely extract args dict from arguments, handling cases where LLM passes string."""
+    args = arguments.get("args", {})
+    # Sometimes LLM passes args as string instead of dict
+    if isinstance(args, str):
+        return {}
+    return args if isinstance(args, dict) else {}
 lmnr_api_key = os.environ.get("LMNR_API_KEY")
 if lmnr_api_key:
     try:
                 print("\n" + format_separator())
                 print(
                     format_header(
+                        f"APPROVAL REQUIRED ({count} item{'s' if count != 1 else ''})"
                     )
                 )
                 print(format_separator())
                     operation = arguments.get("operation", "")
+                    print(f"\n[Item {i}/{count}]")
+                    print(f"Tool: {tool_name}")
                     print(f"Operation: {operation}")
+                    # Handle different tool types
+                    if tool_name == "hf_jobs":
+                        # Check if this is Python mode (script) or Docker mode (command)
+                        script = arguments.get("script")
+                        command = arguments.get("command")
+                        if script:
+                            # Python mode
+                            dependencies = arguments.get("dependencies", [])
+                            python_version = arguments.get("python")
+                            script_args = arguments.get("script_args", [])
+                            # Show script (truncate if too long)
+                            script_display = (
+                                script if len(script) < 200 else script[:200] + "..."
+                            )
+                            print(f"Script: {script_display}")
+                            if dependencies:
+                                print(f"Dependencies: {', '.join(dependencies)}")
+                            if python_version:
+                                print(f"Python version: {python_version}")
+                            if script_args:
+                                print(f"Script args: {' '.join(script_args)}")
+                        elif command:
+                            # Docker mode
+                            image = arguments.get("image", "python:3.12")
+                            command_str = (
+                                " ".join(command)
+                                if isinstance(command, list)
+                                else str(command)
+                            )
+                            print(f"Docker image: {image}")
+                            print(f"Command: {command_str}")
+                        # Common parameters for jobs
+                        hardware_flavor = arguments.get("hardware_flavor", "cpu-basic")
+                        timeout = arguments.get("timeout", "30m")
+                        env = arguments.get("env", {})
+                        schedule = arguments.get("schedule")
+                        print(f"Hardware: {hardware_flavor}")
+                        print(f"Timeout: {timeout}")
+                        if env:
+                            env_keys = ", ".join(env.keys())
+                            print(f"Environment variables: {env_keys}")
+                        if schedule:
+                            print(f"Schedule: {schedule}")
+                    elif tool_name == "hf_private_repos":
+                        # Handle private repo operations
+                        args = _safe_get_args(arguments)
+                        if operation in ["create_repo", "upload_file"]:
+                            repo_id = args.get("repo_id", "")
+                            repo_type = args.get("repo_type", "dataset")
+                            # Build repo URL
+                            type_path = "" if repo_type == "model" else f"{repo_type}s"
+                            repo_url = f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
+                            print(f"Repository: {repo_id}")
+                            print(f"Type: {repo_type}")
+                            print(f"Private: Yes")
+                            print(f"URL: {repo_url}")
+                            # Show file preview for upload_file operation
+                            if operation == "upload_file":
+                                path_in_repo = args.get("path_in_repo", "")
+                                file_content = args.get("file_content", "")
+                                print(f"File: {path_in_repo}")
+                                if isinstance(file_content, str):
+                                    # Calculate metrics
+                                    all_lines = file_content.split('\n')
+                                    line_count = len(all_lines)
+                                    size_bytes = len(file_content.encode('utf-8'))
+                                    size_kb = size_bytes / 1024
+                                    size_mb = size_kb / 1024
+                                    print(f"Line count: {line_count}")
+                                    if size_kb < 1024:
+                                        print(f"Size: {size_kb:.2f} KB")
+                                    else:
+                                        print(f"Size: {size_mb:.2f} MB")
+                                    # Show preview
+                                    preview_lines = all_lines[:5]
+                                    preview = '\n'.join(preview_lines)
+                                    print(f"Content preview (first 5 lines):\n{preview}")
+                                    if len(all_lines) > 5:
+                                        print("...")
+                    # Get user decision for this item
                     response = await prompt_session.prompt_async(
+                        f"Approve item {i}? (y=yes, n=no, or provide feedback to reject): "
                     )
                     response = response.strip()

agent/tools/jobs_tool.py CHANGED Viewed

@@ -756,7 +756,9 @@ HF_JOBS_TOOL_SPEC = {
         "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
         "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
         "## CRITICAL: Files are ephemeral!\n"
-        "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
     ),
     "parameters": {
         "type": "object",

         "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
         "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
         "## CRITICAL: Files are ephemeral!\n"
+        "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script.\n\n"
+        "## After job completion:\n"
+        "If needed or asked by the user, use hf_private_repos tool to store scripts/logs/results to Hub for persistent storage."
     ),
     "parameters": {
         "type": "object",

agent/tools/private_hf_repo_tools.py ADDED Viewed

	@@ -0,0 +1,606 @@

+"""
+Private HF Repos Tool - Manage private Hugging Face repositories
+PRIMARY USE: Store job outputs, training scripts, and logs from HF Jobs.
+Since job results are ephemeral, this tool provides persistent storage in private repos.
+SECONDARY USE: Read back stored files and list repo contents.
+"""
+import asyncio
+from typing import Any, Dict, Literal, Optional
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.utils import HfHubHTTPError
+from agent.tools.types import ToolResult
+# Operation names
+OperationType = Literal["upload_file", "create_repo", "check_repo", "list_files", "read_file"]
+async def _async_call(func, *args, **kwargs):
+    """Wrap synchronous HfApi calls for async context."""
+    return await asyncio.to_thread(func, *args, **kwargs)
+def _build_repo_url(repo_id: str, repo_type: str = "dataset") -> str:
+    """Build the Hub URL for a repository."""
+    type_path = "" if repo_type == "model" else f"{repo_type}s"
+    return f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
+def _content_to_bytes(content: str | bytes) -> bytes:
+    """Convert string or bytes content to bytes."""
+    if isinstance(content, str):
+        return content.encode('utf-8')
+    return content
+class PrivateHfRepoTool:
+    """Tool for managing private Hugging Face repositories."""
+    def __init__(self, hf_token: Optional[str] = None):
+        self.api = HfApi(token=hf_token)
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """Execute the specified upload operation."""
+        operation = params.get("operation")
+        args = params.get("args", {})
+        # If no operation provided, return usage instructions
+        if not operation:
+            return self._show_help()
+        # Normalize operation name
+        operation = operation.lower()
+        # Check if help is requested
+        if args.get("help"):
+            return self._show_operation_help(operation)
+        try:
+            # Route to appropriate handler
+            if operation == "upload_file":
+                return await self._upload_file(args)
+            elif operation == "create_repo":
+                return await self._create_repo(args)
+            elif operation == "check_repo":
+                return await self._check_repo(args)
+            elif operation == "list_files":
+                return await self._list_files(args)
+            elif operation == "read_file":
+                return await self._read_file(args)
+            else:
+                return {
+                    "formatted": f'Unknown operation: "{operation}"\n\n'
+                    "Available operations: upload_file, create_repo, check_repo, list_files, read_file\n\n"
+                    "Call this tool with no operation for full usage instructions.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except HfHubHTTPError as e:
+            return {
+                "formatted": f"API Error: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Error executing {operation}: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    def _show_help(self) -> ToolResult:
+        """Show usage instructions when tool is called with no arguments."""
+        usage_text = """# Private HF Repos Tool
+**PRIMARY USE:** Store job outputs, scripts, and logs from HF Jobs to private repos.
+Since job results are ephemeral, use this tool for persistent storage.
+**SECONDARY USE:** Read back stored files and list repo contents.
+## Available Commands
+### Write Operations
+- **upload_file** - Upload file content to a repository
+- **create_repo** - Create a new private repository
+### Read Operations
+- **list_files** - List all files in a repository
+- **read_file** - Read content of a specific file from a repository
+- **check_repo** - Check if a repository exists
+## Examples
+### Upload a script to a dataset repo
+Call this tool with:
+```json
+{
+  "operation": "upload_file",
+  "args": {
+    "file_content": "import pandas as pd\\nprint('Hello from HF!')",
+    "path_in_repo": "scripts/hello.py",
+    "repo_id": "my-dataset",
+    "repo_type": "dataset",
+    "create_if_missing": true,
+    "commit_message": "Add hello script"
+  }
+}
+```
+### Upload logs from a job
+Call this tool with:
+```json
+{
+  "operation": "upload_file",
+  "args": {
+    "file_content": "Job started...\\nJob completed successfully!",
+    "path_in_repo": "jobs/job-abc123/logs.txt",
+    "repo_id": "job-results",
+    "create_if_missing": true
+  }
+}
+```
+### Create a repository
+Call this tool with:
+```json
+{
+  "operation": "create_repo",
+  "args": {
+    "repo_id": "my-results",
+    "repo_type": "dataset"
+  }
+}
+```
+Note: Repositories are always created as private.
+### Check if a repository exists
+Call this tool with:
+```json
+{
+  "operation": "check_repo",
+  "args": {
+    "repo_id": "my-dataset",
+    "repo_type": "dataset"
+  }
+}
+```
+### List files in a repository
+Call this tool with:
+```json
+{
+  "operation": "list_files",
+  "args": {
+    "repo_id": "job-results",
+    "repo_type": "dataset"
+  }
+}
+```
+### Read a file from a repository
+Call this tool with:
+```json
+{
+  "operation": "read_file",
+  "args": {
+    "repo_id": "job-results",
+    "path_in_repo": "jobs/job-abc123/script.py",
+    "repo_type": "dataset"
+  }
+}
+```
+## Repository Types
+- **dataset** (default) - For storing data, results, logs, scripts
+- **model** - For ML models and related artifacts
+- **space** - For Spaces and applications
+## Tips
+- **Content-based**: Pass file content directly as strings or bytes, not file paths
+- **Repo ID format**: Use just the repo name (e.g., "my-dataset"). Username is automatically inferred from HF_TOKEN
+- **Automatic repo creation**: Set `create_if_missing: true` to auto-create repos (requires user approval)
+- **Organization**: Use path_in_repo to organize files (e.g., "jobs/job-123/script.py")
+- **After jobs**: Upload job scripts and logs after compute jobs complete for reproducibility
+"""
+        return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
+    def _show_operation_help(self, operation: str) -> ToolResult:
+        """Show help for a specific operation."""
+        help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
+        return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
+    async def _upload_file(self, args: Dict[str, Any]) -> ToolResult:
+        """Upload file content to a Hub repository."""
+        # Validate required arguments
+        file_content = args.get("file_content")
+        path_in_repo = args.get("path_in_repo")
+        repo_id = args.get("repo_id")
+        if not file_content:
+            return {
+                "formatted": "file_content is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not path_in_repo:
+            return {
+                "formatted": "path_in_repo is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        create_if_missing = args.get("create_if_missing", False)
+        # Check if repo exists
+        try:
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            # Create repo if needed
+            if not repo_exists and create_if_missing:
+                await self._create_repo(
+                    {
+                        "repo_id": repo_id,
+                        "repo_type": repo_type,
+                        "private": True,
+                    }
+                )
+            elif not repo_exists:
+                return {
+                    "formatted": f"Repository {repo_id} does not exist. Set create_if_missing: true to create it.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to check repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        # Convert content to bytes
+        file_bytes = _content_to_bytes(file_content)
+        # Upload file
+        try:
+            await _async_call(
+                self.api.upload_file,
+                path_or_fileobj=file_bytes,
+                path_in_repo=path_in_repo,
+                repo_id=repo_id,
+                repo_type=repo_type,
+                commit_message=args.get("commit_message", f"Upload {path_in_repo}"),
+            )
+            repo_url = _build_repo_url(repo_id, repo_type)
+            file_url = f"{repo_url}/blob/main/{path_in_repo}"
+            response = f"""✓ File uploaded successfully!
+**Repository:** {repo_id}
+**File:** {path_in_repo}
+**View at:** {file_url}
+**Browse repo:** {repo_url}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            return {
+                "formatted": f"Failed to upload file: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a new Hub repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        private = True  # Always create private repos
+        try:
+            # Check if repo already exists
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            if repo_exists:
+                repo_url = _build_repo_url(repo_id, repo_type)
+                return {
+                    "formatted": f"Repository {repo_id} already exists.\n**View at:** {repo_url}",
+                    "totalResults": 1,
+                    "resultsShared": 1,
+                }
+            # Create repository
+            repo_url = await _async_call(
+                self.api.create_repo,
+                repo_id=repo_id,
+                repo_type=repo_type,
+                private=private,
+                exist_ok=True,
+            )
+            response = f"""✓ Repository created successfully!
+**Repository:** {repo_id}
+**Type:** {repo_type}
+**Private:** Yes
+**View at:** {repo_url}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            return {
+                "formatted": f"Failed to create repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _check_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Check if a Hub repository exists."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            if repo_exists:
+                repo_url = _build_repo_url(repo_id, repo_type)
+                response = f"""✓ Repository exists!
+**Repository:** {repo_id}
+**Type:** {repo_type}
+**View at:** {repo_url}"""
+            else:
+                response = f"""Repository does not exist: {repo_id}
+To create it, call this tool with:
+```json
+{{
+  "operation": "create_repo",
+  "args": {{
+    "repo_id": "{repo_id}",
+    "repo_type": "{repo_type}"
+  }}
+}}
+```"""
+            return {
+                "formatted": response,
+                "totalResults": 1 if repo_exists else 0,
+                "resultsShared": 1 if repo_exists else 0,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to check repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _list_files(self, args: Dict[str, Any]) -> ToolResult:
+        """List all files in a Hub repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            # List all files in the repository
+            files = await _async_call(
+                self.api.list_repo_files, repo_id=repo_id, repo_type=repo_type
+            )
+            if not files:
+                return {
+                    "formatted": f"No files found in repository: {repo_id}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                }
+            # Format file list
+            file_list = "\n".join(f"- {f}" for f in sorted(files))
+            repo_url = _build_repo_url(repo_id, repo_type)
+            response = f"""✓ Files in repository: {repo_id}
+**Total files:** {len(files)}
+**Repository URL:** {repo_url}
+**Files:**
+{file_list}"""
+            return {
+                "formatted": response,
+                "totalResults": len(files),
+                "resultsShared": len(files),
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to list files: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _read_file(self, args: Dict[str, Any]) -> ToolResult:
+        """Read content of a specific file from a Hub repository."""
+        repo_id = args.get("repo_id")
+        path_in_repo = args.get("path_in_repo")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not path_in_repo:
+            return {
+                "formatted": "path_in_repo is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            # Download file to cache and read it
+            file_path = await _async_call(
+                hf_hub_download,
+                repo_id=repo_id,
+                filename=path_in_repo,
+                repo_type=repo_type,
+                token=self.api.token,
+            )
+            # Read file content
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            repo_url = _build_repo_url(repo_id, repo_type)
+            file_url = f"{repo_url}/blob/main/{path_in_repo}"
+            response = f"""✓ File read successfully!
+**Repository:** {repo_id}
+**File:** {path_in_repo}
+**Size:** {len(content)} characters
+**View at:** {file_url}
+**Content:**
+```
+{content}
+```"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except UnicodeDecodeError:
+            # If file is binary, return size info instead
+            try:
+                with open(file_path, "rb") as f:
+                    binary_content = f.read()
+                return {
+                    "formatted": f"File is binary ({len(binary_content)} bytes). Cannot display as text.",
+                    "totalResults": 1,
+                    "resultsShared": 1,
+                }
+            except Exception as e:
+                return {
+                    "formatted": f"Failed to read binary file: {str(e)}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to read file: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+# Tool specification for agent registration
+PRIVATE_HF_REPO_TOOL_SPEC = {
+    "name": "hf_private_repos",
+    "description": (
+        "Manage private Hugging Face repositories. "
+        "PRIMARY USE: Store job outputs, scripts, and logs from HF Jobs (ephemeral results need persistent storage). "
+        "SECONDARY USE: Read back stored files and list repo contents. "
+        "Pass file content as strings/bytes (no filesystem needed). "
+        "Call with no operation for full usage instructions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": ["upload_file", "create_repo", "check_repo", "list_files", "read_file"],
+                "description": (
+                    "Operation to execute. Valid values: [upload_file, create_repo, check_repo, list_files, read_file]"
+                ),
+            },
+            "args": {
+                "type": "object",
+                "description": (
+                    "Operation-specific arguments as a JSON object. "
+                    "Write ops: file_content (string/bytes), path_in_repo (string), repo_id (string), "
+                    "repo_type (dataset/model/space), create_if_missing (boolean), commit_message (string). "
+                    "Read ops: repo_id (string), path_in_repo (for read_file), repo_type (optional)."
+                ),
+                "additionalProperties": True,
+            },
+        },
+    },
+}
+async def private_hf_repo_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = PrivateHfRepoTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error executing Private HF Repo tool: {str(e)}", False

agent/tools/utils_tools.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+Utils Tools - General utility operations
+Provides system information like current date/time with timezone support.
+"""
+import asyncio
+from datetime import datetime
+from typing import Any, Dict, Literal, Optional
+try:
+    import zoneinfo
+except ImportError:
+    from backports import zoneinfo
+from agent.tools.types import ToolResult
+# Operation names
+OperationType = Literal["get_datetime"]
+class UtilsTool:
+    """Tool for general utility operations."""
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """Execute the specified utility operation."""
+        operation = params.get("operation")
+        args = params.get("args", {})
+        # If no operation provided, return usage instructions
+        if not operation:
+            return self._show_help()
+        # Normalize operation name
+        operation = operation.lower()
+        # Check if help is requested
+        if args.get("help"):
+            return self._show_operation_help(operation)
+        try:
+            # Route to appropriate handler
+            if operation == "get_datetime":
+                return await self._get_datetime(args)
+            else:
+                return {
+                    "formatted": f'Unknown operation: "{operation}"\n\n'
+                    "Available operations: get_datetime\n\n"
+                    "Call this tool with no operation for full usage instructions.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Error executing {operation}: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    def _show_help(self) -> ToolResult:
+        """Show usage instructions when tool is called with no arguments."""
+        usage_text = """# Utils Tool
+Utility operations for system information.
+## Available Commands
+- **get_datetime** - Get current date and time with timezone support
+## Examples
+### Get current date and time (Paris timezone by default)
+Call this tool with:
+```json
+{
+  "operation": "get_datetime",
+  "args": {}
+}
+```
+### Get current date and time in a specific timezone
+Call this tool with:
+```json
+{
+  "operation": "get_datetime",
+  "args": {
+    "timezone": "America/New_York"
+  }
+}
+```
+Common timezones: Europe/Paris, America/New_York, America/Los_Angeles, Asia/Tokyo, UTC
+## Tips
+- **Default timezone**: Paris (Europe/Paris)
+- **Date format**: dd-mm-yyyy
+- **Time format**: HH:MM:SS.mmm (24-hour format with milliseconds)
+- **Timezone names**: Use IANA timezone database names (e.g., "Europe/Paris", "UTC")
+"""
+        return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
+    def _show_operation_help(self, operation: str) -> ToolResult:
+        """Show help for a specific operation."""
+        help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
+        return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
+    async def _get_datetime(self, args: Dict[str, Any]) -> ToolResult:
+        """Get current date and time with timezone support."""
+        timezone_name = args.get("timezone", "Europe/Paris")
+        try:
+            # Get timezone object
+            tz = zoneinfo.ZoneInfo(timezone_name)
+            # Get current datetime in specified timezone
+            now = datetime.now(tz)
+            # Format date as dd-mm-yyyy
+            date_str = now.strftime("%d-%m-%Y")
+            # Format time as HH:MM:SS.mmm
+            time_str = now.strftime("%H:%M:%S.%f")[:-3]  # Remove last 3 digits to keep only milliseconds
+            # Get timezone abbreviation/offset
+            tz_offset = now.strftime("%z")
+            tz_name = now.strftime("%Z")
+            response = f"""✓ Current date and time
+**Date:** {date_str}
+**Time:** {time_str}
+**Timezone:** {timezone_name} ({tz_name}, UTC{tz_offset[:3]}:{tz_offset[3:]})
+**ISO Format:** {now.isoformat()}
+**Unix Timestamp:** {int(now.timestamp())}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except zoneinfo.ZoneInfoNotFoundError:
+            return {
+                "formatted": f"Invalid timezone: {timezone_name}\n\n"
+                "Use IANA timezone database names like:\n"
+                "- Europe/Paris\n"
+                "- America/New_York\n"
+                "- Asia/Tokyo\n"
+                "- UTC\n\n"
+                "See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to get date/time: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+# Tool specification for agent registration
+UTILS_TOOL_SPEC = {
+    "name": "utils",
+    "description": (
+        "Utility operations for system information. "
+        "Get current date (dd-mm-yyyy) and time (HH:MM:SS.mmm) with timezone support. "
+        "Default timezone: Paris (Europe/Paris). "
+        "Call with no operation for full usage instructions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": ["get_datetime"],
+                "description": "Operation to execute. Valid values: [get_datetime]",
+            },
+            "args": {
+                "type": "object",
+                "description": (
+                    "Operation-specific arguments as a JSON object. "
+                    "For get_datetime: timezone (string, optional, default: Europe/Paris). "
+                    "Use IANA timezone names like 'America/New_York', 'Asia/Tokyo', 'UTC'."
+                ),
+                "additionalProperties": True,
+            },
+        },
+    },
+}
+async def utils_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = UtilsTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error executing Utils tool: {str(e)}", False