Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

Henri Bonamy commited on Jan 3

Commit

0c21cf1

1 Parent(s): 706d95d

Added tools to create repos, view them, upload files to them (private). added a clock tool.

Browse files

Files changed (8) hide show

agent/context_manager/manager.py +0 -1
agent/core/agent_loop.py +47 -12
agent/core/tools.py +18 -1
agent/main.py +66 -11
agent/tools/jobs_tool.py +1 -1
agent/tools/private_hf_repo_tools.py +606 -0
agent/tools/utils_tools.py +203 -0
test_mcp_connection.py +45 -0

agent/context_manager/manager.py CHANGED Viewed

@@ -52,7 +52,6 @@ class ContextManager:
         """Add a message to the history"""
         if token_count:
             self.context_length = token_count
-            print(f"DEBUG : token_count = {self.context_length}")
         self.items.append(message)
     def get_messages(self) -> list[Message]:

         """Add a message to the history"""
         if token_count:
             self.context_length = token_count
         self.items.append(message)
     def get_messages(self) -> list[Message]:

agent/core/agent_loop.py CHANGED Viewed

@@ -15,14 +15,42 @@ from agent.core.tools import ToolRouter
 ToolCall = ChatCompletionMessageToolCall
 def _needs_approval(tool_name: str, tool_args: dict) -> bool:
     """Check if a tool call requires user approval before execution"""
-    if tool_name != "hf_jobs":
         return False
-    # Check if it's a run or uv operation
-    operation = tool_args.get("operation", "")
-    return operation in ["run", "uv"]
 class Handlers:
@@ -130,16 +158,23 @@ class Handlers:
                         # Return early - wait for EXEC_APPROVAL operation
                         return None
-                    await session.send_event(
-                        Event(
-                            event_type="tool_call",
-                            data={"tool": tool_name, "arguments": tool_args},
                         )
-                    )
-                    output, success = await session.tool_router.call_tool(
-                        tool_name, tool_args
-                    )
                     # Add tool result to history
                     tool_msg = Message(

 ToolCall = ChatCompletionMessageToolCall
+def _validate_tool_args(tool_args: dict) -> tuple[bool, str | None]:
+    """
+    Validate tool arguments structure.
+    Returns:
+        (is_valid, error_message)
+    """
+    args = tool_args.get("args", {})
+    # Sometimes LLM passes args as string instead of dict
+    if isinstance(args, str):
+        return False, f"Tool call error: 'args' must be a JSON object, not a string. You passed: {repr(args)}"
+    if not isinstance(args, dict) and args is not None:
+        return False, f"Tool call error: 'args' must be a JSON object. You passed type: {type(args).__name__}"
+    return True, None
 def _needs_approval(tool_name: str, tool_args: dict) -> bool:
     """Check if a tool call requires user approval before execution"""
+    # If args are malformed, skip approval (validation error will be shown later)
+    args_valid, _ = _validate_tool_args(tool_args)
+    if not args_valid:
         return False
+    args = tool_args.get("args", {})
+    if tool_name == "hf_jobs":
+        # Check if it's a run or uv operation
+        operation = tool_args.get("operation", "")
+        return operation in ["run", "uv"]
+    if tool_name == "hf_private_repos":
+        # Repo creation and file uploads require approval
+        operation = tool_args.get("operation", "")
+        return operation in ["create_repo", "upload_file"]
+    return False
 class Handlers:
                         # Return early - wait for EXEC_APPROVAL operation
                         return None
+                    # Validate tool arguments before calling
+                    args_valid, error_msg = _validate_tool_args(tool_args)
+                    if not args_valid:
+                        # Return error to agent instead of calling tool
+                        output = error_msg
+                        success = False
+                    else:
+                        await session.send_event(
+                            Event(
+                                event_type="tool_call",
+                                data={"tool": tool_name, "arguments": tool_args},
+                            )
                         )
+                        output, success = await session.tool_router.call_tool(
+                            tool_name, tool_args
+                        )
                     # Add tool result to history
                     tool_msg = Message(

agent/core/tools.py CHANGED Viewed

@@ -13,9 +13,14 @@ from lmnr import observe
 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
@@ -189,7 +194,7 @@ class ToolRouter:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
-        f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}"
     )
     return [
         ToolSpec(
@@ -198,6 +203,12 @@ def create_builtin_tools() -> list[ToolSpec]:
             parameters=HF_JOBS_TOOL_SPEC["parameters"],
             handler=hf_jobs_handler,
         ),
         ToolSpec(
             name=SEARCH_DOCS_TOOL_SPEC["name"],
             description=SEARCH_DOCS_TOOL_SPEC["description"],
@@ -210,4 +221,10 @@ def create_builtin_tools() -> list[ToolSpec]:
             parameters=PLAN_TOOL_SPEC["parameters"],
             handler=plan_tool_handler,
         ),
     ]

 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
+from agent.tools.private_hf_repo_tools import (
+    PRIVATE_HF_REPO_TOOL_SPEC,
+    private_hf_repo_handler,
+)
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
+from agent.tools.utils_tools import UTILS_TOOL_SPEC, utils_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
+        f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {PRIVATE_HF_REPO_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {UTILS_TOOL_SPEC['name']}"
     )
     return [
         ToolSpec(
             parameters=HF_JOBS_TOOL_SPEC["parameters"],
             handler=hf_jobs_handler,
         ),
+        ToolSpec(
+            name=PRIVATE_HF_REPO_TOOL_SPEC["name"],
+            description=PRIVATE_HF_REPO_TOOL_SPEC["description"],
+            parameters=PRIVATE_HF_REPO_TOOL_SPEC["parameters"],
+            handler=private_hf_repo_handler,
+        ),
         ToolSpec(
             name=SEARCH_DOCS_TOOL_SPEC["name"],
             description=SEARCH_DOCS_TOOL_SPEC["description"],
             parameters=PLAN_TOOL_SPEC["parameters"],
             handler=plan_tool_handler,
         ),
+        ToolSpec(
+            name=UTILS_TOOL_SPEC["name"],
+            description=UTILS_TOOL_SPEC["description"],
+            parameters=UTILS_TOOL_SPEC["parameters"],
+            handler=utils_handler,
+        ),
     ]

agent/main.py CHANGED Viewed

@@ -29,6 +29,16 @@ from agent.utils.terminal_display import (
 litellm.drop_params = True
 lmnr_api_key = os.environ.get("LMNR_API_KEY")
 if lmnr_api_key:
     try:
@@ -121,7 +131,7 @@ async def event_listener(
                 arguments = event.data.get("arguments", {}) if event.data else {}
                 operation = arguments.get("operation", "")
-                args = arguments.get("args", {})
                 print(f"\nOperation: {operation}")
@@ -137,19 +147,64 @@ async def event_listener(
                     print(f"Docker image: {image}")
                     print(f"Command: {command}")
-                # Common parameters
-                flavor = args.get("flavor", "cpu-basic")
-                detached = args.get("detached", False)
-                print(f"Hardware: {flavor}")
-                print(f"Detached mode: {detached}")
-                secrets = args.get("secrets", [])
-                if secrets:
-                    print(f"Secrets: {', '.join(secrets)}")
                 # Get user decision
                 print("\n" + format_separator())
-                print(format_header("JOB EXECUTION APPROVAL REQUIRED"))
                 print(format_separator())
                 loop = asyncio.get_event_loop()
                 response = await loop.run_in_executor(

 litellm.drop_params = True
+def _safe_get_args(arguments: dict) -> dict:
+    """Safely extract args dict from arguments, handling cases where LLM passes string."""
+    args = arguments.get("args", {})
+    # Sometimes LLM passes args as string instead of dict
+    if isinstance(args, str):
+        return {}
+    return args if isinstance(args, dict) else {}
 lmnr_api_key = os.environ.get("LMNR_API_KEY")
 if lmnr_api_key:
     try:
                 arguments = event.data.get("arguments", {}) if event.data else {}
                 operation = arguments.get("operation", "")
+                args = _safe_get_args(arguments)
                 print(f"\nOperation: {operation}")
                     print(f"Docker image: {image}")
                     print(f"Command: {command}")
+                    # Common parameters
+                    flavor = args.get("flavor", "cpu-basic")
+                    detached = args.get("detached", False)
+                    print(f"Hardware: {flavor}")
+                    print(f"Detached mode: {detached}")
+                    secrets = args.get("secrets", [])
+                    if secrets:
+                        print(f"Secrets: {', '.join(secrets)}")
+                elif operation in ["create_repo", "upload_file"]:
+                    repo_id = args.get("repo_id", "")
+                    repo_type = args.get("repo_type", "dataset")
+                    # Build repo URL
+                    type_path = "" if repo_type == "model" else f"{repo_type}s"
+                    repo_url = f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
+                    print(f"Repository: {repo_id}")
+                    print(f"Type: {repo_type}")
+                    print(f"Private: Yes")
+                    print(f"URL: {repo_url}")
+                    # Show file preview for upload_file operation
+                    if operation == "upload_file":
+                        path_in_repo = args.get("path_in_repo", "")
+                        file_content = args.get("file_content", "")
+                        print(f"File: {path_in_repo}")
+                        if isinstance(file_content, str):
+                            # Calculate metrics
+                            all_lines = file_content.split('\n')
+                            line_count = len(all_lines)
+                            size_bytes = len(file_content.encode('utf-8'))
+                            size_kb = size_bytes / 1024
+                            size_mb = size_kb / 1024
+                            print(f"Line count: {line_count}")
+                            if size_kb < 1024:
+                                print(f"Size: {size_kb:.2f} KB")
+                            else:
+                                print(f"Size: {size_mb:.2f} MB")
+                            # Show preview
+                            preview_lines = all_lines[:5]
+                            preview = '\n'.join(preview_lines)
+                            print(f"Content preview (first 5 lines):\n{preview}")
+                            if len(all_lines) > 5:
+                                print("...")
                 # Get user decision
                 print("\n" + format_separator())
+                if tool_name == "hf_jobs":
+                    header_text = "JOB EXECUTION APPROVAL REQUIRED"
+                elif operation == "upload_file":
+                    header_text = "FILE UPLOAD APPROVAL REQUIRED"
+                else:
+                    header_text = "REPO CREATION APPROVAL REQUIRED"
+                print(format_header(header_text))
                 print(format_separator())
                 loop = asyncio.get_event_loop()
                 response = await loop.run_in_executor(

agent/tools/jobs_tool.py CHANGED Viewed

@@ -74,7 +74,6 @@ def _substitute_hf_token(params: Dict[str, Any] | None) -> Dict[str, Any] | None
     Returns:
         Dictionary with HF_TOKEN value substituted from environment
     """
-    print("DEBUG !! : ", params)
     if params is None:
         return None
@@ -912,6 +911,7 @@ HF_JOBS_TOOL_SPEC = {
         "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
         "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
         "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
         "Call this tool with no operation for full usage instructions and examples."
     ),
     "parameters": {

     Returns:
         Dictionary with HF_TOKEN value substituted from environment
     """
     if params is None:
         return None
         "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
         "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
         "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
+        "After job completion, if needed or asked by the user, use hf_private_repos tool to store scripts/logs/results to Hub."
         "Call this tool with no operation for full usage instructions and examples."
     ),
     "parameters": {

agent/tools/private_hf_repo_tools.py ADDED Viewed

	@@ -0,0 +1,606 @@

+"""
+Private HF Repos Tool - Manage private Hugging Face repositories
+PRIMARY USE: Store job outputs, training scripts, and logs from HF Jobs.
+Since job results are ephemeral, this tool provides persistent storage in private repos.
+SECONDARY USE: Read back stored files and list repo contents.
+"""
+import asyncio
+from typing import Any, Dict, Literal, Optional
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.utils import HfHubHTTPError
+from agent.tools.types import ToolResult
+# Operation names
+OperationType = Literal["upload_file", "create_repo", "check_repo", "list_files", "read_file"]
+async def _async_call(func, *args, **kwargs):
+    """Wrap synchronous HfApi calls for async context."""
+    return await asyncio.to_thread(func, *args, **kwargs)
+def _build_repo_url(repo_id: str, repo_type: str = "dataset") -> str:
+    """Build the Hub URL for a repository."""
+    type_path = "" if repo_type == "model" else f"{repo_type}s"
+    return f"https://huggingface.co/{type_path}/{repo_id}".replace("//", "/")
+def _content_to_bytes(content: str | bytes) -> bytes:
+    """Convert string or bytes content to bytes."""
+    if isinstance(content, str):
+        return content.encode('utf-8')
+    return content
+class PrivateHfRepoTool:
+    """Tool for managing private Hugging Face repositories."""
+    def __init__(self, hf_token: Optional[str] = None):
+        self.api = HfApi(token=hf_token)
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """Execute the specified upload operation."""
+        operation = params.get("operation")
+        args = params.get("args", {})
+        # If no operation provided, return usage instructions
+        if not operation:
+            return self._show_help()
+        # Normalize operation name
+        operation = operation.lower()
+        # Check if help is requested
+        if args.get("help"):
+            return self._show_operation_help(operation)
+        try:
+            # Route to appropriate handler
+            if operation == "upload_file":
+                return await self._upload_file(args)
+            elif operation == "create_repo":
+                return await self._create_repo(args)
+            elif operation == "check_repo":
+                return await self._check_repo(args)
+            elif operation == "list_files":
+                return await self._list_files(args)
+            elif operation == "read_file":
+                return await self._read_file(args)
+            else:
+                return {
+                    "formatted": f'Unknown operation: "{operation}"\n\n'
+                    "Available operations: upload_file, create_repo, check_repo, list_files, read_file\n\n"
+                    "Call this tool with no operation for full usage instructions.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except HfHubHTTPError as e:
+            return {
+                "formatted": f"API Error: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Error executing {operation}: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    def _show_help(self) -> ToolResult:
+        """Show usage instructions when tool is called with no arguments."""
+        usage_text = """# Private HF Repos Tool
+**PRIMARY USE:** Store job outputs, scripts, and logs from HF Jobs to private repos.
+Since job results are ephemeral, use this tool for persistent storage.
+**SECONDARY USE:** Read back stored files and list repo contents.
+## Available Commands
+### Write Operations
+- **upload_file** - Upload file content to a repository
+- **create_repo** - Create a new private repository
+### Read Operations
+- **list_files** - List all files in a repository
+- **read_file** - Read content of a specific file from a repository
+- **check_repo** - Check if a repository exists
+## Examples
+### Upload a script to a dataset repo
+Call this tool with:
+```json
+{
+  "operation": "upload_file",
+  "args": {
+    "file_content": "import pandas as pd\\nprint('Hello from HF!')",
+    "path_in_repo": "scripts/hello.py",
+    "repo_id": "my-dataset",
+    "repo_type": "dataset",
+    "create_if_missing": true,
+    "commit_message": "Add hello script"
+  }
+}
+```
+### Upload logs from a job
+Call this tool with:
+```json
+{
+  "operation": "upload_file",
+  "args": {
+    "file_content": "Job started...\\nJob completed successfully!",
+    "path_in_repo": "jobs/job-abc123/logs.txt",
+    "repo_id": "job-results",
+    "create_if_missing": true
+  }
+}
+```
+### Create a repository
+Call this tool with:
+```json
+{
+  "operation": "create_repo",
+  "args": {
+    "repo_id": "my-results",
+    "repo_type": "dataset"
+  }
+}
+```
+Note: Repositories are always created as private.
+### Check if a repository exists
+Call this tool with:
+```json
+{
+  "operation": "check_repo",
+  "args": {
+    "repo_id": "my-dataset",
+    "repo_type": "dataset"
+  }
+}
+```
+### List files in a repository
+Call this tool with:
+```json
+{
+  "operation": "list_files",
+  "args": {
+    "repo_id": "job-results",
+    "repo_type": "dataset"
+  }
+}
+```
+### Read a file from a repository
+Call this tool with:
+```json
+{
+  "operation": "read_file",
+  "args": {
+    "repo_id": "job-results",
+    "path_in_repo": "jobs/job-abc123/script.py",
+    "repo_type": "dataset"
+  }
+}
+```
+## Repository Types
+- **dataset** (default) - For storing data, results, logs, scripts
+- **model** - For ML models and related artifacts
+- **space** - For Spaces and applications
+## Tips
+- **Content-based**: Pass file content directly as strings or bytes, not file paths
+- **Repo ID format**: Use just the repo name (e.g., "my-dataset"). Username is automatically inferred from HF_TOKEN
+- **Automatic repo creation**: Set `create_if_missing: true` to auto-create repos (requires user approval)
+- **Organization**: Use path_in_repo to organize files (e.g., "jobs/job-123/script.py")
+- **After jobs**: Upload job scripts and logs after compute jobs complete for reproducibility
+"""
+        return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
+    def _show_operation_help(self, operation: str) -> ToolResult:
+        """Show help for a specific operation."""
+        help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
+        return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
+    async def _upload_file(self, args: Dict[str, Any]) -> ToolResult:
+        """Upload file content to a Hub repository."""
+        # Validate required arguments
+        file_content = args.get("file_content")
+        path_in_repo = args.get("path_in_repo")
+        repo_id = args.get("repo_id")
+        if not file_content:
+            return {
+                "formatted": "file_content is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not path_in_repo:
+            return {
+                "formatted": "path_in_repo is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        create_if_missing = args.get("create_if_missing", False)
+        # Check if repo exists
+        try:
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            # Create repo if needed
+            if not repo_exists and create_if_missing:
+                await self._create_repo(
+                    {
+                        "repo_id": repo_id,
+                        "repo_type": repo_type,
+                        "private": True,
+                    }
+                )
+            elif not repo_exists:
+                return {
+                    "formatted": f"Repository {repo_id} does not exist. Set create_if_missing: true to create it.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to check repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        # Convert content to bytes
+        file_bytes = _content_to_bytes(file_content)
+        # Upload file
+        try:
+            await _async_call(
+                self.api.upload_file,
+                path_or_fileobj=file_bytes,
+                path_in_repo=path_in_repo,
+                repo_id=repo_id,
+                repo_type=repo_type,
+                commit_message=args.get("commit_message", f"Upload {path_in_repo}"),
+            )
+            repo_url = _build_repo_url(repo_id, repo_type)
+            file_url = f"{repo_url}/blob/main/{path_in_repo}"
+            response = f"""✓ File uploaded successfully!
+**Repository:** {repo_id}
+**File:** {path_in_repo}
+**View at:** {file_url}
+**Browse repo:** {repo_url}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            return {
+                "formatted": f"Failed to upload file: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _create_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Create a new Hub repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        private = True  # Always create private repos
+        try:
+            # Check if repo already exists
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            if repo_exists:
+                repo_url = _build_repo_url(repo_id, repo_type)
+                return {
+                    "formatted": f"Repository {repo_id} already exists.\n**View at:** {repo_url}",
+                    "totalResults": 1,
+                    "resultsShared": 1,
+                }
+            # Create repository
+            repo_url = await _async_call(
+                self.api.create_repo,
+                repo_id=repo_id,
+                repo_type=repo_type,
+                private=private,
+                exist_ok=True,
+            )
+            response = f"""✓ Repository created successfully!
+**Repository:** {repo_id}
+**Type:** {repo_type}
+**Private:** Yes
+**View at:** {repo_url}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except Exception as e:
+            return {
+                "formatted": f"Failed to create repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _check_repo(self, args: Dict[str, Any]) -> ToolResult:
+        """Check if a Hub repository exists."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            repo_exists = await _async_call(
+                self.api.repo_exists, repo_id=repo_id, repo_type=repo_type
+            )
+            if repo_exists:
+                repo_url = _build_repo_url(repo_id, repo_type)
+                response = f"""✓ Repository exists!
+**Repository:** {repo_id}
+**Type:** {repo_type}
+**View at:** {repo_url}"""
+            else:
+                response = f"""Repository does not exist: {repo_id}
+To create it, call this tool with:
+```json
+{{
+  "operation": "create_repo",
+  "args": {{
+    "repo_id": "{repo_id}",
+    "repo_type": "{repo_type}"
+  }}
+}}
+```"""
+            return {
+                "formatted": response,
+                "totalResults": 1 if repo_exists else 0,
+                "resultsShared": 1 if repo_exists else 0,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to check repository: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _list_files(self, args: Dict[str, Any]) -> ToolResult:
+        """List all files in a Hub repository."""
+        repo_id = args.get("repo_id")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            # List all files in the repository
+            files = await _async_call(
+                self.api.list_repo_files, repo_id=repo_id, repo_type=repo_type
+            )
+            if not files:
+                return {
+                    "formatted": f"No files found in repository: {repo_id}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                }
+            # Format file list
+            file_list = "\n".join(f"- {f}" for f in sorted(files))
+            repo_url = _build_repo_url(repo_id, repo_type)
+            response = f"""✓ Files in repository: {repo_id}
+**Total files:** {len(files)}
+**Repository URL:** {repo_url}
+**Files:**
+{file_list}"""
+            return {
+                "formatted": response,
+                "totalResults": len(files),
+                "resultsShared": len(files),
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to list files: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    async def _read_file(self, args: Dict[str, Any]) -> ToolResult:
+        """Read content of a specific file from a Hub repository."""
+        repo_id = args.get("repo_id")
+        path_in_repo = args.get("path_in_repo")
+        if not repo_id:
+            return {
+                "formatted": "repo_id is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        if not path_in_repo:
+            return {
+                "formatted": "path_in_repo is required",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        repo_type = args.get("repo_type", "dataset")
+        try:
+            # Download file to cache and read it
+            file_path = await _async_call(
+                hf_hub_download,
+                repo_id=repo_id,
+                filename=path_in_repo,
+                repo_type=repo_type,
+                token=self.api.token,
+            )
+            # Read file content
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            repo_url = _build_repo_url(repo_id, repo_type)
+            file_url = f"{repo_url}/blob/main/{path_in_repo}"
+            response = f"""✓ File read successfully!
+**Repository:** {repo_id}
+**File:** {path_in_repo}
+**Size:** {len(content)} characters
+**View at:** {file_url}
+**Content:**
+```
+{content}
+```"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except UnicodeDecodeError:
+            # If file is binary, return size info instead
+            try:
+                with open(file_path, "rb") as f:
+                    binary_content = f.read()
+                return {
+                    "formatted": f"File is binary ({len(binary_content)} bytes). Cannot display as text.",
+                    "totalResults": 1,
+                    "resultsShared": 1,
+                }
+            except Exception as e:
+                return {
+                    "formatted": f"Failed to read binary file: {str(e)}",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to read file: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+# Tool specification for agent registration
+PRIVATE_HF_REPO_TOOL_SPEC = {
+    "name": "hf_private_repos",
+    "description": (
+        "Manage private Hugging Face repositories. "
+        "PRIMARY USE: Store job outputs, scripts, and logs from HF Jobs (ephemeral results need persistent storage). "
+        "SECONDARY USE: Read back stored files and list repo contents. "
+        "Pass file content as strings/bytes (no filesystem needed). "
+        "Call with no operation for full usage instructions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": ["upload_file", "create_repo", "check_repo", "list_files", "read_file"],
+                "description": (
+                    "Operation to execute. Valid values: [upload_file, create_repo, check_repo, list_files, read_file]"
+                ),
+            },
+            "args": {
+                "type": "object",
+                "description": (
+                    "Operation-specific arguments as a JSON object. "
+                    "Write ops: file_content (string/bytes), path_in_repo (string), repo_id (string), "
+                    "repo_type (dataset/model/space), create_if_missing (boolean), commit_message (string). "
+                    "Read ops: repo_id (string), path_in_repo (for read_file), repo_type (optional)."
+                ),
+                "additionalProperties": True,
+            },
+        },
+    },
+}
+async def private_hf_repo_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = PrivateHfRepoTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error executing Private HF Repo tool: {str(e)}", False

agent/tools/utils_tools.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+Utils Tools - General utility operations
+Provides system information like current date/time with timezone support.
+"""
+import asyncio
+from datetime import datetime
+from typing import Any, Dict, Literal, Optional
+try:
+    import zoneinfo
+except ImportError:
+    from backports import zoneinfo
+from agent.tools.types import ToolResult
+# Operation names
+OperationType = Literal["get_datetime"]
+class UtilsTool:
+    """Tool for general utility operations."""
+    async def execute(self, params: Dict[str, Any]) -> ToolResult:
+        """Execute the specified utility operation."""
+        operation = params.get("operation")
+        args = params.get("args", {})
+        # If no operation provided, return usage instructions
+        if not operation:
+            return self._show_help()
+        # Normalize operation name
+        operation = operation.lower()
+        # Check if help is requested
+        if args.get("help"):
+            return self._show_operation_help(operation)
+        try:
+            # Route to appropriate handler
+            if operation == "get_datetime":
+                return await self._get_datetime(args)
+            else:
+                return {
+                    "formatted": f'Unknown operation: "{operation}"\n\n'
+                    "Available operations: get_datetime\n\n"
+                    "Call this tool with no operation for full usage instructions.",
+                    "totalResults": 0,
+                    "resultsShared": 0,
+                    "isError": True,
+                }
+        except Exception as e:
+            return {
+                "formatted": f"Error executing {operation}: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+    def _show_help(self) -> ToolResult:
+        """Show usage instructions when tool is called with no arguments."""
+        usage_text = """# Utils Tool
+Utility operations for system information.
+## Available Commands
+- **get_datetime** - Get current date and time with timezone support
+## Examples
+### Get current date and time (Paris timezone by default)
+Call this tool with:
+```json
+{
+  "operation": "get_datetime",
+  "args": {}
+}
+```
+### Get current date and time in a specific timezone
+Call this tool with:
+```json
+{
+  "operation": "get_datetime",
+  "args": {
+    "timezone": "America/New_York"
+  }
+}
+```
+Common timezones: Europe/Paris, America/New_York, America/Los_Angeles, Asia/Tokyo, UTC
+## Tips
+- **Default timezone**: Paris (Europe/Paris)
+- **Date format**: dd-mm-yyyy
+- **Time format**: HH:MM:SS.mmm (24-hour format with milliseconds)
+- **Timezone names**: Use IANA timezone database names (e.g., "Europe/Paris", "UTC")
+"""
+        return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
+    def _show_operation_help(self, operation: str) -> ToolResult:
+        """Show help for a specific operation."""
+        help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
+        return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
+    async def _get_datetime(self, args: Dict[str, Any]) -> ToolResult:
+        """Get current date and time with timezone support."""
+        timezone_name = args.get("timezone", "Europe/Paris")
+        try:
+            # Get timezone object
+            tz = zoneinfo.ZoneInfo(timezone_name)
+            # Get current datetime in specified timezone
+            now = datetime.now(tz)
+            # Format date as dd-mm-yyyy
+            date_str = now.strftime("%d-%m-%Y")
+            # Format time as HH:MM:SS.mmm
+            time_str = now.strftime("%H:%M:%S.%f")[:-3]  # Remove last 3 digits to keep only milliseconds
+            # Get timezone abbreviation/offset
+            tz_offset = now.strftime("%z")
+            tz_name = now.strftime("%Z")
+            response = f"""✓ Current date and time
+**Date:** {date_str}
+**Time:** {time_str}
+**Timezone:** {timezone_name} ({tz_name}, UTC{tz_offset[:3]}:{tz_offset[3:]})
+**ISO Format:** {now.isoformat()}
+**Unix Timestamp:** {int(now.timestamp())}"""
+            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
+        except zoneinfo.ZoneInfoNotFoundError:
+            return {
+                "formatted": f"Invalid timezone: {timezone_name}\n\n"
+                "Use IANA timezone database names like:\n"
+                "- Europe/Paris\n"
+                "- America/New_York\n"
+                "- Asia/Tokyo\n"
+                "- UTC\n\n"
+                "See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+        except Exception as e:
+            return {
+                "formatted": f"Failed to get date/time: {str(e)}",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
+# Tool specification for agent registration
+UTILS_TOOL_SPEC = {
+    "name": "utils",
+    "description": (
+        "Utility operations for system information. "
+        "Get current date (dd-mm-yyyy) and time (HH:MM:SS.mmm) with timezone support. "
+        "Default timezone: Paris (Europe/Paris). "
+        "Call with no operation for full usage instructions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "operation": {
+                "type": "string",
+                "enum": ["get_datetime"],
+                "description": "Operation to execute. Valid values: [get_datetime]",
+            },
+            "args": {
+                "type": "object",
+                "description": (
+                    "Operation-specific arguments as a JSON object. "
+                    "For get_datetime: timezone (string, optional, default: Europe/Paris). "
+                    "Use IANA timezone names like 'America/New_York', 'Asia/Tokyo', 'UTC'."
+                ),
+                "additionalProperties": True,
+            },
+        },
+    },
+}
+async def utils_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
+    """Handler for agent tool router."""
+    try:
+        tool = UtilsTool()
+        result = await tool.execute(arguments)
+        return result["formatted"], not result.get("isError", False)
+    except Exception as e:
+        return f"Error executing Utils tool: {str(e)}", False

test_mcp_connection.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""Test script to check MCP server connection"""
+import asyncio
+from pathlib import Path
+from agent.config import load_config
+from agent.core.tools import ToolRouter
+async def test_mcp():
+    print("Loading config...")
+    config_path = Path(__file__).parent / "configs" / "main_agent_config.json"
+    config = load_config(config_path)
+    print(f"MCP Servers configured: {list(config.mcpServers.keys())}")
+    print(f"\nInitializing ToolRouter...")
+    tool_router = ToolRouter(config.mcpServers)
+    print("Entering async context (this will init MCP servers)...")
+    try:
+        async with tool_router as router:
+            print("✓ MCP initialization successful!")
+            tools = router.get_tool_specs_for_llm()
+            print(f"\nTotal tools available: {len(tools)}")
+            builtin = [t for t in tools if t['function']['name'] in ['hf_jobs', 'hf_private_repos', 'hf_doc_search', 'plan_tool']]
+            mcp = [t for t in tools if t not in builtin]
+            print(f"Built-in tools: {len(builtin)}")
+            for tool in builtin:
+                print(f"  - {tool['function']['name']}")
+            print(f"\nMCP tools: {len(mcp)}")
+            for tool in mcp[:5]:  # Show first 5
+                print(f"  - {tool['function']['name']}")
+            if len(mcp) > 5:
+                print(f"  ... and {len(mcp) - 5} more")
+    except Exception as e:
+        print(f"✗ Error during MCP initialization: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    asyncio.run(test_mcp())