Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

Aksel Joonas Reedi commited on Jan 3

Commit

3765ba2

2 Parent(s): 1598bb4 b307ff7

main agent search has better vibes

Browse files

Files changed (9) hide show

agent/core/tools.py +44 -6
agent/prompts/search_docs_system_prompt.yaml +0 -36
agent/prompts/system_prompt.yaml +8 -3
agent/tools/__init__.py +0 -3
agent/tools/{_search_agent_tools.py → docs_tools.py} +3 -3
agent/tools/jobs_tool.py +15 -16
agent/tools/search_docs_tool.py +0 -272
configs/_subagent_config_search_agent.json +0 -12
run_search_agent.py +0 -162

agent/core/tools.py CHANGED Viewed

@@ -13,9 +13,14 @@ from lmnr import observe
 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
-from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
@@ -122,6 +127,27 @@ class ToolRouter:
                 )
             )
     def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
         """Get tool specifications in OpenAI format"""
         specs = []
@@ -145,6 +171,10 @@ class ToolRouter:
             await self.register_mcp_tools()
             self._mcp_initialized = True
         print(f"MCP initialized: {self._mcp_initialized}")
         return self
     async def __aexit__(self, exc_type, exc, tb) -> None:
@@ -189,16 +219,24 @@ class ToolRouter:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
-        f"Creating built-in tools: {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}"
     )
     # in order of importance
     return [
         ToolSpec(
-            name=SEARCH_DOCS_TOOL_SPEC["name"],
-            description=SEARCH_DOCS_TOOL_SPEC["description"],
-            parameters=SEARCH_DOCS_TOOL_SPEC["parameters"],
-            handler=search_docs_handler,
         ),
         ToolSpec(
             name=PLAN_TOOL_SPEC["name"],
             description=PLAN_TOOL_SPEC["description"],

 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
+from agent.tools.docs_tools import (
+    EXPLORE_HF_DOCS_TOOL_SPEC,
+    HF_DOCS_FETCH_TOOL_SPEC,
+    explore_hf_docs_handler,
+    hf_docs_fetch_handler,
+)
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
                 )
             )
+    async def register_openapi_tool(self) -> None:
+        """Register the OpenAPI search tool (requires async initialization)"""
+        from agent.tools.docs_tools import (
+            _get_api_search_tool_spec,
+            search_openapi_handler,
+        )
+        print("Registering OpenAPI search tool...")
+        # Register search_hf_api_endpoints with dynamic spec
+        openapi_spec = await _get_api_search_tool_spec()
+        self.register_tool(
+            ToolSpec(
+                name=openapi_spec["name"],
+                description=openapi_spec["description"],
+                parameters=openapi_spec["parameters"],
+                handler=search_openapi_handler,
+            )
+        )
+        print(f"Registered: {openapi_spec['name']}")
     def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
         """Get tool specifications in OpenAI format"""
         specs = []
             await self.register_mcp_tools()
             self._mcp_initialized = True
         print(f"MCP initialized: {self._mcp_initialized}")
+        # Register OpenAPI tool (requires async initialization)
+        await self.register_openapi_tool()
         return self
     async def __aexit__(self, exc_type, exc, tb) -> None:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
+        f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}"
     )
     # in order of importance
     return [
+        # Documentation search tools
+        ToolSpec(
+            name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
+            description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
+            parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
+            handler=explore_hf_docs_handler,
+        ),
         ToolSpec(
+            name=HF_DOCS_FETCH_TOOL_SPEC["name"],
+            description=HF_DOCS_FETCH_TOOL_SPEC["description"],
+            parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
+            handler=hf_docs_fetch_handler,
         ),
+        # Planning and job management tools
         ToolSpec(
             name=PLAN_TOOL_SPEC["name"],
             description=PLAN_TOOL_SPEC["description"],

agent/prompts/search_docs_system_prompt.yaml DELETED Viewed

@@ -1,36 +0,0 @@
-search_docs_system_prompt: |
-  You are a specialized documentation search agent. Your task is to comprehensively search and synthesize information from Hugging Face documentation. You are queried by a main agent who has to build a solution to a user. You have to give the best and the most comprehensive guidance on how to solve the user's task.
-  # Search Strategy
-  You must search thoroughly before synthesizing results. Follow this approach:
-  1. **Query Analysis**: Identify the core concepts and intent of the original user query and the search query passed by the LLM.
-  2. **Initial Search**: Start with a broad search capturing the main topic
-  3. **Iterative Refinement**: Run multiple searches to go deeper into topics. If you see links to other pages, also look into those pages for best information - first-pass results often miss key details
-  4. **You must get to the end truth**: You must get to the bottom of the truth for this search query. You CAN NOT say that somebody should look up documentation. You must look it up yourself and give the best answer you can including code snippets and relevant information. You are teaching the main agent how to solve the user's task and have to give ALL relevant information on how to do it.
-  # Quality metrics:
-  - You are optimizing for the minimum viable way to solve the user request reusing as much as possible from already available code from your research. Opt for reliability and reusability. Hugging Face has a lot of best practices laid out in the documentation and you must pass these to the main agent.
-  # Useful links:
-  - code examples for trl (covers most LLM training tasks): https://github.com/huggingface/trl/tree/main/examples/scripts and https://github.com/huggingface/trl/tree/main/trl/scripts
-  # Response Guidelines
-  After gathering results, synthesize them following these principles:
-  1. **Analyze Relevance**: Evaluate which results directly answer the query
-  2. **Synthesize**: Combine information from multiple sources when applicable
-  3. **Prioritize**: Present information in order of relevance
-  4. **Cite Sources**: Find and pass the relevant code and other snippets from the analyzed articles for the main agent to read.
-  5. **Acknowledge Gaps**: If documents don't fully answer the query, explicitly state this
-  6. **Handle Conflicts**: If sources contradict, note this and explain your reasoning
-  # Constraints
-  - Only provide information found in the documentation
-  - Do not make assumptions beyond what the sources state
-  - If information is not found, say so clearly rather than guessing
-  - Focus on giving the best practices and comprehensive guidance on how to solve the user's task. Include all relevant code snippets without edits from the docs and simplest ways on how to solve the user's task.

agent/prompts/system_prompt.yaml CHANGED Viewed

@@ -6,8 +6,11 @@ system_prompt: |
   **CRITICAL: Research First, Then Implement**
   For ANY implementation task (training, fine-tuning, inference, data processing, etc.):
-  1. **FIRST**: Use `research_solution` to search HF documentation and find the recommended approach
      - This is MANDATORY before writing any code or making implementation decisions
      - Research what libraries to use, find code examples, understand best practices
      - Skip ONLY for simple factual questions (e.g., "What is LoRA?")
@@ -41,13 +44,15 @@ system_prompt: |
   # Conventions
-  - **ALWAYS use `research_solution` BEFORE implementing** any ML workflow (training, inference, data processing, etc.) - This is non-negotiable
   - Never assume you know the correct library, method, or approach - you must verify with documentation first
   - Base your implementation on researched best practices, not general knowledge or assumptions
   - Always search Hugging Face Hub for existing resources before suggesting custom implementations
   - Keep in mind that a space is a repo, so you can create a space directly by uploading files that way. Repos should also be used to store files permanently : post-execution, files from jobs are not available.
   - To run jobs, you must always pass the whole content of the file to execute. No files are available on server. Your local files and distant files are entirely seperate scopes.
-  - To access, create, or modify private Hub assets (spaces, private models, datasets, collections), pass `secrets: {% raw %}{{ "HF_TOKEN": "$HF_TOKEN" }}{% endraw %}` along with the jobs parameters. This is important. Without it, you will encounter authentification issues. Do not assume the user is connected on the jobs' server.
   - When referencing models, datasets, or papers, include direct links from search results
   - Before processing any dataset: inspect its actual structure first using the mcp__hf-mcp-server__hub_repo_details tool. Never assume column names: verify them beforehand.
   - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics

   **CRITICAL: Research First, Then Implement**
   For ANY implementation task (training, fine-tuning, inference, data processing, etc.):
+  1. **FIRST**: Search HF documentation to find the recommended approach
      - This is MANDATORY before writing any code or making implementation decisions
+     - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., "trl", "transformers", "diffusers")
+     - Use `fetch_hf_docs` to retrieve full content from specific documentation pages
+     - Use `search_hf_api_endpoints` to find API endpoints with usage examples
      - Research what libraries to use, find code examples, understand best practices
      - Skip ONLY for simple factual questions (e.g., "What is LoRA?")
   # Conventions
+  - **ALWAYS search documentation BEFORE implementing** any ML workflow (training, inference, data processing, etc.) - This is non-negotiable
+  - Use `explore_hf_docs`, `fetch_hf_docs`, and `search_hf_api_endpoints` to research the correct approach
   - Never assume you know the correct library, method, or approach - you must verify with documentation first
   - Base your implementation on researched best practices, not general knowledge or assumptions
   - Always search Hugging Face Hub for existing resources before suggesting custom implementations
   - Keep in mind that a space is a repo, so you can create a space directly by uploading files that way. Repos should also be used to store files permanently : post-execution, files from jobs are not available.
   - To run jobs, you must always pass the whole content of the file to execute. No files are available on server. Your local files and distant files are entirely seperate scopes.
+  - The HF_TOKEN is automatically loaded from the environment variables.
+  -
   - When referencing models, datasets, or papers, include direct links from search results
   - Before processing any dataset: inspect its actual structure first using the mcp__hf-mcp-server__hub_repo_details tool. Never assume column names: verify them beforehand.
   - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics

agent/tools/__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@ Hugging Face tools for the agent
 """
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
-from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
 from agent.tools.types import ToolResult
 __all__ = [
@@ -11,6 +10,4 @@ __all__ = [
     "HF_JOBS_TOOL_SPEC",
     "hf_jobs_handler",
     "HfJobsTool",
-    "SEARCH_DOCS_TOOL_SPEC",
-    "search_docs_handler",
 ]

 """
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
 from agent.tools.types import ToolResult
 __all__ = [
     "HF_JOBS_TOOL_SPEC",
     "hf_jobs_handler",
     "HfJobsTool",
 ]

agent/tools/{_search_agent_tools.py → docs_tools.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-Tools available to the search sub-agent
-These tools are used by the search sub-agent spawned by search_docs_tool
 """
 import asyncio
@@ -553,7 +553,7 @@ async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
         return f"Error fetching documentation: {str(e)}", False
-# Tool specifications for the search sub-agent
 EXPLORE_HF_DOCS_TOOL_SPEC = {
     "name": "explore_hf_docs",

 """
+Documentation search tools for the HF Agent
+Tools for exploring and fetching HuggingFace documentation and API specifications
 """
 import asyncio
         return f"Error fetching documentation: {str(e)}", False
+# Tool specifications for documentation search
 EXPLORE_HF_DOCS_TOOL_SPEC = {
     "name": "explore_hf_docs",

agent/tools/jobs_tool.py CHANGED Viewed

@@ -10,7 +10,7 @@ import os
 from typing import Any, Dict, Literal, Optional
 from huggingface_hub import HfApi
-from huggingface_hub.utils import HfHubHTTPError, get_token_to_send
 from agent.tools.types import ToolResult
 from agent.tools.utilities import (
@@ -63,20 +63,19 @@ UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
 def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
-    """
-    Automatically adds selected environment variables to the parameters passed by LLM.
-    Args:
-        params: Dictionary that may contain "HF_TOKEN" and other environment variables as keys
-    Returns:
-        Dictionary with environment variables added
-    """
-    result = {"HF_TOKEN": get_token_to_send(os.environ.get("HF_TOKEN", ""))}
-    if params:
-        result.update(params)
     return result
@@ -747,9 +746,9 @@ HF_JOBS_TOOL_SPEC = {
         "GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
         "## Examples:\n\n"
         "**Fine-tune LLM and push to Hub:**\n"
-        "{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
         "**Generate dataset daily and upload:**\n"
-        "{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily', 'secrets': {'HF_TOKEN': '$HF_TOKEN'}}\n\n"
         "**Run custom training with Docker:**\n"
         "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
         "**Monitor jobs:**\n"
@@ -812,9 +811,9 @@ HF_JOBS_TOOL_SPEC = {
                 "type": "string",
                 "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
             },
-            "secrets": {
                 "type": "object",
-                "description": "Environment variables (private). Format: {'KEY': 'VALUE'}. Use {'HF_TOKEN': '$HF_TOKEN'} for Hub auth. Use with 'run'/'scheduled run'.",
             },
             # Job management parameters
             "job_id": {

 from typing import Any, Dict, Literal, Optional
 from huggingface_hub import HfApi
+from huggingface_hub.utils import HfHubHTTPError
 from agent.tools.types import ToolResult
 from agent.tools.utilities import (
 def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
+    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
+    # Start with user-provided env vars, then force-set token last
+    result = dict(params or {})
+    # If the caller passed HF_TOKEN="$HF_TOKEN", ignore it.
+    if result.get("HF_TOKEN", "").strip().startswith("$"):
+        result.pop("HF_TOKEN", None)
+    # Set both names to be safe (different libs check different vars)
+    if token:
+        result["HF_TOKEN"] = token
+        result["HUGGINGFACE_HUB_TOKEN"] = token
     return result
         "GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
         "## Examples:\n\n"
         "**Fine-tune LLM and push to Hub:**\n"
+        "{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"user-name/my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'env': {'CUSTOM_VAR': 'value'}}\n\n"
         "**Generate dataset daily and upload:**\n"
+        "{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily'}\n\n"
         "**Run custom training with Docker:**\n"
         "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
         "**Monitor jobs:**\n"
                 "type": "string",
                 "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
             },
+            "env": {
                 "type": "object",
+                "description": "Environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is automatically included from your auth. Use with 'run'/'scheduled run'.",
             },
             # Job management parameters
             "job_id": {

agent/tools/search_docs_tool.py DELETED Viewed

@@ -1,272 +0,0 @@
-"""
-Search documentation tool that spawns a sub-agent
-The sub-agent has its own agent loop and set of specialized search tools
-"""
-import asyncio
-from typing import Any
-from litellm.utils import get_max_tokens
-from agent.core.session import Session
-async def create_search_tool_router(github_mcp_config: dict[str, Any] | None = None):
-    """
-    Create a ToolRouter instance for the search sub-agent
-    Async because OpenAPI tool needs to fetch and parse spec at initialization
-    Args:
-        github_mcp_config: Optional GitHub MCP server configuration
-    """
-    # Import at runtime to avoid circular dependency
-    from fastmcp import Client
-    from agent.core.tools import ToolRouter
-    # List of allowed GitHub MCP tools
-    ALLOWED_GITHUB_TOOLS = {
-        "list_pull_requests",
-        "list_issues",
-        "search_code",
-        "search_issues",
-        "search_repositories",
-        "search_users",
-        "get_pull_request_status",
-        "get_pull_request_reviews",
-        "get_pull_request",
-        "get_issue",
-        "get_file_contents",
-    }
-    class SearchDocsToolRouter(ToolRouter):
-        """Specialized ToolRouter for the search sub-agent"""
-        def __init__(self, github_mcp_config: dict[str, Any] | None = None):
-            self.tools: dict[str, Any] = {}
-            self.mcp_servers: dict[str, dict[str, Any]] = {}
-            self._mcp_initialized = False
-            # Initialize MCP client with GitHub server if provided
-            if github_mcp_config:
-                self.mcp_client = Client({"mcpServers": github_mcp_config})
-            else:
-                self.mcp_client = None
-        async def initialize_tools(self):
-            """Initialize tools asynchronously"""
-            tools = await make_search_agent_tools()
-            for tool in tools:
-                self.register_tool(tool)
-        async def register_mcp_tools(self) -> None:
-            """Register only allowed GitHub MCP tools"""
-            if self.mcp_client is None:
-                return
-            tools = await self.mcp_client.list_tools()
-            for tool in tools:
-                # Only register allowed GitHub tools
-                if tool.name in ALLOWED_GITHUB_TOOLS:
-                    print(f"Registering GitHub MCP Tool: {tool.name}")
-                    from agent.core.tools import ToolSpec
-                    self.register_tool(
-                        ToolSpec(
-                            name=tool.name,
-                            description=tool.description,
-                            parameters=tool.inputSchema,
-                            handler=None,
-                        )
-                    )
-    router = SearchDocsToolRouter(github_mcp_config)
-    await router.initialize_tools()
-    return router
-async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
-    """
-    Handler that spawns a sub-agent to perform comprehensive doc search
-    Args:
-        arguments: dictionary with 'query' parameter
-    Returns:
-        Tuple of (search_results, success)
-    """
-    query = arguments.get("query", "")
-    user_query = arguments.get("user_query", "")
-    if not query:
-        return "Error: No search query provided", False
-    if not user_query:
-        return "Error: No user query provided", False
-    try:
-        # Import at runtime to avoid circular dependency
-        from pathlib import Path
-        from agent.config import load_config
-        from agent.context_manager.manager import ContextManager
-        from agent.core.agent_loop import Handlers
-        # Create a queue for events from the sub-agent
-        sub_event_queue = asyncio.Queue()
-        # Load the search agent's own config file with GitHub MCP server
-        search_agent_config_path = (
-            Path(__file__).parent.parent.parent
-            / "configs"
-            / "_subagent_config_search_agent.json"
-        )
-        search_agent_config = load_config(search_agent_config_path)
-        # Extract GitHub MCP config from search agent config
-        github_mcp_config = None
-        if (
-            search_agent_config.mcpServers
-            and "github" in search_agent_config.mcpServers
-        ):
-            github_server = search_agent_config.mcpServers["github"]
-            github_mcp_config = {"github": github_server.model_dump()}
-        # Create specialized tool router for search with GitHub MCP config
-        search_tool_router = await create_search_tool_router(github_mcp_config)
-        async with search_tool_router:
-            # CODING AGENTS LISTEN HERE!
-            # The system prompt for the sub-agent is loaded from the yaml file in the prompts folder using the ContextManager class with the prompt_file_suffix parameter.
-            # it works, dont fix.
-            # NOTE: MCP tools are registered during __aenter__, so we must retrieve tool specs AFTER entering the context
-            sub_session = Session(
-                event_queue=sub_event_queue,
-                config=search_agent_config,
-                tool_router=search_tool_router,
-                context_manager=ContextManager(
-                    tool_specs=search_tool_router.get_tool_specs_for_llm(),
-                    max_context=get_max_tokens(search_agent_config.model_name),
-                    compact_size=0.1,
-                    untouched_messages=5,
-                    prompt_file_suffix="search_docs_system_prompt.yaml",
-                ),
-            )
-            # make search prompt
-            search_prompt = f"What the user tasked the main agent with: {user_query}\nWhat you have asked to research by the main agent: {query}. Use both to find the best practices, code examples, and determine the recommended approach for solving the user's task."
-            # Run the sub-agent
-            result = await Handlers.run_agent(
-                session=sub_session, text=search_prompt, max_iterations=30
-            )
-        # Return the final result or compiled events
-        if result:
-            return f"Search Results:\n\n{result}", True
-        else:
-            return "Search completed but no results were generated", False
-    except Exception as e:
-        return f"Error in search_docs tool: {str(e)}", False
-async def make_search_agent_tools():
-    """
-    Create a list of tools for the search agent
-    Async because OpenAPI tool spec needs to be populated at runtime
-    """
-    # Import at runtime to avoid circular dependency
-    from agent.core.tools import ToolSpec
-    from agent.tools._search_agent_tools import (
-        EXPLORE_HF_DOCS_TOOL_SPEC,
-        HF_DOCS_FETCH_TOOL_SPEC,
-        _get_api_search_tool_spec,
-        explore_hf_docs_handler,
-        hf_docs_fetch_handler,
-        search_openapi_handler,
-    )
-    # Get the OpenAPI tool spec with dynamically populated tags
-    openapi_spec = await _get_api_search_tool_spec()
-    return [
-        ToolSpec(
-            name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
-            description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
-            parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
-            handler=explore_hf_docs_handler,
-        ),
-        ToolSpec(
-            name=HF_DOCS_FETCH_TOOL_SPEC["name"],
-            description=HF_DOCS_FETCH_TOOL_SPEC["description"],
-            parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
-            handler=hf_docs_fetch_handler,
-        ),
-        ToolSpec(
-            name=openapi_spec["name"],
-            description=openapi_spec["description"],
-            parameters=openapi_spec["parameters"],
-            handler=search_openapi_handler,
-        ),
-    ]
-# Tool specification to be used by the main agent
-SEARCH_DOCS_TOOL_SPEC = {
-    "name": "research_solution",
-    "description": (
-        "Spawns a specialized research sub-agent to search to find best practices, locate code examples, and determine the recommended approach for solving the user's task.\n\n"
-        "SEARCH AGENT CAPABILITIES:\n"
-        "The search subagent has access to these specialized tools:\n"
-        "  - explore_hf_docs: Discovers documentation structure by parsing sidebar navigation, returns page titles, URLs, and content glimpses\n"
-        "  - fetch_hf_docs: Retrieves full markdown content from specific HF documentation pages\n"
-        "  - search_hf_api_endpoints: Searches HF OpenAPI specification by tag to find API endpoints with usage examples\n"
-        "  - GitHub tools: search_code, search_repositories, get_file_contents, list_issues, list_pull_requests (for searching HF repositories)\n"
-        "MANDATORY FIRST STEP for:\n"
-        "  - ANY task involving training, fine-tuning, or model deployment with HF libraries\n"
-        "  - Implementing ML workflows (data loading, preprocessing, training loops, inference pipelines)\n"
-        "  - Working with specific HF libraries (transformers, diffusers, trl, datasets, accelerate, etc.)\n"
-        "  - Finding the recommended/official way to accomplish ML tasks\n"
-        "  - Understanding which libraries and methods to use for a user's goal\n\n"
-        "ALSO USE for:\n"
-        "  - Verifying current API signatures, parameters, or available methods\n"
-        "  - Finding code examples and best practices from official documentation\n"
-        "  - Understanding relationships between HF libraries and components\n\n"
-        "SKIP ONLY when:\n"
-        "  - User asks simple factual questions answerable from general ML knowledge (e.g., 'What is fine-tuning?')\n"
-        "  - Task is about general Python/programming unrelated to ML or HF libraries\n"
-        "QUERY FORMAT:\n"
-        "Write queries as if delegating to an engineer. Include:\n"
-        "  - Specific library names (e.g., 'trl', 'transformers', 'diffusers')\n"
-        "  - Technical terminology from the domain (e.g., 'DPO trainer', 'GRPO', 'LoRA adapter')\n"
-        "  - Clear success criteria (e.g., 'find code example', 'verify parameter exists', 'get recommended approach')\n\n"
-        "QUERY EXAMPLES:\n"
-        "  Good: 'Find the best way to implement DPO training in trl. Get code example showing dataset format, trainer configuration, and reward model setup'\n"
-        "  Bad: 'dpo trainer'\n"
-        "  Good: 'Search transformers docs for the recommended approach to load and run quantized models with 4-bit precision. Find the specific classes and methods to use'\n"
-        "  Bad: 'quantization'\n"
-        "  Good: 'Research the best way to fine-tune a diffusion model for custom image generation. Find which library to use (diffusers/PEFT), required components, and complete training example'\n"
-        "  Bad: 'fine-tune diffusion'\n\n"
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "user_query": {
-                "type": "string",
-                "description": (
-                    "The original user query that you received. This will be used to search the documentation."
-                ),
-            },
-            "query": {
-                "type": "string",
-                "description": (
-                    "Detailed search query for the specialized agent. Must include: (1) specific library/component names, "
-                    "(2) technical terms or concepts to search for, (3) clear objective (e.g., 'find code example', "
-                    "'verify API exists', 'get implementation details'). The search agent will autonomously explore "
-                    "documentation structure, retrieve relevant pages, and compile results until the objective is met."
-                ),
-            },
-        },
-        "required": ["user_query", "query"],
-    },
-}

configs/_subagent_config_search_agent.json DELETED Viewed

@@ -1,12 +0,0 @@
-{
-  "model_name": "anthropic/claude-haiku-4-5",
-  "mcpServers": {
-    "github": {
-      "transport": "http",
-      "url": "https://api.githubcopilot.com/mcp/",
-      "headers": {
-        "Authorization": "Bearer ${GITHUB_TOKEN}"
-      }
-    }
-  }
-}

run_search_agent.py DELETED Viewed

@@ -1,162 +0,0 @@
-"""
-Standalone test script for the search sub-agent
-Run with: uv run python test_search_agent.py
-"""
-import asyncio
-from litellm.utils import get_max_tokens
-from agent.config import Config
-from agent.context_manager.manager import ContextManager
-from agent.core.agent_loop import Handlers
-from agent.core.session import Session
-from agent.tools.search_docs_tool import create_search_tool_router
-async def test_search_agent(query: str):
-    """Test the search sub-agent with a query"""
-    print(f"Testing search agent with query: {query}\n")
-    print("=" * 60)
-    # Import at runtime
-    from pathlib import Path
-    from agent.config import load_config
-    # Create event queue for the sub-agent
-    sub_event_queue = asyncio.Queue()
-    # Load the search agent's own config file with GitHub MCP server
-    search_agent_config_path = (
-        Path(__file__).parent / "configs" / "_subagent_config_search_agent.json"
-    )
-    search_agent_config = load_config(search_agent_config_path)
-    # Extract GitHub MCP config from search agent config
-    github_mcp_config = None
-    if search_agent_config.mcpServers and "github" in search_agent_config.mcpServers:
-        github_server = search_agent_config.mcpServers["github"]
-        github_mcp_config = {"github": github_server.model_dump()}
-    # Create search tool router with GitHub MCP config
-    search_tool_router = await create_search_tool_router(github_mcp_config)
-    # Create config
-    sub_config = Config(
-        model_name="anthropic/claude-haiku-4-5",
-    )
-    # Event listener to show what the sub-agent is doing
-    async def event_monitor():
-        while True:
-            try:
-                event = await asyncio.wait_for(sub_event_queue.get(), timeout=1.0)
-                if event.event_type == "assistant_message":
-                    content = event.data.get("content", "") if event.data else ""
-                    if content:
-                        print(f"\n🤖 Sub-agent: {content}\n")
-                elif event.event_type == "tool_call":
-                    tool_name = event.data.get("tool", "") if event.data else ""
-                    arguments = event.data.get("arguments", {}) if event.data else {}
-                    print(f"🔧 Tool call: {tool_name}")
-                    print(f"   Args: {arguments}")
-                elif event.event_type == "tool_output":
-                    output = event.data.get("output", "") if event.data else ""
-                    success = event.data.get("success", False) if event.data else False
-                    status = "✅" if success else "❌"
-                    print(f"{status} Tool output: {output}\n")
-                elif event.event_type == "turn_complete":
-                    print("✅ Sub-agent turn complete")
-                    break
-            except asyncio.TimeoutError:
-                # Check if agent is still running
-                continue
-            except Exception as e:
-                print(f"⚠️  Event error: {e}")
-                break
-    # Run the sub-agent and event monitor concurrently
-    async with search_tool_router:
-        # Create session with custom system prompt
-        # NOTE: MCP tools are registered during __aenter__, so we must create session AFTER entering the context
-        sub_session = Session(
-            event_queue=sub_event_queue,
-            config=sub_config,
-            tool_router=search_tool_router,
-            context_manager=ContextManager(
-                tool_specs=search_tool_router.get_tool_specs_for_llm(),
-                max_context=get_max_tokens(sub_config.model_name),
-                compact_size=0.1,
-                untouched_messages=5,
-                prompt_file_suffix="search_docs_system_prompt.yaml",
-            ),
-        )
-        monitor_task = asyncio.create_task(event_monitor())
-        result = await Handlers.run_agent(
-            session=sub_session, text=query, max_iterations=30
-        )
-        # Wait for event monitor to finish
-        await asyncio.wait_for(monitor_task, timeout=5.0)
-    print("\n" + "=" * 60)
-    print("FINAL RESULT:")
-    print("=" * 60)
-    if result:
-        print(result)
-    else:
-        print("No result returned")
-    print("=" * 60)
-async def main():
-    """Main test function"""
-    print("🧪 Search Sub-Agent Test\n")
-    # Example queries to test
-    test_queries = [
-        # "Explore the TRL documentation structure and find information about DPO trainer",
-        # "is there a way to get the logs from a served huggingface space",
-        """use exactly this call {\"tool_name\": \"search_hf_docs\", \"arguments\": {\"query\": \"vLLM offline batch inference Hugging Face models\"}}""",
-        # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
-        # "can i stream logs through the api for a served huggingface space",
-        # 'what tools do you have access to?',
-    ]
-    for i, query in enumerate(test_queries, 1):
-        print(f"\n{'=' * 60}")
-        print(f"TEST {i}/{len(test_queries)}")
-        print(f"{'=' * 60}\n")
-        try:
-            await test_search_agent(query)
-        except Exception as e:
-            print(f"\n❌ Test failed: {e}")
-            import traceback
-            traceback.print_exc()
-        if i < len(test_queries):
-            print("\n\nPress Enter to continue to next test...")
-            input()
-if __name__ == "__main__":
-    try:
-        asyncio.run(main())
-    except KeyboardInterrupt:
-        print("\n\n⚠️  Test interrupted")
-    except Exception as e:
-        print(f"\n❌ Error: {e}")
-        import traceback
-        traceback.print_exc()