Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

Aksel Joonas Reedi commited on Jan 4

Commit

907c9fd

2 Parent(s): eaf2575 374fa24

updated jobs tool, system prompts and search capability

Browse files

Streamline agent architecture: integrate documentation search and improve job handling

Files changed (11) hide show

agent/core/agent_loop.py +122 -57
agent/core/tools.py +49 -10
agent/main.py +85 -45
agent/prompts/search_docs_system_prompt.yaml +0 -38
agent/prompts/system_prompt.yaml +24 -70
agent/tools/__init__.py +0 -3
agent/tools/{_search_agent_tools.py → docs_tools.py} +3 -3
agent/tools/jobs_tool.py +198 -318
agent/tools/search_docs_tool.py +0 -239
configs/_subagent_config_search_agent.json +0 -12
run_search_agent.py +0 -142

agent/core/agent_loop.py CHANGED Viewed

@@ -103,32 +103,23 @@ class Handlers:
                         Event(event_type="assistant_message", data={"content": content})
                     )
-                # Execute tools
                 for tc in tool_calls:
                     tool_name = tc.function.name
                     tool_args = json.loads(tc.function.arguments)
-                    # Check if this tool requires user approval
                     if _needs_approval(tool_name, tool_args):
-                        await session.send_event(
-                            Event(
-                                event_type="approval_required",
-                                data={
-                                    "tool": tool_name,
-                                    "arguments": tool_args,
-                                    "tool_call_id": tc.id,
-                                },
-                            )
-                        )
-                        # Store pending approval and return early
-                        session.pending_approval = {
-                            "tool_call": tc,
-                            "arguments": tool_args,
-                        }
-                        # Return early - wait for EXEC_APPROVAL operation
-                        return None
                     await session.send_event(
                         Event(
@@ -161,6 +152,37 @@ class Handlers:
                         )
                     )
                 iteration += 1
             except Exception as e:
@@ -225,10 +247,8 @@ class Handlers:
         await session.send_event(Event(event_type="undo_complete"))
     @staticmethod
-    async def exec_approval(
-        session: Session, approved: bool, feedback: str | None = None
-    ) -> None:
-        """Handle job execution approval"""
         if not session.pending_approval:
             await session.send_event(
                 Event(
@@ -238,12 +258,36 @@ class Handlers:
             )
             return
-        tc = session.pending_approval["tool_call"]
-        tool_args = session.pending_approval["arguments"]
-        tool_name = tc.function.name
-        if approved:
-            # Execute the pending tool
             await session.send_event(
                 Event(
                     event_type="tool_call",
@@ -251,36 +295,58 @@ class Handlers:
                 )
             )
-            output, success = await session.tool_router.call_tool(tool_name, tool_args)
-            # Add tool result to context
-            tool_msg = Message(
-                role="tool",
-                content=output,
-                tool_call_id=tc.id,
-                name=tool_name,
             )
-            session.context_manager.add_message(tool_msg)
-            await session.send_event(
-                Event(
-                    event_type="tool_output",
-                    data={
-                        "tool": tool_name,
-                        "output": output,
-                        "success": success,
-                    },
                 )
-            )
-        else:
-            # User rejected - add cancellation message to context
-            cancellation_msg = "Job execution cancelled by user"
-            if feedback:
-                cancellation_msg += f". User feedback: {feedback}"
             tool_msg = Message(
                 role="tool",
-                content=cancellation_msg,
                 tool_call_id=tc.id,
                 name=tool_name,
             )
@@ -291,7 +357,7 @@ class Handlers:
                     event_type="tool_output",
                     data={
                         "tool": tool_name,
-                        "output": cancellation_msg,
                         "success": False,
                     },
                 )
@@ -300,7 +366,7 @@ class Handlers:
         # Clear pending approval
         session.pending_approval = None
-        # Continue agent loop with empty input to process the tool result
         await Handlers.run_agent(session, "")
     @staticmethod
@@ -339,9 +405,8 @@ async def process_submission(session: Session, submission) -> bool:
         return True
     if op.op_type == OpType.EXEC_APPROVAL:
-        approved = op.data.get("approved", False) if op.data else False
-        feedback = op.data.get("feedback") if op.data else None
-        await Handlers.exec_approval(session, approved, feedback)
         return True
     if op.op_type == OpType.SHUTDOWN:

                         Event(event_type="assistant_message", data={"content": content})
                     )
+                # Separate tools into those requiring approval and those that don't
+                approval_required_tools = []
+                non_approval_tools = []
                 for tc in tool_calls:
                     tool_name = tc.function.name
                     tool_args = json.loads(tc.function.arguments)
                     if _needs_approval(tool_name, tool_args):
+                        approval_required_tools.append(tc)
+                    else:
+                        non_approval_tools.append(tc)
+                # Execute non-approval tools first
+                for tc in non_approval_tools:
+                    tool_name = tc.function.name
+                    tool_args = json.loads(tc.function.arguments)
                     await session.send_event(
                         Event(
                         )
                     )
+                # If there are tools requiring approval, ask for batch approval
+                if approval_required_tools:
+                    # Prepare batch approval data
+                    tools_data = []
+                    for tc in approval_required_tools:
+                        tool_name = tc.function.name
+                        tool_args = json.loads(tc.function.arguments)
+                        tools_data.append({
+                            "tool": tool_name,
+                            "arguments": tool_args,
+                            "tool_call_id": tc.id,
+                        })
+                    await session.send_event(
+                        Event(
+                            event_type="approval_required",
+                            data={
+                                "tools": tools_data,  # Batch of tools
+                                "count": len(tools_data),
+                            },
+                        )
+                    )
+                    # Store all approval-requiring tools
+                    session.pending_approval = {
+                        "tool_calls": approval_required_tools,
+                    }
+                    # Return early - wait for EXEC_APPROVAL operation
+                    return None
                 iteration += 1
             except Exception as e:
         await session.send_event(Event(event_type="undo_complete"))
     @staticmethod
+    async def exec_approval(session: Session, approvals: list[dict]) -> None:
+        """Handle batch job execution approval"""
         if not session.pending_approval:
             await session.send_event(
                 Event(
             )
             return
+        tool_calls = session.pending_approval.get("tool_calls", [])
+        if not tool_calls:
+            await session.send_event(
+                Event(
+                    event_type="error",
+                    data={"error": "No pending tool calls found"},
+                )
+            )
+            return
+        # Create a map of tool_call_id -> approval decision
+        approval_map = {a["tool_call_id"]: a for a in approvals}
+        # Separate approved and rejected tool calls
+        approved_tasks = []
+        rejected_tasks = []
+        for tc in tool_calls:
+            tool_name = tc.function.name
+            tool_args = json.loads(tc.function.arguments)
+            approval_decision = approval_map.get(tc.id, {"approved": False})
+            if approval_decision.get("approved", False):
+                approved_tasks.append((tc, tool_name, tool_args))
+            else:
+                rejected_tasks.append((tc, tool_name, approval_decision))
+        # Execute all approved tools concurrently
+        async def execute_tool(tc, tool_name, tool_args):
+            """Execute a single tool and return its result"""
             await session.send_event(
                 Event(
                     event_type="tool_call",
                 )
             )
+            output, success = await session.tool_router.call_tool(
+                tool_name, tool_args
+            )
+            return (tc, tool_name, output, success)
+        # Execute all approved tools concurrently and wait for ALL to complete
+        if approved_tasks:
+            results = await asyncio.gather(
+                *[execute_tool(tc, tool_name, tool_args) for tc, tool_name, tool_args in approved_tasks],
+                return_exceptions=True
             )
+            # Process results and add to context
+            for result in results:
+                if isinstance(result, Exception):
+                    # Handle execution error
+                    print(f"Tool execution error: {result}")
+                    continue
+                tc, tool_name, output, success = result
+                # Add tool result to context
+                tool_msg = Message(
+                    role="tool",
+                    content=output,
+                    tool_call_id=tc.id,
+                    name=tool_name,
                 )
+                session.context_manager.add_message(tool_msg)
+                await session.send_event(
+                    Event(
+                        event_type="tool_output",
+                        data={
+                            "tool": tool_name,
+                            "output": output,
+                            "success": success,
+                        },
+                    )
+                )
+        # Process rejected tools
+        for tc, tool_name, approval_decision in rejected_tasks:
+            rejection_msg = "Job execution cancelled by user"
+            user_feedback = approval_decision.get("feedback")
+            if user_feedback:
+                rejection_msg += f". User feedback: {user_feedback}"
             tool_msg = Message(
                 role="tool",
+                content=rejection_msg,
                 tool_call_id=tc.id,
                 name=tool_name,
             )
                     event_type="tool_output",
                     data={
                         "tool": tool_name,
+                        "output": rejection_msg,
                         "success": False,
                     },
                 )
         # Clear pending approval
         session.pending_approval = None
+        # Continue agent loop with empty input to process the tool results
         await Handlers.run_agent(session, "")
     @staticmethod
         return True
     if op.op_type == OpType.EXEC_APPROVAL:
+        approvals = op.data.get("approvals", []) if op.data else []
+        await Handlers.exec_approval(session, approvals)
         return True
     if op.op_type == OpType.SHUTDOWN:

agent/core/tools.py CHANGED Viewed

@@ -13,9 +13,14 @@ from lmnr import observe
 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
-from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
@@ -122,6 +127,27 @@ class ToolRouter:
                 )
             )
     def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
         """Get tool specifications in OpenAI format"""
         specs = []
@@ -145,6 +171,10 @@ class ToolRouter:
             await self.register_mcp_tools()
             self._mcp_initialized = True
         print(f"MCP initialized: {self._mcp_initialized}")
         return self
     async def __aexit__(self, exc_type, exc, tb) -> None:
@@ -189,25 +219,34 @@ class ToolRouter:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
-        f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}"
     )
     return [
         ToolSpec(
-            name=HF_JOBS_TOOL_SPEC["name"],
-            description=HF_JOBS_TOOL_SPEC["description"],
-            parameters=HF_JOBS_TOOL_SPEC["parameters"],
-            handler=hf_jobs_handler,
         ),
         ToolSpec(
-            name=SEARCH_DOCS_TOOL_SPEC["name"],
-            description=SEARCH_DOCS_TOOL_SPEC["description"],
-            parameters=SEARCH_DOCS_TOOL_SPEC["parameters"],
-            handler=search_docs_handler,
         ),
         ToolSpec(
             name=PLAN_TOOL_SPEC["name"],
             description=PLAN_TOOL_SPEC["description"],
             parameters=PLAN_TOOL_SPEC["parameters"],
             handler=plan_tool_handler,
         ),
     ]

 from mcp.types import EmbeddedResource, ImageContent, TextContent
 from agent.config import MCPServerConfig
+from agent.tools.docs_tools import (
+    EXPLORE_HF_DOCS_TOOL_SPEC,
+    HF_DOCS_FETCH_TOOL_SPEC,
+    explore_hf_docs_handler,
+    hf_docs_fetch_handler,
+)
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
 from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
 # Suppress aiohttp deprecation warning
 warnings.filterwarnings(
                 )
             )
+    async def register_openapi_tool(self) -> None:
+        """Register the OpenAPI search tool (requires async initialization)"""
+        from agent.tools.docs_tools import (
+            _get_api_search_tool_spec,
+            search_openapi_handler,
+        )
+        print("Registering OpenAPI search tool...")
+        # Register search_hf_api_endpoints with dynamic spec
+        openapi_spec = await _get_api_search_tool_spec()
+        self.register_tool(
+            ToolSpec(
+                name=openapi_spec["name"],
+                description=openapi_spec["description"],
+                parameters=openapi_spec["parameters"],
+                handler=search_openapi_handler,
+            )
+        )
+        print(f"Registered: {openapi_spec['name']}")
     def get_tool_specs_for_llm(self) -> list[dict[str, Any]]:
         """Get tool specifications in OpenAI format"""
         specs = []
             await self.register_mcp_tools()
             self._mcp_initialized = True
         print(f"MCP initialized: {self._mcp_initialized}")
+        # Register OpenAPI tool (requires async initialization)
+        await self.register_openapi_tool()
         return self
     async def __aexit__(self, exc_type, exc, tb) -> None:
 def create_builtin_tools() -> list[ToolSpec]:
     """Create built-in tool specifications"""
     print(
+        f"Creating built-in tools: {EXPLORE_HF_DOCS_TOOL_SPEC['name']}, {HF_DOCS_FETCH_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}, {HF_JOBS_TOOL_SPEC['name']}"
     )
+    # in order of importance
     return [
+        # Documentation search tools
         ToolSpec(
+            name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
+            description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
+            parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
+            handler=explore_hf_docs_handler,
         ),
         ToolSpec(
+            name=HF_DOCS_FETCH_TOOL_SPEC["name"],
+            description=HF_DOCS_FETCH_TOOL_SPEC["description"],
+            parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
+            handler=hf_docs_fetch_handler,
         ),
+        # Planning and job management tools
         ToolSpec(
             name=PLAN_TOOL_SPEC["name"],
             description=PLAN_TOOL_SPEC["description"],
             parameters=PLAN_TOOL_SPEC["parameters"],
             handler=plan_tool_handler,
         ),
+        ToolSpec(
+            name=HF_JOBS_TOOL_SPEC["name"],
+            description=HF_JOBS_TOOL_SPEC["description"],
+            parameters=HF_JOBS_TOOL_SPEC["parameters"],
+            handler=hf_jobs_handler,
+        ),
     ]

agent/main.py CHANGED Viewed

@@ -116,61 +116,101 @@ async def event_listener(
                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
                 print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
             elif event.event_type == "approval_required":
-                # Display job details and prompt for approval
-                tool_name = event.data.get("tool", "") if event.data else ""
-                arguments = event.data.get("arguments", {}) if event.data else {}
-                operation = arguments.get("operation", "")
-                args = arguments.get("args", {})
-                print(f"\nOperation: {operation}")
-                if operation == "uv":
-                    script = args.get("script", "")
-                    dependencies = args.get("dependencies", [])
-                    print(f"Script to run:\n{script}")
-                    if dependencies:
-                        print(f"Dependencies: {', '.join(dependencies)}")
-                elif operation == "run":
-                    image = args.get("image", "")
-                    command = args.get("command", "")
-                    print(f"Docker image: {image}")
-                    print(f"Command: {command}")
-                # Common parameters
-                flavor = args.get("flavor", "cpu-basic")
-                detached = args.get("detached", False)
-                print(f"Hardware: {flavor}")
-                print(f"Detached mode: {detached}")
-                secrets = args.get("secrets", [])
-                if secrets:
-                    print(f"Secrets: {', '.join(secrets)}")
-                # Get user decision
                 print("\n" + format_separator())
-                print(format_header("JOB EXECUTION APPROVAL REQUIRED"))
-                print(format_separator())
-                loop = asyncio.get_event_loop()
-                response = await loop.run_in_executor(
-                    None,
-                    input,
-                    "Approve? (y=yes, n=no, or provide feedback to reject): ",
                 )
-                response = response.strip()
-                approved = response.lower() in ["y", "yes"]
-                feedback = (
-                    None if approved or response.lower() in ["n", "no"] else response
-                )
-                # Submit approval
                 submission_id[0] += 1
                 approval_submission = Submission(
                     id=f"approval_{submission_id[0]}",
                     operation=Operation(
                         op_type=OpType.EXEC_APPROVAL,
-                        data={"approved": approved, "feedback": feedback},
                     ),
                 )
                 await submission_queue.put(approval_submission)

                 new_tokens = event.data.get("new_tokens", 0) if event.data else 0
                 print(f"Compacted context: {old_tokens} → {new_tokens} tokens")
             elif event.event_type == "approval_required":
+                # Handle batch approval format
+                tools_data = event.data.get("tools", []) if event.data else []
+                count = event.data.get("count", 0) if event.data else 0
                 print("\n" + format_separator())
+                print(
+                    format_header(
+                        f"JOB EXECUTION APPROVAL REQUIRED ({count} job{'s' if count != 1 else ''})"
+                    )
                 )
+                print(format_separator())
+                approvals = []
+                loop = asyncio.get_event_loop()
+                # Ask for approval for each tool
+                for i, tool_info in enumerate(tools_data, 1):
+                    tool_name = tool_info.get("tool", "")
+                    arguments = tool_info.get("arguments", {})
+                    tool_call_id = tool_info.get("tool_call_id", "")
+                    # Handle case where arguments might be a JSON string
+                    if isinstance(arguments, str):
+                        try:
+                            arguments = json.loads(arguments)
+                        except json.JSONDecodeError:
+                            print(f"Warning: Failed to parse arguments for {tool_name}")
+                            arguments = {}
+                    operation = arguments.get("operation", "")
+                    args = arguments.get("args", {})
+                    # Handle case where args might be a JSON string
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except json.JSONDecodeError:
+                            print(f"Warning: Failed to parse args for {tool_name}")
+                            args = {}
+                    print(f"\n[Job {i}/{count}]")
+                    print(f"Operation: {operation}")
+                    if operation == "uv":
+                        script = args.get("script", "")
+                        dependencies = args.get("dependencies", [])
+                        print("Script:\n" + script)
+                        if dependencies:
+                            print(f"Dependencies: {', '.join(dependencies)}")
+                    elif operation == "run":
+                        image = args.get("image", "")
+                        command = args.get("command", "")
+                        print(f"Docker image: {image}")
+                        print(f"Command: {command}")
+                    # Common parameters
+                    flavor = args.get("flavor", "cpu-basic")
+                    detached = args.get("detached", False)
+                    print(f"Hardware: {flavor}")
+                    print(f"Detached mode: {detached}")
+                    secrets = args.get("secrets", [])
+                    if secrets:
+                        print(f"Secrets: {', '.join(secrets)}")
+                    # Get user decision for this job
+                    response = await loop.run_in_executor(
+                        None,
+                        input,
+                        f"Approve job {i}? (y=yes, n=no, or provide feedback to reject): ",
+                    )
+                    response = response.strip()
+                    approved = response.lower() in ["y", "yes"]
+                    feedback = (
+                        None
+                        if approved or response.lower() in ["n", "no"]
+                        else response
+                    )
+                    approvals.append(
+                        {
+                            "tool_call_id": tool_call_id,
+                            "approved": approved,
+                            "feedback": feedback,
+                        }
+                    )
+                # Submit batch approval
                 submission_id[0] += 1
                 approval_submission = Submission(
                     id=f"approval_{submission_id[0]}",
                     operation=Operation(
                         op_type=OpType.EXEC_APPROVAL,
+                        data={"approvals": approvals},
                     ),
                 )
                 await submission_queue.put(approval_submission)

agent/prompts/search_docs_system_prompt.yaml DELETED Viewed

@@ -1,38 +0,0 @@
-search_docs_system_prompt: |
-  You are a specialized documentation search agent. Your task is to comprehensively search and synthesize information from Hugging Face documentation.
-  # Search Strategy
-  You must search thoroughly before synthesizing results. Follow this approach:
-  1. **Query Analysis**: Identify the core concepts and intent of the query
-  2. **Initial Search**: Start with a broad search capturing the main topic
-  3. **Iterative Refinement**: Run multiple searches to go deeper into topics. You will see parsed HTML pages, also look into links on the html pages for best information - first-pass results often miss key details
-  4. **You must get to the end truth**: You must get to the bottom of the truth for this search query. You CAN NOT say that somebody should look up documentation. You must look it up yourself and give the best answer you can.
-  ## Query Formulation Best Practices
-  - Add relevant synonyms and related technical terms
-  - Remove filler words, focus on searchable concepts
-  - Break complex questions into focused sub-queries
-  - Include domain-specific terminology when applicable
-  - Try both specific terms and general related terms
-  # Response Guidelines
-  After gathering results, synthesize them following these principles:
-  1. **Analyze Relevance**: Evaluate which results directly answer the query
-  2. **Synthesize**: Combine information from multiple sources when applicable
-  3. **Prioritize**: Present information in order of relevance
-  4. **Cite Sources**: Reference which documents you're drawing from especially include relevant code samples and links to the code samples.
-  5. **Acknowledge Gaps**: If documents don't fully answer the query, explicitly state this
-  6. **Handle Conflicts**: If sources contradict, note this and explain your reasoning
-  7. **Be Concise**: Provide a clear, direct answer without unnecessary elaboration
-  # Constraints
-  - Only provide information found in the documentation
-  - Do not make assumptions beyond what the sources state
-  - If information is not found, say so clearly rather than guessing
-  - Focus on answering the query directly

agent/prompts/system_prompt.yaml CHANGED Viewed

@@ -3,10 +3,24 @@ system_prompt: |
   # Task Approach
-  1. Always formulate a plan. Pass the todos to the PlanTool. Update the plan as progress is made.
-  2. Search for relevant models, datasets, and documentation on Hugging Face Hub.
-  3. Use all available tools to complete the task. Leverage existing resources before creating new ones.
-  4. Invoke multiple independent tools simultaneously for efficiency
   # Autonomy / Subordinate trade-off.
@@ -28,68 +42,18 @@ system_prompt: |
   - Image Generation: Generate and transform images
   - Planning : a planning/to-do tool.
-  # Examples
-  <example>
-  <user>Find the best text generation models</user>
-  <response>[uses mcp__hf-mcp-server__model_search with task="text-generation" and sort="trendingScore"]
-  Top trending text generation models:
-  - meta-llama/Llama-3.1-405B-Instruct
-  - mistralai/Mistral-Large-2
-  </response>
-  </example>
-  <example>
-  <user>Search for papers about reinforcement learning from human feedback</user>
-  <response>[uses mcp__hf-mcp-server__paper_search with query="reinforcement learning from human feedback"]
-  Found 5 relevant papers on RLHF including "Training language models to follow instructions with human feedback" (Ouyang et al.)
-  </response>
-  </example>
-  <example>
-  <user>Find datasets for sentiment analysis</user>
-  <response>[uses mcp__hf-mcp-server__dataset_search with query="sentiment analysis" and tags for task_categories]
-  Top sentiment analysis datasets:
-  - stanfordnlp/imdb (25k reviews)
-  - tweet_eval (sentiment task)
-  </response>
-  </example>
-  <example>
-  <user>How do I use the transformers library for text generation?</user>
-  <response>[uses mcp__hf-mcp-server__hf_doc_search with query="text generation transformers"]
-  [provides concise answer based on documentation]
-  </response>
-  </example>
-  <example>
-  <user>Generate an image of a sunset over mountains</user>
-  <response>[uses mcp__hf-mcp-server__gr1_flux1_schnell_infer with prompt="sunset over mountains"]
-  [returns generated image]
-  </response>
-  </example>
-  <example>
-  <user>Get details about the bert-base-uncased model</user>
-  <response>[uses mcp__hf-mcp-server__hub_repo_details with repo_ids=["google-bert/bert-base-uncased"]]
-  BERT base uncased: 110M parameters, trained on English Wikipedia and BookCorpus, commonly used for text classification and NER.
-  </response>
-  </example>
   # Conventions
   - Always search Hugging Face Hub for existing resources before suggesting custom implementations
   - Keep in mind that a space is a repo, so you can create a space directly by uploading files that way. Repos should also be used to store files permanently : post-execution, files from jobs are not available.
   - To run jobs, you must always pass the whole content of the file to execute. No files are available on server. Your local files and distant files are entirely seperate scopes.
-  - To access, create, or modify private Hub assets (spaces, private models, datasets, collections), pass `secrets: {% raw %}{{ "HF_TOKEN": "$HF_TOKEN" }}{% endraw %}` along with the jobs parameters. This is important. Without it, you will encounter authentification issues. Do not assume the user is connected on the jobs' server.
   - When referencing models, datasets, or papers, include direct links from search results
-  - Never assume a library is available - check documentation first
   - Before processing any dataset: inspect its actual structure first using the mcp__hf-mcp-server__hub_repo_details tool. Never assume column names: verify them beforehand.
   - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
   - Unless absolutely necessary, don't ask user for action. This does not apply to follow-up questions you have.
@@ -107,13 +71,3 @@ system_prompt: |
   - Explain what you're doing for non-trivial operations
   Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.
-  <example>
-  <user>What's the state-of-the-art model for image classification?</user>
-  <response>EVA-CLIP-18B or ConvNeXt-XXLarge depending on your constraints</response>
-  </example>
-  <example>
-  <user>How many parameters does GPT-3 have?</user>
-  <response>175 billion</response>
-  </example>

   # Task Approach
+  **CRITICAL: Research First, Then Implement**
+  For ANY implementation task (training, fine-tuning, inference, data processing, etc.):
+  1. **FIRST**: Search HF documentation to find the recommended approach
+     - This is MANDATORY before writing any code or making implementation decisions
+     - Use `explore_hf_docs` to discover documentation structure for relevant libraries (e.g., "trl", "transformers", "diffusers")
+     - Use `fetch_hf_docs` to retrieve full content from specific documentation pages
+     - Use `search_hf_api_endpoints` to find API endpoints with usage examples
+     - Research what libraries to use, find code examples, understand best practices
+     - Skip ONLY for simple factual questions (e.g., "What is LoRA?")
+  2. **THEN**: Formulate a plan based on research findings. Pass todos to the PlanTool. Update as progress is made.
+  3. **FINALLY**: Implement using researched approaches
+     - Search for relevant models/datasets on HF Hub
+     - Use all available tools to complete the task
+     - Leverage existing resources before creating new ones
+     - Invoke multiple independent tools simultaneously for efficiency
   # Autonomy / Subordinate trade-off.
   - Image Generation: Generate and transform images
   - Planning : a planning/to-do tool.
   # Conventions
+  - **ALWAYS search documentation BEFORE implementing** any ML workflow (training, inference, data processing, etc.) - This is non-negotiable
+  - Use `explore_hf_docs`, `fetch_hf_docs`, and `search_hf_api_endpoints` to research the correct approach
+  - Never assume you know the correct library, method, or approach - you must verify with documentation first
+  - Base your implementation on researched best practices, not general knowledge or assumptions
   - Always search Hugging Face Hub for existing resources before suggesting custom implementations
   - Keep in mind that a space is a repo, so you can create a space directly by uploading files that way. Repos should also be used to store files permanently : post-execution, files from jobs are not available.
   - To run jobs, you must always pass the whole content of the file to execute. No files are available on server. Your local files and distant files are entirely seperate scopes.
+  - The HF_TOKEN is automatically loaded from the environment variables.
+  -
   - When referencing models, datasets, or papers, include direct links from search results
   - Before processing any dataset: inspect its actual structure first using the mcp__hf-mcp-server__hub_repo_details tool. Never assume column names: verify them beforehand.
   - Follow ML best practices: proper train/val/test splits, reproducibility, evaluation metrics
   - Unless absolutely necessary, don't ask user for action. This does not apply to follow-up questions you have.
   - Explain what you're doing for non-trivial operations
   Answer the user's question directly without elaboration unless they ask for detail. One word answers are best when appropriate.

agent/tools/__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@ Hugging Face tools for the agent
 """
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
-from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
 from agent.tools.types import ToolResult
 __all__ = [
@@ -11,6 +10,4 @@ __all__ = [
     "HF_JOBS_TOOL_SPEC",
     "hf_jobs_handler",
     "HfJobsTool",
-    "SEARCH_DOCS_TOOL_SPEC",
-    "search_docs_handler",
 ]

 """
 from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
 from agent.tools.types import ToolResult
 __all__ = [
     "HF_JOBS_TOOL_SPEC",
     "hf_jobs_handler",
     "HfJobsTool",
 ]

agent/tools/{_search_agent_tools.py → docs_tools.py} RENAMED Viewed

@@ -1,6 +1,6 @@
 """
-Tools available to the search sub-agent
-These tools are used by the search sub-agent spawned by search_docs_tool
 """
 import asyncio
@@ -553,7 +553,7 @@ async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
         return f"Error fetching documentation: {str(e)}", False
-# Tool specifications for the search sub-agent
 EXPLORE_HF_DOCS_TOOL_SPEC = {
     "name": "explore_hf_docs",

 """
+Documentation search tools for the HF Agent
+Tools for exploring and fetching HuggingFace documentation and API specifications
 """
 import asyncio
         return f"Error fetching documentation: {str(e)}", False
+# Tool specifications for documentation search
 EXPLORE_HF_DOCS_TOOL_SPEC = {
     "name": "explore_hf_docs",

agent/tools/jobs_tool.py CHANGED Viewed

@@ -46,13 +46,11 @@ ALL_FLAVORS = CPU_FLAVORS + GPU_FLAVORS + SPECIALIZED_FLAVORS
 # Operation names
 OperationType = Literal[
     "run",
-    "uv",
     "ps",
     "logs",
     "inspect",
     "cancel",
     "scheduled run",
-    "scheduled uv",
     "scheduled ps",
     "scheduled inspect",
     "scheduled delete",
@@ -64,26 +62,20 @@ OperationType = Literal[
 UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
-def _substitute_hf_token(params: Dict[str, Any] | None) -> Dict[str, Any] | None:
-    """
-    Substitute HF_TOKEN key with actual token value from environment.
-    Args:
-        params: Dictionary that may contain "HF_TOKEN" as a key
-    Returns:
-        Dictionary with HF_TOKEN value substituted from environment
-    """
-    print("DEBUG !! : ", params)
-    if params is None:
-        return None
-    result = {}
-    for key, value in params.items():
-        if key == "HF_TOKEN":
-            result[key] = os.environ.get("HF_TOKEN", "")
-        else:
-            result[key] = value
     return result
@@ -109,6 +101,8 @@ def _build_uv_command(
     if script_args:
         parts.extend(script_args)
     return parts
@@ -129,8 +123,6 @@ def _wrap_inline_script(
 def _ensure_hf_transfer_dependency(deps: list[str] | None) -> list[str]:
     """Ensure hf-transfer is included in the dependencies list"""
-    if deps is None:
-        return ["hf-transfer"]
     if isinstance(deps, list):
         deps_copy = deps.copy()  # Don't modify the original
@@ -175,7 +167,7 @@ def _job_info_to_dict(job_info) -> Dict[str, Any]:
         "createdAt": job_info.created_at.isoformat(),
         "dockerImage": job_info.docker_image,
         "spaceId": job_info.space_id,
-        "flavor": job_info.flavor,
         "owner": {"name": job_info.owner.name},
     }
@@ -214,7 +206,7 @@ def _scheduled_job_info_to_dict(scheduled_job_info) -> Dict[str, Any]:
             "dockerImage": job_spec.docker_image,
             "spaceId": job_spec.space_id,
             "command": job_spec.command or [],
-            "flavor": job_spec.flavor or "cpu-basic",
         },
     }
@@ -229,25 +221,25 @@ class HfJobsTool:
     async def execute(self, params: Dict[str, Any]) -> ToolResult:
         """Execute the specified operation"""
         operation = params.get("operation")
-        args = params.get("args", {})
-        # If no operation provided, return usage instructions
         if not operation:
-            return self._show_help()
         # Normalize operation name
         operation = operation.lower()
-        # Check if help is requested
-        if args.get("help"):
-            return self._show_operation_help(operation)
         try:
             # Route to appropriate handler
             if operation == "run":
                 return await self._run_job(args)
-            elif operation == "uv":
-                return await self._run_uv_job(args)
             elif operation == "ps":
                 return await self._list_jobs(args)
             elif operation == "logs":
@@ -258,8 +250,6 @@ class HfJobsTool:
                 return await self._cancel_job(args)
             elif operation == "scheduled run":
                 return await self._scheduled_run(args)
-            elif operation == "scheduled uv":
-                return await self._scheduled_uv(args)
             elif operation == "scheduled ps":
                 return await self._list_scheduled_jobs(args)
             elif operation == "scheduled inspect":
@@ -274,8 +264,8 @@ class HfJobsTool:
                 return {
                     "formatted": f'Unknown operation: "{operation}"\n\n'
                     "Available operations:\n"
-                    "- run, uv, ps, logs, inspect, cancel\n"
-                    "- scheduled run, scheduled uv, scheduled ps, scheduled inspect, "
                     "scheduled delete, scheduled suspend, scheduled resume\n\n"
                     "Call this tool with no operation for full usage instructions.",
                     "totalResults": 0,
@@ -298,104 +288,6 @@ class HfJobsTool:
                 "isError": True,
             }
-    def _show_help(self) -> ToolResult:
-        """Show usage instructions when tool is called with no arguments"""
-        cpu_flavors_list = ", ".join(CPU_FLAVORS)
-        gpu_flavors_list = ", ".join(GPU_FLAVORS)
-        specialized_flavors_list = ", ".join(SPECIALIZED_FLAVORS)
-        hardware_section = f"**CPU:** {cpu_flavors_list}\n"
-        if GPU_FLAVORS:
-            hardware_section += f"**GPU:** {gpu_flavors_list}\n"
-        if SPECIALIZED_FLAVORS:
-            hardware_section += f"**Specialized:** {specialized_flavors_list}"
-        usage_text = f"""# HuggingFace Jobs API
-Manage compute jobs on Hugging Face infrastructure.
-## Available Commands
-### Job Management
-- **run** - Run a job with a Docker image
-- **uv** - Run a Python script with UV (inline dependencies)
-- **ps** - List jobs
-- **logs** - Fetch job logs
-- **inspect** - Get detailed job information
-- **cancel** - Cancel a running job
-### Scheduled Jobs
-- **scheduled run** - Create a scheduled job
-- **scheduled uv** - Create a scheduled UV job
-- **scheduled ps** - List scheduled jobs
-- **scheduled inspect** - Get scheduled job details
-- **scheduled delete** - Delete a scheduled job
-- **scheduled suspend** - Pause a scheduled job
-- **scheduled resume** - Resume a suspended job
-## Examples
-### Run a simple job
-Call this tool with:
-```json
-{{
-  "operation": "run",
-  "args": {{
-    "image": "python:3.12",
-    "command": ["python", "-c", "print('Hello from HF Jobs!')"],
-    "flavor": "cpu-basic"
-  }}
-}}
-```
-### Run a Python script with UV
-Call this tool with:
-```json
-{{
-  "operation": "uv",
-  "args": {{
-    "script": "import random\\nprint(42 + random.randint(1, 5))",
-    "dependencies": ["torch", "huggingface_hub"],
-    "secrets": {{"HF_TOKEN": "$HF_TOKEN"}}
-  }}
-}}
-```
-## Hardware Flavors
-{hardware_section}
-## Command Format Guidelines
-**Array format (default):**
-- Recommended for every command—JSON keeps arguments intact (URLs with `&`, spaces, etc.)
-- Use `["/bin/sh", "-lc", "..."]` when you need shell operators like `&&`, `|`, or redirections
-- Works with any language: Python, bash, node, npm, uv, etc.
-**String format (simple cases only):**
-- Still accepted for backwards compatibility, parsed with POSIX shell semantics
-- Rejects shell operators and can mis-handle characters such as `&`; switch to arrays when things turn complex
-### Show command-specific help
-Call this tool with:
-```json
-{{"operation": "<operation>", "args": {{"help": true}}}}
-```
-## Tips
-- Jobs default to non-detached mode (stream logs until completion). Set `detach: true` to return immediately.
-- Prefer array commands to avoid shell parsing surprises
-- To access, create, or modify private Hub assets (spaces, private models, datasets, collections), pass `secrets: {{ "HF_TOKEN": "$HF_TOKEN" }}`. This is important. Without it, you will encounter authentification issues. Do not assume the user is connected on the jobs' server.
-- Before calling a job, think about dependencies (they must be specified), which hardware flavor to run on (choose simplest for task), and whether to include secrets.
-"""
-        return {"formatted": usage_text, "totalResults": 1, "resultsShared": 1}
-    def _show_operation_help(self, operation: str) -> ToolResult:
-        """Show help for a specific operation"""
-        help_text = f"Help for operation: {operation}\n\nCall with appropriate arguments. Use the main help for examples."
-        return {"formatted": help_text, "totalResults": 1, "resultsShared": 1}
     async def _wait_for_job_completion(
         self, job_id: str, namespace: Optional[str] = None
     ) -> tuple[str, list[str]]:
@@ -424,117 +316,69 @@ Call this tool with:
         return final_status, all_logs
     async def _run_job(self, args: Dict[str, Any]) -> ToolResult:
-        """Run a job using HfApi.run_job()"""
         try:
-            job = await _async_call(
-                self.api.run_job,
-                image=args.get("image", "python:3.12"),
-                command=args.get("command"),
-                env=_substitute_hf_token(args.get("env")),
-                secrets=_substitute_hf_token(args.get("secrets")),
-                flavor=args.get("flavor", "cpu-basic"),
-                timeout=args.get("timeout", "30m"),
-                namespace=args.get("namespace") or self.namespace,
-            )
-            # If detached, return immediately
-            if args.get("detach", False):
-                response = f"""Job started successfully!
-**Job ID:** {job.id}
-**Status:** {job.status.stage}
-**View at:** {job.url}
-To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "{job.id}"}}}}`
-To inspect, call this tool with `{{"operation": "inspect", "args": {{"job_id": "{job.id}"}}}}`"""
-                return {"formatted": response, "totalResults": 1, "resultsShared": 1}
-            # Not detached - wait for completion and stream logs
-            print(f"Job started: {job.url}")
-            print("Streaming logs...\n---\n")
-            final_status, all_logs = await self._wait_for_job_completion(
-                job_id=job.id,
-                namespace=args.get("namespace") or self.namespace,
-            )
-            # Format all logs for the agent
-            log_text = "\n".join(all_logs) if all_logs else "(no logs)"
-            response = f"""Job completed!
-**Job ID:** {job.id}
-**Final Status:** {final_status}
-**View at:** {job.url}
-**Logs:**
-```
-{log_text}
-```"""
-            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
-        except Exception as e:
-            raise Exception(f"Failed to run job: {str(e)}")
-    async def _run_uv_job(self, args: Dict[str, Any]) -> ToolResult:
-        """Run UV job with inline script support (no local files needed)"""
-        try:
-            script = args.get("script")
-            if not script:
-                raise ValueError("script is required")
-            # Get dependencies and ensure hf-transfer is included
-            deps = (
-                args.get("with_deps")
-                or args.get("dependencies")
-                or args.get("packages")
-            )
-            deps = _ensure_hf_transfer_dependency(deps)
-            # Resolve the command based on script type (URL, inline, or file)
-            command = _resolve_uv_command(
-                script=script,
-                with_deps=deps,
-                python=args.get("python"),
-                script_args=args.get("script_args"),
-            )
-            # Use run_job with UV image instead of run_uv_job
             job = await _async_call(
                 self.api.run_job,
-                image=UV_DEFAULT_IMAGE,
                 command=command,
-                env=_substitute_hf_token(args.get("env")),
-                secrets=_substitute_hf_token(args.get("secrets")),
-                flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
                 timeout=args.get("timeout", "30m"),
-                namespace=args.get("namespace") or self.namespace,
             )
-            # If detached, return immediately
-            if args.get("detach", False):
-                response = f"""UV Job started successfully!
-**Job ID:** {job.id}
-**Status:** {job.status.stage}
-**View at:** {job.url}
-To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "{job.id}"}}}}`"""
-                return {"formatted": response, "totalResults": 1, "resultsShared": 1}
-            # Not detached - wait for completion and stream logs
-            print(f"UV Job started: {job.url}")
             print("Streaming logs...\n---\n")
             final_status, all_logs = await self._wait_for_job_completion(
                 job_id=job.id,
-                namespace=args.get("namespace") or self.namespace,
             )
             # Format all logs for the agent
             log_text = "\n".join(all_logs) if all_logs else "(no logs)"
-            response = f"""UV Job completed!
 **Job ID:** {job.id}
 **Final Status:** {final_status}
@@ -547,13 +391,11 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
             return {"formatted": response, "totalResults": 1, "resultsShared": 1}
         except Exception as e:
-            raise Exception(f"Failed to run UV job: {str(e)}")
     async def _list_jobs(self, args: Dict[str, Any]) -> ToolResult:
         """List jobs using HfApi.list_jobs()"""
-        jobs_list = await _async_call(
-            self.api.list_jobs, namespace=args.get("namespace") or self.namespace
-        )
         # Filter jobs
         if not args.get("all", False):
@@ -576,7 +418,7 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
                     "resultsShared": 0,
                 }
             return {
-                "formatted": 'No running jobs found. Use `{"args": {"all": true}}` to show all jobs.',
                 "totalResults": 0,
                 "resultsShared": 0,
             }
@@ -601,9 +443,7 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
         try:
             # Fetch logs (returns generator, convert to list)
-            logs_gen = self.api.fetch_job_logs(
-                job_id=job_id, namespace=args.get("namespace") or self.namespace
-            )
             logs = await _async_call(list, logs_gen)
             if not logs:
@@ -647,7 +487,7 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
                 job = await _async_call(
                     self.api.inspect_job,
                     job_id=jid,
-                    namespace=args.get("namespace") or self.namespace,
                 )
                 jobs.append(_job_info_to_dict(job))
             except Exception as e:
@@ -676,108 +516,93 @@ To check logs, call this tool with `{{"operation": "logs", "args": {{"job_id": "
         await _async_call(
             self.api.cancel_job,
             job_id=job_id,
-            namespace=args.get("namespace") or self.namespace,
         )
         response = f"""✓ Job {job_id} has been cancelled.
-To verify, call this tool with `{{"operation": "inspect", "args": {{"job_id": "{job_id}"}}}}`"""
         return {"formatted": response, "totalResults": 1, "resultsShared": 1}
     async def _scheduled_run(self, args: Dict[str, Any]) -> ToolResult:
-        """Create scheduled job using HfApi.create_scheduled_job()"""
         try:
-            scheduled_job = await _async_call(
-                self.api.create_scheduled_job,
-                image=args.get("image", "python:3.12"),
-                command=args.get("command"),
-                schedule=args.get("schedule"),
-                env=_substitute_hf_token(args.get("env")),
-                secrets=_substitute_hf_token(args.get("secrets")),
-                flavor=args.get("flavor", "cpu-basic"),
-                timeout=args.get("timeout", "30m"),
-                namespace=args.get("namespace") or self.namespace,
-            )
-            scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
-            response = f"""✓ Scheduled job created successfully!
-**Scheduled Job ID:** {scheduled_dict["id"]}
-**Schedule:** {scheduled_dict["schedule"]}
-**Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
-**Next Run:** {scheduled_dict.get("nextRun", "N/A")}
-To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"scheduled_job_id": "{scheduled_dict["id"]}"}}}}`
-To list all, call this tool with `{{"operation": "scheduled ps"}}`"""
-            return {"formatted": response, "totalResults": 1, "resultsShared": 1}
-        except Exception as e:
-            raise Exception(f"Failed to create scheduled job: {str(e)}")
-    async def _scheduled_uv(self, args: Dict[str, Any]) -> ToolResult:
-        """Create scheduled UV job with inline script support"""
-        try:
-            script = args.get("script")
-            if not script:
-                raise ValueError("script is required")
-            schedule = args.get("schedule")
-            if not schedule:
-                raise ValueError("schedule is required")
-            # Get dependencies and ensure hf-transfer is included
-            deps = (
-                args.get("with_deps")
-                or args.get("dependencies")
-                or args.get("packages")
-            )
-            deps = _ensure_hf_transfer_dependency(deps)
-            # Resolve the command based on script type
-            command = _resolve_uv_command(
-                script=script,
-                with_deps=deps,
-                python=args.get("python"),
-                script_args=args.get("script_args"),
-            )
-            # Use create_scheduled_job with UV image
             scheduled_job = await _async_call(
                 self.api.create_scheduled_job,
-                image=UV_DEFAULT_IMAGE,
                 command=command,
                 schedule=schedule,
-                env=_substitute_hf_token(args.get("env")),
-                secrets=_substitute_hf_token(args.get("secrets")),
-                flavor=args.get("flavor") or args.get("hardware") or "cpu-basic",
                 timeout=args.get("timeout", "30m"),
-                namespace=args.get("namespace") or self.namespace,
             )
             scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
-            response = f"""✓ Scheduled UV job created successfully!
 **Scheduled Job ID:** {scheduled_dict["id"]}
 **Schedule:** {scheduled_dict["schedule"]}
 **Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
 **Next Run:** {scheduled_dict.get("nextRun", "N/A")}
-To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"scheduled_job_id": "{scheduled_dict["id"]}"}}}}`"""
             return {"formatted": response, "totalResults": 1, "resultsShared": 1}
         except Exception as e:
-            raise Exception(f"Failed to create scheduled UV job: {str(e)}")
     async def _list_scheduled_jobs(self, args: Dict[str, Any]) -> ToolResult:
         """List scheduled jobs using HfApi.list_scheduled_jobs()"""
         scheduled_jobs_list = await _async_call(
             self.api.list_scheduled_jobs,
-            namespace=args.get("namespace") or self.namespace,
         )
         # Filter jobs - default: hide suspended jobs unless --all is specified
@@ -797,7 +622,7 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
                     "resultsShared": 0,
                 }
             return {
-                "formatted": 'No active scheduled jobs found. Use `{"args": {"all": true}}` to show suspended jobs.',
                 "totalResults": 0,
                 "resultsShared": 0,
             }
@@ -823,7 +648,7 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
         scheduled_job = await _async_call(
             self.api.inspect_scheduled_job,
             scheduled_job_id=scheduled_job_id,
-            namespace=args.get("namespace") or self.namespace,
         )
         scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
@@ -849,7 +674,7 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
         await _async_call(
             self.api.delete_scheduled_job,
             scheduled_job_id=scheduled_job_id,
-            namespace=args.get("namespace") or self.namespace,
         )
         return {
@@ -872,12 +697,12 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
         await _async_call(
             self.api.suspend_scheduled_job,
             scheduled_job_id=scheduled_job_id,
-            namespace=args.get("namespace") or self.namespace,
         )
         response = f"""✓ Scheduled job {scheduled_job_id} has been suspended.
-To resume, call this tool with `{{"operation": "scheduled resume", "args": {{"scheduled_job_id": "{scheduled_job_id}"}}}}`"""
         return {"formatted": response, "totalResults": 1, "resultsShared": 1}
@@ -895,12 +720,12 @@ To resume, call this tool with `{{"operation": "scheduled resume", "args": {{"sc
         await _async_call(
             self.api.resume_scheduled_job,
             scheduled_job_id=scheduled_job_id,
-            namespace=args.get("namespace") or self.namespace,
         )
         response = f"""✓ Scheduled job {scheduled_job_id} has been resumed.
-To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"scheduled_job_id": "{scheduled_job_id}"}}}}`"""
         return {"formatted": response, "totalResults": 1, "resultsShared": 1}
@@ -909,10 +734,29 @@ To inspect, call this tool with `{{"operation": "scheduled inspect", "args": {{"
 HF_JOBS_TOOL_SPEC = {
     "name": "hf_jobs",
     "description": (
-        "Manage Hugging Face CPU/GPU compute jobs. Run commands in Docker containers, "
-        "execute Python scripts with UV. List, schedule and monitor jobs/logs. "
-        "Example hardware/flavor: cpu-basic, cpu-performance, t4-medium. "
-        "Call this tool with no operation for full usage instructions and examples."
     ),
     "parameters": {
         "type": "object",
@@ -921,13 +765,11 @@ HF_JOBS_TOOL_SPEC = {
                 "type": "string",
                 "enum": [
                     "run",
-                    "uv",
                     "ps",
                     "logs",
                     "inspect",
                     "cancel",
                     "scheduled run",
-                    "scheduled uv",
                     "scheduled ps",
                     "scheduled inspect",
                     "scheduled delete",
@@ -935,22 +777,60 @@ HF_JOBS_TOOL_SPEC = {
                     "scheduled resume",
                 ],
                 "description": (
-                    "Operation to execute. Valid values: [run, uv, ps, logs, inspect, cancel, "
-                    "scheduled run, scheduled uv, scheduled ps, scheduled inspect, scheduled delete, "
                     "scheduled suspend, scheduled resume]"
                 ),
             },
-            "args": {
                 "type": "object",
-                "description": (
-                    "Operation-specific arguments as a JSON object. "
-                    "Common args: script (for uv), packages/dependencies (array), "
-                    "flavor/hardware (e.g., a10g-large, cpu-basic), command (array), "
-                    "image (string), env (object), secrets (object)."
-                ),
-                "additionalProperties": True,
             },
         },
     },
 }
@@ -958,7 +838,7 @@ HF_JOBS_TOOL_SPEC = {
 async def hf_jobs_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
     """Handler for agent tool router"""
     try:
-        tool = HfJobsTool()
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:

 # Operation names
 OperationType = Literal[
     "run",
     "ps",
     "logs",
     "inspect",
     "cancel",
     "scheduled run",
     "scheduled ps",
     "scheduled inspect",
     "scheduled delete",
 UV_DEFAULT_IMAGE = "ghcr.io/astral-sh/uv:python3.12-bookworm"
+def _add_environment_variables(params: Dict[str, Any] | None) -> Dict[str, Any]:
+    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN") or ""
+    # Start with user-provided env vars, then force-set token last
+    result = dict(params or {})
+    # If the caller passed HF_TOKEN="$HF_TOKEN", ignore it.
+    if result.get("HF_TOKEN", "").strip().startswith("$"):
+        result.pop("HF_TOKEN", None)
+    # Set both names to be safe (different libs check different vars)
+    if token:
+        result["HF_TOKEN"] = token
+        result["HUGGINGFACE_HUB_TOKEN"] = token
     return result
     if script_args:
         parts.extend(script_args)
+    # add defaults
+    # parts.extend(["--push_to_hub"])
     return parts
 def _ensure_hf_transfer_dependency(deps: list[str] | None) -> list[str]:
     """Ensure hf-transfer is included in the dependencies list"""
     if isinstance(deps, list):
         deps_copy = deps.copy()  # Don't modify the original
         "createdAt": job_info.created_at.isoformat(),
         "dockerImage": job_info.docker_image,
         "spaceId": job_info.space_id,
+        "hardware_flavor": job_info.flavor,
         "owner": {"name": job_info.owner.name},
     }
             "dockerImage": job_spec.docker_image,
             "spaceId": job_spec.space_id,
             "command": job_spec.command or [],
+            "hardware_flavor": job_spec.flavor or "cpu-basic",
         },
     }
     async def execute(self, params: Dict[str, Any]) -> ToolResult:
         """Execute the specified operation"""
         operation = params.get("operation")
+        args = params
+        # If no operation provided, return error
         if not operation:
+            return {
+                "formatted": "Error: 'operation' parameter is required. See tool description for available operations and usage examples.",
+                "totalResults": 0,
+                "resultsShared": 0,
+                "isError": True,
+            }
         # Normalize operation name
         operation = operation.lower()
         try:
             # Route to appropriate handler
             if operation == "run":
                 return await self._run_job(args)
             elif operation == "ps":
                 return await self._list_jobs(args)
             elif operation == "logs":
                 return await self._cancel_job(args)
             elif operation == "scheduled run":
                 return await self._scheduled_run(args)
             elif operation == "scheduled ps":
                 return await self._list_scheduled_jobs(args)
             elif operation == "scheduled inspect":
                 return {
                     "formatted": f'Unknown operation: "{operation}"\n\n'
                     "Available operations:\n"
+                    "- run, ps, logs, inspect, cancel\n"
+                    "- scheduled run, scheduled ps, scheduled inspect, "
                     "scheduled delete, scheduled suspend, scheduled resume\n\n"
                     "Call this tool with no operation for full usage instructions.",
                     "totalResults": 0,
                 "isError": True,
             }
     async def _wait_for_job_completion(
         self, job_id: str, namespace: Optional[str] = None
     ) -> tuple[str, list[str]]:
         return final_status, all_logs
     async def _run_job(self, args: Dict[str, Any]) -> ToolResult:
+        """Run a job using HfApi.run_job() - smart detection of Python vs Docker mode"""
         try:
+            script = args.get("script")
+            command = args.get("command")
+            # Validate mutually exclusive parameters
+            if script and command:
+                raise ValueError(
+                    "'script' and 'command' are mutually exclusive. Provide one or the other, not both."
+                )
+            if not script and not command:
+                raise ValueError(
+                    "Either 'script' (for Python) or 'command' (for Docker) must be provided."
+                )
+            # Python mode: script provided
+            if script:
+                # Get dependencies and ensure hf-transfer is included
+                deps = _ensure_hf_transfer_dependency(args.get("dependencies"))
+                # Resolve the command based on script type (URL, inline, or file)
+                command = _resolve_uv_command(
+                    script=script,
+                    with_deps=deps,
+                    python=args.get("python"),
+                    script_args=args.get("script_args"),
+                )
+                # Use UV image unless overridden
+                image = args.get("image", UV_DEFAULT_IMAGE)
+                job_type = "Python"
+            # Docker mode: command provided
+            else:
+                image = args.get("image", "python:3.12")
+                job_type = "Docker"
+            # Run the job
             job = await _async_call(
                 self.api.run_job,
+                image=image,
                 command=command,
+                env=args.get("env"),
+                secrets=_add_environment_variables(args.get("secrets")),
+                flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
+                namespace=self.namespace,
             )
+            # Wait for completion and stream logs
+            print(f"{job_type} job started: {job.url}")
             print("Streaming logs...\n---\n")
             final_status, all_logs = await self._wait_for_job_completion(
                 job_id=job.id,
+                namespace=self.namespace,
             )
             # Format all logs for the agent
             log_text = "\n".join(all_logs) if all_logs else "(no logs)"
+            response = f"""{job_type} job completed!
 **Job ID:** {job.id}
 **Final Status:** {final_status}
             return {"formatted": response, "totalResults": 1, "resultsShared": 1}
         except Exception as e:
+            raise Exception(f"Failed to run job: {str(e)}")
     async def _list_jobs(self, args: Dict[str, Any]) -> ToolResult:
         """List jobs using HfApi.list_jobs()"""
+        jobs_list = await _async_call(self.api.list_jobs, namespace=self.namespace)
         # Filter jobs
         if not args.get("all", False):
                     "resultsShared": 0,
                 }
             return {
+                "formatted": 'No running jobs found. Use `{"operation": "ps", "all": true}` to show all jobs.',
                 "totalResults": 0,
                 "resultsShared": 0,
             }
         try:
             # Fetch logs (returns generator, convert to list)
+            logs_gen = self.api.fetch_job_logs(job_id=job_id, namespace=self.namespace)
             logs = await _async_call(list, logs_gen)
             if not logs:
                 job = await _async_call(
                     self.api.inspect_job,
                     job_id=jid,
+                    namespace=self.namespace,
                 )
                 jobs.append(_job_info_to_dict(job))
             except Exception as e:
         await _async_call(
             self.api.cancel_job,
             job_id=job_id,
+            namespace=self.namespace,
         )
         response = f"""✓ Job {job_id} has been cancelled.
+To verify, call this tool with `{{"operation": "inspect", "job_id": "{job_id}"}}`"""
         return {"formatted": response, "totalResults": 1, "resultsShared": 1}
     async def _scheduled_run(self, args: Dict[str, Any]) -> ToolResult:
+        """Create scheduled job using HfApi.create_scheduled_job() - smart detection of Python vs Docker mode"""
         try:
+            script = args.get("script")
+            command = args.get("command")
+            schedule = args.get("schedule")
+            if not schedule:
+                raise ValueError("schedule is required for scheduled jobs")
+            # Validate mutually exclusive parameters
+            if script and command:
+                raise ValueError(
+                    "'script' and 'command' are mutually exclusive. Provide one or the other, not both."
+                )
+            if not script and not command:
+                raise ValueError(
+                    "Either 'script' (for Python) or 'command' (for Docker) must be provided."
+                )
+            # Python mode: script provided
+            if script:
+                # Get dependencies and ensure hf-transfer is included
+                deps = _ensure_hf_transfer_dependency(args.get("dependencies"))
+                # Resolve the command based on script type
+                command = _resolve_uv_command(
+                    script=script,
+                    with_deps=deps,
+                    python=args.get("python"),
+                    script_args=args.get("script_args"),
+                )
+                # Use UV image unless overridden
+                image = args.get("image", UV_DEFAULT_IMAGE)
+                job_type = "Python"
+            # Docker mode: command provided
+            else:
+                image = args.get("image", "python:3.12")
+                job_type = "Docker"
+            # Create scheduled job
             scheduled_job = await _async_call(
                 self.api.create_scheduled_job,
+                image=image,
                 command=command,
                 schedule=schedule,
+                env=args.get("env"),
+                secrets=_add_environment_variables(args.get("secrets")),
+                flavor=args.get("hardware_flavor", "cpu-basic"),
                 timeout=args.get("timeout", "30m"),
+                namespace=self.namespace,
             )
             scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
+            response = f"""✓ Scheduled {job_type} job created successfully!
 **Scheduled Job ID:** {scheduled_dict["id"]}
 **Schedule:** {scheduled_dict["schedule"]}
 **Suspended:** {"Yes" if scheduled_dict.get("suspend") else "No"}
 **Next Run:** {scheduled_dict.get("nextRun", "N/A")}
+To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_job_id": "{scheduled_dict["id"]}"}}`
+To list all, call this tool with `{{"operation": "scheduled ps"}}`"""
             return {"formatted": response, "totalResults": 1, "resultsShared": 1}
         except Exception as e:
+            raise Exception(f"Failed to create scheduled job: {str(e)}")
     async def _list_scheduled_jobs(self, args: Dict[str, Any]) -> ToolResult:
         """List scheduled jobs using HfApi.list_scheduled_jobs()"""
         scheduled_jobs_list = await _async_call(
             self.api.list_scheduled_jobs,
+            namespace=self.namespace,
         )
         # Filter jobs - default: hide suspended jobs unless --all is specified
                     "resultsShared": 0,
                 }
             return {
+                "formatted": 'No active scheduled jobs found. Use `{"operation": "scheduled ps", "all": true}` to show suspended jobs.',
                 "totalResults": 0,
                 "resultsShared": 0,
             }
         scheduled_job = await _async_call(
             self.api.inspect_scheduled_job,
             scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
         )
         scheduled_dict = _scheduled_job_info_to_dict(scheduled_job)
         await _async_call(
             self.api.delete_scheduled_job,
             scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
         )
         return {
         await _async_call(
             self.api.suspend_scheduled_job,
             scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
         )
         response = f"""✓ Scheduled job {scheduled_job_id} has been suspended.
+To resume, call this tool with `{{"operation": "scheduled resume", "scheduled_job_id": "{scheduled_job_id}"}}`"""
         return {"formatted": response, "totalResults": 1, "resultsShared": 1}
         await _async_call(
             self.api.resume_scheduled_job,
             scheduled_job_id=scheduled_job_id,
+            namespace=self.namespace,
         )
         response = f"""✓ Scheduled job {scheduled_job_id} has been resumed.
+To inspect, call this tool with `{{"operation": "scheduled inspect", "scheduled_job_id": "{scheduled_job_id}"}}`"""
         return {"formatted": response, "totalResults": 1, "resultsShared": 1}
 HF_JOBS_TOOL_SPEC = {
     "name": "hf_jobs",
     "description": (
+        "Run Python scripts or Docker containers on HF cloud GPUs/CPUs.\n\n"
+        "## Operations:\n"
+        "run, ps, logs, inspect, cancel, scheduled run, scheduled ps, scheduled inspect, scheduled delete, scheduled suspend, scheduled resume\n\n"
+        "## Two modes:\n"
+        "1. **Python mode:** Provide 'script' + 'dependencies' → auto-handles pip install\n"
+        "2. **Docker mode:** Provide 'image' + 'command' → full control\n"
+        "(script and command are mutually exclusive)\n\n"
+        "## Hardware:\n"
+        "CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl\n"
+        "GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100\n\n"
+        "## Examples:\n\n"
+        "**Fine-tune LLM and push to Hub:**\n"
+        "{'operation': 'run', 'script': 'from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer\\nmodel = AutoModelForCausalLM.from_pretrained(\"gpt2\")\\n# ... training code ...\\nmodel.push_to_hub(\"user-name/my-finetuned-model\")', 'dependencies': ['transformers', 'torch', 'datasets'], 'hardware_flavor': 'a10g-large', 'timeout': '4h', 'env': {'CUSTOM_VAR': 'value'}}\n\n"
+        "**Generate dataset daily and upload:**\n"
+        "{'operation': 'scheduled run', 'script': 'from datasets import Dataset\\nimport pandas as pd\\n# scrape/generate data\\ndf = pd.DataFrame(data)\\nds = Dataset.from_pandas(df)\\nds.push_to_hub(\"user-name/daily-dataset\")', 'dependencies': ['datasets', 'pandas'], 'schedule': '@daily'}\n\n"
+        "**Run custom training with Docker:**\n"
+        "{'operation': 'run', 'image': 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime', 'command': ['python', 'train.py', '--epochs', '10'], 'hardware_flavor': 'a100-large'}\n\n"
+        "**Monitor jobs:**\n"
+        "{'operation': 'ps'} - list running\n"
+        "{'operation': 'logs', 'job_id': 'xxx'} - stream logs\n"
+        "{'operation': 'cancel', 'job_id': 'xxx'} - stop job\n\n"
+        "## CRITICAL: Files are ephemeral!\n"
+        "Everything created during execution is DELETED when job finishes. Always .push_to_hub() your outputs (models, datasets, artifacts) in the script."
     ),
     "parameters": {
         "type": "object",
                 "type": "string",
                 "enum": [
                     "run",
                     "ps",
                     "logs",
                     "inspect",
                     "cancel",
                     "scheduled run",
                     "scheduled ps",
                     "scheduled inspect",
                     "scheduled delete",
                     "scheduled resume",
                 ],
                 "description": (
+                    "Operation to execute. Valid values: [run, ps, logs, inspect, cancel, "
+                    "scheduled run, scheduled ps, scheduled inspect, scheduled delete, "
                     "scheduled suspend, scheduled resume]"
                 ),
             },
+            # Python/UV specific parameters
+            "script": {
+                "type": "string",
+                "description": "Python code to execute. Triggers Python mode (auto pip install). Use with 'run'/'scheduled run'. Mutually exclusive with 'command'.",
+            },
+            "dependencies": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Pip packages to install. Example: ['trl', 'torch', 'datasets', 'transformers']. Only used with 'script'.",
+            },
+            # Docker specific parameters
+            "image": {
+                "type": "string",
+                "description": "Docker image. Example: 'pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime'. Use with 'run'/'scheduled run'. Optional (auto-selected if not provided).",
+            },
+            "command": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Command to execute as list. Example: ['python', 'train.py', '--epochs', '10']. Triggers Docker mode. Use with 'run'/'scheduled run'. Mutually exclusive with 'script'.",
+            },
+            # Hardware and environment
+            "hardware_flavor": {
+                "type": "string",
+                "description": "Hardware type. CPU: cpu-basic (default), cpu-upgrade, cpu-performance, cpu-xl. GPU: t4-small, t4-medium, l4x1, a10g-small, a10g-large, a100-large, h100. Use with 'run'/'scheduled run'.",
+            },
+            "timeout": {
+                "type": "string",
+                "description": "Max runtime. Examples: '30m', '2h', '4h'. Default: '30m'. Important for long training jobs. Use with 'run'/'scheduled run'.",
+            },
+            "env": {
                 "type": "object",
+                "description": "Environment variables. Format: {'KEY': 'VALUE'}. HF_TOKEN is automatically included from your auth. Use with 'run'/'scheduled run'.",
+            },
+            # Job management parameters
+            "job_id": {
+                "type": "string",
+                "description": "Job ID to operate on. Required for: 'logs', 'inspect', 'cancel'.",
+            },
+            # Scheduled job parameters
+            "scheduled_job_id": {
+                "type": "string",
+                "description": "Scheduled job ID. Required for: 'scheduled inspect', 'scheduled delete', 'scheduled suspend', 'scheduled resume'.",
+            },
+            "schedule": {
+                "type": "string",
+                "description": "Schedule for recurring job. Presets: '@hourly', '@daily', '@weekly', '@monthly'. Cron: '0 9 * * 1' (Mon 9am). Required for: 'scheduled run'.",
             },
         },
+        "required": ["operation"],
     },
 }
 async def hf_jobs_handler(arguments: Dict[str, Any]) -> tuple[str, bool]:
     """Handler for agent tool router"""
     try:
+        tool = HfJobsTool(namespace=os.environ.get("HF_NAMESPACE", ""))
         result = await tool.execute(arguments)
         return result["formatted"], not result.get("isError", False)
     except Exception as e:

agent/tools/search_docs_tool.py DELETED Viewed

@@ -1,239 +0,0 @@
-"""
-Search documentation tool that spawns a sub-agent
-The sub-agent has its own agent loop and set of specialized search tools
-"""
-import asyncio
-from typing import Any
-from litellm.utils import get_max_tokens
-from agent.core.session import Session
-async def create_search_tool_router(github_mcp_config: dict[str, Any] | None = None):
-    """
-    Create a ToolRouter instance for the search sub-agent
-    Async because OpenAPI tool needs to fetch and parse spec at initialization
-    Args:
-        github_mcp_config: Optional GitHub MCP server configuration
-    """
-    # Import at runtime to avoid circular dependency
-    from fastmcp import Client
-    from agent.core.tools import ToolRouter
-    # List of allowed GitHub MCP tools
-    ALLOWED_GITHUB_TOOLS = {
-        "list_pull_requests",
-        "list_issues",
-        "search_code",
-        "search_issues",
-        "search_repositories",
-        "search_users",
-        "get_pull_request_status",
-        "get_pull_request_reviews",
-        "get_pull_request",
-        "get_issue",
-        "get_file_contents",
-    }
-    class SearchDocsToolRouter(ToolRouter):
-        """Specialized ToolRouter for the search sub-agent"""
-        def __init__(self, github_mcp_config: dict[str, Any] | None = None):
-            self.tools: dict[str, Any] = {}
-            self.mcp_servers: dict[str, dict[str, Any]] = {}
-            self._mcp_initialized = False
-            # Initialize MCP client with GitHub server if provided
-            if github_mcp_config:
-                self.mcp_client = Client({"mcpServers": github_mcp_config})
-            else:
-                self.mcp_client = None
-        async def initialize_tools(self):
-            """Initialize tools asynchronously"""
-            tools = await make_search_agent_tools()
-            for tool in tools:
-                self.register_tool(tool)
-        async def register_mcp_tools(self) -> None:
-            """Register only allowed GitHub MCP tools"""
-            if self.mcp_client is None:
-                return
-            tools = await self.mcp_client.list_tools()
-            for tool in tools:
-                # Only register allowed GitHub tools
-                if tool.name in ALLOWED_GITHUB_TOOLS:
-                    print(f"Registering GitHub MCP Tool: {tool.name}")
-                    from agent.core.tools import ToolSpec
-                    self.register_tool(
-                        ToolSpec(
-                            name=tool.name,
-                            description=tool.description,
-                            parameters=tool.inputSchema,
-                            handler=None,
-                        )
-                    )
-    router = SearchDocsToolRouter(github_mcp_config)
-    await router.initialize_tools()
-    return router
-async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
-    """
-    Handler that spawns a sub-agent to perform comprehensive doc search
-    Args:
-        arguments: dictionary with 'query' parameter
-    Returns:
-        Tuple of (search_results, success)
-    """
-    query = arguments.get("query", "")
-    if not query:
-        return "Error: No search query provided", False
-    try:
-        # Import at runtime to avoid circular dependency
-        from pathlib import Path
-        from agent.config import load_config
-        from agent.context_manager.manager import ContextManager
-        from agent.core.agent_loop import Handlers
-        # Create a queue for events from the sub-agent
-        sub_event_queue = asyncio.Queue()
-        # Load the search agent's own config file with GitHub MCP server
-        search_agent_config_path = (
-            Path(__file__).parent.parent.parent
-            / "configs"
-            / "_subagent_config_search_agent.json"
-        )
-        search_agent_config = load_config(search_agent_config_path)
-        # Extract GitHub MCP config from search agent config
-        github_mcp_config = None
-        if (
-            search_agent_config.mcpServers
-            and "github" in search_agent_config.mcpServers
-        ):
-            github_server = search_agent_config.mcpServers["github"]
-            github_mcp_config = {"github": github_server.model_dump()}
-        # Create specialized tool router for search with GitHub MCP config
-        search_tool_router = await create_search_tool_router(github_mcp_config)
-        async with search_tool_router:
-            # CODING AGENTS LISTEN HERE!
-            # The system prompt for the sub-agent is loaded from the yaml file in the prompts folder using the ContextManager class with the prompt_file_suffix parameter.
-            # it works, dont fix.
-            # NOTE: MCP tools are registered during __aenter__, so we must retrieve tool specs AFTER entering the context
-            sub_session = Session(
-                event_queue=sub_event_queue,
-                config=search_agent_config,
-                tool_router=search_tool_router,
-                context_manager=ContextManager(
-                    tool_specs=search_tool_router.get_tool_specs_for_llm(),
-                    max_context=get_max_tokens(search_agent_config.model_name),
-                    compact_size=0.1,
-                    untouched_messages=5,
-                    prompt_file_suffix="search_docs_system_prompt.yaml",
-                ),
-            )
-            # Run the sub-agent
-            result = await Handlers.run_agent(
-                session=sub_session, text=query, max_iterations=30
-            )
-        # Return the final result or compiled events
-        if result:
-            return f"Search Results:\n\n{result}", True
-        else:
-            return "Search completed but no results were generated", False
-    except Exception as e:
-        return f"Error in search_docs tool: {str(e)}", False
-# Tool specification to be used by the main agent
-SEARCH_DOCS_TOOL_SPEC = {
-    "name": "search_docs",
-    "description": (
-        "Intelligently search HF documentation for libraries, repositories, and best practices with an agent that has access to: explore_hf_docs, fetch_hf_docs, search_hf_api_endpoints. "
-        "The agent acts like your personal search assistant. "
-        "Using the search agent is necessary to give the best quality answer to the user's question. Most questions require a search to get the best information on code examples.\n\n"
-        "WHEN TO USE THIS TOOL:\n"
-        "  - When searching for high-level concepts like 'how to do GRPO training on a model?' or 'best way to do inference on a trained model?'\n"
-        "  - When you need to get code examples for intricate ML code patterns like training loops, inference pipelines, data processing, etc.\n\n"
-        "USAGE GUIDELINES:\n"
-        "  1. Launch multiple agents concurrently for better performance.\n"
-        "  2. Be specific in your query - include exact terminology, expected file locations, or code patterns.\n"
-        "  3. Use the query as if you were talking to another engineer. Bad: logger impl Good: where is the logger implemented, we're trying to find out how to log to files.\n"
-        "  4. Make sure to formulate the query in such a way that the agent knows when it's done or has found the result."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": (
-                    "The search query describing to the agent what it should do. Be "
-                    "specific and include technical terms, file types, or expected "
-                    "code patterns to help the agent find relevant code. Formulate "
-                    "the query in a way that makes it clear to the agent when it "
-                    "has found the right thing."
-                ),
-            },
-        },
-        "required": ["query"],
-    },
-}
-async def make_search_agent_tools():
-    """
-    Create a list of tools for the search agent
-    Async because OpenAPI tool spec needs to be populated at runtime
-    """
-    # Import at runtime to avoid circular dependency
-    from agent.core.tools import ToolSpec
-    from agent.tools._search_agent_tools import (
-        EXPLORE_HF_DOCS_TOOL_SPEC,
-        HF_DOCS_FETCH_TOOL_SPEC,
-        _get_api_search_tool_spec,
-        explore_hf_docs_handler,
-        hf_docs_fetch_handler,
-        search_openapi_handler,
-    )
-    # Get the OpenAPI tool spec with dynamically populated tags
-    openapi_spec = await _get_api_search_tool_spec()
-    return [
-        ToolSpec(
-            name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
-            description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
-            parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
-            handler=explore_hf_docs_handler,
-        ),
-        ToolSpec(
-            name=HF_DOCS_FETCH_TOOL_SPEC["name"],
-            description=HF_DOCS_FETCH_TOOL_SPEC["description"],
-            parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
-            handler=hf_docs_fetch_handler,
-        ),
-        ToolSpec(
-            name=openapi_spec["name"],
-            description=openapi_spec["description"],
-            parameters=openapi_spec["parameters"],
-            handler=search_openapi_handler,
-        ),
-    ]

configs/_subagent_config_search_agent.json DELETED Viewed

@@ -1,12 +0,0 @@
-{
-  "model_name": "anthropic/claude-haiku-4-5",
-  "mcpServers": {
-    "github": {
-      "transport": "http",
-      "url": "https://api.githubcopilot.com/mcp/",
-      "headers": {
-        "Authorization": "Bearer ${GITHUB_TOKEN}"
-      }
-    }
-  }
-}

run_search_agent.py DELETED Viewed

@@ -1,142 +0,0 @@
-"""
-Standalone test script for the search sub-agent
-Run with: uv run python test_search_agent.py
-"""
-import asyncio
-from litellm.utils import get_max_tokens
-from agent.config import Config
-from agent.context_manager.manager import ContextManager
-from agent.core.agent_loop import Handlers
-from agent.core.session import Session
-from agent.tools.search_docs_tool import create_search_tool_router
-async def test_search_agent(query: str):
-    """Test the search sub-agent with a query"""
-    print(f"Testing search agent with query: {query}\n")
-    print("=" * 60)
-    # Create event queue for the sub-agent
-    sub_event_queue = asyncio.Queue()
-    # Create search tool router
-    search_tool_router = await create_search_tool_router()
-    # Create config
-    sub_config = Config(
-        model_name="anthropic/claude-haiku-4-5",
-    )
-    # Create session with custom system prompt
-    sub_session = Session(
-        event_queue=sub_event_queue,
-        config=sub_config,
-        tool_router=search_tool_router,
-        context_manager=ContextManager(
-            tool_specs=search_tool_router.get_tool_specs_for_llm(),
-            max_context=get_max_tokens(sub_config.model_name),
-            compact_size=0.1,
-            untouched_messages=5,
-            prompt_file_suffix="search_docs_system_prompt.yaml",
-        ),
-    )
-    # Event listener to show what the sub-agent is doing
-    async def event_monitor():
-        while True:
-            try:
-                event = await asyncio.wait_for(sub_event_queue.get(), timeout=1.0)
-                if event.event_type == "assistant_message":
-                    content = event.data.get("content", "") if event.data else ""
-                    if content:
-                        print(f"\n🤖 Sub-agent: {content}\n")
-                elif event.event_type == "tool_call":
-                    tool_name = event.data.get("tool", "") if event.data else ""
-                    arguments = event.data.get("arguments", {}) if event.data else {}
-                    print(f"🔧 Tool call: {tool_name}")
-                    print(f"   Args: {arguments}")
-                elif event.event_type == "tool_output":
-                    output = event.data.get("output", "") if event.data else ""
-                    success = event.data.get("success", False) if event.data else False
-                    status = "✅" if success else "❌"
-                    print(f"{status} Tool output: {output}\n")
-                elif event.event_type == "turn_complete":
-                    print("✅ Sub-agent turn complete")
-                    break
-            except asyncio.TimeoutError:
-                # Check if agent is still running
-                continue
-            except Exception as e:
-                print(f"⚠️  Event error: {e}")
-                break
-    # Run the sub-agent and event monitor concurrently
-    async with search_tool_router:
-        monitor_task = asyncio.create_task(event_monitor())
-        result = await Handlers.run_agent(
-            session=sub_session, text=query, max_iterations=30
-        )
-        # Wait for event monitor to finish
-        await asyncio.wait_for(monitor_task, timeout=5.0)
-    print("\n" + "=" * 60)
-    print("FINAL RESULT:")
-    print("=" * 60)
-    if result:
-        print(result)
-    else:
-        print("No result returned")
-    print("=" * 60)
-async def main():
-    """Main test function"""
-    print("🧪 Search Sub-Agent Test\n")
-    # Example queries to test
-    test_queries = [
-        # "Explore the TRL documentation structure and find information about DPO trainer",
-        # "is there a way to get the logs from a served huggingface space",
-        # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
-        "can i stream logs through the api for a served huggingface space",
-    ]
-    for i, query in enumerate(test_queries, 1):
-        print(f"\n{'=' * 60}")
-        print(f"TEST {i}/{len(test_queries)}")
-        print(f"{'=' * 60}\n")
-        try:
-            await test_search_agent(query)
-        except Exception as e:
-            print(f"\n❌ Test failed: {e}")
-            import traceback
-            traceback.print_exc()
-        if i < len(test_queries):
-            print("\n\nPress Enter to continue to next test...")
-            input()
-if __name__ == "__main__":
-    try:
-        asyncio.run(main())
-    except KeyboardInterrupt:
-        print("\n\n⚠️  Test interrupted")
-    except Exception as e:
-        print(f"\n❌ Error: {e}")
-        import traceback
-        traceback.print_exc()