Spaces:

smolagents
/

ml-intern

Running on CPU Upgrade

App Files Files Community

akseljoonas HF Staff commited on Dec 30, 2025

Commit

eb92351

1 Parent(s): 5e8489d

added a api search tool and refined the hf docs search tools

Browse files

Files changed (4) hide show

agent/prompts/search_docs_system_prompt.yaml +0 -282
agent/tools/_search_agent_tools.py +598 -90
agent/tools/search_docs_tool.py +26 -6
run_search_agent.py +142 -0

agent/prompts/search_docs_system_prompt.yaml CHANGED Viewed

@@ -18,288 +18,6 @@ search_docs_system_prompt: |
   - Include domain-specific terminology when applicable
   - Try both specific terms and general related terms
-  # Hugging Face Docs structure
-  - id: hub
-    url: /docs/hub
-    category: Hub & Client Libraries
-    docs on: Hub fundamentals — repos, models/datasets/spaces, auth, versioning, metadata.
-  - id: transformers
-    url: /docs/transformers
-    category: Core ML Libraries
-    docs on: Core model library — architectures, configs, tokenizers, training & inference APIs.
-  - id: diffusers
-    url: /docs/diffusers
-    category: Core ML Libraries
-    docs on: Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.
-  - id: datasets
-    url: /docs/datasets
-    category: Core ML Libraries
-    docs on: Dataset loading, streaming, processing, Arrow format, Hub integration.
-  - id: gradio
-    url: https://www.gradio.app/docs/
-    category: Collaboration & Extras
-    docs on: UI components and demos for interacting with ML models.
-  - id: trackio
-    url: /docs/trackio
-    category: Collaboration & Extras
-    docs on: Experiment tracking, metrics logging, and run comparison.
-  - id: smolagents
-    url: /docs/smolagents
-    category: Collaboration & Extras
-    docs on: Lightweight agent abstractions and tool-using patterns.
-  - id: huggingface_hub
-    url: /docs/huggingface_hub
-    category: Hub & Client Libraries
-    docs on: Python client for Hub operations (auth, upload/download, repo management).
-  - id: huggingface.js
-    url: /docs/huggingface.js
-    category: Hub & Client Libraries
-    docs on: JS/TS client for Hub APIs in browser and Node.
-  - id: transformers.js
-    url: /docs/transformers.js
-    category: Core ML Libraries
-    docs on: Run Transformer models in browser/Node via WebGPU/WASM.
-  - id: inference-providers
-    url: /docs/inference-providers
-    category: Deployment & Inference
-    docs on: Unified interface for third-party inference backends.
-  - id: inference-endpoints
-    url: /docs/inference-endpoints
-    category: Deployment & Inference
-    docs on: Managed, scalable model deployments on HF infrastructure.
-  - id: peft
-    url: /docs/peft
-    category: Training & Optimization
-    docs on: Parameter-efficient fine-tuning methods (LoRA, adapters, etc.).
-  - id: accelerate
-    url: /docs/accelerate
-    category: Training & Optimization
-    docs on: Hardware-agnostic, distributed and mixed-precision training orchestration.
-  - id: optimum
-    url: /docs/optimum
-    category: Training & Optimization
-    docs on: Hardware-aware optimization and model export tooling.
-  - id: optimum-habana
-    url: /docs/optimum-habana
-    category: —
-    docs on: Training and inference on Habana Gaudi accelerators.
-  - id: optimum-neuron
-    url: /docs/optimum-neuron
-    category: Training & Optimization
-    docs on: Optimization workflows for AWS Inferentia/Trainium.
-  - id: optimum-intel
-    url: /docs/optimum-intel
-    category: —
-    docs on: Intel CPU/GPU optimizations (OpenVINO, IPEX).
-  - id: optimum-executorch
-    url: /docs/optimum-executorch
-    category: Training & Optimization
-    docs on: Exporting models to ExecuTorch for edge/mobile.
-  - id: optimum-tpu
-    url: /docs/optimum-tpu
-    category: Training & Optimization
-    docs on: TPU-specific training and optimization paths.
-  - id: tokenizers
-    url: /docs/tokenizers
-    category: Core ML Libraries
-    docs on: Fast tokenizer internals, training, and low-level APIs.
-  - id: llm-course
-    url: /learn/llm-course
-    category: —
-    docs on: End-to-end LLM concepts, training, and deployment.
-  - id: robotics-course
-    url: /learn/robotics-course
-    category: —
-    docs on: Learning-based robotics foundations.
-  - id: mcp-course
-    url: /learn/mcp-course
-    category: —
-    docs on: Model Context Protocol concepts and usage.
-  - id: smol-course
-    url: /learn/smol-course
-    category: —
-    docs on: Small-model and efficiency-focused workflows.
-  - id: agents-course
-    url: /learn/agents-course
-    category: —
-    docs on: Tool-using, planning, and multi-step agent design.
-  - id: deep-rl-course
-    url: /learn/deep-rl-course
-    category: —
-    docs on: Deep reinforcement learning foundations.
-  - id: computer-vision-course
-    url: /learn/computer-vision-course
-    category: —
-    docs on: Vision models, datasets, and pipelines.
-  - id: evaluate
-    url: /docs/evaluate
-    category: Core ML Libraries
-    docs on: Metrics, evaluation workflows, and training-loop integration.
-  - id: tasks
-    url: /tasks
-    category: Hub & Client Libraries
-    docs on: Canonical task definitions and model categorization.
-  - id: dataset-viewer
-    url: /docs/dataset-viewer
-    category: Hub & Client Libraries
-    docs on: Dataset preview, streaming views, and viewer internals.
-  - id: trl
-    url: /docs/trl
-    category: Training & Optimization
-    docs on: RLHF, DPO, PPO, and SFT utilities for LLMs.
-  - id: simulate
-    url: /docs/simulate
-    category: —
-    docs on: Experimental simulation tools and workflows.
-  - id: sagemaker
-    url: /docs/sagemaker
-    category: Deployment & Inference
-    docs on: Deploying Hugging Face models on AWS SageMaker.
-  - id: timm
-    url: /docs/timm
-    category: Core ML Libraries
-    docs on: Image model zoo and utilities via HF integrations.
-  - id: safetensors
-    url: /docs/safetensors
-    category: Training & Optimization
-    docs on: Safe, fast tensor serialization format.
-  - id: tgi
-    url: /docs/text-generation-inference
-    category: Deployment & Inference
-    docs on: High-throughput text generation server for LLMs.
-  - id: setfit
-    url: /docs/setfit
-    category: —
-    docs on: Few-shot text classification via sentence embeddings.
-  - id: audio-course
-    url: /learn/audio-course
-    category: —
-    docs on: Speech and audio models, datasets, and tasks.
-  - id: lerobot
-    url: /docs/lerobot
-    category: Collaboration & Extras
-    docs on: Robotics datasets, policies, and learning workflows.
-  - id: autotrain
-    url: /docs/autotrain
-    category: Collaboration & Extras
-    docs on: No/low-code model training on Hugging Face.
-  - id: tei
-    url: /docs/text-embeddings-inference
-    category: Deployment & Inference
-    docs on: Optimized inference server for embedding workloads.
-  - id: bitsandbytes
-    url: /docs/bitsandbytes
-    category: Training & Optimization
-    docs on: Quantization and memory-efficient optimizers.
-  - id: cookbook
-    url: /learn/cookbook
-    category: —
-    docs on: Practical, task-oriented recipes across the ecosystem.
-  - id: sentence_transformers
-    url: https://sbert.net/
-    category: Core ML Libraries
-    docs on: Embedding models, training recipes, similarity/search workflows.
-  - id: ml-games-course
-    url: /learn/ml-games-course
-    category: —
-    docs on: Game-based ML and reinforcement learning experiments.
-  - id: diffusion-course
-    url: /learn/diffusion-course
-    category: —
-    docs on: Diffusion model theory and hands-on practice.
-  - id: ml-for-3d-course
-    url: /learn/ml-for-3d-course
-    category: —
-    docs on: 3D representations, models, and learning techniques.
-  - id: chat-ui
-    url: /docs/chat-ui
-    category: Collaboration & Extras
-    docs on: Reference chat interfaces for LLM deployment.
-  - id: leaderboards
-    url: /docs/leaderboards
-    category: Collaboration & Extras
-    docs on: Evaluation leaderboards and submission mechanics.
-  - id: lighteval
-    url: /docs/lighteval
-    category: Training & Optimization
-    docs on: Lightweight, reproducible LLM evaluation framework.
-  - id: argilla
-    url: https://argilla-io.github.io/argilla/
-    category: Collaboration & Extras
-    docs on: Data annotation, feedback, and human-in-the-loop workflows.
-  - id: distilabel
-    url: https://distilabel.argilla.io/
-    category: Collaboration & Extras
-    docs on: Synthetic data generation and distillation pipelines.
-  - id: microsoft-azure
-    url: /docs/microsoft-azure
-    category: Deployment & Inference
-    docs on: Azure deployment and integration guides.
-  - id: kernels
-    url: /docs/kernels
-    category: Core ML Libraries
-    docs on: Lightweight execution environments and notebook-style workflows.
-  - id: google-cloud
-    url: /docs/google-cloud
-    category: Deployment & Inference
-    docs on: GCP deployment and serving workflows.
   # Response Guidelines
   After gathering results, synthesize them following these principles:

   - Include domain-specific terminology when applicable
   - Try both specific terms and general related terms
   # Response Guidelines
   After gathering results, synthesize them following these principles:

agent/tools/_search_agent_tools.py CHANGED Viewed

@@ -3,12 +3,156 @@ Tools available to the search sub-agent
 These tools are used by the search sub-agent spawned by search_docs_tool
 """
 import os
 from typing import Any
 import httpx
 from bs4 import BeautifulSoup
 async def explore_docs_structure_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     """
@@ -31,109 +175,316 @@ async def explore_docs_structure_handler(arguments: dict[str, Any]) -> tuple[str
     if not hf_token:
         return "Error: HF_TOKEN environment variable not set", False
-    # Build the URL for the main page (without .md to get HTML with navigation)
-    base_url = "https://huggingface.co/docs"
     endpoint = endpoint.lstrip("/")
-    url = f"{base_url}/{endpoint}"
     try:
-        headers = {"Authorization": f"Bearer {hf_token}"}
-        # Fetch the main HTML page
-        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-            response = await client.get(url, headers=headers)
-            response.raise_for_status()
-        html_content = response.text
-        # Parse the sidebar navigation with BeautifulSoup
-        soup = BeautifulSoup(html_content, "html.parser")
-        # Find the sidebar nav (contains flex-auto class)
-        sidebar = soup.find("nav", class_=lambda x: x and "flex-auto" in x)
-        if not sidebar:
-            return (
-                f"Error: Could not find navigation sidebar on {url}. "
-                "The page structure might be different.",
-                False,
             )
-        # Extract all links from the sidebar
-        links = sidebar.find_all("a", href=True)
-        nav_data = []
-        for link in links:
-            title = link.get_text(strip=True)
-            href = link["href"]
-            # Make URL absolute
-            if href.startswith("/"):
-                page_url = f"https://huggingface.co{href}"
-            else:
-                page_url = href
-            nav_data.append({"title": title, "url": page_url})
-        if not nav_data:
-            return f"No navigation links found in sidebar at {url}", False
-        # Now fetch glimpses (first 200 chars) for each page
-        result_items = []
-        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-            for item in nav_data[:20]:  # Limit to first 20 to avoid too many requests
-                md_url = f"{item['url']}.md"
-                try:
-                    md_response = await client.get(md_url, headers=headers)
-                    md_response.raise_for_status()
-                    content = md_response.text
-                    # Get first 200 characters as glimpse
-                    glimpse = content[:200].strip()
-                    if len(content) > 200:
-                        glimpse += "..."
-                    result_items.append(
-                        {
-                            "title": item["title"],
-                            "url": item["url"],
-                            "md_url": md_url,
-                            "glimpse": glimpse,
-                        }
-                    )
-                except Exception as e:
-                    # If fetching glimpse fails, include without glimpse
-                    result_items.append(
-                        {
-                            "title": item["title"],
-                            "url": item["url"],
-                            "md_url": f"{item['url']}.md",
-                            "glimpse": f"[Could not fetch glimpse: {str(e)[:50]}]",
-                        }
-                    )
-        # Format the results nicely
-        result = f"Documentation structure for: {url}\n\n"
-        result += f"Found {len(result_items)} pages:\n\n"
-        for i, item in enumerate(result_items, 1):
-            result += f"{i}. **{item['title']}**\n"
-            result += f"   URL: {item['url']}\n"
-            result += f"   Glimpse: {item['glimpse']}\n\n"
-        return result, True
     except httpx.HTTPStatusError as e:
-        return (
-            f"HTTP error fetching {url}: {e.response.status_code} - {e.response.text[:200]}",
-            False,
-        )
     except httpx.RequestError as e:
-        return f"Request error fetching {url}: {str(e)}", False
     except Exception as e:
-        return f"Error exploring docs structure: {str(e)}", False
 async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
@@ -146,7 +497,9 @@ async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     Returns:
         Tuple of (full_markdown_content, success)
     """
     url = arguments.get("url", "")
     if not url:
         return "Error: No URL provided", False
@@ -168,14 +521,25 @@ async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
         # Make request with auth
         headers = {"Authorization": f"Bearer {hf_token}"}
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
             response = await client.get(url, headers=headers)
             response.raise_for_status()
         content = response.text
         # Return the markdown content directly
         result = f"Documentation from: {url}\n\n{content}"
         return result, True
     except httpx.HTTPStatusError as e:
@@ -195,8 +559,8 @@ EXPLORE_DOCS_STRUCTURE_TOOL_SPEC = {
     "name": "explore_docs_structure",
     "description": (
         "Explore the structure of HF documentation by parsing the sidebar navigation. "
-        "Provide an endpoint (e.g., 'trl', 'transformers', 'datasets') and get a list of all "
-        "documentation pages with their titles, URLs, and a 200-character glimpse of each page. "
         "Use this to discover what documentation is available before fetching specific pages."
     ),
     "parameters": {
@@ -204,9 +568,122 @@ EXPLORE_DOCS_STRUCTURE_TOOL_SPEC = {
         "properties": {
             "endpoint": {
                 "type": "string",
                 "description": (
-                    "The documentation endpoint to explore (e.g., 'trl', 'transformers', 'hub'). "
-                    "Do not include '/docs/' or leading slashes."
                 ),
             },
         },
@@ -237,3 +714,34 @@ HF_DOCS_FETCH_TOOL_SPEC = {
         "required": ["url"],
     },
 }

 These tools are used by the search sub-agent spawned by search_docs_tool
 """
+import asyncio
 import os
+import time
 from typing import Any
 import httpx
 from bs4 import BeautifulSoup
+# Cache for OpenAPI spec to avoid repeated fetches
+_openapi_spec_cache: dict[str, Any] | None = None
+async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
+    """Fetch the HTML page for a given endpoint"""
+    base_url = "https://huggingface.co/docs"
+    url = f"{base_url}/{endpoint}"
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    fetch_start = time.perf_counter()
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        response = await client.get(url, headers=headers)
+        response.raise_for_status()
+    fetch_time = time.perf_counter() - fetch_start
+    print(f"[DEBUG] _fetch_html_page: Fetched in {fetch_time:.2f}s")
+    return response.text
+def _parse_sidebar_navigation(html_content: str) -> list[dict[str, str]]:
+    """Parse the sidebar navigation and extract all links"""
+    parse_start = time.perf_counter()
+    soup = BeautifulSoup(html_content, "html.parser")
+    sidebar = soup.find("nav", class_=lambda x: x and "flex-auto" in x)
+    if not sidebar:
+        raise ValueError("Could not find navigation sidebar")
+    links = sidebar.find_all("a", href=True)
+    nav_data = []
+    for link in links:
+        title = link.get_text(strip=True)
+        href = link["href"]
+        # Make URL absolute
+        page_url = f"https://huggingface.co{href}" if href.startswith("/") else href
+        nav_data.append({"title": title, "url": page_url})
+    parse_time = time.perf_counter() - parse_start
+    print(
+        f"[DEBUG] _parse_sidebar_navigation: Parsed in {parse_time:.2f}s, found {len(nav_data)} links"
+    )
+    return nav_data
+async def _fetch_single_glimpse(
+    client: httpx.AsyncClient, hf_token: str, item: dict[str, str]
+) -> dict[str, str]:
+    """Fetch a glimpse (first 300 chars) for a single page"""
+    md_url = f"{item['url']}.md"
+    headers = {"Authorization": f"Bearer {hf_token}"}
+    try:
+        response = await client.get(md_url, headers=headers)
+        response.raise_for_status()
+        content = response.text
+        glimpse = content[:300].strip()
+        if len(content) > 300:
+            glimpse += "..."
+        return {
+            "title": item["title"],
+            "url": item["url"],
+            "md_url": md_url,
+            "glimpse": glimpse,
+        }
+    except Exception as e:
+        return {
+            "title": item["title"],
+            "url": item["url"],
+            "md_url": md_url,
+            "glimpse": f"[Could not fetch glimpse: {str(e)[:50]}]",
+        }
+async def _fetch_all_glimpses(
+    hf_token: str, nav_data: list[dict[str, str]]
+) -> list[dict[str, str]]:
+    """Fetch glimpses for all pages in parallel"""
+    glimpse_start = time.perf_counter()
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        result_items = await asyncio.gather(
+            *[_fetch_single_glimpse(client, hf_token, item) for item in nav_data]
+        )
+    glimpse_time = time.perf_counter() - glimpse_start
+    print(
+        f"[DEBUG] _fetch_all_glimpses: Fetched {len(result_items)} glimpses in {glimpse_time:.2f}s"
+    )
+    return list(result_items)
+def _format_exploration_results(
+    endpoint: str, result_items: list[dict[str, str]]
+) -> str:
+    """Format the exploration results as a readable string"""
+    base_url = "https://huggingface.co/docs"
+    url = f"{base_url}/{endpoint}"
+    result = f"Documentation structure for: {url}\n\n"
+    result += f"Found {len(result_items)} pages:\n\n"
+    for i, item in enumerate(result_items, 1):
+        result += f"{i}. **{item['title']}**\n"
+        result += f"   URL: {item['url']}\n"
+        result += f"   Glimpse: {item['glimpse']}\n\n"
+    return result
+async def _explore_docs_structure(hf_token: str, endpoint: str) -> str:
+    """Main function to explore documentation structure"""
+    start_time = time.perf_counter()
+    print(f"[DEBUG] _explore_docs_structure: Starting for endpoint '{endpoint}'")
+    # Fetch HTML page
+    html_content = await _fetch_html_page(hf_token, endpoint)
+    # Parse navigation
+    nav_data = _parse_sidebar_navigation(html_content)
+    if not nav_data:
+        raise ValueError(f"No navigation links found for endpoint '{endpoint}'")
+    # Fetch all glimpses in parallel
+    result_items = await _fetch_all_glimpses(hf_token, nav_data)
+    # Format results
+    result = _format_exploration_results(endpoint, result_items)
+    total_time = time.perf_counter() - start_time
+    print(f"[DEBUG] _explore_docs_structure: Total time {total_time:.2f}s")
+    return result
 async def explore_docs_structure_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     """
     if not hf_token:
         return "Error: HF_TOKEN environment variable not set", False
     endpoint = endpoint.lstrip("/")
     try:
+        result = await _explore_docs_structure(hf_token, endpoint)
+        return result, True
+    except httpx.HTTPStatusError as e:
+        return (
+            f"HTTP error: {e.response.status_code} - {e.response.text[:200]}",
+            False,
+        )
+    except httpx.RequestError as e:
+        return f"Request error: {str(e)}", False
+    except ValueError as e:
+        return f"Error: {str(e)}", False
+    except Exception as e:
+        return f"Unexpected error: {str(e)}", False
+async def _fetch_openapi_spec() -> dict[str, Any]:
+    """Fetch and cache the HuggingFace OpenAPI specification"""
+    global _openapi_spec_cache
+    if _openapi_spec_cache is not None:
+        print("[DEBUG] _fetch_openapi_spec: Using cached spec")
+        return _openapi_spec_cache
+    start_time = time.perf_counter()
+    print("[DEBUG] _fetch_openapi_spec: Fetching from API")
+    url = "https://huggingface.co/.well-known/openapi.json"
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        response = await client.get(url)
+        response.raise_for_status()
+    spec = response.json()
+    _openapi_spec_cache = spec
+    fetch_time = time.perf_counter() - start_time
+    print(f"[DEBUG] _fetch_openapi_spec: Fetched and cached in {fetch_time:.2f}s")
+    return spec
+def _extract_all_tags(spec: dict[str, Any]) -> list[str]:
+    """Extract all unique tags from the OpenAPI spec"""
+    tags = set()
+    # Get tags from the tags section
+    for tag_obj in spec.get("tags", []):
+        if "name" in tag_obj:
+            tags.add(tag_obj["name"])
+    # Also get tags from paths (in case some aren't in the tags section)
+    for path, path_item in spec.get("paths", {}).items():
+        for method, operation in path_item.items():
+            if method in ["get", "post", "put", "delete", "patch", "head", "options"]:
+                for tag in operation.get("tags", []):
+                    tags.add(tag)
+    return sorted(list(tags))
+def _search_openapi_by_tag(spec: dict[str, Any], tag: str) -> list[dict[str, Any]]:
+    """Search for API endpoints with a specific tag"""
+    results = []
+    paths = spec.get("paths", {})
+    servers = spec.get("servers", [])
+    base_url = (
+        servers[0].get("url", "https://huggingface.co")
+        if servers
+        else "https://huggingface.co"
+    )
+    for path, path_item in paths.items():
+        for method, operation in path_item.items():
+            if method not in [
+                "get",
+                "post",
+                "put",
+                "delete",
+                "patch",
+                "head",
+                "options",
+            ]:
+                continue
+            operation_tags = operation.get("tags", [])
+            if tag in operation_tags:
+                # Extract parameters
+                parameters = operation.get("parameters", [])
+                request_body = operation.get("requestBody", {})
+                responses = operation.get("responses", {})
+                results.append(
+                    {
+                        "path": path,
+                        "method": method.upper(),
+                        "operationId": operation.get("operationId", ""),
+                        "summary": operation.get("summary", ""),
+                        "description": operation.get("description", ""),
+                        "parameters": parameters,
+                        "request_body": request_body,
+                        "responses": responses,
+                        "base_url": base_url,
+                    }
+                )
+    return results
+def _generate_curl_example(endpoint: dict[str, Any]) -> str:
+    """Generate a curl command example for an endpoint"""
+    method = endpoint["method"]
+    path = endpoint["path"]
+    base_url = endpoint["base_url"]
+    # Build the full URL with example path parameters
+    full_path = path
+    for param in endpoint.get("parameters", []):
+        if param.get("in") == "path" and param.get("required"):
+            param_name = param["name"]
+            example = param.get(
+                "example", param.get("schema", {}).get("example", f"<{param_name}>")
             )
+            full_path = full_path.replace(f"{{{param_name}}}", str(example))
+    curl = f"curl -X {method} \\\n  '{base_url}{full_path}'"
+    # Add query parameters if any
+    query_params = [p for p in endpoint.get("parameters", []) if p.get("in") == "query"]
+    if query_params and query_params[0].get("required"):
+        param = query_params[0]
+        example = param.get("example", param.get("schema", {}).get("example", "value"))
+        curl += f"?{param['name']}={example}"
+    # Add headers
+    curl += " \\\n  -H 'Authorization: Bearer $HF_TOKEN'"
+    # Add request body if applicable
+    if method in ["POST", "PUT", "PATCH"] and endpoint.get("request_body"):
+        content = endpoint["request_body"].get("content", {})
+        if "application/json" in content:
+            curl += " \\\n  -H 'Content-Type: application/json'"
+            schema = content["application/json"].get("schema", {})
+            example = schema.get("example", "{}")
+            if isinstance(example, dict):
+                import json
+                example = json.dumps(example, indent=2)
+            curl += f" \\\n  -d '{example}'"
+    return curl
+def _format_parameters(parameters: list[dict[str, Any]]) -> str:
+    """Format parameter information from OpenAPI spec"""
+    if not parameters:
+        return ""
+    # Group parameters by type
+    path_params = [p for p in parameters if p.get("in") == "path"]
+    query_params = [p for p in parameters if p.get("in") == "query"]
+    header_params = [p for p in parameters if p.get("in") == "header"]
+    output = []
+    if path_params:
+        output.append("**Path Parameters:**")
+        for param in path_params:
+            name = param.get("name", "")
+            required = " (required)" if param.get("required") else " (optional)"
+            description = param.get("description", "")
+            param_type = param.get("schema", {}).get("type", "string")
+            example = param.get("example") or param.get("schema", {}).get("example", "")
+            output.append(f"- `{name}` ({param_type}){required}: {description}")
+            if example:
+                output.append(f"  Example: `{example}`")
+    if query_params:
+        if output:
+            output.append("")
+        output.append("**Query Parameters:**")
+        for param in query_params:
+            name = param.get("name", "")
+            required = " (required)" if param.get("required") else " (optional)"
+            description = param.get("description", "")
+            param_type = param.get("schema", {}).get("type", "string")
+            example = param.get("example") or param.get("schema", {}).get("example", "")
+            output.append(f"- `{name}` ({param_type}){required}: {description}")
+            if example:
+                output.append(f"  Example: `{example}`")
+    if header_params:
+        if output:
+            output.append("")
+        output.append("**Header Parameters:**")
+        for param in header_params:
+            name = param.get("name", "")
+            required = " (required)" if param.get("required") else " (optional)"
+            description = param.get("description", "")
+            output.append(f"- `{name}`{required}: {description}")
+    return "\n".join(output)
+def _format_response_info(responses: dict[str, Any]) -> str:
+    """Format response information from OpenAPI spec"""
+    if not responses:
+        return "No response information available"
+    output = []
+    for status_code, response_obj in list(responses.items())[
+        :3
+    ]:  # Show first 3 status codes
+        desc = response_obj.get("description", "")
+        output.append(f"- **{status_code}**: {desc}")
+        content = response_obj.get("content", {})
+        if "application/json" in content:
+            schema = content["application/json"].get("schema", {})
+            if "type" in schema:
+                output.append(f"  Returns: {schema.get('type', 'object')}")
+    return "\n".join(output)
+def _format_openapi_results(results: list[dict[str, Any]], tag: str) -> str:
+    """Format OpenAPI search results as markdown with curl examples"""
+    if not results:
+        return f"No API endpoints found with tag '{tag}'"
+    output = f"# API Endpoints for tag: `{tag}`\n\n"
+    output += f"Found {len(results)} endpoint(s)\n\n"
+    output += "---\n\n"
+    for i, endpoint in enumerate(results, 1):
+        output += f"## {i}. {endpoint['method']} {endpoint['path']}\n\n"
+        if endpoint["summary"]:
+            output += f"**Summary:** {endpoint['summary']}\n\n"
+        if endpoint["description"]:
+            desc = endpoint["description"][:300]
+            if len(endpoint["description"]) > 300:
+                desc += "..."
+            output += f"**Description:** {desc}\n\n"
+        # Parameters
+        params_info = _format_parameters(endpoint.get("parameters", []))
+        if params_info:
+            output += params_info + "\n\n"
+        # Curl example
+        output += "**Usage:**\n```bash\n"
+        output += _generate_curl_example(endpoint)
+        output += "\n```\n\n"
+        # Response info
+        output += "**Returns:**\n"
+        output += _format_response_info(endpoint["responses"])
+        output += "\n\n"
+        output += "---\n\n"
+    return output
+async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
+    """
+    Search the HuggingFace OpenAPI specification by tag
+    Args:
+        arguments: Dictionary with 'tag' parameter
+    Returns:
+        Tuple of (search_results, success)
+    """
+    start_time = time.perf_counter()
+    tag = arguments.get("tag", "")
+    print(f"[DEBUG] search_openapi: Starting for tag '{tag}'")
+    if not tag:
+        return "Error: No tag provided", False
+    try:
+        # Fetch OpenAPI spec (cached after first fetch)
+        spec = await _fetch_openapi_spec()
+        # Search for endpoints with this tag
+        results = _search_openapi_by_tag(spec, tag)
+        # Format results
+        formatted = _format_openapi_results(results, tag)
+        total_time = time.perf_counter() - start_time
+        print(f"[DEBUG] search_openapi: Total time {total_time:.2f}s")
+        return formatted, True
     except httpx.HTTPStatusError as e:
+        return f"HTTP error fetching OpenAPI spec: {e.response.status_code}", False
     except httpx.RequestError as e:
+        return f"Request error: {str(e)}", False
     except Exception as e:
+        return f"Error searching OpenAPI spec: {str(e)}", False
 async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
     Returns:
         Tuple of (full_markdown_content, success)
     """
+    start_time = time.perf_counter()
     url = arguments.get("url", "")
+    print(f"[DEBUG] fetch_hf_docs: Starting for URL '{url}'")
     if not url:
         return "Error: No URL provided", False
         # Make request with auth
         headers = {"Authorization": f"Bearer {hf_token}"}
+        fetch_start = time.perf_counter()
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
             response = await client.get(url, headers=headers)
             response.raise_for_status()
+        fetch_time = time.perf_counter() - fetch_start
         content = response.text
+        content_size_kb = len(content) / 1024
+        print(
+            f"[DEBUG] fetch_hf_docs: Fetched {content_size_kb:.1f}KB in {fetch_time:.2f}s"
+        )
         # Return the markdown content directly
         result = f"Documentation from: {url}\n\n{content}"
+        total_time = time.perf_counter() - start_time
+        print(f"[DEBUG] fetch_hf_docs: Total time {total_time:.2f}s")
         return result, True
     except httpx.HTTPStatusError as e:
     "name": "explore_docs_structure",
     "description": (
         "Explore the structure of HF documentation by parsing the sidebar navigation. "
+        "Select an endpoint from the available options and get a list of all documentation pages "
+        "with their titles, URLs, and a 300-character glimpse of each page. "
         "Use this to discover what documentation is available before fetching specific pages."
     ),
     "parameters": {
         "properties": {
             "endpoint": {
                 "type": "string",
+                "enum": [
+                    "hub",
+                    "transformers",
+                    "diffusers",
+                    "datasets",
+                    "gradio",
+                    "trackio",
+                    "smolagents",
+                    "huggingface_hub",
+                    "huggingface.js",
+                    "transformers.js",
+                    "inference-providers",
+                    "inference-endpoints",
+                    "peft",
+                    "accelerate",
+                    "optimum",
+                    "optimum-habana",
+                    "optimum-neuron",
+                    "optimum-intel",
+                    "optimum-executorch",
+                    "optimum-tpu",
+                    "tokenizers",
+                    "llm-course",
+                    "robotics-course",
+                    "mcp-course",
+                    "smol-course",
+                    "agents-course",
+                    "deep-rl-course",
+                    "computer-vision-course",
+                    "evaluate",
+                    "tasks",
+                    "dataset-viewer",
+                    "trl",
+                    "simulate",
+                    "sagemaker",
+                    "timm",
+                    "safetensors",
+                    "tgi",
+                    "setfit",
+                    "audio-course",
+                    "lerobot",
+                    "autotrain",
+                    "tei",
+                    "bitsandbytes",
+                    "cookbook",
+                    "sentence_transformers",
+                    "ml-games-course",
+                    "diffusion-course",
+                    "ml-for-3d-course",
+                    "chat-ui",
+                    "leaderboards",
+                    "lighteval",
+                    "argilla",
+                    "distilabel",
+                    "microsoft-azure",
+                    "kernels",
+                    "google-cloud",
+                ],
                 "description": (
+                    "The documentation endpoint to explore. Each endpoint corresponds to a major section of the Hugging Face documentation:\n\n"
+                    "• hub — Find answers to questions about models/datasets/spaces, auth, versioning, metadata.\n"
+                    "• transformers — Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
+                    "• diffusers — Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
+                    "• datasets — Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
+                    "• gradio — UI components and demos for interacting with ML models.\n"
+                    "• trackio — Experiment tracking, metrics logging, and run comparison.\n"
+                    "• smolagents — Lightweight agent abstractions and tool-using patterns.\n"
+                    "• huggingface_hub — Python client for Hub operations (auth, upload/download, repo management).\n"
+                    "• huggingface.js — JS/TS client for Hub APIs in browser and Node.\n"
+                    "• transformers.js — Run Transformer models in browser/Node via WebGPU/WASM.\n"
+                    "• inference-providers — Unified interface for third-party inference backends.\n"
+                    "• inference-endpoints — Managed, scalable model deployments on HF infrastructure.\n"
+                    "• peft — Parameter-efficient fine-tuning methods (LoRA, adapters, etc.).\n"
+                    "• accelerate — Hardware-agnostic, distributed and mixed-precision training orchestration.\n"
+                    "• optimum — Hardware-aware optimization and model export tooling.\n"
+                    "• optimum-habana — Training and inference on Habana Gaudi accelerators.\n"
+                    "• optimum-neuron — Optimization workflows for AWS Inferentia/Trainium.\n"
+                    "• optimum-intel — Intel CPU/GPU optimizations (OpenVINO, IPEX).\n"
+                    "• optimum-executorch — Exporting models to ExecuTorch for edge/mobile.\n"
+                    "• optimum-tpu — TPU-specific training and optimization paths.\n"
+                    "• tokenizers — Fast tokenizer internals, training, and low-level APIs.\n"
+                    "• llm-course — End-to-end LLM concepts, training, and deployment.\n"
+                    "• robotics-course — Learning-based robotics foundations.\n"
+                    "• mcp-course — Model Context Protocol concepts and usage.\n"
+                    "• smol-course — Small-model and efficiency-focused workflows.\n"
+                    "• agents-course — Tool-using, planning, and multi-step agent design.\n"
+                    "• deep-rl-course — Deep reinforcement learning foundations.\n"
+                    "• computer-vision-course — Vision models, datasets, and pipelines.\n"
+                    "• evaluate — Metrics, evaluation workflows, and training-loop integration.\n"
+                    "• tasks — Canonical task definitions and model categorization.\n"
+                    "• dataset-viewer — Dataset preview, streaming views, and viewer internals.\n"
+                    "• trl — RLHF, DPO, PPO, and SFT utilities for LLMs.\n"
+                    "• simulate — Experimental simulation tools and workflows.\n"
+                    "• sagemaker — Deploying Hugging Face models on AWS SageMaker.\n"
+                    "• timm — Image model zoo and utilities via HF integrations.\n"
+                    "• safetensors — Safe, fast tensor serialization format.\n"
+                    "• tgi — High-throughput text generation server for LLMs.\n"
+                    "• setfit — Few-shot text classification via sentence embeddings.\n"
+                    "• audio-course — Speech and audio models, datasets, and tasks.\n"
+                    "• lerobot — Robotics datasets, policies, and learning workflows.\n"
+                    "• autotrain — No/low-code model training on Hugging Face.\n"
+                    "• tei — Optimized inference server for embedding workloads.\n"
+                    "• bitsandbytes — Quantization and memory-efficient optimizers.\n"
+                    "• cookbook — Practical, task-oriented recipes across the ecosystem.\n"
+                    "• sentence_transformers — Embedding models, training recipes, similarity/search workflows.\n"
+                    "• ml-games-course — Game-based ML and reinforcement learning experiments.\n"
+                    "• diffusion-course — Diffusion model theory and hands-on practice.\n"
+                    "• ml-for-3d-course — 3D representations, models, and learning techniques.\n"
+                    "• chat-ui — Reference chat interfaces for LLM deployment.\n"
+                    "• leaderboards — Evaluation leaderboards and submission mechanics.\n"
+                    "• lighteval — Lightweight, reproducible LLM evaluation framework.\n"
+                    "• argilla — Data annotation, feedback, and human-in-the-loop workflows.\n"
+                    "• distilabel — Synthetic data generation and distillation pipelines.\n"
+                    "• microsoft-azure — Azure deployment and integration guides.\n"
+                    "• kernels — Lightweight execution environments and notebook-style workflows.\n"
+                    "• google-cloud — GCP deployment and serving workflows.\n"
                 ),
             },
         },
         "required": ["url"],
     },
 }
+async def _get_api_search_tool_spec() -> dict[str, Any]:
+    """
+    Dynamically generate the OpenAPI tool spec with tag enum populated at runtime
+    This must be called async to fetch the OpenAPI spec and extract tags
+    """
+    spec = await _fetch_openapi_spec()
+    tags = _extract_all_tags(spec)
+    return {
+        "name": "search_hf_api_endpoints",
+        "description": (
+            "Search the HuggingFace OpenAPI specification by tag to find related API endpoints. "
+            "Returns all endpoints with the specified tag including curl examples showing how to use them. "
+            "Each result includes the endpoint path, summary, usage example with curl, and response information."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "tag": {
+                    "type": "string",
+                    "enum": tags,
+                    "description": (
+                        "The API tag to search for. Each tag groups related API endpoints. "
+                    ),
+                },
+            },
+            "required": ["tag"],
+        },
+    }

agent/tools/search_docs_tool.py CHANGED Viewed

@@ -12,8 +12,11 @@ from agent.config import Config
 from agent.core.session import Session
-def create_search_tool_router():
-    """Create a ToolRouter instance for the search sub-agent"""
     # Import at runtime to avoid circular dependency
     from agent.core.tools import ToolRouter
@@ -26,10 +29,15 @@ def create_search_tool_router():
             self._mcp_initialized = False
             self.mcp_client = None
-            for tool in make_search_agent_tools():
                 self.register_tool(tool)
-    return SearchDocsToolRouter()
 async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
@@ -56,7 +64,7 @@ async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
         sub_event_queue = asyncio.Queue()
         # Create specialized tool router for search
-        search_tool_router = create_search_tool_router()
         # Create config for sub-agent (using same model as main agent)
         sub_config = Config(
@@ -131,19 +139,25 @@ SEARCH_DOCS_TOOL_SPEC = {
-def make_search_agent_tools():
     """
     Create a list of tools for the search agent
     """
     # Import at runtime to avoid circular dependency
     from agent.core.tools import ToolSpec
     from agent.tools._search_agent_tools import (
         EXPLORE_DOCS_STRUCTURE_TOOL_SPEC,
         HF_DOCS_FETCH_TOOL_SPEC,
         explore_docs_structure_handler,
         hf_docs_fetch_handler,
     )
     return [
         ToolSpec(
             name=EXPLORE_DOCS_STRUCTURE_TOOL_SPEC["name"],
@@ -157,4 +171,10 @@ def make_search_agent_tools():
             parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
             handler=hf_docs_fetch_handler,
         ),
     ]

 from agent.core.session import Session
+async def create_search_tool_router():
+    """
+    Create a ToolRouter instance for the search sub-agent
+    Async because OpenAPI tool needs to fetch and parse spec at initialization
+    """
     # Import at runtime to avoid circular dependency
     from agent.core.tools import ToolRouter
             self._mcp_initialized = False
             self.mcp_client = None
+        async def initialize_tools(self):
+            """Initialize tools asynchronously"""
+            tools = await make_search_agent_tools()
+            for tool in tools:
                 self.register_tool(tool)
+    router = SearchDocsToolRouter()
+    await router.initialize_tools()
+    return router
 async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
         sub_event_queue = asyncio.Queue()
         # Create specialized tool router for search
+        search_tool_router = await create_search_tool_router()
         # Create config for sub-agent (using same model as main agent)
         sub_config = Config(
+async def make_search_agent_tools():
     """
     Create a list of tools for the search agent
+    Async because OpenAPI tool spec needs to be populated at runtime
     """
     # Import at runtime to avoid circular dependency
     from agent.core.tools import ToolSpec
     from agent.tools._search_agent_tools import (
         EXPLORE_DOCS_STRUCTURE_TOOL_SPEC,
         HF_DOCS_FETCH_TOOL_SPEC,
+        _get_api_search_tool_spec,
         explore_docs_structure_handler,
         hf_docs_fetch_handler,
+        search_openapi_handler,
     )
+    # Get the OpenAPI tool spec with dynamically populated tags
+    openapi_spec = await _get_api_search_tool_spec()
     return [
         ToolSpec(
             name=EXPLORE_DOCS_STRUCTURE_TOOL_SPEC["name"],
             parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
             handler=hf_docs_fetch_handler,
         ),
+        ToolSpec(
+            name=openapi_spec["name"],
+            description=openapi_spec["description"],
+            parameters=openapi_spec["parameters"],
+            handler=search_openapi_handler,
+        ),
     ]

run_search_agent.py ADDED Viewed

	@@ -0,0 +1,142 @@

+"""
+Standalone test script for the search sub-agent
+Run with: uv run python test_search_agent.py
+"""
+import asyncio
+from litellm.utils import get_max_tokens
+from agent.config import Config
+from agent.context_manager.manager import ContextManager
+from agent.core.agent_loop import Handlers
+from agent.core.session import Session
+from agent.tools.search_docs_tool import create_search_tool_router
+async def test_search_agent(query: str):
+    """Test the search sub-agent with a query"""
+    print(f"Testing search agent with query: {query}\n")
+    print("=" * 60)
+    # Create event queue for the sub-agent
+    sub_event_queue = asyncio.Queue()
+    # Create search tool router
+    search_tool_router = await create_search_tool_router()
+    # Create config
+    sub_config = Config(
+        model_name="anthropic/claude-haiku-4-5",
+    )
+    # Create session with custom system prompt
+    sub_session = Session(
+        event_queue=sub_event_queue,
+        config=sub_config,
+        tool_router=search_tool_router,
+        context_manager=ContextManager(
+            tool_specs=search_tool_router.get_tool_specs_for_llm(),
+            max_context=get_max_tokens(sub_config.model_name),
+            compact_size=0.1,
+            untouched_messages=5,
+            prompt_file_suffix="search_docs_system_prompt.yaml",
+        ),
+    )
+    # Event listener to show what the sub-agent is doing
+    async def event_monitor():
+        while True:
+            try:
+                event = await asyncio.wait_for(sub_event_queue.get(), timeout=1.0)
+                if event.event_type == "assistant_message":
+                    content = event.data.get("content", "") if event.data else ""
+                    if content:
+                        print(f"\n🤖 Sub-agent: {content}\n")
+                elif event.event_type == "tool_call":
+                    tool_name = event.data.get("tool", "") if event.data else ""
+                    arguments = event.data.get("arguments", {}) if event.data else {}
+                    print(f"🔧 Tool call: {tool_name}")
+                    print(f"   Args: {arguments}")
+                elif event.event_type == "tool_output":
+                    output = event.data.get("output", "") if event.data else ""
+                    success = event.data.get("success", False) if event.data else False
+                    status = "✅" if success else "❌"
+                    print(f"{status} Tool output: {output}\n")
+                elif event.event_type == "turn_complete":
+                    print("✅ Sub-agent turn complete")
+                    break
+            except asyncio.TimeoutError:
+                # Check if agent is still running
+                continue
+            except Exception as e:
+                print(f"⚠️  Event error: {e}")
+                break
+    # Run the sub-agent and event monitor concurrently
+    async with search_tool_router:
+        monitor_task = asyncio.create_task(event_monitor())
+        result = await Handlers.run_agent(
+            session=sub_session, text=query, max_iterations=30
+        )
+        # Wait for event monitor to finish
+        await asyncio.wait_for(monitor_task, timeout=5.0)
+    print("\n" + "=" * 60)
+    print("FINAL RESULT:")
+    print("=" * 60)
+    if result:
+        print(result)
+    else:
+        print("No result returned")
+    print("=" * 60)
+async def main():
+    """Main test function"""
+    print("🧪 Search Sub-Agent Test\n")
+    # Example queries to test
+    test_queries = [
+        # "Explore the TRL documentation structure and find information about DPO trainer",
+        # "is there a way to get the logs from a served huggingface space",
+        # "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
+        "can i stream logs through the api for a served huggingface space",
+    ]
+    for i, query in enumerate(test_queries, 1):
+        print(f"\n{'=' * 60}")
+        print(f"TEST {i}/{len(test_queries)}")
+        print(f"{'=' * 60}\n")
+        try:
+            await test_search_agent(query)
+        except Exception as e:
+            print(f"\n❌ Test failed: {e}")
+            import traceback
+            traceback.print_exc()
+        if i < len(test_queries):
+            print("\n\nPress Enter to continue to next test...")
+            input()
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\n\n⚠️  Test interrupted")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        import traceback
+        traceback.print_exc()