File size: 26,662 Bytes
5e8489d
b307ff7
 
5e8489d
 
eb92351
5e8489d
 
 
 
 
 
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7291bab
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8489d
7291bab
5e8489d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7291bab
eb92351
5e8489d
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8489d
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8489d
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8489d
eb92351
 
5e8489d
eb92351
 
 
 
5e8489d
eb92351
 
5e8489d
eb92351
 
 
5e8489d
eb92351
 
5e8489d
eb92351
 
5e8489d
eb92351
5e8489d
 
eb92351
5e8489d
eb92351
5e8489d
eb92351
5e8489d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb92351
5e8489d
 
 
 
 
 
 
 
 
 
 
 
 
b307ff7
5e8489d
7291bab
 
5e8489d
7291bab
eb92351
 
5e8489d
 
 
 
 
 
 
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8489d
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e8489d
 
 
 
 
 
 
 
 
 
 
7291bab
5e8489d
7291bab
5e8489d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb92351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
"""
Documentation search tools for the HF Agent
Tools for exploring and fetching HuggingFace documentation and API specifications
"""

import asyncio
import os
from typing import Any

import httpx
from bs4 import BeautifulSoup

# Cache for OpenAPI spec to avoid repeated fetches
_openapi_spec_cache: dict[str, Any] | None = None


async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
    """Fetch the HTML page for a given endpoint"""
    base_url = "https://huggingface.co/docs"
    url = f"{base_url}/{endpoint}"
    headers = {"Authorization": f"Bearer {hf_token}"}

    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
        response = await client.get(url, headers=headers)
        response.raise_for_status()

    return response.text


def _parse_sidebar_navigation(html_content: str) -> list[dict[str, str]]:
    """Parse the sidebar navigation and extract all links"""
    soup = BeautifulSoup(html_content, "html.parser")
    sidebar = soup.find("nav", class_=lambda x: x and "flex-auto" in x)

    if not sidebar:
        raise ValueError("Could not find navigation sidebar")

    links = sidebar.find_all("a", href=True)
    nav_data = []

    for link in links:
        title = link.get_text(strip=True)
        href = link["href"]

        # Make URL absolute
        page_url = f"https://huggingface.co{href}" if href.startswith("/") else href
        nav_data.append({"title": title, "url": page_url})

    return nav_data


async def _fetch_single_glimpse(
    client: httpx.AsyncClient, hf_token: str, item: dict[str, str]
) -> dict[str, str]:
    """Fetch a glimpse (first 300 chars) for a single page"""
    md_url = f"{item['url']}.md"
    headers = {"Authorization": f"Bearer {hf_token}"}

    try:
        response = await client.get(md_url, headers=headers)
        response.raise_for_status()

        content = response.text
        glimpse = content[:300].strip()
        if len(content) > 300:
            glimpse += "..."

        return {
            "title": item["title"],
            "url": item["url"],
            "md_url": md_url,
            "glimpse": glimpse,
        }
    except Exception as e:
        return {
            "title": item["title"],
            "url": item["url"],
            "md_url": md_url,
            "glimpse": f"[Could not fetch glimpse: {str(e)[:50]}]",
        }


async def _fetch_all_glimpses(
    hf_token: str, nav_data: list[dict[str, str]]
) -> list[dict[str, str]]:
    """Fetch glimpses for all pages in parallel"""
    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
        result_items = await asyncio.gather(
            *[_fetch_single_glimpse(client, hf_token, item) for item in nav_data]
        )

    return list(result_items)


def _format_exploration_results(
    endpoint: str, result_items: list[dict[str, str]]
) -> str:
    """Format the exploration results as a readable string"""
    base_url = "https://huggingface.co/docs"
    url = f"{base_url}/{endpoint}"
    result = f"Documentation structure for: {url}\n\n"
    result += f"Found {len(result_items)} pages:\n\n"

    for i, item in enumerate(result_items, 1):
        result += f"{i}. **{item['title']}**\n"
        result += f"   URL: {item['url']}\n"
        result += f"   Glimpse: {item['glimpse']}\n\n"

    return result


async def explore_hf_docs(hf_token: str, endpoint: str) -> str:
    """Main function to explore documentation structure"""
    # Fetch HTML page
    html_content = await _fetch_html_page(hf_token, endpoint)

    # Parse navigation
    nav_data = _parse_sidebar_navigation(html_content)

    if not nav_data:
        raise ValueError(f"No navigation links found for endpoint '{endpoint}'")

    # Fetch all glimpses in parallel
    result_items = await _fetch_all_glimpses(hf_token, nav_data)

    # Format results
    result = _format_exploration_results(endpoint, result_items)

    return result


async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
    """
    Explore the documentation structure for a given endpoint by parsing the sidebar navigation

    Args:
        arguments: Dictionary with 'endpoint' parameter (e.g., 'trl', 'transformers', etc.)

    Returns:
        Tuple of (structured_navigation_with_glimpses, success)
    """
    endpoint = arguments.get("endpoint", "")

    if not endpoint:
        return "Error: No endpoint provided", False

    # Get HF token from environment
    hf_token = os.environ.get("HF_TOKEN")

    if not hf_token:
        return "Error: HF_TOKEN environment variable not set", False

    endpoint = endpoint.lstrip("/")

    try:
        result = await explore_hf_docs(hf_token, endpoint)
        return result, True

    except httpx.HTTPStatusError as e:
        return (
            f"HTTP error: {e.response.status_code} - {e.response.text[:200]}",
            False,
        )
    except httpx.RequestError as e:
        return f"Request error: {str(e)}", False
    except ValueError as e:
        return f"Error: {str(e)}", False
    except Exception as e:
        return f"Unexpected error: {str(e)}", False


async def _fetch_openapi_spec() -> dict[str, Any]:
    """Fetch and cache the HuggingFace OpenAPI specification"""
    global _openapi_spec_cache

    if _openapi_spec_cache is not None:
        return _openapi_spec_cache

    url = "https://huggingface.co/.well-known/openapi.json"

    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
        response = await client.get(url)
        response.raise_for_status()

    spec = response.json()
    _openapi_spec_cache = spec

    return spec


def _extract_all_tags(spec: dict[str, Any]) -> list[str]:
    """Extract all unique tags from the OpenAPI spec"""
    tags = set()

    # Get tags from the tags section
    for tag_obj in spec.get("tags", []):
        if "name" in tag_obj:
            tags.add(tag_obj["name"])

    # Also get tags from paths (in case some aren't in the tags section)
    for path, path_item in spec.get("paths", {}).items():
        for method, operation in path_item.items():
            if method in ["get", "post", "put", "delete", "patch", "head", "options"]:
                for tag in operation.get("tags", []):
                    tags.add(tag)

    return sorted(list(tags))


def _search_openapi_by_tag(spec: dict[str, Any], tag: str) -> list[dict[str, Any]]:
    """Search for API endpoints with a specific tag"""
    results = []
    paths = spec.get("paths", {})
    servers = spec.get("servers", [])
    base_url = (
        servers[0].get("url", "https://huggingface.co")
        if servers
        else "https://huggingface.co"
    )

    for path, path_item in paths.items():
        for method, operation in path_item.items():
            if method not in [
                "get",
                "post",
                "put",
                "delete",
                "patch",
                "head",
                "options",
            ]:
                continue

            operation_tags = operation.get("tags", [])
            if tag in operation_tags:
                # Extract parameters
                parameters = operation.get("parameters", [])
                request_body = operation.get("requestBody", {})
                responses = operation.get("responses", {})

                results.append(
                    {
                        "path": path,
                        "method": method.upper(),
                        "operationId": operation.get("operationId", ""),
                        "summary": operation.get("summary", ""),
                        "description": operation.get("description", ""),
                        "parameters": parameters,
                        "request_body": request_body,
                        "responses": responses,
                        "base_url": base_url,
                    }
                )

    return results


def _generate_curl_example(endpoint: dict[str, Any]) -> str:
    """Generate a curl command example for an endpoint"""
    method = endpoint["method"]
    path = endpoint["path"]
    base_url = endpoint["base_url"]

    # Build the full URL with example path parameters
    full_path = path
    for param in endpoint.get("parameters", []):
        if param.get("in") == "path" and param.get("required"):
            param_name = param["name"]
            example = param.get(
                "example", param.get("schema", {}).get("example", f"<{param_name}>")
            )
            full_path = full_path.replace(f"{{{param_name}}}", str(example))

    curl = f"curl -X {method} \\\n  '{base_url}{full_path}'"

    # Add query parameters if any
    query_params = [p for p in endpoint.get("parameters", []) if p.get("in") == "query"]
    if query_params and query_params[0].get("required"):
        param = query_params[0]
        example = param.get("example", param.get("schema", {}).get("example", "value"))
        curl += f"?{param['name']}={example}"

    # Add headers
    curl += " \\\n  -H 'Authorization: Bearer $HF_TOKEN'"

    # Add request body if applicable
    if method in ["POST", "PUT", "PATCH"] and endpoint.get("request_body"):
        content = endpoint["request_body"].get("content", {})
        if "application/json" in content:
            curl += " \\\n  -H 'Content-Type: application/json'"
            schema = content["application/json"].get("schema", {})
            example = schema.get("example", "{}")
            if isinstance(example, dict):
                import json

                example = json.dumps(example, indent=2)
            curl += f" \\\n  -d '{example}'"

    return curl


def _format_parameters(parameters: list[dict[str, Any]]) -> str:
    """Format parameter information from OpenAPI spec"""
    if not parameters:
        return ""

    # Group parameters by type
    path_params = [p for p in parameters if p.get("in") == "path"]
    query_params = [p for p in parameters if p.get("in") == "query"]
    header_params = [p for p in parameters if p.get("in") == "header"]

    output = []

    if path_params:
        output.append("**Path Parameters:**")
        for param in path_params:
            name = param.get("name", "")
            required = " (required)" if param.get("required") else " (optional)"
            description = param.get("description", "")
            param_type = param.get("schema", {}).get("type", "string")
            example = param.get("example") or param.get("schema", {}).get("example", "")

            output.append(f"- `{name}` ({param_type}){required}: {description}")
            if example:
                output.append(f"  Example: `{example}`")

    if query_params:
        if output:
            output.append("")
        output.append("**Query Parameters:**")
        for param in query_params:
            name = param.get("name", "")
            required = " (required)" if param.get("required") else " (optional)"
            description = param.get("description", "")
            param_type = param.get("schema", {}).get("type", "string")
            example = param.get("example") or param.get("schema", {}).get("example", "")

            output.append(f"- `{name}` ({param_type}){required}: {description}")
            if example:
                output.append(f"  Example: `{example}`")

    if header_params:
        if output:
            output.append("")
        output.append("**Header Parameters:**")
        for param in header_params:
            name = param.get("name", "")
            required = " (required)" if param.get("required") else " (optional)"
            description = param.get("description", "")

            output.append(f"- `{name}`{required}: {description}")

    return "\n".join(output)


def _format_response_info(responses: dict[str, Any]) -> str:
    """Format response information from OpenAPI spec"""
    if not responses:
        return "No response information available"

    output = []
    for status_code, response_obj in list(responses.items())[
        :3
    ]:  # Show first 3 status codes
        desc = response_obj.get("description", "")
        output.append(f"- **{status_code}**: {desc}")

        content = response_obj.get("content", {})
        if "application/json" in content:
            schema = content["application/json"].get("schema", {})
            if "type" in schema:
                output.append(f"  Returns: {schema.get('type', 'object')}")

    return "\n".join(output)


def _format_openapi_results(results: list[dict[str, Any]], tag: str) -> str:
    """Format OpenAPI search results as markdown with curl examples"""
    if not results:
        return f"No API endpoints found with tag '{tag}'"

    output = f"# API Endpoints for tag: `{tag}`\n\n"
    output += f"Found {len(results)} endpoint(s)\n\n"
    output += "---\n\n"

    for i, endpoint in enumerate(results, 1):
        output += f"## {i}. {endpoint['method']} {endpoint['path']}\n\n"

        if endpoint["summary"]:
            output += f"**Summary:** {endpoint['summary']}\n\n"

        if endpoint["description"]:
            desc = endpoint["description"][:300]
            if len(endpoint["description"]) > 300:
                desc += "..."
            output += f"**Description:** {desc}\n\n"

        # Parameters
        params_info = _format_parameters(endpoint.get("parameters", []))
        if params_info:
            output += params_info + "\n\n"

        # Curl example
        output += "**Usage:**\n```bash\n"
        output += _generate_curl_example(endpoint)
        output += "\n```\n\n"

        # Response info
        output += "**Returns:**\n"
        output += _format_response_info(endpoint["responses"])
        output += "\n\n"

        output += "---\n\n"

    return output


async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
    """
    Search the HuggingFace OpenAPI specification by tag

    Args:
        arguments: Dictionary with 'tag' parameter

    Returns:
        Tuple of (search_results, success)
    """
    tag = arguments.get("tag", "")

    if not tag:
        return "Error: No tag provided", False

    try:
        # Fetch OpenAPI spec (cached after first fetch)
        spec = await _fetch_openapi_spec()

        # Search for endpoints with this tag
        results = _search_openapi_by_tag(spec, tag)

        # Format results
        formatted = _format_openapi_results(results, tag)

        return formatted, True

    except httpx.HTTPStatusError as e:
        return f"HTTP error fetching OpenAPI spec: {e.response.status_code}", False
    except httpx.RequestError as e:
        return f"Request error: {str(e)}", False
    except Exception as e:
        return f"Error searching OpenAPI spec: {str(e)}", False


async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
    """
    Fetch full documentation content from a specific HF docs page

    Args:
        arguments: Dictionary with 'url' parameter (full URL to the doc page)

    Returns:
        Tuple of (full_markdown_content, success)
    """
    url = arguments.get("url", "")

    if not url:
        return "Error: No URL provided", False

    # Get HF token from environment
    hf_token = os.environ.get("HF_TOKEN")

    if not hf_token:
        return (
            "Error: HF_TOKEN environment variable not set",
            False,
        )

    # Add .md extension if not already present
    if not url.endswith(".md"):
        url = f"{url}.md"

    try:
        # Make request with auth
        headers = {"Authorization": f"Bearer {hf_token}"}

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            response = await client.get(url, headers=headers)
            response.raise_for_status()

        content = response.text

        # Return the markdown content directly
        result = f"Documentation from: {url}\n\n{content}"

        return result, True

    except httpx.HTTPStatusError as e:
        return (
            f"HTTP error fetching {url}: {e.response.status_code} - {e.response.text[:200]}",
            False,
        )
    except httpx.RequestError as e:
        return f"Request error fetching {url}: {str(e)}", False
    except Exception as e:
        return f"Error fetching documentation: {str(e)}", False


# Tool specifications for documentation search

EXPLORE_HF_DOCS_TOOL_SPEC = {
    "name": "explore_hf_docs",
    "description": (
        "Explore the Hugging Face documentation at a glance. "
        "Select an endpoint from the available options and get a list of all documentation pages "
        "with their titles, URLs, and a 300-character glimpse of each page. "
        "Use this to discover what documentation is available before fetching specific pages."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "endpoint": {
                "type": "string",
                "enum": [
                    "hub",
                    "transformers",
                    "diffusers",
                    "datasets",
                    "gradio",
                    "trackio",
                    "smolagents",
                    "huggingface_hub",
                    "huggingface.js",
                    "transformers.js",
                    "inference-providers",
                    "inference-endpoints",
                    "peft",
                    "accelerate",
                    "optimum",
                    "optimum-habana",
                    "optimum-neuron",
                    "optimum-intel",
                    "optimum-executorch",
                    "optimum-tpu",
                    "tokenizers",
                    "llm-course",
                    "robotics-course",
                    "mcp-course",
                    "smol-course",
                    "agents-course",
                    "deep-rl-course",
                    "computer-vision-course",
                    "evaluate",
                    "tasks",
                    "dataset-viewer",
                    "trl",
                    "simulate",
                    "sagemaker",
                    "timm",
                    "safetensors",
                    "tgi",
                    "setfit",
                    "audio-course",
                    "lerobot",
                    "autotrain",
                    "tei",
                    "bitsandbytes",
                    "cookbook",
                    "sentence_transformers",
                    "ml-games-course",
                    "diffusion-course",
                    "ml-for-3d-course",
                    "chat-ui",
                    "leaderboards",
                    "lighteval",
                    "argilla",
                    "distilabel",
                    "microsoft-azure",
                    "kernels",
                    "google-cloud",
                ],
                "description": (
                    "The documentation endpoint to explore. Each endpoint corresponds to a major section of the Hugging Face documentation:\n\n"
                    "β€’ hub β€” Find answers to questions about models/datasets/spaces, auth, versioning, metadata.\n"
                    "β€’ transformers β€” Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
                    "β€’ diffusers β€” Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
                    "β€’ datasets β€” Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
                    "β€’ gradio β€” UI components and demos for interacting with ML models.\n"
                    "β€’ trackio β€” Experiment tracking, metrics logging, and run comparison.\n"
                    "β€’ smolagents β€” Lightweight agent abstractions and tool-using patterns.\n"
                    "β€’ huggingface_hub β€” Python client for Hub operations (auth, upload/download, repo management).\n"
                    "β€’ huggingface.js β€” JS/TS client for Hub APIs in browser and Node.\n"
                    "β€’ transformers.js β€” Run Transformer models in browser/Node via WebGPU/WASM.\n"
                    "β€’ inference-providers β€” Unified interface for third-party inference backends.\n"
                    "β€’ inference-endpoints β€” Managed, scalable model deployments on HF infrastructure.\n"
                    "β€’ peft β€” Parameter-efficient fine-tuning methods (LoRA, adapters, etc.).\n"
                    "β€’ accelerate β€” Hardware-agnostic, distributed and mixed-precision training orchestration.\n"
                    "β€’ optimum β€” Hardware-aware optimization and model export tooling.\n"
                    "β€’ optimum-habana β€” Training and inference on Habana Gaudi accelerators.\n"
                    "β€’ optimum-neuron β€” Optimization workflows for AWS Inferentia/Trainium.\n"
                    "β€’ optimum-intel β€” Intel CPU/GPU optimizations (OpenVINO, IPEX).\n"
                    "β€’ optimum-executorch β€” Exporting models to ExecuTorch for edge/mobile.\n"
                    "β€’ optimum-tpu β€” TPU-specific training and optimization paths.\n"
                    "β€’ tokenizers β€” Fast tokenizer internals, training, and low-level APIs.\n"
                    "β€’ llm-course β€” End-to-end LLM concepts, training, and deployment.\n"
                    "β€’ robotics-course β€” Learning-based robotics foundations.\n"
                    "β€’ mcp-course β€” Model Context Protocol concepts and usage.\n"
                    "β€’ smol-course β€” Small-model and efficiency-focused workflows.\n"
                    "β€’ agents-course β€” Tool-using, planning, and multi-step agent design.\n"
                    "β€’ deep-rl-course β€” Deep reinforcement learning foundations.\n"
                    "β€’ computer-vision-course β€” Vision models, datasets, and pipelines.\n"
                    "β€’ evaluate β€” Metrics, evaluation workflows, and training-loop integration.\n"
                    "β€’ tasks β€” Canonical task definitions and model categorization.\n"
                    "β€’ dataset-viewer β€” Dataset preview, streaming views, and viewer internals.\n"
                    "β€’ trl β€” RLHF, DPO, PPO, and SFT utilities for LLMs.\n"
                    "β€’ simulate β€” Experimental simulation tools and workflows.\n"
                    "β€’ sagemaker β€” Deploying Hugging Face models on AWS SageMaker.\n"
                    "β€’ timm β€” Image model zoo and utilities via HF integrations.\n"
                    "β€’ safetensors β€” Safe, fast tensor serialization format.\n"
                    "β€’ tgi β€” High-throughput text generation server for LLMs.\n"
                    "β€’ setfit β€” Few-shot text classification via sentence embeddings.\n"
                    "β€’ audio-course β€” Speech and audio models, datasets, and tasks.\n"
                    "β€’ lerobot β€” Robotics datasets, policies, and learning workflows.\n"
                    "β€’ autotrain β€” No/low-code model training on Hugging Face.\n"
                    "β€’ tei β€” Optimized inference server for embedding workloads.\n"
                    "β€’ bitsandbytes β€” Quantization and memory-efficient optimizers.\n"
                    "β€’ cookbook β€” Practical, task-oriented recipes across the ecosystem.\n"
                    "β€’ sentence_transformers β€” Embedding models, training recipes, similarity/search workflows.\n"
                    "β€’ ml-games-course β€” Game-based ML and reinforcement learning experiments.\n"
                    "β€’ diffusion-course β€” Diffusion model theory and hands-on practice.\n"
                    "β€’ ml-for-3d-course β€” 3D representations, models, and learning techniques.\n"
                    "β€’ chat-ui β€” Reference chat interfaces for LLM deployment.\n"
                    "β€’ leaderboards β€” Evaluation leaderboards and submission mechanics.\n"
                    "β€’ lighteval β€” Lightweight, reproducible LLM evaluation framework.\n"
                    "β€’ argilla β€” Data annotation, feedback, and human-in-the-loop workflows.\n"
                    "β€’ distilabel β€” Synthetic data generation and distillation pipelines.\n"
                    "β€’ microsoft-azure β€” Azure deployment and integration guides.\n"
                    "β€’ kernels β€” Lightweight execution environments and notebook-style workflows.\n"
                    "β€’ google-cloud β€” GCP deployment and serving workflows.\n"
                ),
            },
        },
        "required": ["endpoint"],
    },
}

HF_DOCS_FETCH_TOOL_SPEC = {
    "name": "fetch_hf_docs",
    "description": (
        "Fetch the full content of a specific HF documentation page. "
        "Provide the full URL to the doc page (e.g., from explore_hf_docs results). "
        "Returns the complete markdown content of that page. "
        "Use explore_hf_docs first to discover available pages."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "url": {
                "type": "string",
                "description": (
                    "The full URL to the documentation page. "
                    "Example: 'https://huggingface.co/docs/trl/dpo_trainer' "
                    "The .md extension will be added automatically if not present."
                ),
            },
        },
        "required": ["url"],
    },
}


async def _get_api_search_tool_spec() -> dict[str, Any]:
    """
    Dynamically generate the OpenAPI tool spec with tag enum populated at runtime
    This must be called async to fetch the OpenAPI spec and extract tags
    """
    spec = await _fetch_openapi_spec()
    tags = _extract_all_tags(spec)

    return {
        "name": "search_hf_api_endpoints",
        "description": (
            "Search the HuggingFace OpenAPI specification by tag to find related API endpoints. "
            "Returns all endpoints with the specified tag including curl examples showing how to use them. "
            "Each result includes the endpoint path, summary, usage example with curl, and response information."
        ),
        "parameters": {
            "type": "object",
            "properties": {
                "tag": {
                    "type": "string",
                    "enum": tags,
                    "description": (
                        "The API tag to search for. Each tag groups related API endpoints. "
                    ),
                },
            },
            "required": ["tag"],
        },
    }