akseljoonas HF Staff commited on
Commit
d90bfa3
Β·
1 Parent(s): 551d5e0

pre simple impl

Browse files
Files changed (1) hide show
  1. agent/tools/docs_tools.py +76 -4
agent/tools/docs_tools.py CHANGED
@@ -26,6 +26,10 @@ _CACHE_LOCK = asyncio.Lock()
26
  DEFAULT_MAX_RESULTS = 20
27
  MAX_RESULTS_CAP = 50
28
 
 
 
 
 
29
  # High-level endpoints that bundle related documentation sections
30
  COMPOSITE_ENDPOINTS: dict[str, list[str]] = {
31
  "optimum": [
@@ -57,6 +61,51 @@ def _expand_endpoint(endpoint: str) -> list[str]:
57
  return COMPOSITE_ENDPOINTS.get(endpoint, [endpoint])
58
 
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
61
  """Fetch the HTML page for a given endpoint"""
62
  base_url = "https://huggingface.co/docs"
@@ -378,14 +427,37 @@ async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]
378
  if not endpoint:
379
  return "Error: No endpoint provided", False
380
 
381
- # Get HF token from environment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  hf_token = os.environ.get("HF_TOKEN")
383
 
384
  if not hf_token:
385
  return "Error: HF_TOKEN environment variable not set", False
386
 
387
- endpoint = endpoint.lstrip("/")
388
-
389
  try:
390
  try:
391
  max_results_int = int(max_results) if max_results is not None else None
@@ -822,7 +894,7 @@ EXPLORE_HF_DOCS_TOOL_SPEC = {
822
  "β€’ transformers β€” Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
823
  "β€’ diffusers β€” Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
824
  "β€’ datasets β€” Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
825
- "β€’ gradio β€” UI components and demos for interacting with ML models.\n"
826
  "β€’ trackio β€” Experiment tracking, metrics logging, and run comparison.\n"
827
  "β€’ smolagents β€” Lightweight agent abstractions and tool-using patterns.\n"
828
  "β€’ huggingface_hub β€” Python client for Hub operations (auth, upload/download, repo management).\n"
 
26
  DEFAULT_MAX_RESULTS = 20
27
  MAX_RESULTS_CAP = 50
28
 
29
+ # Gradio documentation endpoints (hosted separately from HF docs)
30
+ GRADIO_LLMS_TXT_URL = "https://gradio.app/llms.txt"
31
+ GRADIO_EMBEDDING_SEARCH_URL = "https://playground-worker.pages.dev/api/prompt"
32
+
33
  # High-level endpoints that bundle related documentation sections
34
  COMPOSITE_ENDPOINTS: dict[str, list[str]] = {
35
  "optimum": [
 
61
  return COMPOSITE_ENDPOINTS.get(endpoint, [endpoint])
62
 
63
 
64
+ # ---------------------------------------------------------------------------
65
+ # Gradio documentation helpers (uses gradio.app instead of HF docs)
66
+ # ---------------------------------------------------------------------------
67
+
68
+
69
+ async def _fetch_gradio_full_docs() -> str:
70
+ """Fetch Gradio's full documentation from llms.txt"""
71
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
72
+ response = await client.get(GRADIO_LLMS_TXT_URL)
73
+ response.raise_for_status()
74
+ return response.text
75
+
76
+
77
+ async def _search_gradio_docs(query: str) -> str:
78
+ """
79
+ Run embedding search on Gradio's documentation via their API.
80
+ Returns the most relevant content for the query.
81
+ """
82
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
83
+ response = await client.post(
84
+ GRADIO_EMBEDDING_SEARCH_URL,
85
+ headers={
86
+ "Content-Type": "application/json",
87
+ "Origin": "https://gradio-docs-mcp.up.railway.app",
88
+ },
89
+ json={
90
+ "prompt_to_embed": query,
91
+ "SYSTEM_PROMPT": "$INSERT_GUIDES_DOCS_DEMOS",
92
+ "FALLBACK_PROMPT": "No results found",
93
+ },
94
+ )
95
+ response.raise_for_status()
96
+ result = response.json()
97
+ return result.get("SYS_PROMPT", "No results found")
98
+
99
+
100
+ def _format_gradio_results(content: str, query: str | None = None) -> str:
101
+ """Format Gradio documentation results"""
102
+ header = "# Gradio Documentation\n\n"
103
+ if query:
104
+ header += f"Search query: '{query}'\n\n"
105
+ header += "Source: https://gradio.app/docs\n\n---\n\n"
106
+ return header + content
107
+
108
+
109
  async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
110
  """Fetch the HTML page for a given endpoint"""
111
  base_url = "https://huggingface.co/docs"
 
427
  if not endpoint:
428
  return "Error: No endpoint provided", False
429
 
430
+ endpoint = endpoint.lstrip("/")
431
+
432
+ # Special handling for Gradio docs (hosted at gradio.app, not HF docs)
433
+ if endpoint.lower() == "gradio":
434
+ try:
435
+ clean_query = (
436
+ query.strip() if isinstance(query, str) and query.strip() else None
437
+ )
438
+ if clean_query:
439
+ # Use embedding search for specific queries
440
+ content = await _search_gradio_docs(clean_query)
441
+ else:
442
+ # Fetch full docs when no query provided
443
+ content = await _fetch_gradio_full_docs()
444
+ return _format_gradio_results(content, query=clean_query), True
445
+ except httpx.HTTPStatusError as e:
446
+ return (
447
+ f"HTTP error fetching Gradio docs: {e.response.status_code}",
448
+ False,
449
+ )
450
+ except httpx.RequestError as e:
451
+ return f"Request error fetching Gradio docs: {str(e)}", False
452
+ except Exception as e:
453
+ return f"Error fetching Gradio docs: {str(e)}", False
454
+
455
+ # Standard HF docs flow for all other endpoints
456
  hf_token = os.environ.get("HF_TOKEN")
457
 
458
  if not hf_token:
459
  return "Error: HF_TOKEN environment variable not set", False
460
 
 
 
461
  try:
462
  try:
463
  max_results_int = int(max_results) if max_results is not None else None
 
894
  "β€’ transformers β€” Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
895
  "β€’ diffusers β€” Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
896
  "β€’ datasets β€” Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
897
+ "β€’ gradio β€” UI components and demos for ML models. Uses Gradio's native API: without query returns full docs (llms.txt), with query uses embedding search for precise results.\n"
898
  "β€’ trackio β€” Experiment tracking, metrics logging, and run comparison.\n"
899
  "β€’ smolagents β€” Lightweight agent abstractions and tool-using patterns.\n"
900
  "β€’ huggingface_hub β€” Python client for Hub operations (auth, upload/download, repo management).\n"