Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit Β·
914fa31
1
Parent(s): 9689843
pre simple impl
Browse files- agent/tools/docs_tools.py +76 -4
agent/tools/docs_tools.py
CHANGED
|
@@ -26,6 +26,10 @@ _CACHE_LOCK = asyncio.Lock()
|
|
| 26 |
DEFAULT_MAX_RESULTS = 20
|
| 27 |
MAX_RESULTS_CAP = 50
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# High-level endpoints that bundle related documentation sections
|
| 30 |
COMPOSITE_ENDPOINTS: dict[str, list[str]] = {
|
| 31 |
"optimum": [
|
|
@@ -57,6 +61,51 @@ def _expand_endpoint(endpoint: str) -> list[str]:
|
|
| 57 |
return COMPOSITE_ENDPOINTS.get(endpoint, [endpoint])
|
| 58 |
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
|
| 61 |
"""Fetch the HTML page for a given endpoint"""
|
| 62 |
base_url = "https://huggingface.co/docs"
|
|
@@ -378,14 +427,37 @@ async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]
|
|
| 378 |
if not endpoint:
|
| 379 |
return "Error: No endpoint provided", False
|
| 380 |
|
| 381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
hf_token = os.environ.get("HF_TOKEN")
|
| 383 |
|
| 384 |
if not hf_token:
|
| 385 |
return "Error: HF_TOKEN environment variable not set", False
|
| 386 |
|
| 387 |
-
endpoint = endpoint.lstrip("/")
|
| 388 |
-
|
| 389 |
try:
|
| 390 |
try:
|
| 391 |
max_results_int = int(max_results) if max_results is not None else None
|
|
@@ -822,7 +894,7 @@ EXPLORE_HF_DOCS_TOOL_SPEC = {
|
|
| 822 |
"β’ transformers β Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
|
| 823 |
"β’ diffusers β Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
|
| 824 |
"β’ datasets β Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
|
| 825 |
-
"β’ gradio β UI components and demos for
|
| 826 |
"β’ trackio β Experiment tracking, metrics logging, and run comparison.\n"
|
| 827 |
"β’ smolagents β Lightweight agent abstractions and tool-using patterns.\n"
|
| 828 |
"β’ huggingface_hub β Python client for Hub operations (auth, upload/download, repo management).\n"
|
|
|
|
| 26 |
DEFAULT_MAX_RESULTS = 20
|
| 27 |
MAX_RESULTS_CAP = 50
|
| 28 |
|
| 29 |
+
# Gradio documentation endpoints (hosted separately from HF docs)
|
| 30 |
+
GRADIO_LLMS_TXT_URL = "https://gradio.app/llms.txt"
|
| 31 |
+
GRADIO_EMBEDDING_SEARCH_URL = "https://playground-worker.pages.dev/api/prompt"
|
| 32 |
+
|
| 33 |
# High-level endpoints that bundle related documentation sections
|
| 34 |
COMPOSITE_ENDPOINTS: dict[str, list[str]] = {
|
| 35 |
"optimum": [
|
|
|
|
| 61 |
return COMPOSITE_ENDPOINTS.get(endpoint, [endpoint])
|
| 62 |
|
| 63 |
|
| 64 |
+
# ---------------------------------------------------------------------------
|
| 65 |
+
# Gradio documentation helpers (uses gradio.app instead of HF docs)
|
| 66 |
+
# ---------------------------------------------------------------------------
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def _fetch_gradio_full_docs() -> str:
|
| 70 |
+
"""Fetch Gradio's full documentation from llms.txt"""
|
| 71 |
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
| 72 |
+
response = await client.get(GRADIO_LLMS_TXT_URL)
|
| 73 |
+
response.raise_for_status()
|
| 74 |
+
return response.text
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
async def _search_gradio_docs(query: str) -> str:
|
| 78 |
+
"""
|
| 79 |
+
Run embedding search on Gradio's documentation via their API.
|
| 80 |
+
Returns the most relevant content for the query.
|
| 81 |
+
"""
|
| 82 |
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
| 83 |
+
response = await client.post(
|
| 84 |
+
GRADIO_EMBEDDING_SEARCH_URL,
|
| 85 |
+
headers={
|
| 86 |
+
"Content-Type": "application/json",
|
| 87 |
+
"Origin": "https://gradio-docs-mcp.up.railway.app",
|
| 88 |
+
},
|
| 89 |
+
json={
|
| 90 |
+
"prompt_to_embed": query,
|
| 91 |
+
"SYSTEM_PROMPT": "$INSERT_GUIDES_DOCS_DEMOS",
|
| 92 |
+
"FALLBACK_PROMPT": "No results found",
|
| 93 |
+
},
|
| 94 |
+
)
|
| 95 |
+
response.raise_for_status()
|
| 96 |
+
result = response.json()
|
| 97 |
+
return result.get("SYS_PROMPT", "No results found")
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _format_gradio_results(content: str, query: str | None = None) -> str:
|
| 101 |
+
"""Format Gradio documentation results"""
|
| 102 |
+
header = "# Gradio Documentation\n\n"
|
| 103 |
+
if query:
|
| 104 |
+
header += f"Search query: '{query}'\n\n"
|
| 105 |
+
header += "Source: https://gradio.app/docs\n\n---\n\n"
|
| 106 |
+
return header + content
|
| 107 |
+
|
| 108 |
+
|
| 109 |
async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
|
| 110 |
"""Fetch the HTML page for a given endpoint"""
|
| 111 |
base_url = "https://huggingface.co/docs"
|
|
|
|
| 427 |
if not endpoint:
|
| 428 |
return "Error: No endpoint provided", False
|
| 429 |
|
| 430 |
+
endpoint = endpoint.lstrip("/")
|
| 431 |
+
|
| 432 |
+
# Special handling for Gradio docs (hosted at gradio.app, not HF docs)
|
| 433 |
+
if endpoint.lower() == "gradio":
|
| 434 |
+
try:
|
| 435 |
+
clean_query = (
|
| 436 |
+
query.strip() if isinstance(query, str) and query.strip() else None
|
| 437 |
+
)
|
| 438 |
+
if clean_query:
|
| 439 |
+
# Use embedding search for specific queries
|
| 440 |
+
content = await _search_gradio_docs(clean_query)
|
| 441 |
+
else:
|
| 442 |
+
# Fetch full docs when no query provided
|
| 443 |
+
content = await _fetch_gradio_full_docs()
|
| 444 |
+
return _format_gradio_results(content, query=clean_query), True
|
| 445 |
+
except httpx.HTTPStatusError as e:
|
| 446 |
+
return (
|
| 447 |
+
f"HTTP error fetching Gradio docs: {e.response.status_code}",
|
| 448 |
+
False,
|
| 449 |
+
)
|
| 450 |
+
except httpx.RequestError as e:
|
| 451 |
+
return f"Request error fetching Gradio docs: {str(e)}", False
|
| 452 |
+
except Exception as e:
|
| 453 |
+
return f"Error fetching Gradio docs: {str(e)}", False
|
| 454 |
+
|
| 455 |
+
# Standard HF docs flow for all other endpoints
|
| 456 |
hf_token = os.environ.get("HF_TOKEN")
|
| 457 |
|
| 458 |
if not hf_token:
|
| 459 |
return "Error: HF_TOKEN environment variable not set", False
|
| 460 |
|
|
|
|
|
|
|
| 461 |
try:
|
| 462 |
try:
|
| 463 |
max_results_int = int(max_results) if max_results is not None else None
|
|
|
|
| 894 |
"β’ transformers β Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
|
| 895 |
"β’ diffusers β Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
|
| 896 |
"β’ datasets β Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
|
| 897 |
+
"β’ gradio β UI components and demos for ML models. Uses Gradio's native API: without query returns full docs (llms.txt), with query uses embedding search for precise results.\n"
|
| 898 |
"β’ trackio β Experiment tracking, metrics logging, and run comparison.\n"
|
| 899 |
"β’ smolagents β Lightweight agent abstractions and tool-using patterns.\n"
|
| 900 |
"β’ huggingface_hub β Python client for Hub operations (auth, upload/download, repo management).\n"
|