Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Merge pull request #5 from huggingface/explore-tool
Browse files- agent/config.py +0 -2
- agent/config_claude_mcp.json +0 -11
- agent/config_mcp_example copy.json +0 -21
- agent/context_manager/manager.py +10 -3
- agent/core/session.py +2 -2
- agent/core/tools.py +13 -4
- agent/main.py +1 -1
- agent/prompts/search_docs_system_prompt.yaml +38 -0
- agent/tools/__init__.py +9 -1
- agent/tools/_search_agent_tools.py +747 -0
- agent/tools/search_docs_tool.py +237 -0
- configs/_subagent_config_search_agent.json +12 -0
- agent/config_mcp_example.json → configs/main_agent_config.json +0 -2
- pyproject.toml +1 -0
- run_search_agent.py +142 -0
- uv.lock +204 -0
agent/config.py
CHANGED
|
@@ -8,7 +8,6 @@ from fastmcp.mcp_config import (
|
|
| 8 |
RemoteMCPServer,
|
| 9 |
StdioMCPServer,
|
| 10 |
)
|
| 11 |
-
from litellm import Tool
|
| 12 |
from pydantic import BaseModel
|
| 13 |
|
| 14 |
# These two are the canonical server config types for MCP servers.
|
|
@@ -19,7 +18,6 @@ class Config(BaseModel):
|
|
| 19 |
"""Configuration manager"""
|
| 20 |
|
| 21 |
model_name: str
|
| 22 |
-
tools: list[Tool] = []
|
| 23 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 24 |
|
| 25 |
|
|
|
|
| 8 |
RemoteMCPServer,
|
| 9 |
StdioMCPServer,
|
| 10 |
)
|
|
|
|
| 11 |
from pydantic import BaseModel
|
| 12 |
|
| 13 |
# These two are the canonical server config types for MCP servers.
|
|
|
|
| 18 |
"""Configuration manager"""
|
| 19 |
|
| 20 |
model_name: str
|
|
|
|
| 21 |
mcpServers: dict[str, MCPServerConfig] = {}
|
| 22 |
|
| 23 |
|
agent/config_claude_mcp.json
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"mcpServers": {
|
| 3 |
-
"huggingface": {
|
| 4 |
-
"type": "http",
|
| 5 |
-
"url": "https://huggingface.co/mcp",
|
| 6 |
-
"headers": {
|
| 7 |
-
"Authorization": "Bearer ${HF_TOKEN}"
|
| 8 |
-
}
|
| 9 |
-
}
|
| 10 |
-
}
|
| 11 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agent/config_mcp_example copy.json
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model_name": "anthropic/claude-sonnet-4-5-20250929",
|
| 3 |
-
"tools": [],
|
| 4 |
-
"system_prompt_path": "",
|
| 5 |
-
"mcpServers": {
|
| 6 |
-
"hf-mcp-server": {
|
| 7 |
-
"transport": "http",
|
| 8 |
-
"url": "https://huggingface.co/mcp?login",
|
| 9 |
-
"headers": {
|
| 10 |
-
"Authorization": "Bearer ${HF_TOKEN}"
|
| 11 |
-
}
|
| 12 |
-
},
|
| 13 |
-
"playwright": {
|
| 14 |
-
"transport": "stdio",
|
| 15 |
-
"command": "npx",
|
| 16 |
-
"args": [
|
| 17 |
-
"@playwright/mcp@latest"
|
| 18 |
-
]
|
| 19 |
-
}
|
| 20 |
-
}
|
| 21 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agent/context_manager/manager.py
CHANGED
|
@@ -19,17 +19,24 @@ class ContextManager:
|
|
| 19 |
compact_size: float = 0.1,
|
| 20 |
untouched_messages: int = 5,
|
| 21 |
tool_specs: list[dict[str, Any]] | None = None,
|
|
|
|
| 22 |
):
|
| 23 |
-
self.system_prompt = self._load_system_prompt(
|
|
|
|
|
|
|
| 24 |
self.max_context = max_context
|
| 25 |
self.compact_size = int(max_context * compact_size)
|
| 26 |
self.context_length = len(self.system_prompt) // 4
|
| 27 |
self.untouched_messages = untouched_messages
|
| 28 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 29 |
|
| 30 |
-
def _load_system_prompt(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
"""Load and render the system prompt from YAML file with Jinja2"""
|
| 32 |
-
prompt_file = Path(__file__).parent.parent / "prompts" / "
|
| 33 |
|
| 34 |
with open(prompt_file, "r") as f:
|
| 35 |
prompt_data = yaml.safe_load(f)
|
|
|
|
| 19 |
compact_size: float = 0.1,
|
| 20 |
untouched_messages: int = 5,
|
| 21 |
tool_specs: list[dict[str, Any]] | None = None,
|
| 22 |
+
prompt_file_suffix: str = "system_prompt.yaml",
|
| 23 |
):
|
| 24 |
+
self.system_prompt = self._load_system_prompt(
|
| 25 |
+
tool_specs or [], prompt_file_suffix="system_prompt.yaml"
|
| 26 |
+
)
|
| 27 |
self.max_context = max_context
|
| 28 |
self.compact_size = int(max_context * compact_size)
|
| 29 |
self.context_length = len(self.system_prompt) // 4
|
| 30 |
self.untouched_messages = untouched_messages
|
| 31 |
self.items: list[Message] = [Message(role="system", content=self.system_prompt)]
|
| 32 |
|
| 33 |
+
def _load_system_prompt(
|
| 34 |
+
self,
|
| 35 |
+
tool_specs: list[dict[str, Any]],
|
| 36 |
+
prompt_file_suffix: str = "system_prompt.yaml",
|
| 37 |
+
):
|
| 38 |
"""Load and render the system prompt from YAML file with Jinja2"""
|
| 39 |
+
prompt_file = Path(__file__).parent.parent / "prompts" / f"{prompt_file_suffix}"
|
| 40 |
|
| 41 |
with open(prompt_file, "r") as f:
|
| 42 |
prompt_data = yaml.safe_load(f)
|
agent/core/session.py
CHANGED
|
@@ -36,10 +36,11 @@ class Session:
|
|
| 36 |
event_queue: asyncio.Queue,
|
| 37 |
config: Config | None = None,
|
| 38 |
tool_router=None,
|
|
|
|
| 39 |
):
|
| 40 |
self.tool_router = tool_router
|
| 41 |
tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
|
| 42 |
-
self.context_manager = ContextManager(
|
| 43 |
max_context=get_max_tokens(config.model_name),
|
| 44 |
compact_size=0.1,
|
| 45 |
untouched_messages=5,
|
|
@@ -49,7 +50,6 @@ class Session:
|
|
| 49 |
self.session_id = str(uuid.uuid4())
|
| 50 |
self.config = config or Config(
|
| 51 |
model_name="anthropic/claude-sonnet-4-5-20250929",
|
| 52 |
-
tools=[],
|
| 53 |
)
|
| 54 |
self.is_running = True
|
| 55 |
self.current_task: asyncio.Task | None = None
|
|
|
|
| 36 |
event_queue: asyncio.Queue,
|
| 37 |
config: Config | None = None,
|
| 38 |
tool_router=None,
|
| 39 |
+
context_manager: ContextManager | None = None,
|
| 40 |
):
|
| 41 |
self.tool_router = tool_router
|
| 42 |
tool_specs = tool_router.get_tool_specs_for_llm() if tool_router else []
|
| 43 |
+
self.context_manager = context_manager or ContextManager(
|
| 44 |
max_context=get_max_tokens(config.model_name),
|
| 45 |
compact_size=0.1,
|
| 46 |
untouched_messages=5,
|
|
|
|
| 50 |
self.session_id = str(uuid.uuid4())
|
| 51 |
self.config = config or Config(
|
| 52 |
model_name="anthropic/claude-sonnet-4-5-20250929",
|
|
|
|
| 53 |
)
|
| 54 |
self.is_running = True
|
| 55 |
self.current_task: asyncio.Task | None = None
|
agent/core/tools.py
CHANGED
|
@@ -14,6 +14,7 @@ from mcp.types import EmbeddedResource, ImageContent, TextContent
|
|
| 14 |
|
| 15 |
from agent.config import MCPServerConfig
|
| 16 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
|
|
|
|
| 17 |
from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
|
| 18 |
|
| 19 |
# Suppress aiohttp deprecation warning
|
|
@@ -21,7 +22,7 @@ warnings.filterwarnings(
|
|
| 21 |
"ignore", category=DeprecationWarning, module="aiohttp.connector"
|
| 22 |
)
|
| 23 |
|
| 24 |
-
NOT_ALLOWED_TOOL_NAMES = ["hf_jobs"]
|
| 25 |
|
| 26 |
|
| 27 |
def convert_mcp_content_to_string(content: list) -> str:
|
|
@@ -187,7 +188,9 @@ class ToolRouter:
|
|
| 187 |
|
| 188 |
def create_builtin_tools() -> list[ToolSpec]:
|
| 189 |
"""Create built-in tool specifications"""
|
| 190 |
-
print(
|
|
|
|
|
|
|
| 191 |
return [
|
| 192 |
ToolSpec(
|
| 193 |
name=HF_JOBS_TOOL_SPEC["name"],
|
|
@@ -196,9 +199,15 @@ def create_builtin_tools() -> list[ToolSpec]:
|
|
| 196 |
handler=hf_jobs_handler,
|
| 197 |
),
|
| 198 |
ToolSpec(
|
| 199 |
-
name=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
description=PLAN_TOOL_SPEC["description"],
|
| 201 |
parameters=PLAN_TOOL_SPEC["parameters"],
|
| 202 |
handler=plan_tool_handler,
|
| 203 |
-
)
|
| 204 |
]
|
|
|
|
| 14 |
|
| 15 |
from agent.config import MCPServerConfig
|
| 16 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, hf_jobs_handler
|
| 17 |
+
from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
|
| 18 |
from agent.tools.plan_tool import PLAN_TOOL_SPEC, plan_tool_handler
|
| 19 |
|
| 20 |
# Suppress aiohttp deprecation warning
|
|
|
|
| 22 |
"ignore", category=DeprecationWarning, module="aiohttp.connector"
|
| 23 |
)
|
| 24 |
|
| 25 |
+
NOT_ALLOWED_TOOL_NAMES = ["hf_jobs", "hf_doc_search", "hf_doc_fetch"]
|
| 26 |
|
| 27 |
|
| 28 |
def convert_mcp_content_to_string(content: list) -> str:
|
|
|
|
| 188 |
|
| 189 |
def create_builtin_tools() -> list[ToolSpec]:
|
| 190 |
"""Create built-in tool specifications"""
|
| 191 |
+
print(
|
| 192 |
+
f"Creating built-in tools: {HF_JOBS_TOOL_SPEC['name']}, {SEARCH_DOCS_TOOL_SPEC['name']}, {PLAN_TOOL_SPEC['name']}"
|
| 193 |
+
)
|
| 194 |
return [
|
| 195 |
ToolSpec(
|
| 196 |
name=HF_JOBS_TOOL_SPEC["name"],
|
|
|
|
| 199 |
handler=hf_jobs_handler,
|
| 200 |
),
|
| 201 |
ToolSpec(
|
| 202 |
+
name=SEARCH_DOCS_TOOL_SPEC["name"],
|
| 203 |
+
description=SEARCH_DOCS_TOOL_SPEC["description"],
|
| 204 |
+
parameters=SEARCH_DOCS_TOOL_SPEC["parameters"],
|
| 205 |
+
handler=search_docs_handler,
|
| 206 |
+
),
|
| 207 |
+
ToolSpec(
|
| 208 |
+
ame=PLAN_TOOL_SPEC["name"],
|
| 209 |
description=PLAN_TOOL_SPEC["description"],
|
| 210 |
parameters=PLAN_TOOL_SPEC["parameters"],
|
| 211 |
handler=plan_tool_handler,
|
| 212 |
+
)
|
| 213 |
]
|
agent/main.py
CHANGED
|
@@ -222,7 +222,7 @@ async def main():
|
|
| 222 |
ready_event = asyncio.Event()
|
| 223 |
|
| 224 |
# Start agent loop in background
|
| 225 |
-
config_path = Path(__file__).parent / "
|
| 226 |
config = load_config(config_path)
|
| 227 |
|
| 228 |
# Create tool router
|
|
|
|
| 222 |
ready_event = asyncio.Event()
|
| 223 |
|
| 224 |
# Start agent loop in background
|
| 225 |
+
config_path = Path(__file__).parent.parent / "configs" / "main_agent_config.json"
|
| 226 |
config = load_config(config_path)
|
| 227 |
|
| 228 |
# Create tool router
|
agent/prompts/search_docs_system_prompt.yaml
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
search_docs_system_prompt: |
|
| 2 |
+
You are a specialized documentation search agent. Your task is to comprehensively search and synthesize information from Hugging Face documentation.
|
| 3 |
+
|
| 4 |
+
# Search Strategy
|
| 5 |
+
|
| 6 |
+
You must search thoroughly before synthesizing results. Follow this approach:
|
| 7 |
+
|
| 8 |
+
1. **Query Analysis**: Identify the core concepts and intent of the query
|
| 9 |
+
2. **Initial Search**: Start with a broad search capturing the main topic
|
| 10 |
+
3. **Iterative Refinement**: Run multiple searches to go deeper into topics. You will see parsed HTML pages, also look into links on the html pages for best information - first-pass results often miss key details
|
| 11 |
+
4. **You must get to the end truth**: You must get to the bottom of the truth for this search query. You CAN NOT say that somebody should look up documentation. You must look it up yourself and give the best answer you can.
|
| 12 |
+
|
| 13 |
+
## Query Formulation Best Practices
|
| 14 |
+
|
| 15 |
+
- Add relevant synonyms and related technical terms
|
| 16 |
+
- Remove filler words, focus on searchable concepts
|
| 17 |
+
- Break complex questions into focused sub-queries
|
| 18 |
+
- Include domain-specific terminology when applicable
|
| 19 |
+
- Try both specific terms and general related terms
|
| 20 |
+
|
| 21 |
+
# Response Guidelines
|
| 22 |
+
|
| 23 |
+
After gathering results, synthesize them following these principles:
|
| 24 |
+
|
| 25 |
+
1. **Analyze Relevance**: Evaluate which results directly answer the query
|
| 26 |
+
2. **Synthesize**: Combine information from multiple sources when applicable
|
| 27 |
+
3. **Prioritize**: Present information in order of relevance
|
| 28 |
+
4. **Cite Sources**: Reference which documents you're drawing from especially include relevant code samples and links to the code samples.
|
| 29 |
+
5. **Acknowledge Gaps**: If documents don't fully answer the query, explicitly state this
|
| 30 |
+
6. **Handle Conflicts**: If sources contradict, note this and explain your reasoning
|
| 31 |
+
7. **Be Concise**: Provide a clear, direct answer without unnecessary elaboration
|
| 32 |
+
|
| 33 |
+
# Constraints
|
| 34 |
+
|
| 35 |
+
- Only provide information found in the documentation
|
| 36 |
+
- Do not make assumptions beyond what the sources state
|
| 37 |
+
- If information is not found, say so clearly rather than guessing
|
| 38 |
+
- Focus on answering the query directly
|
agent/tools/__init__.py
CHANGED
|
@@ -3,6 +3,14 @@ Hugging Face tools for the agent
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
|
|
|
|
| 6 |
from agent.tools.types import ToolResult
|
| 7 |
|
| 8 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
from agent.tools.jobs_tool import HF_JOBS_TOOL_SPEC, HfJobsTool, hf_jobs_handler
|
| 6 |
+
from agent.tools.search_docs_tool import SEARCH_DOCS_TOOL_SPEC, search_docs_handler
|
| 7 |
from agent.tools.types import ToolResult
|
| 8 |
|
| 9 |
+
__all__ = [
|
| 10 |
+
"ToolResult",
|
| 11 |
+
"HF_JOBS_TOOL_SPEC",
|
| 12 |
+
"hf_jobs_handler",
|
| 13 |
+
"HfJobsTool",
|
| 14 |
+
"SEARCH_DOCS_TOOL_SPEC",
|
| 15 |
+
"search_docs_handler",
|
| 16 |
+
]
|
agent/tools/_search_agent_tools.py
ADDED
|
@@ -0,0 +1,747 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Tools available to the search sub-agent
|
| 3 |
+
These tools are used by the search sub-agent spawned by search_docs_tool
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
import os
|
| 8 |
+
import time
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
import httpx
|
| 12 |
+
from bs4 import BeautifulSoup
|
| 13 |
+
|
| 14 |
+
# Cache for OpenAPI spec to avoid repeated fetches
|
| 15 |
+
_openapi_spec_cache: dict[str, Any] | None = None
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
async def _fetch_html_page(hf_token: str, endpoint: str) -> str:
|
| 19 |
+
"""Fetch the HTML page for a given endpoint"""
|
| 20 |
+
base_url = "https://huggingface.co/docs"
|
| 21 |
+
url = f"{base_url}/{endpoint}"
|
| 22 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
| 23 |
+
|
| 24 |
+
fetch_start = time.perf_counter()
|
| 25 |
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
| 26 |
+
response = await client.get(url, headers=headers)
|
| 27 |
+
response.raise_for_status()
|
| 28 |
+
|
| 29 |
+
fetch_time = time.perf_counter() - fetch_start
|
| 30 |
+
print(f"[DEBUG] _fetch_html_page: Fetched in {fetch_time:.2f}s")
|
| 31 |
+
|
| 32 |
+
return response.text
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _parse_sidebar_navigation(html_content: str) -> list[dict[str, str]]:
|
| 36 |
+
"""Parse the sidebar navigation and extract all links"""
|
| 37 |
+
parse_start = time.perf_counter()
|
| 38 |
+
|
| 39 |
+
soup = BeautifulSoup(html_content, "html.parser")
|
| 40 |
+
sidebar = soup.find("nav", class_=lambda x: x and "flex-auto" in x)
|
| 41 |
+
|
| 42 |
+
if not sidebar:
|
| 43 |
+
raise ValueError("Could not find navigation sidebar")
|
| 44 |
+
|
| 45 |
+
links = sidebar.find_all("a", href=True)
|
| 46 |
+
nav_data = []
|
| 47 |
+
|
| 48 |
+
for link in links:
|
| 49 |
+
title = link.get_text(strip=True)
|
| 50 |
+
href = link["href"]
|
| 51 |
+
|
| 52 |
+
# Make URL absolute
|
| 53 |
+
page_url = f"https://huggingface.co{href}" if href.startswith("/") else href
|
| 54 |
+
nav_data.append({"title": title, "url": page_url})
|
| 55 |
+
|
| 56 |
+
parse_time = time.perf_counter() - parse_start
|
| 57 |
+
print(
|
| 58 |
+
f"[DEBUG] _parse_sidebar_navigation: Parsed in {parse_time:.2f}s, found {len(nav_data)} links"
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
return nav_data
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
async def _fetch_single_glimpse(
|
| 65 |
+
client: httpx.AsyncClient, hf_token: str, item: dict[str, str]
|
| 66 |
+
) -> dict[str, str]:
|
| 67 |
+
"""Fetch a glimpse (first 300 chars) for a single page"""
|
| 68 |
+
md_url = f"{item['url']}.md"
|
| 69 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
response = await client.get(md_url, headers=headers)
|
| 73 |
+
response.raise_for_status()
|
| 74 |
+
|
| 75 |
+
content = response.text
|
| 76 |
+
glimpse = content[:300].strip()
|
| 77 |
+
if len(content) > 300:
|
| 78 |
+
glimpse += "..."
|
| 79 |
+
|
| 80 |
+
return {
|
| 81 |
+
"title": item["title"],
|
| 82 |
+
"url": item["url"],
|
| 83 |
+
"md_url": md_url,
|
| 84 |
+
"glimpse": glimpse,
|
| 85 |
+
}
|
| 86 |
+
except Exception as e:
|
| 87 |
+
return {
|
| 88 |
+
"title": item["title"],
|
| 89 |
+
"url": item["url"],
|
| 90 |
+
"md_url": md_url,
|
| 91 |
+
"glimpse": f"[Could not fetch glimpse: {str(e)[:50]}]",
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
async def _fetch_all_glimpses(
|
| 96 |
+
hf_token: str, nav_data: list[dict[str, str]]
|
| 97 |
+
) -> list[dict[str, str]]:
|
| 98 |
+
"""Fetch glimpses for all pages in parallel"""
|
| 99 |
+
glimpse_start = time.perf_counter()
|
| 100 |
+
|
| 101 |
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
| 102 |
+
result_items = await asyncio.gather(
|
| 103 |
+
*[_fetch_single_glimpse(client, hf_token, item) for item in nav_data]
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
glimpse_time = time.perf_counter() - glimpse_start
|
| 107 |
+
print(
|
| 108 |
+
f"[DEBUG] _fetch_all_glimpses: Fetched {len(result_items)} glimpses in {glimpse_time:.2f}s"
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
return list(result_items)
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _format_exploration_results(
|
| 115 |
+
endpoint: str, result_items: list[dict[str, str]]
|
| 116 |
+
) -> str:
|
| 117 |
+
"""Format the exploration results as a readable string"""
|
| 118 |
+
base_url = "https://huggingface.co/docs"
|
| 119 |
+
url = f"{base_url}/{endpoint}"
|
| 120 |
+
result = f"Documentation structure for: {url}\n\n"
|
| 121 |
+
result += f"Found {len(result_items)} pages:\n\n"
|
| 122 |
+
|
| 123 |
+
for i, item in enumerate(result_items, 1):
|
| 124 |
+
result += f"{i}. **{item['title']}**\n"
|
| 125 |
+
result += f" URL: {item['url']}\n"
|
| 126 |
+
result += f" Glimpse: {item['glimpse']}\n\n"
|
| 127 |
+
|
| 128 |
+
return result
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
async def explore_hf_docs(hf_token: str, endpoint: str) -> str:
|
| 132 |
+
"""Main function to explore documentation structure"""
|
| 133 |
+
start_time = time.perf_counter()
|
| 134 |
+
print(f"[DEBUG] explore_hf_docs: Starting for endpoint '{endpoint}'")
|
| 135 |
+
|
| 136 |
+
# Fetch HTML page
|
| 137 |
+
html_content = await _fetch_html_page(hf_token, endpoint)
|
| 138 |
+
|
| 139 |
+
# Parse navigation
|
| 140 |
+
nav_data = _parse_sidebar_navigation(html_content)
|
| 141 |
+
|
| 142 |
+
if not nav_data:
|
| 143 |
+
raise ValueError(f"No navigation links found for endpoint '{endpoint}'")
|
| 144 |
+
|
| 145 |
+
# Fetch all glimpses in parallel
|
| 146 |
+
result_items = await _fetch_all_glimpses(hf_token, nav_data)
|
| 147 |
+
|
| 148 |
+
# Format results
|
| 149 |
+
result = _format_exploration_results(endpoint, result_items)
|
| 150 |
+
|
| 151 |
+
total_time = time.perf_counter() - start_time
|
| 152 |
+
print(f"[DEBUG] explore_hf_docs: Total time {total_time:.2f}s")
|
| 153 |
+
|
| 154 |
+
return result
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
async def explore_hf_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
|
| 158 |
+
"""
|
| 159 |
+
Explore the documentation structure for a given endpoint by parsing the sidebar navigation
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
arguments: Dictionary with 'endpoint' parameter (e.g., 'trl', 'transformers', etc.)
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
Tuple of (structured_navigation_with_glimpses, success)
|
| 166 |
+
"""
|
| 167 |
+
endpoint = arguments.get("endpoint", "")
|
| 168 |
+
|
| 169 |
+
if not endpoint:
|
| 170 |
+
return "Error: No endpoint provided", False
|
| 171 |
+
|
| 172 |
+
# Get HF token from environment
|
| 173 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 174 |
+
|
| 175 |
+
if not hf_token:
|
| 176 |
+
return "Error: HF_TOKEN environment variable not set", False
|
| 177 |
+
|
| 178 |
+
endpoint = endpoint.lstrip("/")
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
result = await explore_hf_docs(hf_token, endpoint)
|
| 182 |
+
return result, True
|
| 183 |
+
|
| 184 |
+
except httpx.HTTPStatusError as e:
|
| 185 |
+
return (
|
| 186 |
+
f"HTTP error: {e.response.status_code} - {e.response.text[:200]}",
|
| 187 |
+
False,
|
| 188 |
+
)
|
| 189 |
+
except httpx.RequestError as e:
|
| 190 |
+
return f"Request error: {str(e)}", False
|
| 191 |
+
except ValueError as e:
|
| 192 |
+
return f"Error: {str(e)}", False
|
| 193 |
+
except Exception as e:
|
| 194 |
+
return f"Unexpected error: {str(e)}", False
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
async def _fetch_openapi_spec() -> dict[str, Any]:
|
| 198 |
+
"""Fetch and cache the HuggingFace OpenAPI specification"""
|
| 199 |
+
global _openapi_spec_cache
|
| 200 |
+
|
| 201 |
+
if _openapi_spec_cache is not None:
|
| 202 |
+
print("[DEBUG] _fetch_openapi_spec: Using cached spec")
|
| 203 |
+
return _openapi_spec_cache
|
| 204 |
+
|
| 205 |
+
start_time = time.perf_counter()
|
| 206 |
+
print("[DEBUG] _fetch_openapi_spec: Fetching from API")
|
| 207 |
+
|
| 208 |
+
url = "https://huggingface.co/.well-known/openapi.json"
|
| 209 |
+
|
| 210 |
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
| 211 |
+
response = await client.get(url)
|
| 212 |
+
response.raise_for_status()
|
| 213 |
+
|
| 214 |
+
spec = response.json()
|
| 215 |
+
_openapi_spec_cache = spec
|
| 216 |
+
|
| 217 |
+
fetch_time = time.perf_counter() - start_time
|
| 218 |
+
print(f"[DEBUG] _fetch_openapi_spec: Fetched and cached in {fetch_time:.2f}s")
|
| 219 |
+
|
| 220 |
+
return spec
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def _extract_all_tags(spec: dict[str, Any]) -> list[str]:
|
| 224 |
+
"""Extract all unique tags from the OpenAPI spec"""
|
| 225 |
+
tags = set()
|
| 226 |
+
|
| 227 |
+
# Get tags from the tags section
|
| 228 |
+
for tag_obj in spec.get("tags", []):
|
| 229 |
+
if "name" in tag_obj:
|
| 230 |
+
tags.add(tag_obj["name"])
|
| 231 |
+
|
| 232 |
+
# Also get tags from paths (in case some aren't in the tags section)
|
| 233 |
+
for path, path_item in spec.get("paths", {}).items():
|
| 234 |
+
for method, operation in path_item.items():
|
| 235 |
+
if method in ["get", "post", "put", "delete", "patch", "head", "options"]:
|
| 236 |
+
for tag in operation.get("tags", []):
|
| 237 |
+
tags.add(tag)
|
| 238 |
+
|
| 239 |
+
return sorted(list(tags))
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def _search_openapi_by_tag(spec: dict[str, Any], tag: str) -> list[dict[str, Any]]:
|
| 243 |
+
"""Search for API endpoints with a specific tag"""
|
| 244 |
+
results = []
|
| 245 |
+
paths = spec.get("paths", {})
|
| 246 |
+
servers = spec.get("servers", [])
|
| 247 |
+
base_url = (
|
| 248 |
+
servers[0].get("url", "https://huggingface.co")
|
| 249 |
+
if servers
|
| 250 |
+
else "https://huggingface.co"
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
for path, path_item in paths.items():
|
| 254 |
+
for method, operation in path_item.items():
|
| 255 |
+
if method not in [
|
| 256 |
+
"get",
|
| 257 |
+
"post",
|
| 258 |
+
"put",
|
| 259 |
+
"delete",
|
| 260 |
+
"patch",
|
| 261 |
+
"head",
|
| 262 |
+
"options",
|
| 263 |
+
]:
|
| 264 |
+
continue
|
| 265 |
+
|
| 266 |
+
operation_tags = operation.get("tags", [])
|
| 267 |
+
if tag in operation_tags:
|
| 268 |
+
# Extract parameters
|
| 269 |
+
parameters = operation.get("parameters", [])
|
| 270 |
+
request_body = operation.get("requestBody", {})
|
| 271 |
+
responses = operation.get("responses", {})
|
| 272 |
+
|
| 273 |
+
results.append(
|
| 274 |
+
{
|
| 275 |
+
"path": path,
|
| 276 |
+
"method": method.upper(),
|
| 277 |
+
"operationId": operation.get("operationId", ""),
|
| 278 |
+
"summary": operation.get("summary", ""),
|
| 279 |
+
"description": operation.get("description", ""),
|
| 280 |
+
"parameters": parameters,
|
| 281 |
+
"request_body": request_body,
|
| 282 |
+
"responses": responses,
|
| 283 |
+
"base_url": base_url,
|
| 284 |
+
}
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
return results
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _generate_curl_example(endpoint: dict[str, Any]) -> str:
|
| 291 |
+
"""Generate a curl command example for an endpoint"""
|
| 292 |
+
method = endpoint["method"]
|
| 293 |
+
path = endpoint["path"]
|
| 294 |
+
base_url = endpoint["base_url"]
|
| 295 |
+
|
| 296 |
+
# Build the full URL with example path parameters
|
| 297 |
+
full_path = path
|
| 298 |
+
for param in endpoint.get("parameters", []):
|
| 299 |
+
if param.get("in") == "path" and param.get("required"):
|
| 300 |
+
param_name = param["name"]
|
| 301 |
+
example = param.get(
|
| 302 |
+
"example", param.get("schema", {}).get("example", f"<{param_name}>")
|
| 303 |
+
)
|
| 304 |
+
full_path = full_path.replace(f"{{{param_name}}}", str(example))
|
| 305 |
+
|
| 306 |
+
curl = f"curl -X {method} \\\n '{base_url}{full_path}'"
|
| 307 |
+
|
| 308 |
+
# Add query parameters if any
|
| 309 |
+
query_params = [p for p in endpoint.get("parameters", []) if p.get("in") == "query"]
|
| 310 |
+
if query_params and query_params[0].get("required"):
|
| 311 |
+
param = query_params[0]
|
| 312 |
+
example = param.get("example", param.get("schema", {}).get("example", "value"))
|
| 313 |
+
curl += f"?{param['name']}={example}"
|
| 314 |
+
|
| 315 |
+
# Add headers
|
| 316 |
+
curl += " \\\n -H 'Authorization: Bearer $HF_TOKEN'"
|
| 317 |
+
|
| 318 |
+
# Add request body if applicable
|
| 319 |
+
if method in ["POST", "PUT", "PATCH"] and endpoint.get("request_body"):
|
| 320 |
+
content = endpoint["request_body"].get("content", {})
|
| 321 |
+
if "application/json" in content:
|
| 322 |
+
curl += " \\\n -H 'Content-Type: application/json'"
|
| 323 |
+
schema = content["application/json"].get("schema", {})
|
| 324 |
+
example = schema.get("example", "{}")
|
| 325 |
+
if isinstance(example, dict):
|
| 326 |
+
import json
|
| 327 |
+
|
| 328 |
+
example = json.dumps(example, indent=2)
|
| 329 |
+
curl += f" \\\n -d '{example}'"
|
| 330 |
+
|
| 331 |
+
return curl
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def _format_parameters(parameters: list[dict[str, Any]]) -> str:
|
| 335 |
+
"""Format parameter information from OpenAPI spec"""
|
| 336 |
+
if not parameters:
|
| 337 |
+
return ""
|
| 338 |
+
|
| 339 |
+
# Group parameters by type
|
| 340 |
+
path_params = [p for p in parameters if p.get("in") == "path"]
|
| 341 |
+
query_params = [p for p in parameters if p.get("in") == "query"]
|
| 342 |
+
header_params = [p for p in parameters if p.get("in") == "header"]
|
| 343 |
+
|
| 344 |
+
output = []
|
| 345 |
+
|
| 346 |
+
if path_params:
|
| 347 |
+
output.append("**Path Parameters:**")
|
| 348 |
+
for param in path_params:
|
| 349 |
+
name = param.get("name", "")
|
| 350 |
+
required = " (required)" if param.get("required") else " (optional)"
|
| 351 |
+
description = param.get("description", "")
|
| 352 |
+
param_type = param.get("schema", {}).get("type", "string")
|
| 353 |
+
example = param.get("example") or param.get("schema", {}).get("example", "")
|
| 354 |
+
|
| 355 |
+
output.append(f"- `{name}` ({param_type}){required}: {description}")
|
| 356 |
+
if example:
|
| 357 |
+
output.append(f" Example: `{example}`")
|
| 358 |
+
|
| 359 |
+
if query_params:
|
| 360 |
+
if output:
|
| 361 |
+
output.append("")
|
| 362 |
+
output.append("**Query Parameters:**")
|
| 363 |
+
for param in query_params:
|
| 364 |
+
name = param.get("name", "")
|
| 365 |
+
required = " (required)" if param.get("required") else " (optional)"
|
| 366 |
+
description = param.get("description", "")
|
| 367 |
+
param_type = param.get("schema", {}).get("type", "string")
|
| 368 |
+
example = param.get("example") or param.get("schema", {}).get("example", "")
|
| 369 |
+
|
| 370 |
+
output.append(f"- `{name}` ({param_type}){required}: {description}")
|
| 371 |
+
if example:
|
| 372 |
+
output.append(f" Example: `{example}`")
|
| 373 |
+
|
| 374 |
+
if header_params:
|
| 375 |
+
if output:
|
| 376 |
+
output.append("")
|
| 377 |
+
output.append("**Header Parameters:**")
|
| 378 |
+
for param in header_params:
|
| 379 |
+
name = param.get("name", "")
|
| 380 |
+
required = " (required)" if param.get("required") else " (optional)"
|
| 381 |
+
description = param.get("description", "")
|
| 382 |
+
|
| 383 |
+
output.append(f"- `{name}`{required}: {description}")
|
| 384 |
+
|
| 385 |
+
return "\n".join(output)
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def _format_response_info(responses: dict[str, Any]) -> str:
|
| 389 |
+
"""Format response information from OpenAPI spec"""
|
| 390 |
+
if not responses:
|
| 391 |
+
return "No response information available"
|
| 392 |
+
|
| 393 |
+
output = []
|
| 394 |
+
for status_code, response_obj in list(responses.items())[
|
| 395 |
+
:3
|
| 396 |
+
]: # Show first 3 status codes
|
| 397 |
+
desc = response_obj.get("description", "")
|
| 398 |
+
output.append(f"- **{status_code}**: {desc}")
|
| 399 |
+
|
| 400 |
+
content = response_obj.get("content", {})
|
| 401 |
+
if "application/json" in content:
|
| 402 |
+
schema = content["application/json"].get("schema", {})
|
| 403 |
+
if "type" in schema:
|
| 404 |
+
output.append(f" Returns: {schema.get('type', 'object')}")
|
| 405 |
+
|
| 406 |
+
return "\n".join(output)
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def _format_openapi_results(results: list[dict[str, Any]], tag: str) -> str:
|
| 410 |
+
"""Format OpenAPI search results as markdown with curl examples"""
|
| 411 |
+
if not results:
|
| 412 |
+
return f"No API endpoints found with tag '{tag}'"
|
| 413 |
+
|
| 414 |
+
output = f"# API Endpoints for tag: `{tag}`\n\n"
|
| 415 |
+
output += f"Found {len(results)} endpoint(s)\n\n"
|
| 416 |
+
output += "---\n\n"
|
| 417 |
+
|
| 418 |
+
for i, endpoint in enumerate(results, 1):
|
| 419 |
+
output += f"## {i}. {endpoint['method']} {endpoint['path']}\n\n"
|
| 420 |
+
|
| 421 |
+
if endpoint["summary"]:
|
| 422 |
+
output += f"**Summary:** {endpoint['summary']}\n\n"
|
| 423 |
+
|
| 424 |
+
if endpoint["description"]:
|
| 425 |
+
desc = endpoint["description"][:300]
|
| 426 |
+
if len(endpoint["description"]) > 300:
|
| 427 |
+
desc += "..."
|
| 428 |
+
output += f"**Description:** {desc}\n\n"
|
| 429 |
+
|
| 430 |
+
# Parameters
|
| 431 |
+
params_info = _format_parameters(endpoint.get("parameters", []))
|
| 432 |
+
if params_info:
|
| 433 |
+
output += params_info + "\n\n"
|
| 434 |
+
|
| 435 |
+
# Curl example
|
| 436 |
+
output += "**Usage:**\n```bash\n"
|
| 437 |
+
output += _generate_curl_example(endpoint)
|
| 438 |
+
output += "\n```\n\n"
|
| 439 |
+
|
| 440 |
+
# Response info
|
| 441 |
+
output += "**Returns:**\n"
|
| 442 |
+
output += _format_response_info(endpoint["responses"])
|
| 443 |
+
output += "\n\n"
|
| 444 |
+
|
| 445 |
+
output += "---\n\n"
|
| 446 |
+
|
| 447 |
+
return output
|
| 448 |
+
|
| 449 |
+
|
| 450 |
+
async def search_openapi_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
|
| 451 |
+
"""
|
| 452 |
+
Search the HuggingFace OpenAPI specification by tag
|
| 453 |
+
|
| 454 |
+
Args:
|
| 455 |
+
arguments: Dictionary with 'tag' parameter
|
| 456 |
+
|
| 457 |
+
Returns:
|
| 458 |
+
Tuple of (search_results, success)
|
| 459 |
+
"""
|
| 460 |
+
start_time = time.perf_counter()
|
| 461 |
+
tag = arguments.get("tag", "")
|
| 462 |
+
print(f"[DEBUG] search_openapi: Starting for tag '{tag}'")
|
| 463 |
+
|
| 464 |
+
if not tag:
|
| 465 |
+
return "Error: No tag provided", False
|
| 466 |
+
|
| 467 |
+
try:
|
| 468 |
+
# Fetch OpenAPI spec (cached after first fetch)
|
| 469 |
+
spec = await _fetch_openapi_spec()
|
| 470 |
+
|
| 471 |
+
# Search for endpoints with this tag
|
| 472 |
+
results = _search_openapi_by_tag(spec, tag)
|
| 473 |
+
|
| 474 |
+
# Format results
|
| 475 |
+
formatted = _format_openapi_results(results, tag)
|
| 476 |
+
|
| 477 |
+
total_time = time.perf_counter() - start_time
|
| 478 |
+
print(f"[DEBUG] search_openapi: Total time {total_time:.2f}s")
|
| 479 |
+
|
| 480 |
+
return formatted, True
|
| 481 |
+
|
| 482 |
+
except httpx.HTTPStatusError as e:
|
| 483 |
+
return f"HTTP error fetching OpenAPI spec: {e.response.status_code}", False
|
| 484 |
+
except httpx.RequestError as e:
|
| 485 |
+
return f"Request error: {str(e)}", False
|
| 486 |
+
except Exception as e:
|
| 487 |
+
return f"Error searching OpenAPI spec: {str(e)}", False
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
async def hf_docs_fetch_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
|
| 491 |
+
"""
|
| 492 |
+
Fetch full documentation content from a specific HF docs page
|
| 493 |
+
|
| 494 |
+
Args:
|
| 495 |
+
arguments: Dictionary with 'url' parameter (full URL to the doc page)
|
| 496 |
+
|
| 497 |
+
Returns:
|
| 498 |
+
Tuple of (full_markdown_content, success)
|
| 499 |
+
"""
|
| 500 |
+
start_time = time.perf_counter()
|
| 501 |
+
url = arguments.get("url", "")
|
| 502 |
+
print(f"[DEBUG] fetch_hf_docs: Starting for URL '{url}'")
|
| 503 |
+
|
| 504 |
+
if not url:
|
| 505 |
+
return "Error: No URL provided", False
|
| 506 |
+
|
| 507 |
+
# Get HF token from environment
|
| 508 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 509 |
+
|
| 510 |
+
if not hf_token:
|
| 511 |
+
return (
|
| 512 |
+
"Error: HF_TOKEN environment variable not set",
|
| 513 |
+
False,
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
# Add .md extension if not already present
|
| 517 |
+
if not url.endswith(".md"):
|
| 518 |
+
url = f"{url}.md"
|
| 519 |
+
|
| 520 |
+
try:
|
| 521 |
+
# Make request with auth
|
| 522 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
| 523 |
+
|
| 524 |
+
fetch_start = time.perf_counter()
|
| 525 |
+
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
| 526 |
+
response = await client.get(url, headers=headers)
|
| 527 |
+
response.raise_for_status()
|
| 528 |
+
|
| 529 |
+
fetch_time = time.perf_counter() - fetch_start
|
| 530 |
+
content = response.text
|
| 531 |
+
content_size_kb = len(content) / 1024
|
| 532 |
+
|
| 533 |
+
print(
|
| 534 |
+
f"[DEBUG] fetch_hf_docs: Fetched {content_size_kb:.1f}KB in {fetch_time:.2f}s"
|
| 535 |
+
)
|
| 536 |
+
|
| 537 |
+
# Return the markdown content directly
|
| 538 |
+
result = f"Documentation from: {url}\n\n{content}"
|
| 539 |
+
|
| 540 |
+
total_time = time.perf_counter() - start_time
|
| 541 |
+
print(f"[DEBUG] fetch_hf_docs: Total time {total_time:.2f}s")
|
| 542 |
+
|
| 543 |
+
return result, True
|
| 544 |
+
|
| 545 |
+
except httpx.HTTPStatusError as e:
|
| 546 |
+
return (
|
| 547 |
+
f"HTTP error fetching {url}: {e.response.status_code} - {e.response.text[:200]}",
|
| 548 |
+
False,
|
| 549 |
+
)
|
| 550 |
+
except httpx.RequestError as e:
|
| 551 |
+
return f"Request error fetching {url}: {str(e)}", False
|
| 552 |
+
except Exception as e:
|
| 553 |
+
return f"Error fetching documentation: {str(e)}", False
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
# Tool specifications for the search sub-agent
|
| 557 |
+
|
| 558 |
+
EXPLORE_HF_DOCS_TOOL_SPEC = {
|
| 559 |
+
"name": "explore_hf_docs",
|
| 560 |
+
"description": (
|
| 561 |
+
"Explore the Hugging Face documentation at a glance. "
|
| 562 |
+
"Select an endpoint from the available options and get a list of all documentation pages "
|
| 563 |
+
"with their titles, URLs, and a 300-character glimpse of each page. "
|
| 564 |
+
"Use this to discover what documentation is available before fetching specific pages."
|
| 565 |
+
),
|
| 566 |
+
"parameters": {
|
| 567 |
+
"type": "object",
|
| 568 |
+
"properties": {
|
| 569 |
+
"endpoint": {
|
| 570 |
+
"type": "string",
|
| 571 |
+
"enum": [
|
| 572 |
+
"hub",
|
| 573 |
+
"transformers",
|
| 574 |
+
"diffusers",
|
| 575 |
+
"datasets",
|
| 576 |
+
"gradio",
|
| 577 |
+
"trackio",
|
| 578 |
+
"smolagents",
|
| 579 |
+
"huggingface_hub",
|
| 580 |
+
"huggingface.js",
|
| 581 |
+
"transformers.js",
|
| 582 |
+
"inference-providers",
|
| 583 |
+
"inference-endpoints",
|
| 584 |
+
"peft",
|
| 585 |
+
"accelerate",
|
| 586 |
+
"optimum",
|
| 587 |
+
"optimum-habana",
|
| 588 |
+
"optimum-neuron",
|
| 589 |
+
"optimum-intel",
|
| 590 |
+
"optimum-executorch",
|
| 591 |
+
"optimum-tpu",
|
| 592 |
+
"tokenizers",
|
| 593 |
+
"llm-course",
|
| 594 |
+
"robotics-course",
|
| 595 |
+
"mcp-course",
|
| 596 |
+
"smol-course",
|
| 597 |
+
"agents-course",
|
| 598 |
+
"deep-rl-course",
|
| 599 |
+
"computer-vision-course",
|
| 600 |
+
"evaluate",
|
| 601 |
+
"tasks",
|
| 602 |
+
"dataset-viewer",
|
| 603 |
+
"trl",
|
| 604 |
+
"simulate",
|
| 605 |
+
"sagemaker",
|
| 606 |
+
"timm",
|
| 607 |
+
"safetensors",
|
| 608 |
+
"tgi",
|
| 609 |
+
"setfit",
|
| 610 |
+
"audio-course",
|
| 611 |
+
"lerobot",
|
| 612 |
+
"autotrain",
|
| 613 |
+
"tei",
|
| 614 |
+
"bitsandbytes",
|
| 615 |
+
"cookbook",
|
| 616 |
+
"sentence_transformers",
|
| 617 |
+
"ml-games-course",
|
| 618 |
+
"diffusion-course",
|
| 619 |
+
"ml-for-3d-course",
|
| 620 |
+
"chat-ui",
|
| 621 |
+
"leaderboards",
|
| 622 |
+
"lighteval",
|
| 623 |
+
"argilla",
|
| 624 |
+
"distilabel",
|
| 625 |
+
"microsoft-azure",
|
| 626 |
+
"kernels",
|
| 627 |
+
"google-cloud",
|
| 628 |
+
],
|
| 629 |
+
"description": (
|
| 630 |
+
"The documentation endpoint to explore. Each endpoint corresponds to a major section of the Hugging Face documentation:\n\n"
|
| 631 |
+
"• hub — Find answers to questions about models/datasets/spaces, auth, versioning, metadata.\n"
|
| 632 |
+
"• transformers — Core model library: architectures, configs, tokenizers, training & inference APIs.\n"
|
| 633 |
+
"• diffusers — Diffusion pipelines, schedulers, fine-tuning, training, and deployment patterns.\n"
|
| 634 |
+
"• datasets — Dataset loading, streaming, processing, Arrow format, Hub integration.\n"
|
| 635 |
+
"• gradio — UI components and demos for interacting with ML models.\n"
|
| 636 |
+
"• trackio — Experiment tracking, metrics logging, and run comparison.\n"
|
| 637 |
+
"• smolagents — Lightweight agent abstractions and tool-using patterns.\n"
|
| 638 |
+
"• huggingface_hub — Python client for Hub operations (auth, upload/download, repo management).\n"
|
| 639 |
+
"• huggingface.js — JS/TS client for Hub APIs in browser and Node.\n"
|
| 640 |
+
"• transformers.js — Run Transformer models in browser/Node via WebGPU/WASM.\n"
|
| 641 |
+
"• inference-providers — Unified interface for third-party inference backends.\n"
|
| 642 |
+
"• inference-endpoints — Managed, scalable model deployments on HF infrastructure.\n"
|
| 643 |
+
"• peft — Parameter-efficient fine-tuning methods (LoRA, adapters, etc.).\n"
|
| 644 |
+
"• accelerate — Hardware-agnostic, distributed and mixed-precision training orchestration.\n"
|
| 645 |
+
"• optimum — Hardware-aware optimization and model export tooling.\n"
|
| 646 |
+
"• optimum-habana — Training and inference on Habana Gaudi accelerators.\n"
|
| 647 |
+
"• optimum-neuron — Optimization workflows for AWS Inferentia/Trainium.\n"
|
| 648 |
+
"• optimum-intel — Intel CPU/GPU optimizations (OpenVINO, IPEX).\n"
|
| 649 |
+
"• optimum-executorch — Exporting models to ExecuTorch for edge/mobile.\n"
|
| 650 |
+
"• optimum-tpu — TPU-specific training and optimization paths.\n"
|
| 651 |
+
"• tokenizers — Fast tokenizer internals, training, and low-level APIs.\n"
|
| 652 |
+
"• llm-course — End-to-end LLM concepts, training, and deployment.\n"
|
| 653 |
+
"• robotics-course — Learning-based robotics foundations.\n"
|
| 654 |
+
"• mcp-course — Model Context Protocol concepts and usage.\n"
|
| 655 |
+
"• smol-course — Small-model and efficiency-focused workflows.\n"
|
| 656 |
+
"• agents-course — Tool-using, planning, and multi-step agent design.\n"
|
| 657 |
+
"• deep-rl-course — Deep reinforcement learning foundations.\n"
|
| 658 |
+
"• computer-vision-course — Vision models, datasets, and pipelines.\n"
|
| 659 |
+
"• evaluate — Metrics, evaluation workflows, and training-loop integration.\n"
|
| 660 |
+
"• tasks — Canonical task definitions and model categorization.\n"
|
| 661 |
+
"• dataset-viewer — Dataset preview, streaming views, and viewer internals.\n"
|
| 662 |
+
"• trl — RLHF, DPO, PPO, and SFT utilities for LLMs.\n"
|
| 663 |
+
"• simulate — Experimental simulation tools and workflows.\n"
|
| 664 |
+
"• sagemaker — Deploying Hugging Face models on AWS SageMaker.\n"
|
| 665 |
+
"• timm — Image model zoo and utilities via HF integrations.\n"
|
| 666 |
+
"• safetensors — Safe, fast tensor serialization format.\n"
|
| 667 |
+
"• tgi — High-throughput text generation server for LLMs.\n"
|
| 668 |
+
"• setfit — Few-shot text classification via sentence embeddings.\n"
|
| 669 |
+
"• audio-course — Speech and audio models, datasets, and tasks.\n"
|
| 670 |
+
"• lerobot — Robotics datasets, policies, and learning workflows.\n"
|
| 671 |
+
"• autotrain — No/low-code model training on Hugging Face.\n"
|
| 672 |
+
"• tei — Optimized inference server for embedding workloads.\n"
|
| 673 |
+
"• bitsandbytes — Quantization and memory-efficient optimizers.\n"
|
| 674 |
+
"• cookbook — Practical, task-oriented recipes across the ecosystem.\n"
|
| 675 |
+
"• sentence_transformers — Embedding models, training recipes, similarity/search workflows.\n"
|
| 676 |
+
"• ml-games-course — Game-based ML and reinforcement learning experiments.\n"
|
| 677 |
+
"• diffusion-course — Diffusion model theory and hands-on practice.\n"
|
| 678 |
+
"• ml-for-3d-course — 3D representations, models, and learning techniques.\n"
|
| 679 |
+
"• chat-ui — Reference chat interfaces for LLM deployment.\n"
|
| 680 |
+
"• leaderboards — Evaluation leaderboards and submission mechanics.\n"
|
| 681 |
+
"• lighteval — Lightweight, reproducible LLM evaluation framework.\n"
|
| 682 |
+
"• argilla — Data annotation, feedback, and human-in-the-loop workflows.\n"
|
| 683 |
+
"• distilabel — Synthetic data generation and distillation pipelines.\n"
|
| 684 |
+
"• microsoft-azure — Azure deployment and integration guides.\n"
|
| 685 |
+
"• kernels — Lightweight execution environments and notebook-style workflows.\n"
|
| 686 |
+
"• google-cloud — GCP deployment and serving workflows.\n"
|
| 687 |
+
),
|
| 688 |
+
},
|
| 689 |
+
},
|
| 690 |
+
"required": ["endpoint"],
|
| 691 |
+
},
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
HF_DOCS_FETCH_TOOL_SPEC = {
|
| 695 |
+
"name": "fetch_hf_docs",
|
| 696 |
+
"description": (
|
| 697 |
+
"Fetch the full content of a specific HF documentation page. "
|
| 698 |
+
"Provide the full URL to the doc page (e.g., from explore_hf_docs results). "
|
| 699 |
+
"Returns the complete markdown content of that page. "
|
| 700 |
+
"Use explore_hf_docs first to discover available pages."
|
| 701 |
+
),
|
| 702 |
+
"parameters": {
|
| 703 |
+
"type": "object",
|
| 704 |
+
"properties": {
|
| 705 |
+
"url": {
|
| 706 |
+
"type": "string",
|
| 707 |
+
"description": (
|
| 708 |
+
"The full URL to the documentation page. "
|
| 709 |
+
"Example: 'https://huggingface.co/docs/trl/dpo_trainer' "
|
| 710 |
+
"The .md extension will be added automatically if not present."
|
| 711 |
+
),
|
| 712 |
+
},
|
| 713 |
+
},
|
| 714 |
+
"required": ["url"],
|
| 715 |
+
},
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
+
|
| 719 |
+
async def _get_api_search_tool_spec() -> dict[str, Any]:
|
| 720 |
+
"""
|
| 721 |
+
Dynamically generate the OpenAPI tool spec with tag enum populated at runtime
|
| 722 |
+
This must be called async to fetch the OpenAPI spec and extract tags
|
| 723 |
+
"""
|
| 724 |
+
spec = await _fetch_openapi_spec()
|
| 725 |
+
tags = _extract_all_tags(spec)
|
| 726 |
+
|
| 727 |
+
return {
|
| 728 |
+
"name": "search_hf_api_endpoints",
|
| 729 |
+
"description": (
|
| 730 |
+
"Search the HuggingFace OpenAPI specification by tag to find related API endpoints. "
|
| 731 |
+
"Returns all endpoints with the specified tag including curl examples showing how to use them. "
|
| 732 |
+
"Each result includes the endpoint path, summary, usage example with curl, and response information."
|
| 733 |
+
),
|
| 734 |
+
"parameters": {
|
| 735 |
+
"type": "object",
|
| 736 |
+
"properties": {
|
| 737 |
+
"tag": {
|
| 738 |
+
"type": "string",
|
| 739 |
+
"enum": tags,
|
| 740 |
+
"description": (
|
| 741 |
+
"The API tag to search for. Each tag groups related API endpoints. "
|
| 742 |
+
),
|
| 743 |
+
},
|
| 744 |
+
},
|
| 745 |
+
"required": ["tag"],
|
| 746 |
+
},
|
| 747 |
+
}
|
agent/tools/search_docs_tool.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Search documentation tool that spawns a sub-agent
|
| 3 |
+
The sub-agent has its own agent loop and set of specialized search tools
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
from litellm.utils import get_max_tokens
|
| 10 |
+
|
| 11 |
+
from agent.core.session import Session
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
async def create_search_tool_router(github_mcp_config: dict[str, Any] | None = None):
|
| 15 |
+
"""
|
| 16 |
+
Create a ToolRouter instance for the search sub-agent
|
| 17 |
+
Async because OpenAPI tool needs to fetch and parse spec at initialization
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
github_mcp_config: Optional GitHub MCP server configuration
|
| 21 |
+
"""
|
| 22 |
+
# Import at runtime to avoid circular dependency
|
| 23 |
+
from fastmcp import Client
|
| 24 |
+
|
| 25 |
+
from agent.core.tools import ToolRouter
|
| 26 |
+
|
| 27 |
+
# List of allowed GitHub MCP tools
|
| 28 |
+
ALLOWED_GITHUB_TOOLS = {
|
| 29 |
+
"list_pull_requests",
|
| 30 |
+
"list_issues",
|
| 31 |
+
"search_code",
|
| 32 |
+
"search_issues",
|
| 33 |
+
"search_repositories",
|
| 34 |
+
"search_users",
|
| 35 |
+
"get_pull_request_status",
|
| 36 |
+
"get_pull_request_reviews",
|
| 37 |
+
"get_pull_request",
|
| 38 |
+
"get_issue",
|
| 39 |
+
"get_file_contents",
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
class SearchDocsToolRouter(ToolRouter):
|
| 43 |
+
"""Specialized ToolRouter for the search sub-agent"""
|
| 44 |
+
|
| 45 |
+
def __init__(self, github_mcp_config: dict[str, Any] | None = None):
|
| 46 |
+
self.tools: dict[str, Any] = {}
|
| 47 |
+
self.mcp_servers: dict[str, dict[str, Any]] = {}
|
| 48 |
+
self._mcp_initialized = False
|
| 49 |
+
|
| 50 |
+
# Initialize MCP client with GitHub server if provided
|
| 51 |
+
if github_mcp_config:
|
| 52 |
+
self.mcp_client = Client({"mcpServers": github_mcp_config})
|
| 53 |
+
else:
|
| 54 |
+
self.mcp_client = None
|
| 55 |
+
|
| 56 |
+
async def initialize_tools(self):
|
| 57 |
+
"""Initialize tools asynchronously"""
|
| 58 |
+
tools = await make_search_agent_tools()
|
| 59 |
+
for tool in tools:
|
| 60 |
+
self.register_tool(tool)
|
| 61 |
+
|
| 62 |
+
async def register_mcp_tools(self) -> None:
|
| 63 |
+
"""Register only allowed GitHub MCP tools"""
|
| 64 |
+
if self.mcp_client is None:
|
| 65 |
+
return
|
| 66 |
+
|
| 67 |
+
tools = await self.mcp_client.list_tools()
|
| 68 |
+
for tool in tools:
|
| 69 |
+
# Only register allowed GitHub tools
|
| 70 |
+
if tool.name in ALLOWED_GITHUB_TOOLS:
|
| 71 |
+
print(f"Registering GitHub MCP Tool: {tool.name}")
|
| 72 |
+
from agent.core.tools import ToolSpec
|
| 73 |
+
|
| 74 |
+
self.register_tool(
|
| 75 |
+
ToolSpec(
|
| 76 |
+
name=tool.name,
|
| 77 |
+
description=tool.description,
|
| 78 |
+
parameters=tool.inputSchema,
|
| 79 |
+
handler=None,
|
| 80 |
+
)
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
router = SearchDocsToolRouter(github_mcp_config)
|
| 84 |
+
await router.initialize_tools()
|
| 85 |
+
return router
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
async def search_docs_handler(arguments: dict[str, Any]) -> tuple[str, bool]:
|
| 89 |
+
"""
|
| 90 |
+
Handler that spawns a sub-agent to perform comprehensive doc search
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
arguments: dictionary with 'query' parameter
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
Tuple of (search_results, success)
|
| 97 |
+
"""
|
| 98 |
+
query = arguments.get("query", "")
|
| 99 |
+
|
| 100 |
+
if not query:
|
| 101 |
+
return "Error: No search query provided", False
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
# Import at runtime to avoid circular dependency
|
| 105 |
+
from pathlib import Path
|
| 106 |
+
|
| 107 |
+
from agent.config import load_config
|
| 108 |
+
from agent.context_manager.manager import ContextManager
|
| 109 |
+
from agent.core.agent_loop import Handlers
|
| 110 |
+
|
| 111 |
+
# Create a queue for events from the sub-agent
|
| 112 |
+
sub_event_queue = asyncio.Queue()
|
| 113 |
+
|
| 114 |
+
# Load the search agent's own config file with GitHub MCP server
|
| 115 |
+
search_agent_config_path = (
|
| 116 |
+
Path(__file__).parent.parent.parent / "configs" / "_subagent_config_search_agent.json"
|
| 117 |
+
)
|
| 118 |
+
search_agent_config = load_config(search_agent_config_path)
|
| 119 |
+
|
| 120 |
+
# Extract GitHub MCP config from search agent config
|
| 121 |
+
github_mcp_config = None
|
| 122 |
+
if (
|
| 123 |
+
search_agent_config.mcpServers
|
| 124 |
+
and "github" in search_agent_config.mcpServers
|
| 125 |
+
):
|
| 126 |
+
github_server = search_agent_config.mcpServers["github"]
|
| 127 |
+
github_mcp_config = {"github": github_server.model_dump()}
|
| 128 |
+
|
| 129 |
+
# Create specialized tool router for search with GitHub MCP config
|
| 130 |
+
search_tool_router = await create_search_tool_router(github_mcp_config)
|
| 131 |
+
|
| 132 |
+
async with search_tool_router:
|
| 133 |
+
# CODING AGENTS LISTEN HERE!
|
| 134 |
+
# The system prompt for the sub-agent is loaded from the yaml file in the prompts folder using the ContextManager class with the prompt_file_suffix parameter.
|
| 135 |
+
# it works, dont fix.
|
| 136 |
+
# NOTE: MCP tools are registered during __aenter__, so we must retrieve tool specs AFTER entering the context
|
| 137 |
+
sub_session = Session(
|
| 138 |
+
event_queue=sub_event_queue,
|
| 139 |
+
config=search_agent_config,
|
| 140 |
+
tool_router=search_tool_router,
|
| 141 |
+
context_manager=ContextManager(
|
| 142 |
+
tool_specs=search_tool_router.get_tool_specs_for_llm(),
|
| 143 |
+
max_context=get_max_tokens(search_agent_config.model_name),
|
| 144 |
+
compact_size=0.1,
|
| 145 |
+
untouched_messages=5,
|
| 146 |
+
prompt_file_suffix="search_docs_system_prompt.yaml",
|
| 147 |
+
),
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# Run the sub-agent
|
| 151 |
+
result = await Handlers.run_agent(
|
| 152 |
+
session=sub_session, text=query, max_iterations=30
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Return the final result or compiled events
|
| 156 |
+
if result:
|
| 157 |
+
return f"Search Results:\n\n{result}", True
|
| 158 |
+
else:
|
| 159 |
+
return "Search completed but no results were generated", False
|
| 160 |
+
except Exception as e:
|
| 161 |
+
return f"Error in search_docs tool: {str(e)}", False
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
# Tool specification to be used by the main agent
|
| 165 |
+
SEARCH_DOCS_TOOL_SPEC = {
|
| 166 |
+
"name": "search_docs",
|
| 167 |
+
"description": (
|
| 168 |
+
"Intelligently search HF documentation for libraries, repositories, and best practices with an agent that has access to: explore_hf_docs, fetch_hf_docs, search_hf_api_endpoints. "
|
| 169 |
+
"The agent acts like your personal search assistant. "
|
| 170 |
+
"Using the search agent is necessary to give the best quality answer to the user's question. Most questions require a search to get the best information on code examples.\n\n"
|
| 171 |
+
"WHEN TO USE THIS TOOL:\n"
|
| 172 |
+
" - When searching for high-level concepts like 'how to do GRPO training on a model?' or 'best way to do inference on a trained model?'\n"
|
| 173 |
+
" - When you need to get code examples for intricate ML code patterns like training loops, inference pipelines, data processing, etc.\n\n"
|
| 174 |
+
"USAGE GUIDELINES:\n"
|
| 175 |
+
" 1. Launch multiple agents concurrently for better performance.\n"
|
| 176 |
+
" 2. Be specific in your query - include exact terminology, expected file locations, or code patterns.\n"
|
| 177 |
+
" 3. Use the query as if you were talking to another engineer. Bad: logger impl Good: where is the logger implemented, we're trying to find out how to log to files.\n"
|
| 178 |
+
" 4. Make sure to formulate the query in such a way that the agent knows when it's done or has found the result."
|
| 179 |
+
),
|
| 180 |
+
"parameters": {
|
| 181 |
+
"type": "object",
|
| 182 |
+
"properties": {
|
| 183 |
+
"query": {
|
| 184 |
+
"type": "string",
|
| 185 |
+
"description": (
|
| 186 |
+
"The search query describing to the agent what it should do. Be "
|
| 187 |
+
"specific and include technical terms, file types, or expected "
|
| 188 |
+
"code patterns to help the agent find relevant code. Formulate "
|
| 189 |
+
"the query in a way that makes it clear to the agent when it "
|
| 190 |
+
"has found the right thing."
|
| 191 |
+
),
|
| 192 |
+
},
|
| 193 |
+
},
|
| 194 |
+
"required": ["query"],
|
| 195 |
+
},
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
async def make_search_agent_tools():
|
| 200 |
+
"""
|
| 201 |
+
Create a list of tools for the search agent
|
| 202 |
+
Async because OpenAPI tool spec needs to be populated at runtime
|
| 203 |
+
"""
|
| 204 |
+
# Import at runtime to avoid circular dependency
|
| 205 |
+
from agent.core.tools import ToolSpec
|
| 206 |
+
from agent.tools._search_agent_tools import (
|
| 207 |
+
EXPLORE_HF_DOCS_TOOL_SPEC,
|
| 208 |
+
HF_DOCS_FETCH_TOOL_SPEC,
|
| 209 |
+
_get_api_search_tool_spec,
|
| 210 |
+
explore_hf_docs_handler,
|
| 211 |
+
hf_docs_fetch_handler,
|
| 212 |
+
search_openapi_handler,
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# Get the OpenAPI tool spec with dynamically populated tags
|
| 216 |
+
openapi_spec = await _get_api_search_tool_spec()
|
| 217 |
+
|
| 218 |
+
return [
|
| 219 |
+
ToolSpec(
|
| 220 |
+
name=EXPLORE_HF_DOCS_TOOL_SPEC["name"],
|
| 221 |
+
description=EXPLORE_HF_DOCS_TOOL_SPEC["description"],
|
| 222 |
+
parameters=EXPLORE_HF_DOCS_TOOL_SPEC["parameters"],
|
| 223 |
+
handler=explore_hf_docs_handler,
|
| 224 |
+
),
|
| 225 |
+
ToolSpec(
|
| 226 |
+
name=HF_DOCS_FETCH_TOOL_SPEC["name"],
|
| 227 |
+
description=HF_DOCS_FETCH_TOOL_SPEC["description"],
|
| 228 |
+
parameters=HF_DOCS_FETCH_TOOL_SPEC["parameters"],
|
| 229 |
+
handler=hf_docs_fetch_handler,
|
| 230 |
+
),
|
| 231 |
+
ToolSpec(
|
| 232 |
+
name=openapi_spec["name"],
|
| 233 |
+
description=openapi_spec["description"],
|
| 234 |
+
parameters=openapi_spec["parameters"],
|
| 235 |
+
handler=search_openapi_handler,
|
| 236 |
+
),
|
| 237 |
+
]
|
configs/_subagent_config_search_agent.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_name": "anthropic/claude-haiku-4-5",
|
| 3 |
+
"mcpServers": {
|
| 4 |
+
"github": {
|
| 5 |
+
"transport": "http",
|
| 6 |
+
"url": "https://api.githubcopilot.com/mcp/",
|
| 7 |
+
"headers": {
|
| 8 |
+
"Authorization": "Bearer ${GITHUB_TOKEN}"
|
| 9 |
+
}
|
| 10 |
+
}
|
| 11 |
+
}
|
| 12 |
+
}
|
agent/config_mcp_example.json → configs/main_agent_config.json
RENAMED
|
@@ -1,7 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"model_name": "anthropic/claude-sonnet-4-5-20250929",
|
| 3 |
-
"tools": [],
|
| 4 |
-
"system_prompt_path": "",
|
| 5 |
"mcpServers": {
|
| 6 |
"hf-mcp-server": {
|
| 7 |
"transport": "http",
|
|
|
|
| 1 |
{
|
| 2 |
"model_name": "anthropic/claude-sonnet-4-5-20250929",
|
|
|
|
|
|
|
| 3 |
"mcpServers": {
|
| 4 |
"hf-mcp-server": {
|
| 5 |
"transport": "http",
|
pyproject.toml
CHANGED
|
@@ -20,4 +20,5 @@ dependencies = [
|
|
| 20 |
"transformers>=2.3.0",
|
| 21 |
"torch>=2.9.1",
|
| 22 |
"pytest>=9.0.2",
|
|
|
|
| 23 |
]
|
|
|
|
| 20 |
"transformers>=2.3.0",
|
| 21 |
"torch>=2.9.1",
|
| 22 |
"pytest>=9.0.2",
|
| 23 |
+
"trafilatura>=2.0.0",
|
| 24 |
]
|
run_search_agent.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Standalone test script for the search sub-agent
|
| 3 |
+
Run with: uv run python test_search_agent.py
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import asyncio
|
| 7 |
+
|
| 8 |
+
from litellm.utils import get_max_tokens
|
| 9 |
+
|
| 10 |
+
from agent.config import Config
|
| 11 |
+
from agent.context_manager.manager import ContextManager
|
| 12 |
+
from agent.core.agent_loop import Handlers
|
| 13 |
+
from agent.core.session import Session
|
| 14 |
+
from agent.tools.search_docs_tool import create_search_tool_router
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
async def test_search_agent(query: str):
|
| 18 |
+
"""Test the search sub-agent with a query"""
|
| 19 |
+
print(f"Testing search agent with query: {query}\n")
|
| 20 |
+
print("=" * 60)
|
| 21 |
+
|
| 22 |
+
# Create event queue for the sub-agent
|
| 23 |
+
sub_event_queue = asyncio.Queue()
|
| 24 |
+
|
| 25 |
+
# Create search tool router
|
| 26 |
+
search_tool_router = await create_search_tool_router()
|
| 27 |
+
|
| 28 |
+
# Create config
|
| 29 |
+
sub_config = Config(
|
| 30 |
+
model_name="anthropic/claude-haiku-4-5",
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Create session with custom system prompt
|
| 34 |
+
sub_session = Session(
|
| 35 |
+
event_queue=sub_event_queue,
|
| 36 |
+
config=sub_config,
|
| 37 |
+
tool_router=search_tool_router,
|
| 38 |
+
context_manager=ContextManager(
|
| 39 |
+
tool_specs=search_tool_router.get_tool_specs_for_llm(),
|
| 40 |
+
max_context=get_max_tokens(sub_config.model_name),
|
| 41 |
+
compact_size=0.1,
|
| 42 |
+
untouched_messages=5,
|
| 43 |
+
prompt_file_suffix="search_docs_system_prompt.yaml",
|
| 44 |
+
),
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# Event listener to show what the sub-agent is doing
|
| 48 |
+
async def event_monitor():
|
| 49 |
+
while True:
|
| 50 |
+
try:
|
| 51 |
+
event = await asyncio.wait_for(sub_event_queue.get(), timeout=1.0)
|
| 52 |
+
|
| 53 |
+
if event.event_type == "assistant_message":
|
| 54 |
+
content = event.data.get("content", "") if event.data else ""
|
| 55 |
+
if content:
|
| 56 |
+
print(f"\n🤖 Sub-agent: {content}\n")
|
| 57 |
+
|
| 58 |
+
elif event.event_type == "tool_call":
|
| 59 |
+
tool_name = event.data.get("tool", "") if event.data else ""
|
| 60 |
+
arguments = event.data.get("arguments", {}) if event.data else {}
|
| 61 |
+
print(f"🔧 Tool call: {tool_name}")
|
| 62 |
+
print(f" Args: {arguments}")
|
| 63 |
+
|
| 64 |
+
elif event.event_type == "tool_output":
|
| 65 |
+
output = event.data.get("output", "") if event.data else ""
|
| 66 |
+
success = event.data.get("success", False) if event.data else False
|
| 67 |
+
status = "✅" if success else "❌"
|
| 68 |
+
|
| 69 |
+
print(f"{status} Tool output: {output}\n")
|
| 70 |
+
|
| 71 |
+
elif event.event_type == "turn_complete":
|
| 72 |
+
print("✅ Sub-agent turn complete")
|
| 73 |
+
break
|
| 74 |
+
|
| 75 |
+
except asyncio.TimeoutError:
|
| 76 |
+
# Check if agent is still running
|
| 77 |
+
continue
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"⚠️ Event error: {e}")
|
| 80 |
+
break
|
| 81 |
+
|
| 82 |
+
# Run the sub-agent and event monitor concurrently
|
| 83 |
+
async with search_tool_router:
|
| 84 |
+
monitor_task = asyncio.create_task(event_monitor())
|
| 85 |
+
|
| 86 |
+
result = await Handlers.run_agent(
|
| 87 |
+
session=sub_session, text=query, max_iterations=30
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Wait for event monitor to finish
|
| 91 |
+
await asyncio.wait_for(monitor_task, timeout=5.0)
|
| 92 |
+
|
| 93 |
+
print("\n" + "=" * 60)
|
| 94 |
+
print("FINAL RESULT:")
|
| 95 |
+
print("=" * 60)
|
| 96 |
+
if result:
|
| 97 |
+
print(result)
|
| 98 |
+
else:
|
| 99 |
+
print("No result returned")
|
| 100 |
+
print("=" * 60)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
async def main():
|
| 104 |
+
"""Main test function"""
|
| 105 |
+
print("🧪 Search Sub-Agent Test\n")
|
| 106 |
+
|
| 107 |
+
# Example queries to test
|
| 108 |
+
test_queries = [
|
| 109 |
+
# "Explore the TRL documentation structure and find information about DPO trainer",
|
| 110 |
+
# "is there a way to get the logs from a served huggingface space",
|
| 111 |
+
# "How do I train GLM4.7 with a GRPO training loop with trl with llm judge as a reward model for training on hle?"
|
| 112 |
+
"can i stream logs through the api for a served huggingface space",
|
| 113 |
+
]
|
| 114 |
+
|
| 115 |
+
for i, query in enumerate(test_queries, 1):
|
| 116 |
+
print(f"\n{'=' * 60}")
|
| 117 |
+
print(f"TEST {i}/{len(test_queries)}")
|
| 118 |
+
print(f"{'=' * 60}\n")
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
await test_search_agent(query)
|
| 122 |
+
except Exception as e:
|
| 123 |
+
print(f"\n❌ Test failed: {e}")
|
| 124 |
+
import traceback
|
| 125 |
+
|
| 126 |
+
traceback.print_exc()
|
| 127 |
+
|
| 128 |
+
if i < len(test_queries):
|
| 129 |
+
print("\n\nPress Enter to continue to next test...")
|
| 130 |
+
input()
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
if __name__ == "__main__":
|
| 134 |
+
try:
|
| 135 |
+
asyncio.run(main())
|
| 136 |
+
except KeyboardInterrupt:
|
| 137 |
+
print("\n\n⚠️ Test interrupted")
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"\n❌ Error: {e}")
|
| 140 |
+
import traceback
|
| 141 |
+
|
| 142 |
+
traceback.print_exc()
|
uv.lock
CHANGED
|
@@ -230,6 +230,15 @@ wheels = [
|
|
| 230 |
{ url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608, upload-time = "2025-10-02T13:36:07.637Z" },
|
| 231 |
]
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
[[package]]
|
| 234 |
name = "beartype"
|
| 235 |
version = "0.22.6"
|
|
@@ -433,6 +442,20 @@ wheels = [
|
|
| 433 |
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
| 434 |
]
|
| 435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 436 |
[[package]]
|
| 437 |
name = "cryptography"
|
| 438 |
version = "46.0.3"
|
|
@@ -529,6 +552,21 @@ wheels = [
|
|
| 529 |
{ url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
|
| 530 |
]
|
| 531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
[[package]]
|
| 533 |
name = "debugpy"
|
| 534 |
version = "1.8.17"
|
|
@@ -898,6 +936,7 @@ dependencies = [
|
|
| 898 |
{ name = "requests" },
|
| 899 |
{ name = "tenacity" },
|
| 900 |
{ name = "torch" },
|
|
|
|
| 901 |
{ name = "transformers" },
|
| 902 |
]
|
| 903 |
|
|
@@ -917,6 +956,7 @@ requires-dist = [
|
|
| 917 |
{ name = "requests", specifier = ">=2.32.5" },
|
| 918 |
{ name = "tenacity", specifier = ">=8.0.0" },
|
| 919 |
{ name = "torch", specifier = ">=2.9.1" },
|
|
|
|
| 920 |
{ name = "transformers", specifier = ">=2.3.0" },
|
| 921 |
]
|
| 922 |
|
|
@@ -949,6 +989,22 @@ wheels = [
|
|
| 949 |
{ url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" },
|
| 950 |
]
|
| 951 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 952 |
[[package]]
|
| 953 |
name = "httpcore"
|
| 954 |
version = "1.0.9"
|
|
@@ -1391,6 +1447,18 @@ wheels = [
|
|
| 1391 |
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
|
| 1392 |
]
|
| 1393 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1394 |
[[package]]
|
| 1395 |
name = "keyring"
|
| 1396 |
version = "25.7.0"
|
|
@@ -1497,6 +1565,103 @@ all = [
|
|
| 1497 |
{ name = "opentelemetry-instrumentation-weaviate" },
|
| 1498 |
]
|
| 1499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1500 |
[[package]]
|
| 1501 |
name = "markdown-it-py"
|
| 1502 |
version = "4.0.0"
|
|
@@ -3699,6 +3864,15 @@ wheels = [
|
|
| 3699 |
{ url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
|
| 3700 |
]
|
| 3701 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3702 |
[[package]]
|
| 3703 |
name = "tokenizers"
|
| 3704 |
version = "0.22.1"
|
|
@@ -3788,6 +3962,24 @@ wheels = [
|
|
| 3788 |
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
|
| 3789 |
]
|
| 3790 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3791 |
[[package]]
|
| 3792 |
name = "transformers"
|
| 3793 |
version = "2.3.0"
|
|
@@ -3861,6 +4053,18 @@ wheels = [
|
|
| 3861 |
{ url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
|
| 3862 |
]
|
| 3863 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3864 |
[[package]]
|
| 3865 |
name = "uc-micro-py"
|
| 3866 |
version = "1.0.3"
|
|
|
|
| 230 |
{ url = "https://files.pythonhosted.org/packages/f8/aa/5082412d1ee302e9e7d80b6949bc4d2a8fa1149aaab610c5fc24709605d6/authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a", size = 243608, upload-time = "2025-10-02T13:36:07.637Z" },
|
| 231 |
]
|
| 232 |
|
| 233 |
+
[[package]]
|
| 234 |
+
name = "babel"
|
| 235 |
+
version = "2.17.0"
|
| 236 |
+
source = { registry = "https://pypi.org/simple" }
|
| 237 |
+
sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" }
|
| 238 |
+
wheels = [
|
| 239 |
+
{ url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" },
|
| 240 |
+
]
|
| 241 |
+
|
| 242 |
[[package]]
|
| 243 |
name = "beartype"
|
| 244 |
version = "0.22.6"
|
|
|
|
| 442 |
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
|
| 443 |
]
|
| 444 |
|
| 445 |
+
[[package]]
|
| 446 |
+
name = "courlan"
|
| 447 |
+
version = "1.3.2"
|
| 448 |
+
source = { registry = "https://pypi.org/simple" }
|
| 449 |
+
dependencies = [
|
| 450 |
+
{ name = "babel" },
|
| 451 |
+
{ name = "tld" },
|
| 452 |
+
{ name = "urllib3" },
|
| 453 |
+
]
|
| 454 |
+
sdist = { url = "https://files.pythonhosted.org/packages/6f/54/6d6ceeff4bed42e7a10d6064d35ee43a810e7b3e8beb4abeae8cff4713ae/courlan-1.3.2.tar.gz", hash = "sha256:0b66f4db3a9c39a6e22dd247c72cfaa57d68ea660e94bb2c84ec7db8712af190", size = 206382, upload-time = "2024-10-29T16:40:20.994Z" }
|
| 455 |
+
wheels = [
|
| 456 |
+
{ url = "https://files.pythonhosted.org/packages/8e/ca/6a667ccbe649856dcd3458bab80b016681b274399d6211187c6ab969fc50/courlan-1.3.2-py3-none-any.whl", hash = "sha256:d0dab52cf5b5b1000ee2839fbc2837e93b2514d3cb5bb61ae158a55b7a04c6be", size = 33848, upload-time = "2024-10-29T16:40:18.325Z" },
|
| 457 |
+
]
|
| 458 |
+
|
| 459 |
[[package]]
|
| 460 |
name = "cryptography"
|
| 461 |
version = "46.0.3"
|
|
|
|
| 552 |
{ url = "https://files.pythonhosted.org/packages/3b/5e/6f8d874366788ad5d549e9ba258037d974dda6e004843be1bda794571701/datasets-4.4.1-py3-none-any.whl", hash = "sha256:c1163de5211e42546079ab355cc0250c7e6db16eb209ac5ac6252f801f596c44", size = 511591, upload-time = "2025-11-05T16:00:36.365Z" },
|
| 553 |
]
|
| 554 |
|
| 555 |
+
[[package]]
|
| 556 |
+
name = "dateparser"
|
| 557 |
+
version = "1.2.2"
|
| 558 |
+
source = { registry = "https://pypi.org/simple" }
|
| 559 |
+
dependencies = [
|
| 560 |
+
{ name = "python-dateutil" },
|
| 561 |
+
{ name = "pytz" },
|
| 562 |
+
{ name = "regex" },
|
| 563 |
+
{ name = "tzlocal" },
|
| 564 |
+
]
|
| 565 |
+
sdist = { url = "https://files.pythonhosted.org/packages/a9/30/064144f0df1749e7bb5faaa7f52b007d7c2d08ec08fed8411aba87207f68/dateparser-1.2.2.tar.gz", hash = "sha256:986316f17cb8cdc23ea8ce563027c5ef12fc725b6fb1d137c14ca08777c5ecf7", size = 329840, upload-time = "2025-06-26T09:29:23.211Z" }
|
| 566 |
+
wheels = [
|
| 567 |
+
{ url = "https://files.pythonhosted.org/packages/87/22/f020c047ae1346613db9322638186468238bcfa8849b4668a22b97faad65/dateparser-1.2.2-py3-none-any.whl", hash = "sha256:5a5d7211a09013499867547023a2a0c91d5a27d15dd4dbcea676ea9fe66f2482", size = 315453, upload-time = "2025-06-26T09:29:21.412Z" },
|
| 568 |
+
]
|
| 569 |
+
|
| 570 |
[[package]]
|
| 571 |
name = "debugpy"
|
| 572 |
version = "1.8.17"
|
|
|
|
| 936 |
{ name = "requests" },
|
| 937 |
{ name = "tenacity" },
|
| 938 |
{ name = "torch" },
|
| 939 |
+
{ name = "trafilatura" },
|
| 940 |
{ name = "transformers" },
|
| 941 |
]
|
| 942 |
|
|
|
|
| 956 |
{ name = "requests", specifier = ">=2.32.5" },
|
| 957 |
{ name = "tenacity", specifier = ">=8.0.0" },
|
| 958 |
{ name = "torch", specifier = ">=2.9.1" },
|
| 959 |
+
{ name = "trafilatura", specifier = ">=2.0.0" },
|
| 960 |
{ name = "transformers", specifier = ">=2.3.0" },
|
| 961 |
]
|
| 962 |
|
|
|
|
| 989 |
{ url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" },
|
| 990 |
]
|
| 991 |
|
| 992 |
+
[[package]]
|
| 993 |
+
name = "htmldate"
|
| 994 |
+
version = "1.9.4"
|
| 995 |
+
source = { registry = "https://pypi.org/simple" }
|
| 996 |
+
dependencies = [
|
| 997 |
+
{ name = "charset-normalizer" },
|
| 998 |
+
{ name = "dateparser" },
|
| 999 |
+
{ name = "lxml" },
|
| 1000 |
+
{ name = "python-dateutil" },
|
| 1001 |
+
{ name = "urllib3" },
|
| 1002 |
+
]
|
| 1003 |
+
sdist = { url = "https://files.pythonhosted.org/packages/9d/10/ead9dabc999f353c3aa5d0dc0835b1e355215a5ecb489a7f4ef2ddad5e33/htmldate-1.9.4.tar.gz", hash = "sha256:1129063e02dd0354b74264de71e950c0c3fcee191178321418ccad2074cc8ed0", size = 44690, upload-time = "2025-11-04T17:46:44.983Z" }
|
| 1004 |
+
wheels = [
|
| 1005 |
+
{ url = "https://files.pythonhosted.org/packages/a1/bd/adfcdaaad5805c0c5156aeefd64c1e868c05e9c1cd6fd21751f168cd88c7/htmldate-1.9.4-py3-none-any.whl", hash = "sha256:1b94bcc4e08232a5b692159903acf95548b6a7492dddca5bb123d89d6325921c", size = 31558, upload-time = "2025-11-04T17:46:43.258Z" },
|
| 1006 |
+
]
|
| 1007 |
+
|
| 1008 |
[[package]]
|
| 1009 |
name = "httpcore"
|
| 1010 |
version = "1.0.9"
|
|
|
|
| 1447 |
{ url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
|
| 1448 |
]
|
| 1449 |
|
| 1450 |
+
[[package]]
|
| 1451 |
+
name = "justext"
|
| 1452 |
+
version = "3.0.2"
|
| 1453 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1454 |
+
dependencies = [
|
| 1455 |
+
{ name = "lxml", extra = ["html-clean"] },
|
| 1456 |
+
]
|
| 1457 |
+
sdist = { url = "https://files.pythonhosted.org/packages/49/f3/45890c1b314f0d04e19c1c83d534e611513150939a7cf039664d9ab1e649/justext-3.0.2.tar.gz", hash = "sha256:13496a450c44c4cd5b5a75a5efcd9996066d2a189794ea99a49949685a0beb05", size = 828521, upload-time = "2025-02-25T20:21:49.934Z" }
|
| 1458 |
+
wheels = [
|
| 1459 |
+
{ url = "https://files.pythonhosted.org/packages/f2/ac/52f4e86d1924a7fc05af3aeb34488570eccc39b4af90530dd6acecdf16b5/justext-3.0.2-py2.py3-none-any.whl", hash = "sha256:62b1c562b15c3c6265e121cc070874243a443bfd53060e869393f09d6b6cc9a7", size = 837940, upload-time = "2025-02-25T20:21:44.179Z" },
|
| 1460 |
+
]
|
| 1461 |
+
|
| 1462 |
[[package]]
|
| 1463 |
name = "keyring"
|
| 1464 |
version = "25.7.0"
|
|
|
|
| 1565 |
{ name = "opentelemetry-instrumentation-weaviate" },
|
| 1566 |
]
|
| 1567 |
|
| 1568 |
+
[[package]]
|
| 1569 |
+
name = "lxml"
|
| 1570 |
+
version = "6.0.2"
|
| 1571 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1572 |
+
sdist = { url = "https://files.pythonhosted.org/packages/aa/88/262177de60548e5a2bfc46ad28232c9e9cbde697bd94132aeb80364675cb/lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62", size = 4073426, upload-time = "2025-09-22T04:04:59.287Z" }
|
| 1573 |
+
wheels = [
|
| 1574 |
+
{ url = "https://files.pythonhosted.org/packages/f3/c8/8ff2bc6b920c84355146cd1ab7d181bc543b89241cfb1ebee824a7c81457/lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456", size = 8661887, upload-time = "2025-09-22T04:01:17.265Z" },
|
| 1575 |
+
{ url = "https://files.pythonhosted.org/packages/37/6f/9aae1008083bb501ef63284220ce81638332f9ccbfa53765b2b7502203cf/lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924", size = 4667818, upload-time = "2025-09-22T04:01:19.688Z" },
|
| 1576 |
+
{ url = "https://files.pythonhosted.org/packages/f1/ca/31fb37f99f37f1536c133476674c10b577e409c0a624384147653e38baf2/lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f", size = 4950807, upload-time = "2025-09-22T04:01:21.487Z" },
|
| 1577 |
+
{ url = "https://files.pythonhosted.org/packages/da/87/f6cb9442e4bada8aab5ae7e1046264f62fdbeaa6e3f6211b93f4c0dd97f1/lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534", size = 5109179, upload-time = "2025-09-22T04:01:23.32Z" },
|
| 1578 |
+
{ url = "https://files.pythonhosted.org/packages/c8/20/a7760713e65888db79bbae4f6146a6ae5c04e4a204a3c48896c408cd6ed2/lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564", size = 5023044, upload-time = "2025-09-22T04:01:25.118Z" },
|
| 1579 |
+
{ url = "https://files.pythonhosted.org/packages/a2/b0/7e64e0460fcb36471899f75831509098f3fd7cd02a3833ac517433cb4f8f/lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f", size = 5359685, upload-time = "2025-09-22T04:01:27.398Z" },
|
| 1580 |
+
{ url = "https://files.pythonhosted.org/packages/b9/e1/e5df362e9ca4e2f48ed6411bd4b3a0ae737cc842e96877f5bf9428055ab4/lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0", size = 5654127, upload-time = "2025-09-22T04:01:29.629Z" },
|
| 1581 |
+
{ url = "https://files.pythonhosted.org/packages/c6/d1/232b3309a02d60f11e71857778bfcd4acbdb86c07db8260caf7d008b08f8/lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192", size = 5253958, upload-time = "2025-09-22T04:01:31.535Z" },
|
| 1582 |
+
{ url = "https://files.pythonhosted.org/packages/35/35/d955a070994725c4f7d80583a96cab9c107c57a125b20bb5f708fe941011/lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0", size = 4711541, upload-time = "2025-09-22T04:01:33.801Z" },
|
| 1583 |
+
{ url = "https://files.pythonhosted.org/packages/1e/be/667d17363b38a78c4bd63cfd4b4632029fd68d2c2dc81f25ce9eb5224dd5/lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092", size = 5267426, upload-time = "2025-09-22T04:01:35.639Z" },
|
| 1584 |
+
{ url = "https://files.pythonhosted.org/packages/ea/47/62c70aa4a1c26569bc958c9ca86af2bb4e1f614e8c04fb2989833874f7ae/lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f", size = 5064917, upload-time = "2025-09-22T04:01:37.448Z" },
|
| 1585 |
+
{ url = "https://files.pythonhosted.org/packages/bd/55/6ceddaca353ebd0f1908ef712c597f8570cc9c58130dbb89903198e441fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8", size = 4788795, upload-time = "2025-09-22T04:01:39.165Z" },
|
| 1586 |
+
{ url = "https://files.pythonhosted.org/packages/cf/e8/fd63e15da5e3fd4c2146f8bbb3c14e94ab850589beab88e547b2dbce22e1/lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f", size = 5676759, upload-time = "2025-09-22T04:01:41.506Z" },
|
| 1587 |
+
{ url = "https://files.pythonhosted.org/packages/76/47/b3ec58dc5c374697f5ba37412cd2728f427d056315d124dd4b61da381877/lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6", size = 5255666, upload-time = "2025-09-22T04:01:43.363Z" },
|
| 1588 |
+
{ url = "https://files.pythonhosted.org/packages/19/93/03ba725df4c3d72afd9596eef4a37a837ce8e4806010569bedfcd2cb68fd/lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322", size = 5277989, upload-time = "2025-09-22T04:01:45.215Z" },
|
| 1589 |
+
{ url = "https://files.pythonhosted.org/packages/c6/80/c06de80bfce881d0ad738576f243911fccf992687ae09fd80b734712b39c/lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849", size = 3611456, upload-time = "2025-09-22T04:01:48.243Z" },
|
| 1590 |
+
{ url = "https://files.pythonhosted.org/packages/f7/d7/0cdfb6c3e30893463fb3d1e52bc5f5f99684a03c29a0b6b605cfae879cd5/lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f", size = 4011793, upload-time = "2025-09-22T04:01:50.042Z" },
|
| 1591 |
+
{ url = "https://files.pythonhosted.org/packages/ea/7b/93c73c67db235931527301ed3785f849c78991e2e34f3fd9a6663ffda4c5/lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6", size = 3672836, upload-time = "2025-09-22T04:01:52.145Z" },
|
| 1592 |
+
{ url = "https://files.pythonhosted.org/packages/53/fd/4e8f0540608977aea078bf6d79f128e0e2c2bba8af1acf775c30baa70460/lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77", size = 8648494, upload-time = "2025-09-22T04:01:54.242Z" },
|
| 1593 |
+
{ url = "https://files.pythonhosted.org/packages/5d/f4/2a94a3d3dfd6c6b433501b8d470a1960a20ecce93245cf2db1706adf6c19/lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f", size = 4661146, upload-time = "2025-09-22T04:01:56.282Z" },
|
| 1594 |
+
{ url = "https://files.pythonhosted.org/packages/25/2e/4efa677fa6b322013035d38016f6ae859d06cac67437ca7dc708a6af7028/lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452", size = 4946932, upload-time = "2025-09-22T04:01:58.989Z" },
|
| 1595 |
+
{ url = "https://files.pythonhosted.org/packages/ce/0f/526e78a6d38d109fdbaa5049c62e1d32fdd70c75fb61c4eadf3045d3d124/lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048", size = 5100060, upload-time = "2025-09-22T04:02:00.812Z" },
|
| 1596 |
+
{ url = "https://files.pythonhosted.org/packages/81/76/99de58d81fa702cc0ea7edae4f4640416c2062813a00ff24bd70ac1d9c9b/lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df", size = 5019000, upload-time = "2025-09-22T04:02:02.671Z" },
|
| 1597 |
+
{ url = "https://files.pythonhosted.org/packages/b5/35/9e57d25482bc9a9882cb0037fdb9cc18f4b79d85df94fa9d2a89562f1d25/lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1", size = 5348496, upload-time = "2025-09-22T04:02:04.904Z" },
|
| 1598 |
+
{ url = "https://files.pythonhosted.org/packages/a6/8e/cb99bd0b83ccc3e8f0f528e9aa1f7a9965dfec08c617070c5db8d63a87ce/lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916", size = 5643779, upload-time = "2025-09-22T04:02:06.689Z" },
|
| 1599 |
+
{ url = "https://files.pythonhosted.org/packages/d0/34/9e591954939276bb679b73773836c6684c22e56d05980e31d52a9a8deb18/lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd", size = 5244072, upload-time = "2025-09-22T04:02:08.587Z" },
|
| 1600 |
+
{ url = "https://files.pythonhosted.org/packages/8d/27/b29ff065f9aaca443ee377aff699714fcbffb371b4fce5ac4ca759e436d5/lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6", size = 4718675, upload-time = "2025-09-22T04:02:10.783Z" },
|
| 1601 |
+
{ url = "https://files.pythonhosted.org/packages/2b/9f/f756f9c2cd27caa1a6ef8c32ae47aadea697f5c2c6d07b0dae133c244fbe/lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a", size = 5255171, upload-time = "2025-09-22T04:02:12.631Z" },
|
| 1602 |
+
{ url = "https://files.pythonhosted.org/packages/61/46/bb85ea42d2cb1bd8395484fd72f38e3389611aa496ac7772da9205bbda0e/lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679", size = 5057175, upload-time = "2025-09-22T04:02:14.718Z" },
|
| 1603 |
+
{ url = "https://files.pythonhosted.org/packages/95/0c/443fc476dcc8e41577f0af70458c50fe299a97bb6b7505bb1ae09aa7f9ac/lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659", size = 4785688, upload-time = "2025-09-22T04:02:16.957Z" },
|
| 1604 |
+
{ url = "https://files.pythonhosted.org/packages/48/78/6ef0b359d45bb9697bc5a626e1992fa5d27aa3f8004b137b2314793b50a0/lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484", size = 5660655, upload-time = "2025-09-22T04:02:18.815Z" },
|
| 1605 |
+
{ url = "https://files.pythonhosted.org/packages/ff/ea/e1d33808f386bc1339d08c0dcada6e4712d4ed8e93fcad5f057070b7988a/lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2", size = 5247695, upload-time = "2025-09-22T04:02:20.593Z" },
|
| 1606 |
+
{ url = "https://files.pythonhosted.org/packages/4f/47/eba75dfd8183673725255247a603b4ad606f4ae657b60c6c145b381697da/lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314", size = 5269841, upload-time = "2025-09-22T04:02:22.489Z" },
|
| 1607 |
+
{ url = "https://files.pythonhosted.org/packages/76/04/5c5e2b8577bc936e219becb2e98cdb1aca14a4921a12995b9d0c523502ae/lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2", size = 3610700, upload-time = "2025-09-22T04:02:24.465Z" },
|
| 1608 |
+
{ url = "https://files.pythonhosted.org/packages/fe/0a/4643ccc6bb8b143e9f9640aa54e38255f9d3b45feb2cbe7ae2ca47e8782e/lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7", size = 4010347, upload-time = "2025-09-22T04:02:26.286Z" },
|
| 1609 |
+
{ url = "https://files.pythonhosted.org/packages/31/ef/dcf1d29c3f530577f61e5fe2f1bd72929acf779953668a8a47a479ae6f26/lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf", size = 3671248, upload-time = "2025-09-22T04:02:27.918Z" },
|
| 1610 |
+
{ url = "https://files.pythonhosted.org/packages/03/15/d4a377b385ab693ce97b472fe0c77c2b16ec79590e688b3ccc71fba19884/lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe", size = 8659801, upload-time = "2025-09-22T04:02:30.113Z" },
|
| 1611 |
+
{ url = "https://files.pythonhosted.org/packages/c8/e8/c128e37589463668794d503afaeb003987373c5f94d667124ffd8078bbd9/lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d", size = 4659403, upload-time = "2025-09-22T04:02:32.119Z" },
|
| 1612 |
+
{ url = "https://files.pythonhosted.org/packages/00/ce/74903904339decdf7da7847bb5741fc98a5451b42fc419a86c0c13d26fe2/lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d", size = 4966974, upload-time = "2025-09-22T04:02:34.155Z" },
|
| 1613 |
+
{ url = "https://files.pythonhosted.org/packages/1f/d3/131dec79ce61c5567fecf82515bd9bc36395df42501b50f7f7f3bd065df0/lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5", size = 5102953, upload-time = "2025-09-22T04:02:36.054Z" },
|
| 1614 |
+
{ url = "https://files.pythonhosted.org/packages/3a/ea/a43ba9bb750d4ffdd885f2cd333572f5bb900cd2408b67fdda07e85978a0/lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0", size = 5055054, upload-time = "2025-09-22T04:02:38.154Z" },
|
| 1615 |
+
{ url = "https://files.pythonhosted.org/packages/60/23/6885b451636ae286c34628f70a7ed1fcc759f8d9ad382d132e1c8d3d9bfd/lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba", size = 5352421, upload-time = "2025-09-22T04:02:40.413Z" },
|
| 1616 |
+
{ url = "https://files.pythonhosted.org/packages/48/5b/fc2ddfc94ddbe3eebb8e9af6e3fd65e2feba4967f6a4e9683875c394c2d8/lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0", size = 5673684, upload-time = "2025-09-22T04:02:42.288Z" },
|
| 1617 |
+
{ url = "https://files.pythonhosted.org/packages/29/9c/47293c58cc91769130fbf85531280e8cc7868f7fbb6d92f4670071b9cb3e/lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d", size = 5252463, upload-time = "2025-09-22T04:02:44.165Z" },
|
| 1618 |
+
{ url = "https://files.pythonhosted.org/packages/9b/da/ba6eceb830c762b48e711ded880d7e3e89fc6c7323e587c36540b6b23c6b/lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37", size = 4698437, upload-time = "2025-09-22T04:02:46.524Z" },
|
| 1619 |
+
{ url = "https://files.pythonhosted.org/packages/a5/24/7be3f82cb7990b89118d944b619e53c656c97dc89c28cfb143fdb7cd6f4d/lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9", size = 5269890, upload-time = "2025-09-22T04:02:48.812Z" },
|
| 1620 |
+
{ url = "https://files.pythonhosted.org/packages/1b/bd/dcfb9ea1e16c665efd7538fc5d5c34071276ce9220e234217682e7d2c4a5/lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917", size = 5097185, upload-time = "2025-09-22T04:02:50.746Z" },
|
| 1621 |
+
{ url = "https://files.pythonhosted.org/packages/21/04/a60b0ff9314736316f28316b694bccbbabe100f8483ad83852d77fc7468e/lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f", size = 4745895, upload-time = "2025-09-22T04:02:52.968Z" },
|
| 1622 |
+
{ url = "https://files.pythonhosted.org/packages/d6/bd/7d54bd1846e5a310d9c715921c5faa71cf5c0853372adf78aee70c8d7aa2/lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8", size = 5695246, upload-time = "2025-09-22T04:02:54.798Z" },
|
| 1623 |
+
{ url = "https://files.pythonhosted.org/packages/fd/32/5643d6ab947bc371da21323acb2a6e603cedbe71cb4c99c8254289ab6f4e/lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a", size = 5260797, upload-time = "2025-09-22T04:02:57.058Z" },
|
| 1624 |
+
{ url = "https://files.pythonhosted.org/packages/33/da/34c1ec4cff1eea7d0b4cd44af8411806ed943141804ac9c5d565302afb78/lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c", size = 5277404, upload-time = "2025-09-22T04:02:58.966Z" },
|
| 1625 |
+
{ url = "https://files.pythonhosted.org/packages/82/57/4eca3e31e54dc89e2c3507e1cd411074a17565fa5ffc437c4ae0a00d439e/lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b", size = 3670072, upload-time = "2025-09-22T04:03:38.05Z" },
|
| 1626 |
+
{ url = "https://files.pythonhosted.org/packages/e3/e0/c96cf13eccd20c9421ba910304dae0f619724dcf1702864fd59dd386404d/lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed", size = 4080617, upload-time = "2025-09-22T04:03:39.835Z" },
|
| 1627 |
+
{ url = "https://files.pythonhosted.org/packages/d5/5d/b3f03e22b3d38d6f188ef044900a9b29b2fe0aebb94625ce9fe244011d34/lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8", size = 3754930, upload-time = "2025-09-22T04:03:41.565Z" },
|
| 1628 |
+
{ url = "https://files.pythonhosted.org/packages/5e/5c/42c2c4c03554580708fc738d13414801f340c04c3eff90d8d2d227145275/lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d", size = 8910380, upload-time = "2025-09-22T04:03:01.645Z" },
|
| 1629 |
+
{ url = "https://files.pythonhosted.org/packages/bf/4f/12df843e3e10d18d468a7557058f8d3733e8b6e12401f30b1ef29360740f/lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba", size = 4775632, upload-time = "2025-09-22T04:03:03.814Z" },
|
| 1630 |
+
{ url = "https://files.pythonhosted.org/packages/e4/0c/9dc31e6c2d0d418483cbcb469d1f5a582a1cd00a1f4081953d44051f3c50/lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601", size = 4975171, upload-time = "2025-09-22T04:03:05.651Z" },
|
| 1631 |
+
{ url = "https://files.pythonhosted.org/packages/e7/2b/9b870c6ca24c841bdd887504808f0417aa9d8d564114689266f19ddf29c8/lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed", size = 5110109, upload-time = "2025-09-22T04:03:07.452Z" },
|
| 1632 |
+
{ url = "https://files.pythonhosted.org/packages/bf/0c/4f5f2a4dd319a178912751564471355d9019e220c20d7db3fb8307ed8582/lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37", size = 5041061, upload-time = "2025-09-22T04:03:09.297Z" },
|
| 1633 |
+
{ url = "https://files.pythonhosted.org/packages/12/64/554eed290365267671fe001a20d72d14f468ae4e6acef1e179b039436967/lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338", size = 5306233, upload-time = "2025-09-22T04:03:11.651Z" },
|
| 1634 |
+
{ url = "https://files.pythonhosted.org/packages/7a/31/1d748aa275e71802ad9722df32a7a35034246b42c0ecdd8235412c3396ef/lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9", size = 5604739, upload-time = "2025-09-22T04:03:13.592Z" },
|
| 1635 |
+
{ url = "https://files.pythonhosted.org/packages/8f/41/2c11916bcac09ed561adccacceaedd2bf0e0b25b297ea92aab99fd03d0fa/lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd", size = 5225119, upload-time = "2025-09-22T04:03:15.408Z" },
|
| 1636 |
+
{ url = "https://files.pythonhosted.org/packages/99/05/4e5c2873d8f17aa018e6afde417c80cc5d0c33be4854cce3ef5670c49367/lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d", size = 4633665, upload-time = "2025-09-22T04:03:17.262Z" },
|
| 1637 |
+
{ url = "https://files.pythonhosted.org/packages/0f/c9/dcc2da1bebd6275cdc723b515f93edf548b82f36a5458cca3578bc899332/lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9", size = 5234997, upload-time = "2025-09-22T04:03:19.14Z" },
|
| 1638 |
+
{ url = "https://files.pythonhosted.org/packages/9c/e2/5172e4e7468afca64a37b81dba152fc5d90e30f9c83c7c3213d6a02a5ce4/lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e", size = 5090957, upload-time = "2025-09-22T04:03:21.436Z" },
|
| 1639 |
+
{ url = "https://files.pythonhosted.org/packages/a5/b3/15461fd3e5cd4ddcb7938b87fc20b14ab113b92312fc97afe65cd7c85de1/lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d", size = 4764372, upload-time = "2025-09-22T04:03:23.27Z" },
|
| 1640 |
+
{ url = "https://files.pythonhosted.org/packages/05/33/f310b987c8bf9e61c4dd8e8035c416bd3230098f5e3cfa69fc4232de7059/lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec", size = 5634653, upload-time = "2025-09-22T04:03:25.767Z" },
|
| 1641 |
+
{ url = "https://files.pythonhosted.org/packages/70/ff/51c80e75e0bc9382158133bdcf4e339b5886c6ee2418b5199b3f1a61ed6d/lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272", size = 5233795, upload-time = "2025-09-22T04:03:27.62Z" },
|
| 1642 |
+
{ url = "https://files.pythonhosted.org/packages/56/4d/4856e897df0d588789dd844dbed9d91782c4ef0b327f96ce53c807e13128/lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f", size = 5257023, upload-time = "2025-09-22T04:03:30.056Z" },
|
| 1643 |
+
{ url = "https://files.pythonhosted.org/packages/0f/85/86766dfebfa87bea0ab78e9ff7a4b4b45225df4b4d3b8cc3c03c5cd68464/lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312", size = 3911420, upload-time = "2025-09-22T04:03:32.198Z" },
|
| 1644 |
+
{ url = "https://files.pythonhosted.org/packages/fe/1a/b248b355834c8e32614650b8008c69ffeb0ceb149c793961dd8c0b991bb3/lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca", size = 4406837, upload-time = "2025-09-22T04:03:34.027Z" },
|
| 1645 |
+
{ url = "https://files.pythonhosted.org/packages/92/aa/df863bcc39c5e0946263454aba394de8a9084dbaff8ad143846b0d844739/lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c", size = 3822205, upload-time = "2025-09-22T04:03:36.249Z" },
|
| 1646 |
+
]
|
| 1647 |
+
|
| 1648 |
+
[package.optional-dependencies]
|
| 1649 |
+
html-clean = [
|
| 1650 |
+
{ name = "lxml-html-clean" },
|
| 1651 |
+
]
|
| 1652 |
+
|
| 1653 |
+
[[package]]
|
| 1654 |
+
name = "lxml-html-clean"
|
| 1655 |
+
version = "0.4.3"
|
| 1656 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1657 |
+
dependencies = [
|
| 1658 |
+
{ name = "lxml" },
|
| 1659 |
+
]
|
| 1660 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d9/cb/c9c5bb2a9c47292e236a808dd233a03531f53b626f36259dcd32b49c76da/lxml_html_clean-0.4.3.tar.gz", hash = "sha256:c9df91925b00f836c807beab127aac82575110eacff54d0a75187914f1bd9d8c", size = 21498, upload-time = "2025-10-02T20:49:24.895Z" }
|
| 1661 |
+
wheels = [
|
| 1662 |
+
{ url = "https://files.pythonhosted.org/packages/10/4a/63a9540e3ca73709f4200564a737d63a4c8c9c4dd032bab8535f507c190a/lxml_html_clean-0.4.3-py3-none-any.whl", hash = "sha256:63fd7b0b9c3a2e4176611c2ca5d61c4c07ffca2de76c14059a81a2825833731e", size = 14177, upload-time = "2025-10-02T20:49:23.749Z" },
|
| 1663 |
+
]
|
| 1664 |
+
|
| 1665 |
[[package]]
|
| 1666 |
name = "markdown-it-py"
|
| 1667 |
version = "4.0.0"
|
|
|
|
| 3864 |
{ url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" },
|
| 3865 |
]
|
| 3866 |
|
| 3867 |
+
[[package]]
|
| 3868 |
+
name = "tld"
|
| 3869 |
+
version = "0.13.1"
|
| 3870 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3871 |
+
sdist = { url = "https://files.pythonhosted.org/packages/df/a1/5723b07a70c1841a80afc9ac572fdf53488306848d844cd70519391b0d26/tld-0.13.1.tar.gz", hash = "sha256:75ec00936cbcf564f67361c41713363440b6c4ef0f0c1592b5b0fbe72c17a350", size = 462000, upload-time = "2025-05-21T22:18:29.341Z" }
|
| 3872 |
+
wheels = [
|
| 3873 |
+
{ url = "https://files.pythonhosted.org/packages/dc/70/b2f38360c3fc4bc9b5e8ef429e1fde63749144ac583c2dbdf7e21e27a9ad/tld-0.13.1-py2.py3-none-any.whl", hash = "sha256:a2d35109433ac83486ddf87e3c4539ab2c5c2478230e5d9c060a18af4b03aa7c", size = 274718, upload-time = "2025-05-21T22:18:25.811Z" },
|
| 3874 |
+
]
|
| 3875 |
+
|
| 3876 |
[[package]]
|
| 3877 |
name = "tokenizers"
|
| 3878 |
version = "0.22.1"
|
|
|
|
| 3962 |
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
|
| 3963 |
]
|
| 3964 |
|
| 3965 |
+
[[package]]
|
| 3966 |
+
name = "trafilatura"
|
| 3967 |
+
version = "2.0.0"
|
| 3968 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3969 |
+
dependencies = [
|
| 3970 |
+
{ name = "certifi" },
|
| 3971 |
+
{ name = "charset-normalizer" },
|
| 3972 |
+
{ name = "courlan" },
|
| 3973 |
+
{ name = "htmldate" },
|
| 3974 |
+
{ name = "justext" },
|
| 3975 |
+
{ name = "lxml" },
|
| 3976 |
+
{ name = "urllib3" },
|
| 3977 |
+
]
|
| 3978 |
+
sdist = { url = "https://files.pythonhosted.org/packages/06/25/e3ebeefdebfdfae8c4a4396f5a6ea51fc6fa0831d63ce338e5090a8003dc/trafilatura-2.0.0.tar.gz", hash = "sha256:ceb7094a6ecc97e72fea73c7dba36714c5c5b577b6470e4520dca893706d6247", size = 253404, upload-time = "2024-12-03T15:23:24.16Z" }
|
| 3979 |
+
wheels = [
|
| 3980 |
+
{ url = "https://files.pythonhosted.org/packages/8a/b6/097367f180b6383a3581ca1b86fcae284e52075fa941d1232df35293363c/trafilatura-2.0.0-py3-none-any.whl", hash = "sha256:77eb5d1e993747f6f20938e1de2d840020719735690c840b9a1024803a4cd51d", size = 132557, upload-time = "2024-12-03T15:23:21.41Z" },
|
| 3981 |
+
]
|
| 3982 |
+
|
| 3983 |
[[package]]
|
| 3984 |
name = "transformers"
|
| 3985 |
version = "2.3.0"
|
|
|
|
| 4053 |
{ url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
|
| 4054 |
]
|
| 4055 |
|
| 4056 |
+
[[package]]
|
| 4057 |
+
name = "tzlocal"
|
| 4058 |
+
version = "5.3.1"
|
| 4059 |
+
source = { registry = "https://pypi.org/simple" }
|
| 4060 |
+
dependencies = [
|
| 4061 |
+
{ name = "tzdata", marker = "sys_platform == 'win32'" },
|
| 4062 |
+
]
|
| 4063 |
+
sdist = { url = "https://files.pythonhosted.org/packages/8b/2e/c14812d3d4d9cd1773c6be938f89e5735a1f11a9f184ac3639b93cef35d5/tzlocal-5.3.1.tar.gz", hash = "sha256:cceffc7edecefea1f595541dbd6e990cb1ea3d19bf01b2809f362a03dd7921fd", size = 30761, upload-time = "2025-03-05T21:17:41.549Z" }
|
| 4064 |
+
wheels = [
|
| 4065 |
+
{ url = "https://files.pythonhosted.org/packages/c2/14/e2a54fabd4f08cd7af1c07030603c3356b74da07f7cc056e600436edfa17/tzlocal-5.3.1-py3-none-any.whl", hash = "sha256:eb1a66c3ef5847adf7a834f1be0800581b683b5608e74f86ecbcef8ab91bb85d", size = 18026, upload-time = "2025-03-05T21:17:39.857Z" },
|
| 4066 |
+
]
|
| 4067 |
+
|
| 4068 |
[[package]]
|
| 4069 |
name = "uc-micro-py"
|
| 4070 |
version = "1.0.3"
|