| import logging |
| from typing import Any |
|
|
| import httpx |
| from playwright.async_api import async_playwright |
|
|
| from services.config import settings |
|
|
| logger = logging.getLogger("uvicorn") |
|
|
|
|
| class BrowserTool: |
| """ |
| Tools for live web search and direct URL extraction. |
| """ |
|
|
| def __init__(self) -> None: |
| self.tavily_api_key = settings.TAVILY_API_KEY |
|
|
| async def search_and_extract(self, url: str) -> str: |
| """ |
| Navigates to a URL and returns the page text content. |
| """ |
| logger.info("BrowserTool: Navigating to %s", url) |
| async with async_playwright() as playwright: |
| browser = await playwright.chromium.launch(headless=True) |
| page = await browser.new_page() |
| try: |
| await page.goto(url, wait_until="networkidle", timeout=30000) |
| title = await page.title() |
| content = await page.inner_text("body") |
| combined = f"Title: {title}\nURL: {url}\n\n{content}".strip() |
| return combined[:12000] |
| except Exception as exc: |
| logger.error("BrowserTool extract error for %s: %s", url, exc) |
| return f"Error accessing {url}: {exc}" |
| finally: |
| await browser.close() |
|
|
| async def web_search(self, query: str, topic: str = "general", max_results: int = 5) -> str: |
| """ |
| Searches the public web with Tavily and returns LLM-friendly results. |
| """ |
| if not self.tavily_api_key: |
| return ( |
| "Web search is unavailable: TAVILY_API_KEY is not configured. " |
| "Add it to the backend environment to enable internet search." |
| ) |
|
|
| payload = { |
| "query": query, |
| "topic": topic if topic in {"general", "news", "finance"} else "general", |
| "search_depth": "advanced", |
| "max_results": max(1, min(max_results, 10)), |
| "include_answer": "advanced", |
| "include_raw_content": False, |
| "include_images": False, |
| } |
|
|
| headers = { |
| "Authorization": f"Bearer {self.tavily_api_key}", |
| "Content-Type": "application/json", |
| } |
|
|
| try: |
| async with httpx.AsyncClient(timeout=45.0) as client: |
| response = await client.post( |
| "https://api.tavily.com/search", |
| headers=headers, |
| json=payload, |
| ) |
| response.raise_for_status() |
| except httpx.HTTPStatusError as exc: |
| detail = exc.response.text[:500] if exc.response is not None else str(exc) |
| logger.error("Tavily HTTP error: %s", detail) |
| return f"Tavily search failed with status {exc.response.status_code}: {detail}" |
| except Exception as exc: |
| logger.error("Tavily request error: %s", exc) |
| return f"Tavily search failed: {exc}" |
|
|
| data = response.json() |
| return self._format_tavily_results(query, data) |
|
|
| def _format_tavily_results(self, query: str, data: dict[str, Any]) -> str: |
| answer = data.get("answer") |
| results = data.get("results") or [] |
|
|
| lines = [f"Search query: {query}"] |
| if answer: |
| lines.extend(["", "Answer:", str(answer).strip()]) |
|
|
| if not results: |
| lines.extend(["", "No search results returned."]) |
| return "\n".join(lines) |
|
|
| lines.extend(["", "Sources:"]) |
| for index, result in enumerate(results, start=1): |
| title = result.get("title") or "Untitled" |
| url = result.get("url") or "" |
| snippet = (result.get("content") or "").strip() |
| score = result.get("score") |
|
|
| lines.append(f"{index}. {title}") |
| if url: |
| lines.append(f" URL: {url}") |
| if score is not None: |
| lines.append(f" Score: {score}") |
| if snippet: |
| lines.append(f" Snippet: {snippet[:900]}") |
|
|
| return "\n".join(lines)[:12000] |
|
|