| import os |
| import requests |
| from typing import Dict, Any, Optional, List |
| from pydantic import Field |
| from .search_base import SearchBase |
| from .tool import Tool, Toolkit |
| from evoagentx.core.logging import logger |
| import dotenv |
|
|
| dotenv.load_dotenv() |
|
|
| class SearchSerpAPI(SearchBase): |
| """ |
| SerpAPI search tool that provides access to multiple search engines including |
| Google, Bing, Baidu, Yahoo, and DuckDuckGo through a unified interface. |
| """ |
| |
| api_key: Optional[str] = Field(default=None, description="SerpAPI authentication key") |
| default_engine: Optional[str] = Field(default="google", description="Default search engine") |
| default_location: Optional[str] = Field(default=None, description="Default geographic location") |
| default_language: Optional[str] = Field(default="en", description="Default interface language") |
| default_country: Optional[str] = Field(default="us", description="Default country code") |
| enable_content_scraping: Optional[bool] = Field(default=True, description="Enable full content scraping") |
| |
| def __init__( |
| self, |
| name: str = "SearchSerpAPI", |
| num_search_pages: Optional[int] = 5, |
| max_content_words: Optional[int] = None, |
| api_key: Optional[str] = None, |
| default_engine: Optional[str] = "google", |
| default_location: Optional[str] = None, |
| default_language: Optional[str] = "en", |
| default_country: Optional[str] = "us", |
| enable_content_scraping: Optional[bool] = True, |
| **kwargs |
| ): |
| """ |
| Initialize the SerpAPI Search tool. |
| |
| Args: |
| name (str): Name of the tool |
| num_search_pages (int): Number of search results to retrieve |
| max_content_words (int): Maximum number of words to include in content |
| api_key (str): SerpAPI authentication key (can also use SERPAPI_KEY env var) |
| default_engine (str): Default search engine (google, bing, baidu, yahoo, duckduckgo) |
| default_location (str): Default geographic location for searches |
| default_language (str): Default interface language |
| default_country (str): Default country code |
| enable_content_scraping (bool): Whether to scrape full page content |
| **kwargs: Additional keyword arguments for parent class initialization |
| """ |
| super().__init__( |
| name=name, |
| num_search_pages=num_search_pages, |
| max_content_words=max_content_words, |
| api_key=api_key, |
| default_engine=default_engine, |
| default_location=default_location, |
| default_language=default_language, |
| default_country=default_country, |
| enable_content_scraping=enable_content_scraping, |
| **kwargs |
| ) |
| |
| |
| self.api_key = api_key or os.getenv('SERPAPI_KEY', '') |
| self.base_url = "https://serpapi.com/search.json" |
| |
| if not self.api_key: |
| logger.warning("SerpAPI key not found. Set SERPAPI_KEY environment variable or pass api_key parameter.") |
|
|
| def _build_serpapi_params(self, query: str, engine: str = None, location: str = None, |
| language: str = None, country: str = None, search_type: str = None, |
| num_results: int = None) -> Dict[str, Any]: |
| """ |
| Build SerpAPI request parameters. |
| |
| Args: |
| query (str): Search query |
| engine (str): Search engine to use |
| location (str): Geographic location |
| language (str): Interface language |
| country (str): Country code |
| search_type (str): Type of search (web, images, news, shopping, maps) |
| num_results (int): Number of results to retrieve |
| |
| Returns: |
| Dict[str, Any]: SerpAPI request parameters |
| """ |
| params = { |
| "q": query, |
| "api_key": self.api_key, |
| "num": num_results or self.num_search_pages, |
| } |
| |
| |
| if location or self.default_location: |
| params["location"] = location or self.default_location |
| |
| if language or self.default_language: |
| params["hl"] = language or self.default_language |
| |
| if country or self.default_country: |
| params["gl"] = country or self.default_country |
| |
| |
| if search_type and search_type != "web": |
| search_type_map = { |
| "images": "isch", |
| "news": "nws", |
| "shopping": "shop", |
| "maps": "lcl" |
| } |
| if search_type in search_type_map: |
| params["tbm"] = search_type_map[search_type] |
| |
| return params |
|
|
| def _execute_serpapi_search(self, params: Dict[str, Any]) -> Dict[str, Any]: |
| """ |
| Execute search using direct HTTP requests to SerpAPI. |
| |
| Args: |
| params (Dict[str, Any]): Search parameters |
| |
| Returns: |
| Dict[str, Any]: SerpAPI response data |
| |
| Raises: |
| Exception: For API errors |
| """ |
| try: |
| response = requests.get(self.base_url, params=params, timeout=30) |
| response.raise_for_status() |
| |
| data = response.json() |
| |
| |
| if "error" in data: |
| raise Exception(f"SerpAPI error: {data['error']}") |
| |
| return data |
| |
| except requests.exceptions.RequestException as e: |
| raise Exception(f"SerpAPI request failed: {str(e)}") |
| except Exception as e: |
| raise Exception(f"SerpAPI search failed: {str(e)}") |
|
|
| def _process_serpapi_results(self, serpapi_data: Dict[str, Any], max_content_words: int = None) -> Dict[str, Any]: |
| """ |
| Process SerpAPI results into structured format with processed results + raw data. |
| |
| Args: |
| serpapi_data (Dict[str, Any]): Raw SerpAPI response |
| max_content_words (int): Maximum words per result content |
| |
| Returns: |
| Dict[str, Any]: Structured response with processed results and raw data |
| """ |
| processed_results = [] |
| |
| |
| if knowledge_graph := serpapi_data.get("knowledge_graph", {}): |
| if description := knowledge_graph.get("description"): |
| title = knowledge_graph.get("title", "Unknown") |
| content = f"**{title}**" |
| |
| |
| if kg_type := knowledge_graph.get("type"): |
| content += f" ({kg_type})" |
| content += f"\n\n{description}" |
| |
| |
| if kg_list := knowledge_graph.get("list", {}): |
| content += "\n\n**Key Information:**" |
| for key, value in list(kg_list.items())[:5]: |
| if isinstance(value, list) and value: |
| formatted_key = key.replace('_', ' ').title() |
| formatted_value = ', '.join(str(v) for v in value[:3]) |
| content += f"\n• {formatted_key}: {formatted_value}" |
| |
| processed_results.append({ |
| "title": f"Knowledge: {title}", |
| "content": self._truncate_content(content, max_content_words or 200), |
| "url": knowledge_graph.get("source", {}).get("link", ""), |
| "type": "knowledge_graph", |
| "priority": 1 |
| }) |
| |
| |
| for item in serpapi_data.get("organic_results", []): |
| url = item.get("link", "") |
| title = item.get("title", "No Title") |
| snippet = item.get("snippet", "") |
| position = item.get("position", 0) |
| |
| |
| result = { |
| "title": title, |
| "content": self._truncate_content(snippet, max_content_words or 400), |
| "url": url, |
| "type": "organic", |
| "priority": 2, |
| "position": position |
| } |
| |
| |
| if self.enable_content_scraping and url and url.startswith(('http://', 'https://')): |
| try: |
| scraped_title, scraped_content = self._scrape_page(url) |
| if scraped_content and scraped_content.strip(): |
| |
| if scraped_title and scraped_title.strip(): |
| result["title"] = scraped_title |
| |
| result["site_content"] = self._truncate_content(scraped_content, max_content_words or 400) |
| else: |
| result["site_content"] = None |
| except Exception as e: |
| logger.debug(f"Content scraping failed for {url}: {str(e)}") |
| result["site_content"] = None |
| else: |
| result["site_content"] = None |
| |
| |
| if snippet or result.get("site_content"): |
| processed_results.append(result) |
| |
| |
| raw_data = {} |
| raw_sections = [ |
| "local_results", "news_results", "shopping_results", |
| "related_questions", "recipes_results", "images_results" |
| ] |
| |
| for section in raw_sections: |
| if section in serpapi_data and serpapi_data[section]: |
| |
| if section == "local_results": |
| |
| places = serpapi_data[section].get("places", [])[:3] |
| if places: |
| raw_data[section] = {"places": places} |
| else: |
| |
| raw_data[section] = serpapi_data[section][:3] |
| |
| |
| search_metadata = {} |
| if search_meta := serpapi_data.get("search_metadata", {}): |
| search_metadata = { |
| "query": search_meta.get("query", ""), |
| "location": search_meta.get("location", ""), |
| "total_results": search_meta.get("total_results", ""), |
| "search_time": search_meta.get("total_time_taken", "") |
| } |
| |
| |
| processed_results.sort(key=lambda x: (x.get("priority", 999), x.get("position", 0))) |
| |
| return { |
| "results": processed_results, |
| "raw_data": raw_data if raw_data else None, |
| "search_metadata": search_metadata if search_metadata else None, |
| "error": None |
| } |
|
|
| def _handle_api_errors(self, error: Exception) -> str: |
| """ |
| Handle SerpAPI specific errors with appropriate messages. |
| |
| Args: |
| error (Exception): The exception that occurred |
| |
| Returns: |
| str: User-friendly error message |
| """ |
| error_str = str(error).lower() |
| |
| if "api key" in error_str or "unauthorized" in error_str: |
| return "Invalid or missing SerpAPI key. Please set SERPAPI_KEY environment variable." |
| elif "rate limit" in error_str or "too many requests" in error_str: |
| return "SerpAPI rate limit exceeded. Please try again later." |
| elif "quota" in error_str or "credit" in error_str: |
| return "SerpAPI quota exceeded. Please check your plan limits." |
| elif "timeout" in error_str: |
| return "SerpAPI request timeout. Please try again." |
| else: |
| return f"SerpAPI error: {str(error)}" |
|
|
| def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, |
| engine: str = None, location: str = None, language: str = None, |
| country: str = None, search_type: str = None) -> Dict[str, Any]: |
| """ |
| Search using SerpAPI with comprehensive parameter support. |
| |
| Args: |
| query (str): The search query |
| num_search_pages (int): Number of search results to retrieve |
| max_content_words (int): Maximum number of words to include in content |
| engine (str): Search engine (google, bing, baidu, yahoo, duckduckgo) |
| location (str): Geographic location for localized results |
| language (str): Interface language (e.g., 'en', 'es', 'fr') |
| country (str): Country code for country-specific results (e.g., 'us', 'uk') |
| search_type (str): Type of search (web, images, news, shopping, maps) |
| |
| Returns: |
| Dict[str, Any]: Contains search results and optional error message |
| """ |
| |
| num_search_pages = num_search_pages or self.num_search_pages |
| max_content_words = max_content_words or self.max_content_words |
| |
| if not self.api_key: |
| error_msg = ( |
| "SerpAPI key is required. Please set SERPAPI_KEY environment variable " |
| "or pass api_key parameter. Get your key from: https://serpapi.com/" |
| ) |
| logger.error(error_msg) |
| return {"results": [], "raw_data": None, "search_metadata": None, "error": error_msg} |
| |
| try: |
| search_engine = engine or self.default_engine |
| logger.info(f"Searching {search_engine} via SerpAPI: {query}, " |
| f"num_results={num_search_pages}, max_content_words={max_content_words}") |
| |
| |
| params = self._build_serpapi_params( |
| query=query, |
| engine=search_engine, |
| location=location, |
| language=language, |
| country=country, |
| search_type=search_type, |
| num_results=num_search_pages |
| ) |
| |
| |
| serpapi_data = self._execute_serpapi_search(params) |
| |
| |
| response_data = self._process_serpapi_results(serpapi_data, max_content_words) |
| |
| logger.info(f"Successfully retrieved {len(response_data['results'])} processed results") |
| return response_data |
| |
| except Exception as e: |
| error_msg = self._handle_api_errors(e) |
| logger.error(f"SerpAPI search failed: {error_msg}") |
| return {"results": [], "raw_data": None, "search_metadata": None, "error": error_msg} |
|
|
|
|
| class SerpAPITool(Tool): |
| name: str = "serpapi_search" |
| description: str = "Search multiple search engines using SerpAPI with comprehensive result processing and content scraping" |
| inputs: Dict[str, Dict[str, str]] = { |
| "query": { |
| "type": "string", |
| "description": "The search query to execute" |
| }, |
| "num_search_pages": { |
| "type": "integer", |
| "description": "Number of search results to retrieve. Default: 5" |
| }, |
| "max_content_words": { |
| "type": "integer", |
| "description": "Maximum number of words to include in content per result. None means no limit. Default: None" |
| }, |
| "engine": { |
| "type": "string", |
| "description": "Search engine to use: google, bing, baidu, yahoo, duckduckgo. Default: google" |
| }, |
| "location": { |
| "type": "string", |
| "description": "Geographic location for localized results (e.g., 'New York, NY', 'London, UK')" |
| }, |
| "language": { |
| "type": "string", |
| "description": "Interface language code (e.g., 'en', 'es', 'fr', 'de'). Default: en" |
| }, |
| "country": { |
| "type": "string", |
| "description": "Country code for country-specific results (e.g., 'us', 'uk', 'ca'). Default: us" |
| }, |
| "search_type": { |
| "type": "string", |
| "description": "Type of search: web, images, news, shopping, maps. Default: web" |
| } |
| } |
| required: Optional[List[str]] = ["query"] |
| |
| def __init__(self, search_serpapi: SearchSerpAPI = None): |
| super().__init__() |
| self.search_serpapi = search_serpapi |
| |
| def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, |
| engine: str = None, location: str = None, language: str = None, |
| country: str = None, search_type: str = None) -> Dict[str, Any]: |
| """Execute SerpAPI search using the SearchSerpAPI instance.""" |
| if not self.search_serpapi: |
| raise RuntimeError("SerpAPI search instance not initialized") |
| |
| try: |
| return self.search_serpapi.search( |
| query=query, |
| num_search_pages=num_search_pages, |
| max_content_words=max_content_words, |
| engine=engine, |
| location=location, |
| language=language, |
| country=country, |
| search_type=search_type |
| ) |
| except Exception as e: |
| return {"results": [], "error": f"Error executing SerpAPI search: {str(e)}"} |
|
|
|
|
| class SerpAPIToolkit(Toolkit): |
| def __init__( |
| self, |
| name: str = "SerpAPIToolkit", |
| api_key: Optional[str] = None, |
| num_search_pages: Optional[int] = 5, |
| max_content_words: Optional[int] = None, |
| default_engine: Optional[str] = "google", |
| default_location: Optional[str] = None, |
| default_language: Optional[str] = "en", |
| default_country: Optional[str] = "us", |
| enable_content_scraping: Optional[bool] = True, |
| **kwargs |
| ): |
| """ |
| Initialize SerpAPI Toolkit. |
| |
| Args: |
| name (str): Name of the toolkit |
| api_key (str): SerpAPI authentication key |
| num_search_pages (int): Default number of search results to retrieve |
| max_content_words (int): Default maximum words per result content |
| default_engine (str): Default search engine |
| default_location (str): Default geographic location |
| default_language (str): Default interface language |
| default_country (str): Default country code |
| enable_content_scraping (bool): Whether to enable content scraping |
| **kwargs: Additional keyword arguments |
| """ |
| |
| search_serpapi = SearchSerpAPI( |
| name="SearchSerpAPI", |
| api_key=api_key, |
| num_search_pages=num_search_pages, |
| max_content_words=max_content_words, |
| default_engine=default_engine, |
| default_location=default_location, |
| default_language=default_language, |
| default_country=default_country, |
| enable_content_scraping=enable_content_scraping, |
| **kwargs |
| ) |
| |
| |
| tools = [ |
| SerpAPITool(search_serpapi=search_serpapi) |
| ] |
| |
| |
| super().__init__(name=name, tools=tools) |
| |
| |
| self.search_serpapi = search_serpapi |
|
|
|
|
|
|