| |
|
|
| from .search_base import SearchBase |
| from .tool import Tool,Toolkit |
| from ddgs import DDGS |
| from typing import Dict, Any, List, Optional |
| import pandas as pd |
| class SearchDDGS(SearchBase): |
| """ |
| DDGS (Dux Distributed Global Search) tool that aggregates results from multiple search engines. |
| Supports DuckDuckGo, Google, Bing, Brave, Yahoo, and other backends. |
| """ |
| |
| def __init__( |
| self, |
| name: str = "SearchDDGS", |
| num_search_pages: Optional[int] = 5, |
| max_content_words: Optional[int] = None, |
| backend: str = "auto", |
| region: str = "us-en", |
| **kwargs |
| ): |
| """ |
| Initialize the DDGS Search tool. |
| |
| Args: |
| name (str): Name of the tool |
| num_search_pages (int): Number of search results to retrieve |
| max_content_words (int): Maximum number of words to include in content |
| backend (str): Search backend(s) to use. Options: "auto", "duckduckgo", "google", "bing", "brave", "yahoo", etc. |
| region (str): Search region (e.g., "us-en", "uk-en", "ru-ru") |
| **kwargs: Additional keyword arguments for parent class initialization |
| """ |
| super().__init__(name=name, num_search_pages=num_search_pages, max_content_words=max_content_words, **kwargs) |
| self.backend = backend |
| self.region = region |
|
|
| def search(self, query: str, num_search_pages: int = None, max_content_words: int = None, backend: str = None, region: str = None) -> Dict[str, Any]: |
| """ |
| Searches using DDGS for the given query and retrieves content from multiple pages. |
| |
| Args: |
| query (str): The search query. |
| num_search_pages (int): Number of search results to retrieve |
| max_content_words (int): Maximum number of words to include in content, None means no limit |
| backend (str): Search backend to use (overrides instance default) |
| region (str): Search region to use (overrides instance default) |
| |
| Returns: |
| Dict[str, Any]: Contains a list of search results and optional error message. |
| """ |
| |
| num_search_pages = num_search_pages or self.num_search_pages |
| max_content_words = max_content_words or self.max_content_words |
| backend = backend or self.backend |
| region = region or self.region |
| |
| results = [] |
| try: |
| |
| with DDGS() as ddgs: |
| search_results = list(ddgs.text( |
| query, |
| max_results=num_search_pages, |
| backend=backend, |
| region=region |
| )) |
| |
| if not search_results: |
| return {"results": [], "error": "No search results found."} |
| |
| |
| for result in search_results: |
| try: |
| title = result.get('title', 'No Title') |
| url = result.get('href', '') or result.get('link', '') or result.get('url', '') |
| |
| |
| if url and url.startswith(('http://', 'https://')): |
| try: |
| scraped_title, scraped_content = self._scrape_page(url) |
| if scraped_content: |
| title = scraped_title or title |
| content = scraped_content |
| else: |
| |
| content = result.get('body', '') |
| except Exception: |
| |
| content = result.get('body', '') |
| else: |
| |
| content = result.get('body', '') |
| |
| if content: |
| |
| display_content = self._truncate_content(content, max_content_words) |
| |
| results.append({ |
| "title": title, |
| "content": display_content, |
| "url": url, |
| }) |
| |
| except Exception: |
| continue |
|
|
| return {"results": results, "error": None} |
| |
| except Exception as e: |
| return {"results": [], "error": str(e)} |
| |
|
|
| class DDGSSearchTool(Tool): |
| name: str = "ddgs_search" |
| description: str = "Search using DDGS (Dux Distributed Global Search) which aggregates results from multiple search engines including DuckDuckGo, Google, Bing, and others" |
| inputs: Dict[str, Dict[str, str]] = { |
| "query": { |
| "type": "string", |
| "description": "The search query to execute" |
| }, |
| "num_search_pages": { |
| "type": "integer", |
| "description": "Number of search results to retrieve. Default: 5" |
| }, |
| "max_content_words": { |
| "type": "integer", |
| "description": "Maximum number of words to include in content per result. None means no limit. Default: None" |
| }, |
| "backend": { |
| "type": "string", |
| "description": "Search backend to use. Options: 'auto', 'duckduckgo', 'google', 'bing', 'brave', 'yahoo'. Default: 'auto'" |
| }, |
| "region": { |
| "type": "string", |
| "description": "Search region (e.g., 'us-en', 'uk-en', 'ru-ru'). Default: 'us-en'" |
| } |
| } |
| required: Optional[List[str]] = ["query"] |
| |
| def __init__(self, search_ddgs: SearchDDGS = None): |
| super().__init__() |
| self.search_ddgs = search_ddgs |
| |
| def __call__(self, query: str, num_search_pages: int = None, max_content_words: int = None, backend: str = None, region: str = None) -> Dict[str, Any]: |
| """Execute DDGS search using the SearchDDGS instance.""" |
| if not self.search_ddgs: |
| raise RuntimeError("DDGS search instance not initialized") |
| |
| try: |
| return self.search_ddgs.search(query, num_search_pages, max_content_words, backend, region) |
| except Exception as e: |
| return {"results": [], "error": f"Error executing DDGS search: {str(e)}"} |
|
|
|
|
| class DDGSSearchToolkit(Toolkit): |
| def __init__( |
| self, |
| name: str = "DDGSSearchToolkit", |
| num_search_pages: Optional[int] = 5, |
| max_content_words: Optional[int] = None, |
| backend: str = "auto", |
| region: str = "us-en", |
| **kwargs |
| ): |
| |
| search_ddgs = SearchDDGS( |
| name="DDGSSearch", |
| num_search_pages=num_search_pages, |
| max_content_words=max_content_words, |
| backend=backend, |
| region=region, |
| **kwargs |
| ) |
| |
| |
| tools = [ |
| DDGSSearchTool(search_ddgs=search_ddgs) |
| ] |
| |
| |
| super().__init__(name=name, tools=tools) |
| |
| |
| self.search_ddgs = search_ddgs |
| |
|
|
| class PERTSearchTool(Tool): |
| name: str = "pert_search" |
| description: str = "Search gene regulatory network and return the gene-gene pair" |
| inputs: Dict[str, Dict[str, str]] = { |
| "source_gene_name": { |
| "type": "string", |
| "description": "name of perturbed gene" |
| }, |
| "target_gene_name": { |
| "type": "string", |
| "description": "name of targeted gene" |
| }, |
| "cell_line": { |
| "type": "string", |
| "description": "Name of selected cell line" |
| }, |
| } |
| required: Optional[List[str]] = ["source_gene_name", "target_gene_name", "cell_line"] |
| |
| def __init__(self,sourcekey='k562', toplist = 20): |
| super().__init__() |
| self.toplist = toplist |
| self.sourcekey = sourcekey |
| self.filelist = pd.read_csv(f"/gpfs/radev/home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/{sourcekey}_processed_grn.csv",index_col=0) |
| |
| def __call__(self, source_gene_name: str, target_gene_name: str, cell_line: str) -> Dict[str, Any]: |
| """Execute DDGS search using the SearchDDGS instance.""" |
| print(source_gene_name, target_gene_name, cell_line) |
| gene_name = target_gene_name |
| try: |
| searchinfo =f'''The detected gene list and gene regulatory strength in cell line {self.sourcekey} is: ''' |
| finditem = self.filelist.loc[:,gene_name].sort_values(ascending=False).iloc[0:self.toplist] |
| searchinfo += '''RegulatorGeneName TargetGeneName Score\n''' |
| for name, sten in zip(finditem.index, finditem.values): |
| searchinfo += f'''{name} {target_gene_name} {sten}\n''' |
| print(searchinfo) |
| return {"results": searchinfo} |
| except Exception as e: |
| return {"results": [], "error": f"Error executing Perturbation searching: {str(e)}"} |
| |
| class PertToolkit(Toolkit): |
| def __init__( |
| self, |
| name: str = "PertToolkit", |
| sourcekey = "k562", |
| toplist = 20, |
| **kwargs |
| ): |
| |
|
|
| |
| tools = [ |
| PERTSearchTool(sourcekey=sourcekey,toplist=toplist) |
| ] |
| |
| |
| super().__init__(name=name, tools=tools) |
| |
|
|