| |
| |
| |
| |
| |
| |
| |
|
|
| import asyncio |
| import os |
| from typing import Dict, Any, Optional, List |
| from dotenv import load_dotenv |
|
|
| from .tool import Tool, Toolkit |
| from ..core.module import BaseModule |
| from ..core.logging import logger |
|
|
| |
| load_dotenv() |
|
|
|
|
| class BrowserUseBase(BaseModule): |
| """ |
| Base class for Browser Use interactions. |
| Handles LLM setup, browser configuration, and async agent execution. |
| """ |
| |
| def __init__(self, model: str = "gpt-4o-mini", api_key: str = os.getenv("OPENAI_API_KEY"), |
| browser_type: str = "chromium", headless: bool = True, **kwargs): |
| """ |
| Initialize the BrowserUse base. |
| |
| Args: |
| model: LLM model to use (gpt-4o-mini, claude-3-5-sonnet, etc.) |
| api_key: API key for the LLM (if not in environment) |
| browser_type: Browser type (chromium, firefox, webkit) |
| headless: Whether to run browser in headless mode |
| """ |
| super().__init__(**kwargs) |
| |
| try: |
| |
| from browser_use import Agent |
| from browser_use.llm import ChatOpenAI, ChatAnthropic |
| self.Agent = Agent |
| self.ChatOpenAI = ChatOpenAI |
| self.ChatAnthropic = ChatAnthropic |
| except ImportError: |
| try: |
| |
| from browser_use_py310x import Agent |
| from browser_use_py310x.llm import ChatOpenAI, ChatAnthropic |
| self.Agent = Agent |
| self.ChatOpenAI = ChatOpenAI |
| self.ChatAnthropic = ChatAnthropic |
| except ImportError as e: |
| logger.error("browser-use package not installed. For Python 3.11+: pip install browser-use, For Python 3.10: pip install browser-use-py310x") |
| raise ImportError(f"browser-use package required: {e}") |
| |
| self.model = model |
| self.api_key = api_key |
| self.browser_type = browser_type |
| self.headless = headless |
| |
| |
| self.llm = self._setup_llm() |
| |
| |
| self.browser_config = { |
| "browser_type": browser_type, |
| "headless": headless |
| } |
| |
| def _setup_llm(self): |
| """Setup the appropriate LLM based on model name.""" |
| try: |
| if "gpt" in self.model.lower() or "openai" in self.model.lower(): |
| |
| kwargs = {"model": self.model} |
| if self.api_key: |
| kwargs["api_key"] = self.api_key |
| return self.ChatOpenAI(**kwargs) |
| |
| elif "claude" in self.model.lower() or "anthropic" in self.model.lower(): |
| |
| kwargs = {"model": self.model} |
| if self.api_key: |
| kwargs["api_key"] = self.api_key |
| return self.ChatAnthropic(**kwargs) |
| |
| else: |
| |
| logger.warning(f"Unknown model {self.model}, defaulting to OpenAI") |
| return self.ChatOpenAI(model=self.model) |
| |
| except Exception as e: |
| logger.error(f"Failed to setup LLM: {e}") |
| raise |
| |
| async def execute_task(self, task: str) -> Dict[str, Any]: |
| """ |
| Execute a browser task using the Browser Use agent. |
| |
| Args: |
| task: The task description for the browser agent |
| |
| Returns: |
| Dictionary containing task results |
| """ |
| try: |
| |
| agent = self.Agent( |
| task=task, |
| llm=self.llm, |
| **self.browser_config |
| ) |
| |
| |
| logger.info(f"Executing browser task: {task}") |
| result = await agent.run() |
| |
| return { |
| "success": True, |
| "result": result |
| } |
| |
| except Exception as e: |
| logger.error(f"Browser task failed: {e}") |
| return { |
| "success": False, |
| "error": str(e) |
| } |
| |
| def execute_task_sync(self, task: str) -> Dict[str, Any]: |
| """ |
| Synchronous wrapper for execute_task. |
| |
| Args: |
| task: The task description for the browser agent |
| |
| Returns: |
| Dictionary containing task results |
| """ |
| try: |
| |
| return asyncio.run(self.execute_task(task)) |
| except RuntimeError: |
| |
| loop = asyncio.get_event_loop() |
| task_coro = self.execute_task(task) |
| return loop.run_until_complete(task_coro) |
|
|
|
|
| class BrowserUseTool(Tool): |
| """Tool for executing browser automation tasks using natural language.""" |
| |
| name: str = "browser_use" |
| description: str = "Execute web browser automation tasks using natural language instructions" |
| inputs: Dict[str, Dict[str, str]] = { |
| "task": { |
| "type": "string", |
| "description": "Natural language description of the browser task to execute" |
| } |
| } |
| required: Optional[List[str]] = ["task"] |
| |
| def __init__(self, browser_base: BrowserUseBase = None): |
| super().__init__() |
| self.browser_base = browser_base |
| |
| def __call__(self, task: str) -> Dict[str, Any]: |
| """ |
| Execute a browser automation task. |
| |
| Args: |
| task: Natural language task description |
| |
| Returns: |
| Dictionary with task execution results |
| """ |
| if not task.strip(): |
| return { |
| "success": False, |
| "error": "Task description cannot be empty" |
| } |
| |
| return self.browser_base.execute_task_sync(task) |
|
|
|
|
| class BrowserUseToolkit(Toolkit): |
| """Toolkit for browser automation using Browser Use.""" |
| |
| def __init__(self, name: str = "BrowserUseToolkit", model: str = "gpt-4o-mini", |
| api_key: str = None, browser_type: str = "chromium", |
| headless: bool = True): |
| """ |
| Initialize the BrowserUse toolkit. |
| |
| Args: |
| name: Toolkit name |
| model: LLM model to use |
| api_key: API key for the LLM |
| browser_type: Browser type (chromium, firefox, webkit) |
| headless: Whether to run browser in headless mode |
| """ |
| |
| browser_base = BrowserUseBase( |
| model=model, |
| api_key=api_key, |
| browser_type=browser_type, |
| headless=headless |
| ) |
| |
| |
| tools = [ |
| BrowserUseTool(browser_base=browser_base) |
| ] |
| |
| |
| super().__init__(name=name, tools=tools) |
| |
| |
| self.browser_base = browser_base |
|
|