Spaces:
Sleeping
Sleeping
| """ | |
| BrowserSession — async Playwright wrapper for the Tripplanner browser-automation layer. | |
| AI subagents (claude-haiku with tool_use) call the exposed methods to navigate | |
| delta.com, ihg.com, and resy.com. A persistent Chromium profile keeps users | |
| logged in between runs. | |
| Usage: | |
| async with BrowserSession(user_data_dir="/tmp/tripplanner-profile") as browser: | |
| await browser.navigate("https://www.delta.com") | |
| text = await browser.get_page_text() | |
| """ | |
| from __future__ import annotations | |
| import base64 | |
| import os | |
| from pathlib import Path | |
| from typing import Any | |
| from playwright.async_api import async_playwright, BrowserContext, Page, TimeoutError as PWTimeoutError | |
| class BrowserSession: | |
| """Async context manager wrapping a Chromium browser. | |
| On Render (or any headless environment), set BROWSER_USER_DATA_DIR to a | |
| persistent disk path and upload sessions.json via /api/session/upload. | |
| Locally, run scripts/session_setup.py once to log in interactively. | |
| """ | |
| def __init__( | |
| self, | |
| user_data_dir: str, | |
| headless: bool | None = None, | |
| viewport: dict[str, int] | None = None, | |
| slow_mo: float = 0, | |
| ) -> None: | |
| self._user_data_dir = user_data_dir | |
| # Auto-detect headless: if no DISPLAY env var, force headless (i.e. on Render) | |
| if headless is None: | |
| self._headless = not bool(os.getenv("DISPLAY", "")) or os.getenv("RENDER") == "true" | |
| else: | |
| self._headless = headless | |
| self._viewport = viewport or {"width": 1280, "height": 900} | |
| self._slow_mo = slow_mo | |
| self._pw = None | |
| self._context: BrowserContext | None = None | |
| self._page: Page | None = None | |
| def _session_file(self) -> Path | None: | |
| """Return path to sessions.json if available. | |
| Priority: | |
| 1. BROWSER_SESSION_JSON env var (base64-encoded) — for HF Spaces / ephemeral envs | |
| 2. sessions.json on disk (uploaded via /api/session/upload) | |
| """ | |
| # 1. Decode from env var (HF Spaces stores secrets as env vars) | |
| b64 = os.getenv("BROWSER_SESSION_JSON", "") | |
| if b64: | |
| out = Path(self._user_data_dir) / "sessions.json" | |
| out.parent.mkdir(parents=True, exist_ok=True) | |
| out.write_bytes(base64.b64decode(b64)) | |
| return out | |
| # 2. File on disk (Render persistent disk or local upload) | |
| p = Path(self._user_data_dir) / "sessions.json" | |
| return p if p.exists() else None | |
| # ------------------------------------------------------------------ | |
| # Async context manager | |
| # ------------------------------------------------------------------ | |
| async def __aenter__(self) -> "BrowserSession": | |
| self._pw = await async_playwright().start() | |
| session_file = self._session_file() | |
| if session_file: | |
| # Render / deployed: use a regular (non-persistent) context with saved storage state. | |
| browser = await self._pw.chromium.launch( | |
| headless=self._headless, | |
| slow_mo=self._slow_mo, | |
| ) | |
| self._context = await browser.new_context( | |
| storage_state=str(session_file), | |
| viewport=self._viewport, | |
| ) | |
| else: | |
| # Local: use a persistent profile so the user stays logged in. | |
| self._context = await self._pw.chromium.launch_persistent_context( | |
| self._user_data_dir, | |
| headless=self._headless, | |
| viewport=self._viewport, | |
| slow_mo=self._slow_mo, | |
| ) | |
| if self._context.pages: | |
| self._page = self._context.pages[0] | |
| else: | |
| self._page = await self._context.new_page() | |
| return self | |
| async def __aexit__(self, *_: Any) -> None: | |
| if self._context: | |
| await self._context.close() | |
| if self._pw: | |
| await self._pw.stop() | |
| # ------------------------------------------------------------------ | |
| # Browser action methods | |
| # ------------------------------------------------------------------ | |
| async def navigate(self, url: str) -> str: | |
| """Navigate to a URL and return the page title.""" | |
| try: | |
| await self._page.goto(url, wait_until="domcontentloaded") | |
| title = await self._page.title() | |
| return title or "(no title)" | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def click(self, selector: str | None = None, text: str | None = None) -> str: | |
| """Click an element by CSS selector OR by visible text (not both). | |
| Returns a confirmation string or an error message. | |
| """ | |
| try: | |
| if selector and text: | |
| return "Error: provide selector OR text, not both." | |
| if selector: | |
| await self._page.click(selector) | |
| return f"Clicked selector: {selector}" | |
| if text: | |
| await self._page.get_by_text(text, exact=False).first.click() | |
| return f"Clicked element with text: {text!r}" | |
| return "Error: provide selector or text." | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def fill(self, selector: str, value: str) -> str: | |
| """Clear and fill an input field, then return confirmation.""" | |
| try: | |
| await self._page.fill(selector, value) | |
| return f"Filled {selector!r} with value." | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def get_text(self, selector: str, limit: int = 20) -> list[str]: | |
| """Return the inner text of all elements matching *selector*, up to *limit* items.""" | |
| try: | |
| texts = await self._page.locator(selector).all_inner_texts() | |
| return texts[:limit] | |
| except Exception as exc: | |
| return [f"Error: {exc}"] | |
| async def get_page_text(self) -> str: | |
| """Return visible page text (body), truncated to 8000 characters.""" | |
| try: | |
| text = await self._page.inner_text("body") | |
| return text[:8000] | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def wait_for(self, selector: str, timeout: int = 10000) -> bool: | |
| """Wait until *selector* appears in the DOM. Returns True on success, False on timeout.""" | |
| try: | |
| await self._page.wait_for_selector(selector, timeout=timeout) | |
| return True | |
| except PWTimeoutError: | |
| return False | |
| except Exception: | |
| return False | |
| async def select_option(self, selector: str, value: str) -> str: | |
| """Select an <option> by value in a <select> element.""" | |
| try: | |
| await self._page.select_option(selector, value=value) | |
| return f"Selected option {value!r} in {selector!r}." | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def press_key(self, key: str) -> str: | |
| """Send a keyboard key to the focused element (e.g. 'Enter', 'Tab').""" | |
| try: | |
| await self._page.keyboard.press(key) | |
| return f"Pressed key: {key}" | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def scroll_down(self) -> str: | |
| """Scroll the page down by ~800 px.""" | |
| try: | |
| await self._page.mouse.wheel(0, 800) | |
| return "Scrolled down." | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| async def screenshot_base64(self) -> str: | |
| """Take a full-page screenshot and return it as a base64-encoded PNG string.""" | |
| try: | |
| png_bytes = await self._page.screenshot(full_page=True) | |
| return base64.b64encode(png_bytes).decode() | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| # ------------------------------------------------------------------ | |
| # Anthropic tool definitions | |
| # ------------------------------------------------------------------ | |
| def tool_definitions(cls) -> list[dict]: | |
| """Return Anthropic-compatible tool definitions for all browser methods. | |
| Pass the result directly to ``client.messages.create(tools=...)``. | |
| """ | |
| return [ | |
| { | |
| "name": "navigate", | |
| "description": "Navigate the browser to a URL and return the page title.", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "url": { | |
| "type": "string", | |
| "description": "The full URL to navigate to (e.g. 'https://www.delta.com').", | |
| } | |
| }, | |
| "required": ["url"], | |
| }, | |
| }, | |
| { | |
| "name": "click", | |
| "description": ( | |
| "Click an element on the page. Provide EITHER 'selector' (CSS) " | |
| "OR 'text' (visible text match), not both." | |
| ), | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "selector": { | |
| "type": "string", | |
| "description": "CSS selector of the element to click.", | |
| }, | |
| "text": { | |
| "type": "string", | |
| "description": "Visible text of the element to click.", | |
| }, | |
| }, | |
| "required": [], | |
| }, | |
| }, | |
| { | |
| "name": "fill", | |
| "description": "Clear and type a value into an input or textarea element.", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "selector": { | |
| "type": "string", | |
| "description": "CSS selector of the input element.", | |
| }, | |
| "value": { | |
| "type": "string", | |
| "description": "The text to enter into the field.", | |
| }, | |
| }, | |
| "required": ["selector", "value"], | |
| }, | |
| }, | |
| { | |
| "name": "get_text", | |
| "description": ( | |
| "Return the inner text of all elements matching a CSS selector, " | |
| "up to 'limit' results." | |
| ), | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "selector": { | |
| "type": "string", | |
| "description": "CSS selector to match elements.", | |
| }, | |
| "limit": { | |
| "type": "integer", | |
| "description": "Maximum number of text strings to return (default 20).", | |
| }, | |
| }, | |
| "required": ["selector"], | |
| }, | |
| }, | |
| { | |
| "name": "get_page_text", | |
| "description": "Return the full visible text of the current page, truncated to 8000 characters.", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": {}, | |
| "required": [], | |
| }, | |
| }, | |
| { | |
| "name": "wait_for", | |
| "description": ( | |
| "Wait for a CSS selector to appear in the DOM. " | |
| "Returns true on success, false on timeout." | |
| ), | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "selector": { | |
| "type": "string", | |
| "description": "CSS selector to wait for.", | |
| }, | |
| "timeout": { | |
| "type": "integer", | |
| "description": "Maximum wait time in milliseconds (default 10000).", | |
| }, | |
| }, | |
| "required": ["selector"], | |
| }, | |
| }, | |
| { | |
| "name": "select_option", | |
| "description": "Select an option by value in a <select> element.", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "selector": { | |
| "type": "string", | |
| "description": "CSS selector of the <select> element.", | |
| }, | |
| "value": { | |
| "type": "string", | |
| "description": "The option value to select.", | |
| }, | |
| }, | |
| "required": ["selector", "value"], | |
| }, | |
| }, | |
| { | |
| "name": "press_key", | |
| "description": "Send a keyboard key press to the focused element (e.g. 'Enter', 'Tab', 'Escape').", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": { | |
| "key": { | |
| "type": "string", | |
| "description": "The key to press (Playwright key name, e.g. 'Enter', 'Tab').", | |
| } | |
| }, | |
| "required": ["key"], | |
| }, | |
| }, | |
| { | |
| "name": "scroll_down", | |
| "description": "Scroll the current page down by approximately 800 pixels.", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": {}, | |
| "required": [], | |
| }, | |
| }, | |
| { | |
| "name": "screenshot_base64", | |
| "description": "Take a full-page screenshot and return it as a base64-encoded PNG string (for debugging).", | |
| "input_schema": { | |
| "type": "object", | |
| "properties": {}, | |
| "required": [], | |
| }, | |
| }, | |
| ] | |
| # ------------------------------------------------------------------ | |
| # Tool dispatcher | |
| # ------------------------------------------------------------------ | |
| async def dispatch_tool(self, name: str, input: dict) -> str: | |
| """Dispatch a tool call by name to the matching method. | |
| Returns the result as a string (serialised if necessary). | |
| Unknown names and bad inputs are caught and returned as error strings. | |
| """ | |
| _dispatch: dict[str, Any] = { | |
| "navigate": self.navigate, | |
| "click": self.click, | |
| "fill": self.fill, | |
| "get_text": self.get_text, | |
| "get_page_text": self.get_page_text, | |
| "wait_for": self.wait_for, | |
| "select_option": self.select_option, | |
| "press_key": self.press_key, | |
| "scroll_down": self.scroll_down, | |
| "screenshot_base64": self.screenshot_base64, | |
| } | |
| method = _dispatch.get(name) | |
| if method is None: | |
| return f"Error: unknown tool '{name}'." | |
| try: | |
| result = await method(**input) | |
| # Coerce non-string results (bool, list) to strings for uniform AI consumption. | |
| if isinstance(result, list): | |
| return "\n".join(str(item) for item in result) | |
| return str(result) | |
| except TypeError as exc: | |
| return f"Error: bad arguments for '{name}': {exc}" | |
| except Exception as exc: | |
| return f"Error: {exc}" | |
| # Aliases used by subagents | |
| async def execute_tool(self, name: str, input: dict) -> str: | |
| return await self.dispatch_tool(name, input) | |
| def tools(self) -> list[dict]: | |
| return BrowserSession.tool_definitions() | |
| def get_tool_definitions(self) -> list[dict]: | |
| return BrowserSession.tool_definitions() | |
| def gemini_tools(self): | |
| """Return google-genai Tool objects for all browser methods.""" | |
| from google.genai import types as gtypes | |
| return [gtypes.Tool(function_declarations=[ | |
| gtypes.FunctionDeclaration( | |
| name=t["name"], | |
| description=t["description"], | |
| parameters=t["input_schema"], | |
| ) | |
| for t in BrowserSession.tool_definitions() | |
| ])] | |