Bas95's picture
Deploy Tripplanner backend
649703e verified
"""
BrowserSession — async Playwright wrapper for the Tripplanner browser-automation layer.
AI subagents (claude-haiku with tool_use) call the exposed methods to navigate
delta.com, ihg.com, and resy.com. A persistent Chromium profile keeps users
logged in between runs.
Usage:
async with BrowserSession(user_data_dir="/tmp/tripplanner-profile") as browser:
await browser.navigate("https://www.delta.com")
text = await browser.get_page_text()
"""
from __future__ import annotations
import base64
import os
from pathlib import Path
from typing import Any
from playwright.async_api import async_playwright, BrowserContext, Page, TimeoutError as PWTimeoutError
class BrowserSession:
"""Async context manager wrapping a Chromium browser.
On Render (or any headless environment), set BROWSER_USER_DATA_DIR to a
persistent disk path and upload sessions.json via /api/session/upload.
Locally, run scripts/session_setup.py once to log in interactively.
"""
def __init__(
self,
user_data_dir: str,
headless: bool | None = None,
viewport: dict[str, int] | None = None,
slow_mo: float = 0,
) -> None:
self._user_data_dir = user_data_dir
# Auto-detect headless: if no DISPLAY env var, force headless (i.e. on Render)
if headless is None:
self._headless = not bool(os.getenv("DISPLAY", "")) or os.getenv("RENDER") == "true"
else:
self._headless = headless
self._viewport = viewport or {"width": 1280, "height": 900}
self._slow_mo = slow_mo
self._pw = None
self._context: BrowserContext | None = None
self._page: Page | None = None
def _session_file(self) -> Path | None:
"""Return path to sessions.json if available.
Priority:
1. BROWSER_SESSION_JSON env var (base64-encoded) — for HF Spaces / ephemeral envs
2. sessions.json on disk (uploaded via /api/session/upload)
"""
# 1. Decode from env var (HF Spaces stores secrets as env vars)
b64 = os.getenv("BROWSER_SESSION_JSON", "")
if b64:
out = Path(self._user_data_dir) / "sessions.json"
out.parent.mkdir(parents=True, exist_ok=True)
out.write_bytes(base64.b64decode(b64))
return out
# 2. File on disk (Render persistent disk or local upload)
p = Path(self._user_data_dir) / "sessions.json"
return p if p.exists() else None
# ------------------------------------------------------------------
# Async context manager
# ------------------------------------------------------------------
async def __aenter__(self) -> "BrowserSession":
self._pw = await async_playwright().start()
session_file = self._session_file()
if session_file:
# Render / deployed: use a regular (non-persistent) context with saved storage state.
browser = await self._pw.chromium.launch(
headless=self._headless,
slow_mo=self._slow_mo,
)
self._context = await browser.new_context(
storage_state=str(session_file),
viewport=self._viewport,
)
else:
# Local: use a persistent profile so the user stays logged in.
self._context = await self._pw.chromium.launch_persistent_context(
self._user_data_dir,
headless=self._headless,
viewport=self._viewport,
slow_mo=self._slow_mo,
)
if self._context.pages:
self._page = self._context.pages[0]
else:
self._page = await self._context.new_page()
return self
async def __aexit__(self, *_: Any) -> None:
if self._context:
await self._context.close()
if self._pw:
await self._pw.stop()
# ------------------------------------------------------------------
# Browser action methods
# ------------------------------------------------------------------
async def navigate(self, url: str) -> str:
"""Navigate to a URL and return the page title."""
try:
await self._page.goto(url, wait_until="domcontentloaded")
title = await self._page.title()
return title or "(no title)"
except Exception as exc:
return f"Error: {exc}"
async def click(self, selector: str | None = None, text: str | None = None) -> str:
"""Click an element by CSS selector OR by visible text (not both).
Returns a confirmation string or an error message.
"""
try:
if selector and text:
return "Error: provide selector OR text, not both."
if selector:
await self._page.click(selector)
return f"Clicked selector: {selector}"
if text:
await self._page.get_by_text(text, exact=False).first.click()
return f"Clicked element with text: {text!r}"
return "Error: provide selector or text."
except Exception as exc:
return f"Error: {exc}"
async def fill(self, selector: str, value: str) -> str:
"""Clear and fill an input field, then return confirmation."""
try:
await self._page.fill(selector, value)
return f"Filled {selector!r} with value."
except Exception as exc:
return f"Error: {exc}"
async def get_text(self, selector: str, limit: int = 20) -> list[str]:
"""Return the inner text of all elements matching *selector*, up to *limit* items."""
try:
texts = await self._page.locator(selector).all_inner_texts()
return texts[:limit]
except Exception as exc:
return [f"Error: {exc}"]
async def get_page_text(self) -> str:
"""Return visible page text (body), truncated to 8000 characters."""
try:
text = await self._page.inner_text("body")
return text[:8000]
except Exception as exc:
return f"Error: {exc}"
async def wait_for(self, selector: str, timeout: int = 10000) -> bool:
"""Wait until *selector* appears in the DOM. Returns True on success, False on timeout."""
try:
await self._page.wait_for_selector(selector, timeout=timeout)
return True
except PWTimeoutError:
return False
except Exception:
return False
async def select_option(self, selector: str, value: str) -> str:
"""Select an <option> by value in a <select> element."""
try:
await self._page.select_option(selector, value=value)
return f"Selected option {value!r} in {selector!r}."
except Exception as exc:
return f"Error: {exc}"
async def press_key(self, key: str) -> str:
"""Send a keyboard key to the focused element (e.g. 'Enter', 'Tab')."""
try:
await self._page.keyboard.press(key)
return f"Pressed key: {key}"
except Exception as exc:
return f"Error: {exc}"
async def scroll_down(self) -> str:
"""Scroll the page down by ~800 px."""
try:
await self._page.mouse.wheel(0, 800)
return "Scrolled down."
except Exception as exc:
return f"Error: {exc}"
async def screenshot_base64(self) -> str:
"""Take a full-page screenshot and return it as a base64-encoded PNG string."""
try:
png_bytes = await self._page.screenshot(full_page=True)
return base64.b64encode(png_bytes).decode()
except Exception as exc:
return f"Error: {exc}"
# ------------------------------------------------------------------
# Anthropic tool definitions
# ------------------------------------------------------------------
@classmethod
def tool_definitions(cls) -> list[dict]:
"""Return Anthropic-compatible tool definitions for all browser methods.
Pass the result directly to ``client.messages.create(tools=...)``.
"""
return [
{
"name": "navigate",
"description": "Navigate the browser to a URL and return the page title.",
"input_schema": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "The full URL to navigate to (e.g. 'https://www.delta.com').",
}
},
"required": ["url"],
},
},
{
"name": "click",
"description": (
"Click an element on the page. Provide EITHER 'selector' (CSS) "
"OR 'text' (visible text match), not both."
),
"input_schema": {
"type": "object",
"properties": {
"selector": {
"type": "string",
"description": "CSS selector of the element to click.",
},
"text": {
"type": "string",
"description": "Visible text of the element to click.",
},
},
"required": [],
},
},
{
"name": "fill",
"description": "Clear and type a value into an input or textarea element.",
"input_schema": {
"type": "object",
"properties": {
"selector": {
"type": "string",
"description": "CSS selector of the input element.",
},
"value": {
"type": "string",
"description": "The text to enter into the field.",
},
},
"required": ["selector", "value"],
},
},
{
"name": "get_text",
"description": (
"Return the inner text of all elements matching a CSS selector, "
"up to 'limit' results."
),
"input_schema": {
"type": "object",
"properties": {
"selector": {
"type": "string",
"description": "CSS selector to match elements.",
},
"limit": {
"type": "integer",
"description": "Maximum number of text strings to return (default 20).",
},
},
"required": ["selector"],
},
},
{
"name": "get_page_text",
"description": "Return the full visible text of the current page, truncated to 8000 characters.",
"input_schema": {
"type": "object",
"properties": {},
"required": [],
},
},
{
"name": "wait_for",
"description": (
"Wait for a CSS selector to appear in the DOM. "
"Returns true on success, false on timeout."
),
"input_schema": {
"type": "object",
"properties": {
"selector": {
"type": "string",
"description": "CSS selector to wait for.",
},
"timeout": {
"type": "integer",
"description": "Maximum wait time in milliseconds (default 10000).",
},
},
"required": ["selector"],
},
},
{
"name": "select_option",
"description": "Select an option by value in a <select> element.",
"input_schema": {
"type": "object",
"properties": {
"selector": {
"type": "string",
"description": "CSS selector of the <select> element.",
},
"value": {
"type": "string",
"description": "The option value to select.",
},
},
"required": ["selector", "value"],
},
},
{
"name": "press_key",
"description": "Send a keyboard key press to the focused element (e.g. 'Enter', 'Tab', 'Escape').",
"input_schema": {
"type": "object",
"properties": {
"key": {
"type": "string",
"description": "The key to press (Playwright key name, e.g. 'Enter', 'Tab').",
}
},
"required": ["key"],
},
},
{
"name": "scroll_down",
"description": "Scroll the current page down by approximately 800 pixels.",
"input_schema": {
"type": "object",
"properties": {},
"required": [],
},
},
{
"name": "screenshot_base64",
"description": "Take a full-page screenshot and return it as a base64-encoded PNG string (for debugging).",
"input_schema": {
"type": "object",
"properties": {},
"required": [],
},
},
]
# ------------------------------------------------------------------
# Tool dispatcher
# ------------------------------------------------------------------
async def dispatch_tool(self, name: str, input: dict) -> str:
"""Dispatch a tool call by name to the matching method.
Returns the result as a string (serialised if necessary).
Unknown names and bad inputs are caught and returned as error strings.
"""
_dispatch: dict[str, Any] = {
"navigate": self.navigate,
"click": self.click,
"fill": self.fill,
"get_text": self.get_text,
"get_page_text": self.get_page_text,
"wait_for": self.wait_for,
"select_option": self.select_option,
"press_key": self.press_key,
"scroll_down": self.scroll_down,
"screenshot_base64": self.screenshot_base64,
}
method = _dispatch.get(name)
if method is None:
return f"Error: unknown tool '{name}'."
try:
result = await method(**input)
# Coerce non-string results (bool, list) to strings for uniform AI consumption.
if isinstance(result, list):
return "\n".join(str(item) for item in result)
return str(result)
except TypeError as exc:
return f"Error: bad arguments for '{name}': {exc}"
except Exception as exc:
return f"Error: {exc}"
# Aliases used by subagents
async def execute_tool(self, name: str, input: dict) -> str:
return await self.dispatch_tool(name, input)
@property
def tools(self) -> list[dict]:
return BrowserSession.tool_definitions()
def get_tool_definitions(self) -> list[dict]:
return BrowserSession.tool_definitions()
@property
def gemini_tools(self):
"""Return google-genai Tool objects for all browser methods."""
from google.genai import types as gtypes
return [gtypes.Tool(function_declarations=[
gtypes.FunctionDeclaration(
name=t["name"],
description=t["description"],
parameters=t["input_schema"],
)
for t in BrowserSession.tool_definitions()
])]