Sauten commited on
Commit
25e6cf1
·
verified ·
1 Parent(s): 6051317

Create toolVisitWebpage.py

Browse files
Files changed (1) hide show
  1. toolVisitWebpage.py +44 -0
toolVisitWebpage.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import tool
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ MAX_WEBPAGE_SIZE = 3000 # max characters to return from scraped content
6
+
7
+ @tool
8
+ def visit_webpage(url: str) -> dict:
9
+ """
10
+ Visits a webpage and extracts clean text from it.
11
+
12
+ Args:
13
+ url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI)
14
+
15
+ Returns:
16
+ dict: A dictionary containing:
17
+ - "text": Truncated page content
18
+ - "url": The original URL
19
+ - "status": HTTP status or error info
20
+ """
21
+ print(f" Tool:visit_webpage visiting {url}...")
22
+
23
+ try:
24
+ response = requests.get(url, timeout=10)
25
+ response.raise_for_status()
26
+
27
+ soup = BeautifulSoup(response.text, "html.parser")
28
+ text = soup.get_text(separator="\n", strip=True)
29
+ short_text = text[:MAX_WEBPAGE_SIZE]
30
+
31
+ print(f"✅ Extracted {len(short_text)} characters from {url}")
32
+ return {
33
+ "text": short_text,
34
+ "url": url,
35
+ "status": f"Success ({response.status_code})"
36
+ }
37
+
38
+ except Exception as e:
39
+ print(f"🚨 Error in visit_webpage: {e}")
40
+ return {
41
+ "text": "",
42
+ "url": url,
43
+ "status": f"Error: {e}"
44
+ }