TharaKavin commited on
Commit
e008495
·
verified ·
1 Parent(s): e475b3e

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +19 -16
scraper.py CHANGED
@@ -1,17 +1,20 @@
1
- from scrapling.fetchers import Fetcher
2
-
3
- def scrape_url(url: str) -> str:
4
- try:
5
- page = Fetcher.get(url)
6
-
7
- # Extract text properly
8
- texts = page.css("body *::text").getall()
9
-
10
- # Clean text
11
- cleaned = [t.strip() for t in texts if t.strip()]
12
-
13
- return " ".join(cleaned)
14
-
15
- except Exception as e:
16
- print("SCRAPING ERROR:", e)
 
 
 
17
  return ""
 
1
+ from scrapling.fetchers import Fetcher
2
+
3
+ def scrape_url(url: str) -> str:
4
+ try:
5
+ page = Fetcher.get(url)
6
+
7
+ # Try method 1 (new versions)
8
+ try:
9
+ texts = page.css("body *::text").getall()
10
+ except:
11
+ # Fallback for older versions
12
+ texts = [t.get() for t in page.css("body *::text")]
13
+
14
+ cleaned = [t.strip() for t in texts if t and t.strip()]
15
+
16
+ return " ".join(cleaned)
17
+
18
+ except Exception as e:
19
+ print("SCRAPING ERROR:", e)
20
  return ""