File size: 312 Bytes
7fafb5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17

import trafilatura
import requests

def extract_text(url):

    try:
        response = requests.get(url, timeout=10)
        downloaded = response.text

        text = trafilatura.extract(downloaded)

        return text[:4000] if text else None

    except Exception as e:
        return None