File size: 896 Bytes
1f725d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from langchain_community.document_loaders import WebBaseLoader
from utils.asyncHandler import asyncHandler
from src.Web.models.web_model import State
from src.Web.utils.main_utils import is_youtube_video
from langchain_community.document_loaders import YoutubeLoader
import logging

@asyncHandler
async def load_web_content(state:State)->State:
    logging.info("Entered in the load_web_content node")
    url=state['url']
    if is_youtube_video(url):
        loader=YoutubeLoader.from_youtube_url(
            url, add_video_info=False
        )
    else:
        loader=WebBaseLoader(url)
    docs = loader.load()
    page_content = docs[0].page_content

    if len(page_content.split()) > 2000:
        raise OverflowError(f"The content is too large for llm {len(page_content.split())}")

    logging.info("Exited from the load_web_content node")

    return {"page_content":page_content}