Multi-Rag / src /Web /nodes /webBasedLoader_node.py
VashuTheGreat's picture
Clean commit without images
1f725d8
from langchain_community.document_loaders import WebBaseLoader
from utils.asyncHandler import asyncHandler
from src.Web.models.web_model import State
from src.Web.utils.main_utils import is_youtube_video
from langchain_community.document_loaders import YoutubeLoader
import logging
@asyncHandler
async def load_web_content(state:State)->State:
logging.info("Entered in the load_web_content node")
url=state['url']
if is_youtube_video(url):
loader=YoutubeLoader.from_youtube_url(
url, add_video_info=False
)
else:
loader=WebBaseLoader(url)
docs = loader.load()
page_content = docs[0].page_content
if len(page_content.split()) > 2000:
raise OverflowError(f"The content is too large for llm {len(page_content.split())}")
logging.info("Exited from the load_web_content node")
return {"page_content":page_content}