{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3d9158b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "from langchain_aws import ChatBedrockConverse\n",
    "LLM_MODEL_ID = \"us.meta.llama3-3-70b-instruct-v1:0\"\n",
    "LLM_REGION = \"us-east-1\"\n",
    "\n",
    "import logging\n",
    "llm = ChatBedrockConverse(\n",
    "    model_id=LLM_MODEL_ID,\n",
    "    region_name=LLM_REGION\n",
    ")\n",
    "logging.info(f\"LLM initialized with model_id={LLM_MODEL_ID}, region_name={LLM_REGION}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cad5e68d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:langchain_community.utils.user_agent:USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.document_loaders import WebBaseLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "2a289acb",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader=WebBaseLoader(\"https://docs.langchain.com/oss/python/integrations/document_loaders/web_base\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d1fcbd5f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'https://docs.langchain.com/oss/python/integrations/document_loaders/web_base', 'title': 'WebBaseLoader integration - Docs by LangChain', 'description': 'Integrate with the WebBaseLoader document loader using LangChain Python.', 'language': 'en'}, page_content='WebBaseLoader integration - Docs by LangChainSkip to main contentDocs by LangChain home pageOpen sourceSearch...⌘KAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationWebBaseLoader integrationDeep AgentsLangChainLangGraphIntegrationsLearnReferenceContributePythonLangChain integrationsAll providersPopular ProvidersOpenAIAnthropicGoogleAWSHugging FaceMicrosoftOllamaGroqNVIDIAIntegrations by componentChat modelsTools and toolkitsMiddlewareRetrieversText splittersEmbedding modelsVector storesDocument loadersKey-value storesOn this pageOverviewIntegration detailsLoader featuresSetupCredentialsInstallationInitializationInitialization with multiple pagesLoadLoad multiple urls concurrentlyLoading a xml file, or using a different BeautifulSoup parserLazy loadAsyncUsing proxiesAPI referenceWebBaseLoader integrationCopy pageIntegrate with the WebBaseLoader document loader using LangChain Python.Copy pageThis covers how to use WebBaseLoader to load all text from HTML webpages into a document format that we can use downstream. For more custom logic for loading webpages look at some child class examples such as IMSDbLoader, AZLyricsLoader, and CollegeConfidentialLoader.\\nIf you don’t want to worry about website crawling, bypassing JS-blocking sites, and data cleaning, consider using FireCrawlLoader or the faster option SpiderLoader.\\n\\u200bOverview\\n\\u200bIntegration details\\n\\nTODO: Fill in table features.\\nTODO: Remove JS support link if not relevant, otherwise ensure link is correct.\\nTODO: Make sure API reference links are correct.\\n\\nClassPackageLocalSerializableJS supportWebBaseLoaderlangchain-community✅❌❌\\n\\u200bLoader features\\nSourceDocument Lazy LoadingNative Async SupportWebBaseLoader✅✅\\n\\u200bSetup\\n\\u200bCredentials\\nWebBaseLoader does not require any credentials.\\n\\u200bInstallation\\nTo use the WebBaseLoader you first need to install the langchain-community python package.\\nCopypip install -qU langchain-community beautifulsoup4\\n\\n\\u200bInitialization\\nNow we can instantiate our model object and load documents:\\nCopyfrom langchain_community.document_loaders import WebBaseLoader\\n\\nloader = WebBaseLoader(\"https://www.example.com/\")\\n\\nTo bypass SSL verification errors during fetching, you can set the “verify” option:\\nloader.requests_kwargs = {\\'verify\\':False}\\n\\u200bInitialization with multiple pages\\nYou can also pass in a list of pages to load from.\\nCopyloader_multiple_pages = WebBaseLoader(\\n    [\"https://www.example.com/\", \"https://google.com\"]\\n)\\n\\n\\u200bLoad\\nCopydocs = loader.load()\\n\\ndocs[0]\\n\\nCopyDocument(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n    domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\')\\n\\nCopyprint(docs[0].metadata)\\n\\nCopy{\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}\\n\\n\\u200bLoad multiple urls concurrently\\nYou can speed up the scraping process by scraping and parsing multiple urls concurrently.\\nThere are reasonable limits to concurrent requests, defaulting to 2 per second.  If you aren’t concerned about being a good citizen, or you control the server you are scraping and don’t care about load, you can change the requests_per_second parameter to increase the max concurrent requests.  Note, while this will speed up the scraping process, but may cause the server to block you.  Be careful!\\nCopypip install -qU  nest_asyncio\\n\\n# fixes a bug with asyncio and jupyter\\nimport nest_asyncio\\n\\nnest_asyncio.apply()\\n\\nCopyloader = WebBaseLoader([\"https://www.example.com/\", \"https://google.com\"])\\nloader.requests_per_second = 1\\ndocs = loader.aload()\\ndocs\\n\\nCopyFetching pages: 100%|###########################################################################| 2/2 [00:00<00:00,  8.28it/s]\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n    domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\'),\\n Document(metadata={\\'source\\': \\'https://google.com\\', \\'title\\': \\'Google\\', \\'description\\': \"Search the world\\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\\'re looking for.\", \\'language\\': \\'en\\'}, page_content=\\'GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\\\xa0Advanced search5 ways Gemini can help during the HolidaysAdvertisingBusiness SolutionsAbout Google© 2024 - Privacy - Terms  \\')]\\n\\n\\u200bLoading a xml file, or using a different BeautifulSoup parser\\nYou can also look at SitemapLoader for an example of how to load a sitemap file, which is an example of using this feature.\\nCopyloader = WebBaseLoader(\\n    \"https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\"\\n)\\nloader.default_parser = \"xml\"\\ndocs = loader.load()\\ndocs\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}, page_content=\\'\\\\n\\\\n10\\\\nEnergy\\\\n3\\\\n2018-01-01\\\\n2018-01-01\\\\nfalse\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\nÂ§ 431.86\\\\nSection Â§ 431.86\\\\n\\\\nEnergy\\\\nDEPARTMENT OF ENERGY\\\\nENERGY CONSERVATION\\\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\\\nCommercial Packaged Boilers\\\\nTest Procedures\\\\n\\\\n\\\\n\\\\n\\\\n§\\\\u2009431.86\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\\\n\\\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\\\n\\\\nEquipment category\\\\nSubcategory\\\\nCertified rated inputBtu/h\\\\n\\\\nStandards efficiency metric(§\\\\u2009431.87)\\\\n\\\\nTest procedure(corresponding to\\\\nstandards efficiency\\\\nmetric required\\\\nby §\\\\u2009431.87)\\\\n\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3 with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3. with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\n*\\\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\\\n\\\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\\\n[81 FR 89305, Dec. 9, 2016]\\\\n\\\\n\\\\nEnergy Efficiency Standards\\\\n\\\\n\\')]\\n\\n\\u200bLazy load\\nYou can use lazy loading to only load one page at a time in order to minimize memory requirements.\\nCopypages = []\\nfor doc in loader.lazy_load():\\n    pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bAsync\\nCopypages = []\\nasync for doc in loader.alazy_load():\\n    pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopyFetching pages: 100%|###########################################################################| 1/1 [00:00<00:00, 10.51it/s]\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bUsing proxies\\nSometimes you might need to use proxies to get around IP blocks. You can pass in a dictionary of proxies to the loader (and requests underneath) to use them.\\nCopyloader = WebBaseLoader(\\n        \"https://www.walmart.com/search?q=parrots\",\\n        proxies={\\n        \"http\": \"http://{username}:{password}:@proxy.service.com:6666/\",\\n        \"https\": \"https://{username}:{password}:@proxy.service.com:6666/\",\\n    },\\n)\\ndocs = loader.load()\\n\\n\\n\\u200bAPI reference\\nFor detailed documentation of all WebBaseLoader features and configurations head to the API reference: python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html\\n\\nEdit this page on GitHub or file an issue.Connect these docs to Claude, VSCode, and more via MCP for real-time answers.Was this page helpful?YesNo⌘IDocs by LangChain home pagegithubxlinkedinyoutubeResourcesForumChangelogLangChain AcademyTrust CenterCompanyHomeAboutCareersBloggithubxlinkedinyoutube\\n')]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "docs=loader.load()\n",
    "docs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "0ee58d3a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'WebBaseLoader integration - Docs by LangChainSkip to main contentDocs by LangChain home pageOpen sourceSearch...⌘KAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationWebBaseLoader integrationDeep AgentsLangChainLangGraphIntegrationsLearnReferenceContributePythonLangChain integrationsAll providersPopular ProvidersOpenAIAnthropicGoogleAWSHugging FaceMicrosoftOllamaGroqNVIDIAIntegrations by componentChat modelsTools and toolkitsMiddlewareRetrieversText splittersEmbedding modelsVector storesDocument loadersKey-value storesOn this pageOverviewIntegration detailsLoader featuresSetupCredentialsInstallationInitializationInitialization with multiple pagesLoadLoad multiple urls concurrentlyLoading a xml file, or using a different BeautifulSoup parserLazy loadAsyncUsing proxiesAPI referenceWebBaseLoader integrationCopy pageIntegrate with the WebBaseLoader document loader using LangChain Python.Copy pageThis covers how to use WebBaseLoader to load all text from HTML webpages into a document format that we can use downstream. For more custom logic for loading webpages look at some child class examples such as IMSDbLoader, AZLyricsLoader, and CollegeConfidentialLoader.\\nIf you don’t want to worry about website crawling, bypassing JS-blocking sites, and data cleaning, consider using FireCrawlLoader or the faster option SpiderLoader.\\n\\u200bOverview\\n\\u200bIntegration details\\n\\nTODO: Fill in table features.\\nTODO: Remove JS support link if not relevant, otherwise ensure link is correct.\\nTODO: Make sure API reference links are correct.\\n\\nClassPackageLocalSerializableJS supportWebBaseLoaderlangchain-community✅❌❌\\n\\u200bLoader features\\nSourceDocument Lazy LoadingNative Async SupportWebBaseLoader✅✅\\n\\u200bSetup\\n\\u200bCredentials\\nWebBaseLoader does not require any credentials.\\n\\u200bInstallation\\nTo use the WebBaseLoader you first need to install the langchain-community python package.\\nCopypip install -qU langchain-community beautifulsoup4\\n\\n\\u200bInitialization\\nNow we can instantiate our model object and load documents:\\nCopyfrom langchain_community.document_loaders import WebBaseLoader\\n\\nloader = WebBaseLoader(\"https://www.example.com/\")\\n\\nTo bypass SSL verification errors during fetching, you can set the “verify” option:\\nloader.requests_kwargs = {\\'verify\\':False}\\n\\u200bInitialization with multiple pages\\nYou can also pass in a list of pages to load from.\\nCopyloader_multiple_pages = WebBaseLoader(\\n    [\"https://www.example.com/\", \"https://google.com\"]\\n)\\n\\n\\u200bLoad\\nCopydocs = loader.load()\\n\\ndocs[0]\\n\\nCopyDocument(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n    domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\')\\n\\nCopyprint(docs[0].metadata)\\n\\nCopy{\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}\\n\\n\\u200bLoad multiple urls concurrently\\nYou can speed up the scraping process by scraping and parsing multiple urls concurrently.\\nThere are reasonable limits to concurrent requests, defaulting to 2 per second.  If you aren’t concerned about being a good citizen, or you control the server you are scraping and don’t care about load, you can change the requests_per_second parameter to increase the max concurrent requests.  Note, while this will speed up the scraping process, but may cause the server to block you.  Be careful!\\nCopypip install -qU  nest_asyncio\\n\\n# fixes a bug with asyncio and jupyter\\nimport nest_asyncio\\n\\nnest_asyncio.apply()\\n\\nCopyloader = WebBaseLoader([\"https://www.example.com/\", \"https://google.com\"])\\nloader.requests_per_second = 1\\ndocs = loader.aload()\\ndocs\\n\\nCopyFetching pages: 100%|###########################################################################| 2/2 [00:00<00:00,  8.28it/s]\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n    domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\'),\\n Document(metadata={\\'source\\': \\'https://google.com\\', \\'title\\': \\'Google\\', \\'description\\': \"Search the world\\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\\'re looking for.\", \\'language\\': \\'en\\'}, page_content=\\'GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\\\xa0Advanced search5 ways Gemini can help during the HolidaysAdvertisingBusiness SolutionsAbout Google© 2024 - Privacy - Terms  \\')]\\n\\n\\u200bLoading a xml file, or using a different BeautifulSoup parser\\nYou can also look at SitemapLoader for an example of how to load a sitemap file, which is an example of using this feature.\\nCopyloader = WebBaseLoader(\\n    \"https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\"\\n)\\nloader.default_parser = \"xml\"\\ndocs = loader.load()\\ndocs\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}, page_content=\\'\\\\n\\\\n10\\\\nEnergy\\\\n3\\\\n2018-01-01\\\\n2018-01-01\\\\nfalse\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\nÂ§ 431.86\\\\nSection Â§ 431.86\\\\n\\\\nEnergy\\\\nDEPARTMENT OF ENERGY\\\\nENERGY CONSERVATION\\\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\\\nCommercial Packaged Boilers\\\\nTest Procedures\\\\n\\\\n\\\\n\\\\n\\\\n§\\\\u2009431.86\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\\\n\\\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\\\n\\\\nEquipment category\\\\nSubcategory\\\\nCertified rated inputBtu/h\\\\n\\\\nStandards efficiency metric(§\\\\u2009431.87)\\\\n\\\\nTest procedure(corresponding to\\\\nstandards efficiency\\\\nmetric required\\\\nby §\\\\u2009431.87)\\\\n\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3 with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3. with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\n*\\\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\\\n\\\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\\\n[81 FR 89305, Dec. 9, 2016]\\\\n\\\\n\\\\nEnergy Efficiency Standards\\\\n\\\\n\\')]\\n\\n\\u200bLazy load\\nYou can use lazy loading to only load one page at a time in order to minimize memory requirements.\\nCopypages = []\\nfor doc in loader.lazy_load():\\n    pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bAsync\\nCopypages = []\\nasync for doc in loader.alazy_load():\\n    pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopyFetching pages: 100%|###########################################################################| 1/1 [00:00<00:00, 10.51it/s]\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bUsing proxies\\nSometimes you might need to use proxies to get around IP blocks. You can pass in a dictionary of proxies to the loader (and requests underneath) to use them.\\nCopyloader = WebBaseLoader(\\n        \"https://www.walmart.com/search?q=parrots\",\\n        proxies={\\n        \"http\": \"http://{username}:{password}:@proxy.service.com:6666/\",\\n        \"https\": \"https://{username}:{password}:@proxy.service.com:6666/\",\\n    },\\n)\\ndocs = loader.load()\\n\\n\\n\\u200bAPI reference\\nFor detailed documentation of all WebBaseLoader features and configurations head to the API reference: python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html\\n\\nEdit this page on GitHub or file an issue.Connect these docs to Claude, VSCode, and more via MCP for real-time answers.Was this page helpful?YesNo⌘IDocs by LangChain home pagegithubxlinkedinyoutubeResourcesForumChangelogLangChain AcademyTrust CenterCompanyHomeAboutCareersBloggithubxlinkedinyoutube\\n'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "docs[0].page_content"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "775a77b9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "47202522",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "992"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(docs[0].page_content.split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "46f0d682",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import YoutubeLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c0cb3d1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = YoutubeLoader.from_youtube_url(\n",
    "    \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "496ebb6d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'source': 'QsYGlZkevEg'}, page_content='LADIES AND GENTLEMEN, PEDRO PASCAL! [ CHEERS AND APPLAUSE ] >> THANK YOU, THANK YOU. THANK YOU VERY MUCH. I\\'M SO EXCITED TO BE HERE. THANK YOU. I SPENT THE LAST YEAR SHOOTING A SHOW CALLED \"THE LAST OF US\" ON HBO. FOR SOME HBO SHOES, YOU GET TO SHOOT IN A FIVE STAR ITALIAN RESORT SURROUNDED BY BEAUTIFUL PEOPLE, BUT I SAID, NO, THAT\\'S TOO EASY. I WANT TO SHOOT IN A FREEZING CANADIAN FOREST WHILE BEING CHASED AROUND BY A GUY WHOSE HEAD LOOKS LIKE A GENITAL WART. IT IS AN HONOR BEING A PART OF THESE HUGE FRANCHISEs LIKE \"GAME OF THRONES\" AND \"STAR WARS,\" BUT I\\'M STILL GETTING USED TO PEOPLE RECOGNIZING ME. THE OTHER DAY, A GUY STOPPED ME ON THE STREET AND SAYS, MY SON LOVES \"THE MANDALORIAN\" AND THE NEXT THING I KNOW, I\\'M FACE TIMING WITH A 6-YEAR-OLD WHO HAS NO IDEA WHO I AM BECAUSE MY CHARACTER WEARS A MASK THE ENTIRE SHOW. THE GUY IS LIKE, DO THE MANDO VOICE, BUT IT\\'S LIKE A BEDROOM VOICE. WITHOUT THE MASK, IT JUST SOUNDS PORNY. PEOPLE WALKING BY ON THE STREET SEE ME WHISPERING TO A 6-YEAR-OLD KID. I CAN BRING YOU IN WARM, OR I CAN BRING YOU IN COLD. EVEN THOUGH I CAME TO THE U.S. WHEN I WAS LITTLE, I WAS BORN IN CHILE, AND I HAVE 34 FIRST COUSINS WHO ARE STILL THERE. THEY\\'RE VERY PROUD OF ME. I KNOW THEY\\'RE PROUD BECAUSE THEY GIVE MY PHONE NUMBER TO EVERY PERSON THEY MEET, WHICH MEANS EVERY DAY, SOMEONE IN SANTIAGO WILL TEXT ME STUFF LIKE, CAN YOU COME TO MY WEDDING, OR CAN YOU SING MY PRIEST HAPPY BIRTHDAY, OR IS BABY YODA MEAN IN REAL LIFE. SO I HAVE TO BE LIKE NO, NO, AND HIS NAME IS GROGU. BUT MY COUSINS WEREN\\'T ALWAYS SO PROUD. EARLY IN MY CAREER, I PLAYED SMALL PARTS IN EVERY CRIME SHOW. I EVEN PLAYED TWO DIFFERENT CHARACTERS ON \"LAW AND ORDER.\" TITO CABASSA WHO LOOKED LIKE THIS. AND ONE YEAR LATER, I PLAYED REGGIE LUCKMAN WHO LOOKS LIKE THIS. AND THAT, MY FRIENDS, IS CALLED RANGE. BUT IT IS AMAZING TO BE HERE, LIKE I SAID. I WAS BORN IN CHILE, AND NINE MONTHS LATER, MY PARENTS FLED AND BROUGHT ME AND MY SISTER TO THE U.S. THEY WERE SO BRAVE, AND WITHOUT THEM, I WOULDN\\'T BE HERE IN THIS WONDERFUL COUNTRY, AND I CERTAINLY WOULDN\\'T BE STANDING HERE WITH YOU ALL TONIGHT. SO TO ALL MY FAMILY WATCHING IN CHILE, I WANT TO SAY [ SPEAKING NON-ENGLISH ] WHICH MEANS, I LOVE YOU, I MISS YOU, AND STOP GIVING OUT MY PHONE NUMBER. WE\\'VE GOT AN AMAZING SHOW FOR YOU TONIGHT. COLDPLAY IS HERE, SO STICK')]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "3c4920b7",
   "metadata": {},
   "outputs": [
    {
     "ename": "HTTPError",
     "evalue": "HTTP Error 400: Bad Request",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mHTTPError\u001b[39m                                 Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m      1\u001b[39m loader = YoutubeLoader.from_youtube_url(\n\u001b[32m      2\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mhttps://www.youtube.com/watch?v=QsYGlZkevEg\u001b[39m\u001b[33m\"\u001b[39m, add_video_info=\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m      3\u001b[39m )\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[43mloader\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\langchain_community\\document_loaders\\youtube.py:258\u001b[39m, in \u001b[36mYoutubeLoader.load\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    250\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m    251\u001b[39m         \u001b[33m'\u001b[39m\u001b[33mCould not import \u001b[39m\u001b[33m\"\u001b[39m\u001b[33myoutube_transcript_api\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m Python package. \u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m    252\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mPlease install it with `pip install youtube-transcript-api`.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    253\u001b[39m     )\n\u001b[32m    255\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.add_video_info:\n\u001b[32m    256\u001b[39m     \u001b[38;5;66;03m# Get more video meta info\u001b[39;00m\n\u001b[32m    257\u001b[39m     \u001b[38;5;66;03m# Such as title, description, thumbnail url, publish_date\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m258\u001b[39m     video_info = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_get_video_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    259\u001b[39m     \u001b[38;5;28mself\u001b[39m._metadata.update(video_info)\n\u001b[32m    261\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\langchain_community\\document_loaders\\youtube.py:336\u001b[39m, in \u001b[36mYoutubeLoader._get_video_info\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    330\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m    331\u001b[39m         \u001b[33m'\u001b[39m\u001b[33mCould not import \u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpytube\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m Python package. \u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m    332\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mPlease install it with `pip install pytube`.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    333\u001b[39m     )\n\u001b[32m    334\u001b[39m yt = YouTube(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mhttps://www.youtube.com/watch?v=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.video_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m    335\u001b[39m video_info = {\n\u001b[32m--> \u001b[39m\u001b[32m336\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mtitle\u001b[39m\u001b[33m\"\u001b[39m: \u001b[43myt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtitle\u001b[49m \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m    337\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mdescription\u001b[39m\u001b[33m\"\u001b[39m: yt.description \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m    338\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mview_count\u001b[39m\u001b[33m\"\u001b[39m: yt.views \u001b[38;5;129;01mor\u001b[39;00m \u001b[32m0\u001b[39m,\n\u001b[32m    339\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mthumbnail_url\u001b[39m\u001b[33m\"\u001b[39m: yt.thumbnail_url \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m    340\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mpublish_date\u001b[39m\u001b[33m\"\u001b[39m: yt.publish_date.strftime(\u001b[33m\"\u001b[39m\u001b[33m%\u001b[39m\u001b[33mY-\u001b[39m\u001b[33m%\u001b[39m\u001b[33mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m \u001b[39m\u001b[33m%\u001b[39m\u001b[33mH:\u001b[39m\u001b[33m%\u001b[39m\u001b[33mM:\u001b[39m\u001b[33m%\u001b[39m\u001b[33mS\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m    341\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m yt.publish_date\n\u001b[32m    342\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m    343\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mlength\u001b[39m\u001b[33m\"\u001b[39m: yt.length \u001b[38;5;129;01mor\u001b[39;00m \u001b[32m0\u001b[39m,\n\u001b[32m    344\u001b[39m     \u001b[33m\"\u001b[39m\u001b[33mauthor\u001b[39m\u001b[33m\"\u001b[39m: yt.author \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m    345\u001b[39m }\n\u001b[32m    346\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m video_info\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\__main__.py:341\u001b[39m, in \u001b[36mYouTube.title\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    338\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._title\n\u001b[32m    340\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m341\u001b[39m     \u001b[38;5;28mself\u001b[39m._title = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mvid_info\u001b[49m[\u001b[33m'\u001b[39m\u001b[33mvideoDetails\u001b[39m\u001b[33m'\u001b[39m][\u001b[33m'\u001b[39m\u001b[33mtitle\u001b[39m\u001b[33m'\u001b[39m]\n\u001b[32m    342\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[32m    343\u001b[39m     \u001b[38;5;66;03m# Check_availability will raise the correct exception in most cases\u001b[39;00m\n\u001b[32m    344\u001b[39m     \u001b[38;5;66;03m#  if it doesn't, ask for a report.\u001b[39;00m\n\u001b[32m    345\u001b[39m     \u001b[38;5;28mself\u001b[39m.check_availability()\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\__main__.py:246\u001b[39m, in \u001b[36mYouTube.vid_info\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m    242\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._vid_info\n\u001b[32m    244\u001b[39m innertube = InnerTube(use_oauth=\u001b[38;5;28mself\u001b[39m.use_oauth, allow_cache=\u001b[38;5;28mself\u001b[39m.allow_oauth_cache)\n\u001b[32m--> \u001b[39m\u001b[32m246\u001b[39m innertube_response = \u001b[43minnertube\u001b[49m\u001b[43m.\u001b[49m\u001b[43mplayer\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mvideo_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    247\u001b[39m \u001b[38;5;28mself\u001b[39m._vid_info = innertube_response\n\u001b[32m    248\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._vid_info\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\innertube.py:448\u001b[39m, in \u001b[36mInnerTube.player\u001b[39m\u001b[34m(self, video_id)\u001b[39m\n\u001b[32m    444\u001b[39m query = {\n\u001b[32m    445\u001b[39m     \u001b[33m'\u001b[39m\u001b[33mvideoId\u001b[39m\u001b[33m'\u001b[39m: video_id,\n\u001b[32m    446\u001b[39m }\n\u001b[32m    447\u001b[39m query.update(\u001b[38;5;28mself\u001b[39m.base_params)\n\u001b[32m--> \u001b[39m\u001b[32m448\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbase_data\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\innertube.py:390\u001b[39m, in \u001b[36mInnerTube._call_api\u001b[39m\u001b[34m(self, endpoint, query, data)\u001b[39m\n\u001b[32m    386\u001b[39m         headers[\u001b[33m'\u001b[39m\u001b[33mAuthorization\u001b[39m\u001b[33m'\u001b[39m] = \u001b[33mf\u001b[39m\u001b[33m'\u001b[39m\u001b[33mBearer \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.access_token\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\n\u001b[32m    388\u001b[39m headers.update(\u001b[38;5;28mself\u001b[39m.header)\n\u001b[32m--> \u001b[39m\u001b[32m390\u001b[39m response = \u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_execute_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    391\u001b[39m \u001b[43m    \u001b[49m\u001b[43mendpoint_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    392\u001b[39m \u001b[43m    \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mPOST\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m    393\u001b[39m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    394\u001b[39m \u001b[43m    \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\n\u001b[32m    395\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    396\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m json.loads(response.read())\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\request.py:37\u001b[39m, in \u001b[36m_execute_request\u001b[39m\u001b[34m(url, method, headers, data, timeout)\u001b[39m\n\u001b[32m     35\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m     36\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mInvalid URL\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m37\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:215\u001b[39m, in \u001b[36murlopen\u001b[39m\u001b[34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[39m\n\u001b[32m    213\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    214\u001b[39m     opener = _opener\n\u001b[32m--> \u001b[39m\u001b[32m215\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mopener\u001b[49m\u001b[43m.\u001b[49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:521\u001b[39m, in \u001b[36mOpenerDirector.open\u001b[39m\u001b[34m(self, fullurl, data, timeout)\u001b[39m\n\u001b[32m    519\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m processor \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.process_response.get(protocol, []):\n\u001b[32m    520\u001b[39m     meth = \u001b[38;5;28mgetattr\u001b[39m(processor, meth_name)\n\u001b[32m--> \u001b[39m\u001b[32m521\u001b[39m     response = \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    523\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:630\u001b[39m, in \u001b[36mHTTPErrorProcessor.http_response\u001b[39m\u001b[34m(self, request, response)\u001b[39m\n\u001b[32m    627\u001b[39m \u001b[38;5;66;03m# According to RFC 2616, \"2xx\" code indicates that the client's\u001b[39;00m\n\u001b[32m    628\u001b[39m \u001b[38;5;66;03m# request was successfully received, understood, and accepted.\u001b[39;00m\n\u001b[32m    629\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[32m200\u001b[39m <= code < \u001b[32m300\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m630\u001b[39m     response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparent\u001b[49m\u001b[43m.\u001b[49m\u001b[43merror\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    631\u001b[39m \u001b[43m        \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mhttp\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmsg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhdrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    633\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:559\u001b[39m, in \u001b[36mOpenerDirector.error\u001b[39m\u001b[34m(self, proto, *args)\u001b[39m\n\u001b[32m    557\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m http_err:\n\u001b[32m    558\u001b[39m     args = (\u001b[38;5;28mdict\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mdefault\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mhttp_error_default\u001b[39m\u001b[33m'\u001b[39m) + orig_args\n\u001b[32m--> \u001b[39m\u001b[32m559\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_chain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:492\u001b[39m, in \u001b[36mOpenerDirector._call_chain\u001b[39m\u001b[34m(self, chain, kind, meth_name, *args)\u001b[39m\n\u001b[32m    490\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m handler \u001b[38;5;129;01min\u001b[39;00m handlers:\n\u001b[32m    491\u001b[39m     func = \u001b[38;5;28mgetattr\u001b[39m(handler, meth_name)\n\u001b[32m--> \u001b[39m\u001b[32m492\u001b[39m     result = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    493\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m result \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m    494\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m result\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:639\u001b[39m, in \u001b[36mHTTPDefaultErrorHandler.http_error_default\u001b[39m\u001b[34m(self, req, fp, code, msg, hdrs)\u001b[39m\n\u001b[32m    638\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mhttp_error_default\u001b[39m(\u001b[38;5;28mself\u001b[39m, req, fp, code, msg, hdrs):\n\u001b[32m--> \u001b[39m\u001b[32m639\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(req.full_url, code, msg, hdrs, fp)\n",
      "\u001b[31mHTTPError\u001b[39m: HTTP Error 400: Bad Request"
     ]
    }
   ],
   "source": [
    "loader = YoutubeLoader.from_youtube_url(\n",
    "    \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True\n",
    ")\n",
    "loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7c635f42",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}