{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3d9158b7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from langchain_aws import ChatBedrockConverse\n", "LLM_MODEL_ID = \"us.meta.llama3-3-70b-instruct-v1:0\"\n", "LLM_REGION = \"us-east-1\"\n", "\n", "import logging\n", "llm = ChatBedrockConverse(\n", " model_id=LLM_MODEL_ID,\n", " region_name=LLM_REGION\n", ")\n", "logging.info(f\"LLM initialized with model_id={LLM_MODEL_ID}, region_name={LLM_REGION}\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "cad5e68d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:langchain_community.utils.user_agent:USER_AGENT environment variable not set, consider setting it to identify your requests.\n" ] } ], "source": [ "from langchain_community.document_loaders import WebBaseLoader" ] }, { "cell_type": "code", "execution_count": 5, "id": "2a289acb", "metadata": {}, "outputs": [], "source": [ "loader=WebBaseLoader(\"https://docs.langchain.com/oss/python/integrations/document_loaders/web_base\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "d1fcbd5f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(metadata={'source': 'https://docs.langchain.com/oss/python/integrations/document_loaders/web_base', 'title': 'WebBaseLoader integration - Docs by LangChain', 'description': 'Integrate with the WebBaseLoader document loader using LangChain Python.', 'language': 'en'}, page_content='WebBaseLoader integration - Docs by LangChainSkip to main contentDocs by LangChain home pageOpen sourceSearch...⌘KAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationWebBaseLoader integrationDeep AgentsLangChainLangGraphIntegrationsLearnReferenceContributePythonLangChain integrationsAll providersPopular ProvidersOpenAIAnthropicGoogleAWSHugging FaceMicrosoftOllamaGroqNVIDIAIntegrations by componentChat modelsTools and toolkitsMiddlewareRetrieversText splittersEmbedding modelsVector storesDocument loadersKey-value storesOn this pageOverviewIntegration detailsLoader featuresSetupCredentialsInstallationInitializationInitialization with multiple pagesLoadLoad multiple urls concurrentlyLoading a xml file, or using a different BeautifulSoup parserLazy loadAsyncUsing proxiesAPI referenceWebBaseLoader integrationCopy pageIntegrate with the WebBaseLoader document loader using LangChain Python.Copy pageThis covers how to use WebBaseLoader to load all text from HTML webpages into a document format that we can use downstream. For more custom logic for loading webpages look at some child class examples such as IMSDbLoader, AZLyricsLoader, and CollegeConfidentialLoader.\\nIf you don’t want to worry about website crawling, bypassing JS-blocking sites, and data cleaning, consider using FireCrawlLoader or the faster option SpiderLoader.\\n\\u200bOverview\\n\\u200bIntegration details\\n\\nTODO: Fill in table features.\\nTODO: Remove JS support link if not relevant, otherwise ensure link is correct.\\nTODO: Make sure API reference links are correct.\\n\\nClassPackageLocalSerializableJS supportWebBaseLoaderlangchain-community✅❌❌\\n\\u200bLoader features\\nSourceDocument Lazy LoadingNative Async SupportWebBaseLoader✅✅\\n\\u200bSetup\\n\\u200bCredentials\\nWebBaseLoader does not require any credentials.\\n\\u200bInstallation\\nTo use the WebBaseLoader you first need to install the langchain-community python package.\\nCopypip install -qU langchain-community beautifulsoup4\\n\\n\\u200bInitialization\\nNow we can instantiate our model object and load documents:\\nCopyfrom langchain_community.document_loaders import WebBaseLoader\\n\\nloader = WebBaseLoader(\"https://www.example.com/\")\\n\\nTo bypass SSL verification errors during fetching, you can set the “verify” option:\\nloader.requests_kwargs = {\\'verify\\':False}\\n\\u200bInitialization with multiple pages\\nYou can also pass in a list of pages to load from.\\nCopyloader_multiple_pages = WebBaseLoader(\\n [\"https://www.example.com/\", \"https://google.com\"]\\n)\\n\\n\\u200bLoad\\nCopydocs = loader.load()\\n\\ndocs[0]\\n\\nCopyDocument(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\')\\n\\nCopyprint(docs[0].metadata)\\n\\nCopy{\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}\\n\\n\\u200bLoad multiple urls concurrently\\nYou can speed up the scraping process by scraping and parsing multiple urls concurrently.\\nThere are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren’t concerned about being a good citizen, or you control the server you are scraping and don’t care about load, you can change the requests_per_second parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but may cause the server to block you. Be careful!\\nCopypip install -qU nest_asyncio\\n\\n# fixes a bug with asyncio and jupyter\\nimport nest_asyncio\\n\\nnest_asyncio.apply()\\n\\nCopyloader = WebBaseLoader([\"https://www.example.com/\", \"https://google.com\"])\\nloader.requests_per_second = 1\\ndocs = loader.aload()\\ndocs\\n\\nCopyFetching pages: 100%|###########################################################################| 2/2 [00:00<00:00, 8.28it/s]\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\'),\\n Document(metadata={\\'source\\': \\'https://google.com\\', \\'title\\': \\'Google\\', \\'description\\': \"Search the world\\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\\'re looking for.\", \\'language\\': \\'en\\'}, page_content=\\'GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\\\xa0Advanced search5 ways Gemini can help during the HolidaysAdvertisingBusiness SolutionsAbout Google© 2024 - Privacy - Terms \\')]\\n\\n\\u200bLoading a xml file, or using a different BeautifulSoup parser\\nYou can also look at SitemapLoader for an example of how to load a sitemap file, which is an example of using this feature.\\nCopyloader = WebBaseLoader(\\n \"https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\"\\n)\\nloader.default_parser = \"xml\"\\ndocs = loader.load()\\ndocs\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}, page_content=\\'\\\\n\\\\n10\\\\nEnergy\\\\n3\\\\n2018-01-01\\\\n2018-01-01\\\\nfalse\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\n§ 431.86\\\\nSection § 431.86\\\\n\\\\nEnergy\\\\nDEPARTMENT OF ENERGY\\\\nENERGY CONSERVATION\\\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\\\nCommercial Packaged Boilers\\\\nTest Procedures\\\\n\\\\n\\\\n\\\\n\\\\n§\\\\u2009431.86\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\\\n\\\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\\\n\\\\nEquipment category\\\\nSubcategory\\\\nCertified rated inputBtu/h\\\\n\\\\nStandards efficiency metric(§\\\\u2009431.87)\\\\n\\\\nTest procedure(corresponding to\\\\nstandards efficiency\\\\nmetric required\\\\nby §\\\\u2009431.87)\\\\n\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3 with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3. with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\n*\\\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\\\n\\\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\\\n[81 FR 89305, Dec. 9, 2016]\\\\n\\\\n\\\\nEnergy Efficiency Standards\\\\n\\\\n\\')]\\n\\n\\u200bLazy load\\nYou can use lazy loading to only load one page at a time in order to minimize memory requirements.\\nCopypages = []\\nfor doc in loader.lazy_load():\\n pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bAsync\\nCopypages = []\\nasync for doc in loader.alazy_load():\\n pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopyFetching pages: 100%|###########################################################################| 1/1 [00:00<00:00, 10.51it/s]\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bUsing proxies\\nSometimes you might need to use proxies to get around IP blocks. You can pass in a dictionary of proxies to the loader (and requests underneath) to use them.\\nCopyloader = WebBaseLoader(\\n \"https://www.walmart.com/search?q=parrots\",\\n proxies={\\n \"http\": \"http://{username}:{password}:@proxy.service.com:6666/\",\\n \"https\": \"https://{username}:{password}:@proxy.service.com:6666/\",\\n },\\n)\\ndocs = loader.load()\\n\\n\\n\\u200bAPI reference\\nFor detailed documentation of all WebBaseLoader features and configurations head to the API reference: python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html\\n\\nEdit this page on GitHub or file an issue.Connect these docs to Claude, VSCode, and more via MCP for real-time answers.Was this page helpful?YesNo⌘IDocs by LangChain home pagegithubxlinkedinyoutubeResourcesForumChangelogLangChain AcademyTrust CenterCompanyHomeAboutCareersBloggithubxlinkedinyoutube\\n')]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs=loader.load()\n", "docs" ] }, { "cell_type": "code", "execution_count": 13, "id": "0ee58d3a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'WebBaseLoader integration - Docs by LangChainSkip to main contentDocs by LangChain home pageOpen sourceSearch...⌘KAsk AIGitHubTry LangSmithTry LangSmithSearch...NavigationWebBaseLoader integrationDeep AgentsLangChainLangGraphIntegrationsLearnReferenceContributePythonLangChain integrationsAll providersPopular ProvidersOpenAIAnthropicGoogleAWSHugging FaceMicrosoftOllamaGroqNVIDIAIntegrations by componentChat modelsTools and toolkitsMiddlewareRetrieversText splittersEmbedding modelsVector storesDocument loadersKey-value storesOn this pageOverviewIntegration detailsLoader featuresSetupCredentialsInstallationInitializationInitialization with multiple pagesLoadLoad multiple urls concurrentlyLoading a xml file, or using a different BeautifulSoup parserLazy loadAsyncUsing proxiesAPI referenceWebBaseLoader integrationCopy pageIntegrate with the WebBaseLoader document loader using LangChain Python.Copy pageThis covers how to use WebBaseLoader to load all text from HTML webpages into a document format that we can use downstream. For more custom logic for loading webpages look at some child class examples such as IMSDbLoader, AZLyricsLoader, and CollegeConfidentialLoader.\\nIf you don’t want to worry about website crawling, bypassing JS-blocking sites, and data cleaning, consider using FireCrawlLoader or the faster option SpiderLoader.\\n\\u200bOverview\\n\\u200bIntegration details\\n\\nTODO: Fill in table features.\\nTODO: Remove JS support link if not relevant, otherwise ensure link is correct.\\nTODO: Make sure API reference links are correct.\\n\\nClassPackageLocalSerializableJS supportWebBaseLoaderlangchain-community✅❌❌\\n\\u200bLoader features\\nSourceDocument Lazy LoadingNative Async SupportWebBaseLoader✅✅\\n\\u200bSetup\\n\\u200bCredentials\\nWebBaseLoader does not require any credentials.\\n\\u200bInstallation\\nTo use the WebBaseLoader you first need to install the langchain-community python package.\\nCopypip install -qU langchain-community beautifulsoup4\\n\\n\\u200bInitialization\\nNow we can instantiate our model object and load documents:\\nCopyfrom langchain_community.document_loaders import WebBaseLoader\\n\\nloader = WebBaseLoader(\"https://www.example.com/\")\\n\\nTo bypass SSL verification errors during fetching, you can set the “verify” option:\\nloader.requests_kwargs = {\\'verify\\':False}\\n\\u200bInitialization with multiple pages\\nYou can also pass in a list of pages to load from.\\nCopyloader_multiple_pages = WebBaseLoader(\\n [\"https://www.example.com/\", \"https://google.com\"]\\n)\\n\\n\\u200bLoad\\nCopydocs = loader.load()\\n\\ndocs[0]\\n\\nCopyDocument(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\')\\n\\nCopyprint(docs[0].metadata)\\n\\nCopy{\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}\\n\\n\\u200bLoad multiple urls concurrently\\nYou can speed up the scraping process by scraping and parsing multiple urls concurrently.\\nThere are reasonable limits to concurrent requests, defaulting to 2 per second. If you aren’t concerned about being a good citizen, or you control the server you are scraping and don’t care about load, you can change the requests_per_second parameter to increase the max concurrent requests. Note, while this will speed up the scraping process, but may cause the server to block you. Be careful!\\nCopypip install -qU nest_asyncio\\n\\n# fixes a bug with asyncio and jupyter\\nimport nest_asyncio\\n\\nnest_asyncio.apply()\\n\\nCopyloader = WebBaseLoader([\"https://www.example.com/\", \"https://google.com\"])\\nloader.requests_per_second = 1\\ndocs = loader.aload()\\ndocs\\n\\nCopyFetching pages: 100%|###########################################################################| 2/2 [00:00<00:00, 8.28it/s]\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.example.com/\\', \\'title\\': \\'Example Domain\\', \\'language\\': \\'No language found.\\'}, page_content=\\'\\\\n\\\\n\\\\nExample Domain\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\n\\\\nExample Domain\\\\nThis domain is for use in illustrative examples in documents. You may use this\\\\n domain in literature without prior coordination or asking for permission.\\\\nMore information...\\\\n\\\\n\\\\n\\\\n\\'),\\n Document(metadata={\\'source\\': \\'https://google.com\\', \\'title\\': \\'Google\\', \\'description\\': \"Search the world\\'s information, including webpages, images, videos and more. Google has many special features to help you find exactly what you\\'re looking for.\", \\'language\\': \\'en\\'}, page_content=\\'GoogleSearch Images Maps Play YouTube News Gmail Drive More »Web History | Settings | Sign in\\\\xa0Advanced search5 ways Gemini can help during the HolidaysAdvertisingBusiness SolutionsAbout Google© 2024 - Privacy - Terms \\')]\\n\\n\\u200bLoading a xml file, or using a different BeautifulSoup parser\\nYou can also look at SitemapLoader for an example of how to load a sitemap file, which is an example of using this feature.\\nCopyloader = WebBaseLoader(\\n \"https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\"\\n)\\nloader.default_parser = \"xml\"\\ndocs = loader.load()\\ndocs\\n\\nCopy[Document(metadata={\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}, page_content=\\'\\\\n\\\\n10\\\\nEnergy\\\\n3\\\\n2018-01-01\\\\n2018-01-01\\\\nfalse\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\n§ 431.86\\\\nSection § 431.86\\\\n\\\\nEnergy\\\\nDEPARTMENT OF ENERGY\\\\nENERGY CONSERVATION\\\\nENERGY EFFICIENCY PROGRAM FOR CERTAIN COMMERCIAL AND INDUSTRIAL EQUIPMENT\\\\nCommercial Packaged Boilers\\\\nTest Procedures\\\\n\\\\n\\\\n\\\\n\\\\n§\\\\u2009431.86\\\\nUniform test method for the measurement of energy efficiency of commercial packaged boilers.\\\\n(a) Scope. This section provides test procedures, pursuant to the Energy Policy and Conservation Act (EPCA), as amended, which must be followed for measuring the combustion efficiency and/or thermal efficiency of a gas- or oil-fired commercial packaged boiler.\\\\n(b) Testing and Calculations. Determine the thermal efficiency or combustion efficiency of commercial packaged boilers by conducting the appropriate test procedure(s) indicated in Table 1 of this section.\\\\n\\\\nTable 1—Test Requirements for Commercial Packaged Boiler Equipment Classes\\\\n\\\\nEquipment category\\\\nSubcategory\\\\nCertified rated inputBtu/h\\\\n\\\\nStandards efficiency metric(§\\\\u2009431.87)\\\\n\\\\nTest procedure(corresponding to\\\\nstandards efficiency\\\\nmetric required\\\\nby §\\\\u2009431.87)\\\\n\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nGas-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nHot Water\\\\nOil-fired\\\\n>2,500,000\\\\nCombustion Efficiency\\\\nAppendix A, Section 3.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nGas-fired (all*)\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3 with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n≥300,000 and ≤2,500,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\nSteam\\\\nOil-fired\\\\n>2,500,000 and ≤5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.\\\\n\\\\n\\\\n\\\\u2003\\\\n\\\\n>5,000,000\\\\nThermal Efficiency\\\\nAppendix A, Section 2.OR\\\\nAppendix A, Section 3. with Section 2.4.3.2.\\\\n\\\\n\\\\n\\\\n*\\\\u2009Equipment classes for commercial packaged boilers as of July 22, 2009 (74 FR 36355) distinguish between gas-fired natural draft and all other gas-fired (except natural draft).\\\\n\\\\n(c) Field Tests. The field test provisions of appendix A may be used only to test a unit of commercial packaged boiler with rated input greater than 5,000,000 Btu/h.\\\\n[81 FR 89305, Dec. 9, 2016]\\\\n\\\\n\\\\nEnergy Efficiency Standards\\\\n\\\\n\\')]\\n\\n\\u200bLazy load\\nYou can use lazy loading to only load one page at a time in order to minimize memory requirements.\\nCopypages = []\\nfor doc in loader.lazy_load():\\n pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bAsync\\nCopypages = []\\nasync for doc in loader.alazy_load():\\n pages.append(doc)\\n\\nprint(pages[0].page_content[:100])\\nprint(pages[0].metadata)\\n\\nCopyFetching pages: 100%|###########################################################################| 1/1 [00:00<00:00, 10.51it/s]\\n\\nCopy10\\nEnergy\\n3\\n2018-01-01\\n2018-01-01\\nfalse\\nUniform test method for the measurement of energy efficien\\n{\\'source\\': \\'https://www.govinfo.gov/content/pkg/CFR-2018-title10-vol3/xml/CFR-2018-title10-vol3-sec431-86.xml\\'}\\n\\n\\u200bUsing proxies\\nSometimes you might need to use proxies to get around IP blocks. You can pass in a dictionary of proxies to the loader (and requests underneath) to use them.\\nCopyloader = WebBaseLoader(\\n \"https://www.walmart.com/search?q=parrots\",\\n proxies={\\n \"http\": \"http://{username}:{password}:@proxy.service.com:6666/\",\\n \"https\": \"https://{username}:{password}:@proxy.service.com:6666/\",\\n },\\n)\\ndocs = loader.load()\\n\\n\\n\\u200bAPI reference\\nFor detailed documentation of all WebBaseLoader features and configurations head to the API reference: python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html\\n\\nEdit this page on GitHub or file an issue.Connect these docs to Claude, VSCode, and more via MCP for real-time answers.Was this page helpful?YesNo⌘IDocs by LangChain home pagegithubxlinkedinyoutubeResourcesForumChangelogLangChain AcademyTrust CenterCompanyHomeAboutCareersBloggithubxlinkedinyoutube\\n'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "docs[0].page_content" ] }, { "cell_type": "code", "execution_count": 14, "id": "775a77b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(docs)" ] }, { "cell_type": "code", "execution_count": 16, "id": "47202522", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "992" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(docs[0].page_content.split())" ] }, { "cell_type": "code", "execution_count": 2, "id": "46f0d682", "metadata": {}, "outputs": [], "source": [ "from langchain_community.document_loaders import YoutubeLoader" ] }, { "cell_type": "code", "execution_count": 3, "id": "c0cb3d1f", "metadata": {}, "outputs": [], "source": [ "loader = YoutubeLoader.from_youtube_url(\n", " \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=False\n", ")" ] }, { "cell_type": "code", "execution_count": 5, "id": "496ebb6d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Document(metadata={'source': 'QsYGlZkevEg'}, page_content='LADIES AND GENTLEMEN, PEDRO PASCAL! [ CHEERS AND APPLAUSE ] >> THANK YOU, THANK YOU. THANK YOU VERY MUCH. I\\'M SO EXCITED TO BE HERE. THANK YOU. I SPENT THE LAST YEAR SHOOTING A SHOW CALLED \"THE LAST OF US\" ON HBO. FOR SOME HBO SHOES, YOU GET TO SHOOT IN A FIVE STAR ITALIAN RESORT SURROUNDED BY BEAUTIFUL PEOPLE, BUT I SAID, NO, THAT\\'S TOO EASY. I WANT TO SHOOT IN A FREEZING CANADIAN FOREST WHILE BEING CHASED AROUND BY A GUY WHOSE HEAD LOOKS LIKE A GENITAL WART. IT IS AN HONOR BEING A PART OF THESE HUGE FRANCHISEs LIKE \"GAME OF THRONES\" AND \"STAR WARS,\" BUT I\\'M STILL GETTING USED TO PEOPLE RECOGNIZING ME. THE OTHER DAY, A GUY STOPPED ME ON THE STREET AND SAYS, MY SON LOVES \"THE MANDALORIAN\" AND THE NEXT THING I KNOW, I\\'M FACE TIMING WITH A 6-YEAR-OLD WHO HAS NO IDEA WHO I AM BECAUSE MY CHARACTER WEARS A MASK THE ENTIRE SHOW. THE GUY IS LIKE, DO THE MANDO VOICE, BUT IT\\'S LIKE A BEDROOM VOICE. WITHOUT THE MASK, IT JUST SOUNDS PORNY. PEOPLE WALKING BY ON THE STREET SEE ME WHISPERING TO A 6-YEAR-OLD KID. I CAN BRING YOU IN WARM, OR I CAN BRING YOU IN COLD. EVEN THOUGH I CAME TO THE U.S. WHEN I WAS LITTLE, I WAS BORN IN CHILE, AND I HAVE 34 FIRST COUSINS WHO ARE STILL THERE. THEY\\'RE VERY PROUD OF ME. I KNOW THEY\\'RE PROUD BECAUSE THEY GIVE MY PHONE NUMBER TO EVERY PERSON THEY MEET, WHICH MEANS EVERY DAY, SOMEONE IN SANTIAGO WILL TEXT ME STUFF LIKE, CAN YOU COME TO MY WEDDING, OR CAN YOU SING MY PRIEST HAPPY BIRTHDAY, OR IS BABY YODA MEAN IN REAL LIFE. SO I HAVE TO BE LIKE NO, NO, AND HIS NAME IS GROGU. BUT MY COUSINS WEREN\\'T ALWAYS SO PROUD. EARLY IN MY CAREER, I PLAYED SMALL PARTS IN EVERY CRIME SHOW. I EVEN PLAYED TWO DIFFERENT CHARACTERS ON \"LAW AND ORDER.\" TITO CABASSA WHO LOOKED LIKE THIS. AND ONE YEAR LATER, I PLAYED REGGIE LUCKMAN WHO LOOKS LIKE THIS. AND THAT, MY FRIENDS, IS CALLED RANGE. BUT IT IS AMAZING TO BE HERE, LIKE I SAID. I WAS BORN IN CHILE, AND NINE MONTHS LATER, MY PARENTS FLED AND BROUGHT ME AND MY SISTER TO THE U.S. THEY WERE SO BRAVE, AND WITHOUT THEM, I WOULDN\\'T BE HERE IN THIS WONDERFUL COUNTRY, AND I CERTAINLY WOULDN\\'T BE STANDING HERE WITH YOU ALL TONIGHT. SO TO ALL MY FAMILY WATCHING IN CHILE, I WANT TO SAY [ SPEAKING NON-ENGLISH ] WHICH MEANS, I LOVE YOU, I MISS YOU, AND STOP GIVING OUT MY PHONE NUMBER. WE\\'VE GOT AN AMAZING SHOW FOR YOU TONIGHT. COLDPLAY IS HERE, SO STICK')]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loader.load()" ] }, { "cell_type": "code", "execution_count": 8, "id": "3c4920b7", "metadata": {}, "outputs": [ { "ename": "HTTPError", "evalue": "HTTP Error 400: Bad Request", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mHTTPError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 1\u001b[39m loader = YoutubeLoader.from_youtube_url(\n\u001b[32m 2\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mhttps://www.youtube.com/watch?v=QsYGlZkevEg\u001b[39m\u001b[33m\"\u001b[39m, add_video_info=\u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[32m 3\u001b[39m )\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m \u001b[43mloader\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\langchain_community\\document_loaders\\youtube.py:258\u001b[39m, in \u001b[36mYoutubeLoader.load\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 250\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m 251\u001b[39m \u001b[33m'\u001b[39m\u001b[33mCould not import \u001b[39m\u001b[33m\"\u001b[39m\u001b[33myoutube_transcript_api\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m Python package. \u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m 252\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPlease install it with `pip install youtube-transcript-api`.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 253\u001b[39m )\n\u001b[32m 255\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.add_video_info:\n\u001b[32m 256\u001b[39m \u001b[38;5;66;03m# Get more video meta info\u001b[39;00m\n\u001b[32m 257\u001b[39m \u001b[38;5;66;03m# Such as title, description, thumbnail url, publish_date\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m258\u001b[39m video_info = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_get_video_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 259\u001b[39m \u001b[38;5;28mself\u001b[39m._metadata.update(video_info)\n\u001b[32m 261\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\langchain_community\\document_loaders\\youtube.py:336\u001b[39m, in \u001b[36mYoutubeLoader._get_video_info\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 330\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[32m 331\u001b[39m \u001b[33m'\u001b[39m\u001b[33mCould not import \u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpytube\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m Python package. \u001b[39m\u001b[33m'\u001b[39m\n\u001b[32m 332\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mPlease install it with `pip install pytube`.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 333\u001b[39m )\n\u001b[32m 334\u001b[39m yt = YouTube(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mhttps://www.youtube.com/watch?v=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.video_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 335\u001b[39m video_info = {\n\u001b[32m--> \u001b[39m\u001b[32m336\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mtitle\u001b[39m\u001b[33m\"\u001b[39m: \u001b[43myt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtitle\u001b[49m \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 337\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mdescription\u001b[39m\u001b[33m\"\u001b[39m: yt.description \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 338\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mview_count\u001b[39m\u001b[33m\"\u001b[39m: yt.views \u001b[38;5;129;01mor\u001b[39;00m \u001b[32m0\u001b[39m,\n\u001b[32m 339\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mthumbnail_url\u001b[39m\u001b[33m\"\u001b[39m: yt.thumbnail_url \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 340\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mpublish_date\u001b[39m\u001b[33m\"\u001b[39m: yt.publish_date.strftime(\u001b[33m\"\u001b[39m\u001b[33m%\u001b[39m\u001b[33mY-\u001b[39m\u001b[33m%\u001b[39m\u001b[33mm-\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m \u001b[39m\u001b[33m%\u001b[39m\u001b[33mH:\u001b[39m\u001b[33m%\u001b[39m\u001b[33mM:\u001b[39m\u001b[33m%\u001b[39m\u001b[33mS\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 341\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m yt.publish_date\n\u001b[32m 342\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 343\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mlength\u001b[39m\u001b[33m\"\u001b[39m: yt.length \u001b[38;5;129;01mor\u001b[39;00m \u001b[32m0\u001b[39m,\n\u001b[32m 344\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mauthor\u001b[39m\u001b[33m\"\u001b[39m: yt.author \u001b[38;5;129;01mor\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mUnknown\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 345\u001b[39m }\n\u001b[32m 346\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m video_info\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\__main__.py:341\u001b[39m, in \u001b[36mYouTube.title\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 338\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._title\n\u001b[32m 340\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m341\u001b[39m \u001b[38;5;28mself\u001b[39m._title = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mvid_info\u001b[49m[\u001b[33m'\u001b[39m\u001b[33mvideoDetails\u001b[39m\u001b[33m'\u001b[39m][\u001b[33m'\u001b[39m\u001b[33mtitle\u001b[39m\u001b[33m'\u001b[39m]\n\u001b[32m 342\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[32m 343\u001b[39m \u001b[38;5;66;03m# Check_availability will raise the correct exception in most cases\u001b[39;00m\n\u001b[32m 344\u001b[39m \u001b[38;5;66;03m# if it doesn't, ask for a report.\u001b[39;00m\n\u001b[32m 345\u001b[39m \u001b[38;5;28mself\u001b[39m.check_availability()\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\__main__.py:246\u001b[39m, in \u001b[36mYouTube.vid_info\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m 242\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._vid_info\n\u001b[32m 244\u001b[39m innertube = InnerTube(use_oauth=\u001b[38;5;28mself\u001b[39m.use_oauth, allow_cache=\u001b[38;5;28mself\u001b[39m.allow_oauth_cache)\n\u001b[32m--> \u001b[39m\u001b[32m246\u001b[39m innertube_response = \u001b[43minnertube\u001b[49m\u001b[43m.\u001b[49m\u001b[43mplayer\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mvideo_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 247\u001b[39m \u001b[38;5;28mself\u001b[39m._vid_info = innertube_response\n\u001b[32m 248\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._vid_info\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\innertube.py:448\u001b[39m, in \u001b[36mInnerTube.player\u001b[39m\u001b[34m(self, video_id)\u001b[39m\n\u001b[32m 444\u001b[39m query = {\n\u001b[32m 445\u001b[39m \u001b[33m'\u001b[39m\u001b[33mvideoId\u001b[39m\u001b[33m'\u001b[39m: video_id,\n\u001b[32m 446\u001b[39m }\n\u001b[32m 447\u001b[39m query.update(\u001b[38;5;28mself\u001b[39m.base_params)\n\u001b[32m--> \u001b[39m\u001b[32m448\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_api\u001b[49m\u001b[43m(\u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbase_data\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\innertube.py:390\u001b[39m, in \u001b[36mInnerTube._call_api\u001b[39m\u001b[34m(self, endpoint, query, data)\u001b[39m\n\u001b[32m 386\u001b[39m headers[\u001b[33m'\u001b[39m\u001b[33mAuthorization\u001b[39m\u001b[33m'\u001b[39m] = \u001b[33mf\u001b[39m\u001b[33m'\u001b[39m\u001b[33mBearer \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.access_token\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\n\u001b[32m 388\u001b[39m headers.update(\u001b[38;5;28mself\u001b[39m.header)\n\u001b[32m--> \u001b[39m\u001b[32m390\u001b[39m response = \u001b[43mrequest\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_execute_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 391\u001b[39m \u001b[43m \u001b[49m\u001b[43mendpoint_url\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 392\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mPOST\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 393\u001b[39m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m=\u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 394\u001b[39m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdata\u001b[49m\n\u001b[32m 395\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 396\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m json.loads(response.read())\n", "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\vansh\\Projects\\AIAgents\\.venv\\Lib\\site-packages\\pytube\\request.py:37\u001b[39m, in \u001b[36m_execute_request\u001b[39m\u001b[34m(url, method, headers, data, timeout)\u001b[39m\n\u001b[32m 35\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 36\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mInvalid URL\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m37\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:215\u001b[39m, in \u001b[36murlopen\u001b[39m\u001b[34m(url, data, timeout, cafile, capath, cadefault, context)\u001b[39m\n\u001b[32m 213\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 214\u001b[39m opener = _opener\n\u001b[32m--> \u001b[39m\u001b[32m215\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mopener\u001b[49m\u001b[43m.\u001b[49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:521\u001b[39m, in \u001b[36mOpenerDirector.open\u001b[39m\u001b[34m(self, fullurl, data, timeout)\u001b[39m\n\u001b[32m 519\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m processor \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.process_response.get(protocol, []):\n\u001b[32m 520\u001b[39m meth = \u001b[38;5;28mgetattr\u001b[39m(processor, meth_name)\n\u001b[32m--> \u001b[39m\u001b[32m521\u001b[39m response = \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 523\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:630\u001b[39m, in \u001b[36mHTTPErrorProcessor.http_response\u001b[39m\u001b[34m(self, request, response)\u001b[39m\n\u001b[32m 627\u001b[39m \u001b[38;5;66;03m# According to RFC 2616, \"2xx\" code indicates that the client's\u001b[39;00m\n\u001b[32m 628\u001b[39m \u001b[38;5;66;03m# request was successfully received, understood, and accepted.\u001b[39;00m\n\u001b[32m 629\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[32m200\u001b[39m <= code < \u001b[32m300\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m630\u001b[39m response = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparent\u001b[49m\u001b[43m.\u001b[49m\u001b[43merror\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 631\u001b[39m \u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mhttp\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmsg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhdrs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 633\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m response\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:559\u001b[39m, in \u001b[36mOpenerDirector.error\u001b[39m\u001b[34m(self, proto, *args)\u001b[39m\n\u001b[32m 557\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m http_err:\n\u001b[32m 558\u001b[39m args = (\u001b[38;5;28mdict\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mdefault\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mhttp_error_default\u001b[39m\u001b[33m'\u001b[39m) + orig_args\n\u001b[32m--> \u001b[39m\u001b[32m559\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_call_chain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:492\u001b[39m, in \u001b[36mOpenerDirector._call_chain\u001b[39m\u001b[34m(self, chain, kind, meth_name, *args)\u001b[39m\n\u001b[32m 490\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m handler \u001b[38;5;129;01min\u001b[39;00m handlers:\n\u001b[32m 491\u001b[39m func = \u001b[38;5;28mgetattr\u001b[39m(handler, meth_name)\n\u001b[32m--> \u001b[39m\u001b[32m492\u001b[39m result = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 493\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m result \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 494\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", "\u001b[36mFile \u001b[39m\u001b[32m~\\AppData\\Roaming\\uv\\python\\cpython-3.12.12-windows-x86_64-none\\Lib\\urllib\\request.py:639\u001b[39m, in \u001b[36mHTTPDefaultErrorHandler.http_error_default\u001b[39m\u001b[34m(self, req, fp, code, msg, hdrs)\u001b[39m\n\u001b[32m 638\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mhttp_error_default\u001b[39m(\u001b[38;5;28mself\u001b[39m, req, fp, code, msg, hdrs):\n\u001b[32m--> \u001b[39m\u001b[32m639\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(req.full_url, code, msg, hdrs, fp)\n", "\u001b[31mHTTPError\u001b[39m: HTTP Error 400: Bad Request" ] } ], "source": [ "loader = YoutubeLoader.from_youtube_url(\n", " \"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True\n", ")\n", "loader.load()" ] }, { "cell_type": "code", "execution_count": null, "id": "7c635f42", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.12" } }, "nbformat": 4, "nbformat_minor": 5 }