Final_Assignment_Template

Sleeping

App Files Files Community

hanshan1988 commited on Feb 27

Commit

a654024

1 Parent(s): 9df9926

changed tool to use youtube transcript api

Browse files

Files changed (2) hide show

agent.py +4 -3
tools.py +41 -22

agent.py CHANGED Viewed

@@ -85,12 +85,13 @@ def assistant(state: AgentState, llm) -> Dict[str, Any]:
             Returns:
                 A single string containing the content of the Wikipedia page.
-        youtube_transcript(url: str) -> str:
             Fetch the transcript of a youtube video.
             Args:
                 url: input youtube url.
             Returns:
-                A single string containing the transcript of the youtube videos.
         python_repl_tool(code: str) -> str:
             Execute Python code and return the output.
@@ -206,7 +207,7 @@ class BasicAgent:
         response = await agent_graph.ainvoke(
             {"messages": messages},
             config={
-                "recursion_limit": 10,
                 "callbacks": [langfuse_handler],
             }
         )

             Returns:
                 A single string containing the content of the Wikipedia page.
+        youtube_transcript(url: str) -> list[dict]:
             Fetch the transcript of a youtube video.
             Args:
                 url: input youtube url.
             Returns:
+                A list of dictionaries containing the transcript of the youtube videos.
+                Each dictionary has 'text', 'start', and 'duration' keys.
         python_repl_tool(code: str) -> str:
             Execute Python code and return the output.
         response = await agent_graph.ainvoke(
             {"messages": messages},
             config={
+                "recursion_limit": 8,
                 "callbacks": [langfuse_handler],
             }
         )

tools.py CHANGED Viewed

@@ -7,11 +7,30 @@ from langchain_community.utilities import WikipediaAPIWrapper
 from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
 from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
 from langchain_experimental.utilities import PythonREPL
 # Initialize Python REPL
 python_repl = PythonREPL()
 @tool
 def duckduckgo_search_results(query: str) -> list[dict]:
     """Perform a DuckDuckGo search for the given query and return the results.
@@ -79,27 +98,27 @@ def get_wiki_full(query: str) -> str:
     return content.get_text()[:32_000]  # Limit to 8k tokens to avoid excessive length
-@tool
-def youtube_transcript(url: str) -> str:
-    """Retrieve transcript from Youtube based url.
-    Args:
-        url: input youtube url.
-    Returns:
-        A single string containing the transcript of the youtube videos.
-    """
-    max_attempts = 5  # Set a maximum number of attempts
-    attempts = 0
-    loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
-    while attempts < max_attempts:
-        try:
-            docs  = loader.load()
-            return docs[0].page_content
-        except Exception as e:
-            attempts += 1
-            print(f"Attempt {attempts} failed: {e}")
-            # Optionally add a delay before retrying
-            time.sleep(1) # Import the time module
-    return "Failed to retrieve transcript after multiple attempts."
 @tool
 def python_repl_tool(code: str) -> str:

 from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
 from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
 from langchain_experimental.utilities import PythonREPL
+from youtube_transcript_api import YouTubeTranscriptApi
 # Initialize Python REPL
 python_repl = PythonREPL()
+# Initialise Youtube
+youtube_loader = YouTubeTranscriptApi()
+@tool
+def youtube_transcript(url: str) -> list[dict]:
+    """Retrieve transcript from Youtube based url.
+    Args:
+        url: input youtube url.
+    Returns:
+        A list of dictionaries containing the transcript of the youtube videos.
+        Each dictionary has 'text', 'start', and 'duration' keys.
+    """
+    try:
+        video_id = url.split("watch?v=")[-1]
+        transcript = youtube_loader.fetch(video_id).to_raw_data()
+        return transcript
+    except Exception as e:
+        return f"Error retrieving transcript: {str(e)}"
 @tool
 def duckduckgo_search_results(query: str) -> list[dict]:
     """Perform a DuckDuckGo search for the given query and return the results.
     return content.get_text()[:32_000]  # Limit to 8k tokens to avoid excessive length
+# @tool
+# def youtube_transcript(url: str) -> str:
+#     """Retrieve transcript from Youtube based url.
+#     Args:
+#         url: input youtube url.
+#     Returns:
+#         A single string containing the transcript of the youtube videos.
+#     """
+#     max_attempts = 5  # Set a maximum number of attempts
+#     attempts = 0
+#     loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
+#     while attempts < max_attempts:
+#         try:
+#             docs  = loader.load()
+#             return docs[0].page_content
+#         except Exception as e:
+#             attempts += 1
+#             print(f"Attempt {attempts} failed: {e}")
+#             # Optionally add a delay before retrying
+#             time.sleep(1) # Import the time module
+#     return "Failed to retrieve transcript after multiple attempts."
 @tool
 def python_repl_tool(code: str) -> str: