hanshan1988 commited on
Commit
a654024
·
1 Parent(s): 9df9926

changed tool to use youtube transcript api

Browse files
Files changed (2) hide show
  1. agent.py +4 -3
  2. tools.py +41 -22
agent.py CHANGED
@@ -85,12 +85,13 @@ def assistant(state: AgentState, llm) -> Dict[str, Any]:
85
  Returns:
86
  A single string containing the content of the Wikipedia page.
87
 
88
- youtube_transcript(url: str) -> str:
89
  Fetch the transcript of a youtube video.
90
  Args:
91
  url: input youtube url.
92
  Returns:
93
- A single string containing the transcript of the youtube videos.
 
94
 
95
  python_repl_tool(code: str) -> str:
96
  Execute Python code and return the output.
@@ -206,7 +207,7 @@ class BasicAgent:
206
  response = await agent_graph.ainvoke(
207
  {"messages": messages},
208
  config={
209
- "recursion_limit": 10,
210
  "callbacks": [langfuse_handler],
211
  }
212
  )
 
85
  Returns:
86
  A single string containing the content of the Wikipedia page.
87
 
88
+ youtube_transcript(url: str) -> list[dict]:
89
  Fetch the transcript of a youtube video.
90
  Args:
91
  url: input youtube url.
92
  Returns:
93
+ A list of dictionaries containing the transcript of the youtube videos.
94
+ Each dictionary has 'text', 'start', and 'duration' keys.
95
 
96
  python_repl_tool(code: str) -> str:
97
  Execute Python code and return the output.
 
207
  response = await agent_graph.ainvoke(
208
  {"messages": messages},
209
  config={
210
+ "recursion_limit": 8,
211
  "callbacks": [langfuse_handler],
212
  }
213
  )
tools.py CHANGED
@@ -7,11 +7,30 @@ from langchain_community.utilities import WikipediaAPIWrapper
7
  from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
8
  from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
9
  from langchain_experimental.utilities import PythonREPL
10
-
11
 
12
  # Initialize Python REPL
13
  python_repl = PythonREPL()
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  @tool
16
  def duckduckgo_search_results(query: str) -> list[dict]:
17
  """Perform a DuckDuckGo search for the given query and return the results.
@@ -79,27 +98,27 @@ def get_wiki_full(query: str) -> str:
79
 
80
  return content.get_text()[:32_000] # Limit to 8k tokens to avoid excessive length
81
 
82
- @tool
83
- def youtube_transcript(url: str) -> str:
84
- """Retrieve transcript from Youtube based url.
85
- Args:
86
- url: input youtube url.
87
- Returns:
88
- A single string containing the transcript of the youtube videos.
89
- """
90
- max_attempts = 5 # Set a maximum number of attempts
91
- attempts = 0
92
- loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
93
- while attempts < max_attempts:
94
- try:
95
- docs = loader.load()
96
- return docs[0].page_content
97
- except Exception as e:
98
- attempts += 1
99
- print(f"Attempt {attempts} failed: {e}")
100
- # Optionally add a delay before retrying
101
- time.sleep(1) # Import the time module
102
- return "Failed to retrieve transcript after multiple attempts."
103
 
104
  @tool
105
  def python_repl_tool(code: str) -> str:
 
7
  from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun, DuckDuckGoSearchResults
8
  from langchain_community.document_loaders import YoutubeLoader, WebBaseLoader
9
  from langchain_experimental.utilities import PythonREPL
10
+ from youtube_transcript_api import YouTubeTranscriptApi
11
 
12
  # Initialize Python REPL
13
  python_repl = PythonREPL()
14
 
15
+ # Initialise Youtube
16
+ youtube_loader = YouTubeTranscriptApi()
17
+
18
+ @tool
19
+ def youtube_transcript(url: str) -> list[dict]:
20
+ """Retrieve transcript from Youtube based url.
21
+ Args:
22
+ url: input youtube url.
23
+ Returns:
24
+ A list of dictionaries containing the transcript of the youtube videos.
25
+ Each dictionary has 'text', 'start', and 'duration' keys.
26
+ """
27
+ try:
28
+ video_id = url.split("watch?v=")[-1]
29
+ transcript = youtube_loader.fetch(video_id).to_raw_data()
30
+ return transcript
31
+ except Exception as e:
32
+ return f"Error retrieving transcript: {str(e)}"
33
+
34
  @tool
35
  def duckduckgo_search_results(query: str) -> list[dict]:
36
  """Perform a DuckDuckGo search for the given query and return the results.
 
98
 
99
  return content.get_text()[:32_000] # Limit to 8k tokens to avoid excessive length
100
 
101
+ # @tool
102
+ # def youtube_transcript(url: str) -> str:
103
+ # """Retrieve transcript from Youtube based url.
104
+ # Args:
105
+ # url: input youtube url.
106
+ # Returns:
107
+ # A single string containing the transcript of the youtube videos.
108
+ # """
109
+ # max_attempts = 5 # Set a maximum number of attempts
110
+ # attempts = 0
111
+ # loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
112
+ # while attempts < max_attempts:
113
+ # try:
114
+ # docs = loader.load()
115
+ # return docs[0].page_content
116
+ # except Exception as e:
117
+ # attempts += 1
118
+ # print(f"Attempt {attempts} failed: {e}")
119
+ # # Optionally add a delay before retrying
120
+ # time.sleep(1) # Import the time module
121
+ # return "Failed to retrieve transcript after multiple attempts."
122
 
123
  @tool
124
  def python_repl_tool(code: str) -> str: