neuralgeekroot commited on
Commit
3c5437c
·
1 Parent(s): f4a9ff2

updated README.md file

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. BlogAI-dummy-v2.py +356 -0
  3. BlogGeneration.zip +0 -0
  4. README.md +0 -1
.DS_Store ADDED
Binary file (6.15 kB). View file
 
BlogAI-dummy-v2.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_groq import ChatGroq
4
+ from langgraph.graph import StateGraph, START, END
5
+ from langgraph.prebuilt import ToolNode, tools_condition
6
+ from langchain_core.prompts import PromptTemplate
7
+ import streamlit as st
8
+ from typing import List, TypedDict, Annotated, Literal
9
+ from pydantic import BaseModel, Field
10
+ from langgraph.constants import Send
11
+ import operator
12
+ from langchain_core.messages import SystemMessage, HumanMessage
13
+ from langsmith import traceable
14
+ from openai import OpenAI
15
+ from langchain.agents import AgentExecutor, create_react_agent
16
+ from langchain_community.tools import ArxivQueryRun, TavilySearchResults, YouTubeSearchTool
17
+ from langchain_community.utilities import ArxivAPIWrapper
18
+ from langchain import hub
19
+ from langchain.schema import Document
20
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
21
+ from IPython.display import Image, display
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+ os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
26
+ os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
27
+ os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
28
+ os.environ['LANGSMITH_TRACING_V2'] = 'true'
29
+ os.environ['LANGCHAIN_PROJECT_NAME'] = os.getenv('LANGCHAIN_PROJECT_NAME')
30
+ os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY')
31
+
32
+ # Initialize LLM and tools
33
+ llm = ChatGroq(model='gemma2-9b-it')
34
+ client = OpenAI()
35
+
36
+ # Manually initialize the TavilySearchResults tool
37
+ tavily_tool = TavilySearchResults(max_results=1)
38
+
39
+ # Load other tools
40
+ tools = [
41
+ ArxivQueryRun(api_wrapper=ArxivAPIWrapper()),
42
+ YouTubeSearchTool(),
43
+ tavily_tool, # Add the manually initialized Tavily tool
44
+ ]
45
+
46
+ prompt = hub.pull("hwchase17/react")
47
+
48
+ # Create an agent
49
+ agent = create_react_agent(llm, tools, prompt)
50
+
51
+ # Create an AgentExecutor
52
+ agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
53
+
54
+ class Route(BaseModel):
55
+ step: Literal["Arxiv", "Youtube", "Text"] = Field(
56
+ None, description="The next step in the routing process"
57
+ )
58
+ router = llm.with_structured_output(Route)
59
+
60
+ # Define the BlogState
61
+ class BlogState(TypedDict):
62
+ search_results: List[dict] # Ensure search_results is a list of dictionaries
63
+ input_type: str
64
+ input_data: str
65
+ summary: List[str]
66
+ outline: List[str]
67
+ completed_sections: Annotated[List[str], operator.add]
68
+ image_urls: List[str]
69
+ fallback_links: List[str]
70
+ review_content: str
71
+ seo_optimized_content: str
72
+ final_blog: str
73
+
74
+ # Router Node
75
+ @traceable
76
+ def router_node(state: BlogState):
77
+ st.write('Deciding the router node...')
78
+ input_type = router.invoke(
79
+ [
80
+ SystemMessage(
81
+ content="""Route the input data to Arxiv, Youtube, or Text node based on the user's request.
82
+ - If the input is an arXiv link (e.g., https://arxiv.org/abs/2106.15928) or arXiv ID (e.g., 2106.15928), route to 'Arxiv'.
83
+ - If the input is a YouTube link (e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ), route to 'Youtube'.
84
+ - If the input is plain text (e.g., 'Latest advancements in AI and machine learning'), route to 'Text'.
85
+ """
86
+ ),
87
+ HumanMessage(content=state["input_data"]),
88
+ ]
89
+ )
90
+ st.write(f"LLM routing the input data to {input_type.step}")
91
+ return {"input_type": input_type.step}
92
+
93
+ def route_decision(state):
94
+ st.write('Routing to the specific node...')
95
+ if state['input_type'] == 'Arxiv':
96
+ return 'arxiv_tool'
97
+ elif state['input_type'] == 'Youtube':
98
+ return 'youtube_tool'
99
+ else:
100
+ return 'text_tool'
101
+
102
+ # Tool Nodes (Replaced with AgentExecutor)
103
+ @traceable
104
+ def arxiv_tool_node(state: BlogState):
105
+ if state['input_type'] == 'Arxiv':
106
+ st.write("Fetching data from arXiv using agent...")
107
+ result = agent_executor.invoke({"input": state['input_data']})
108
+ return {**state, 'search_results': [{"content": result['output'], "url": state['input_data']}]}
109
+ return state
110
+
111
+ @traceable
112
+ def youtube_tool_node(state: BlogState):
113
+ if state['input_type'] == 'Youtube':
114
+ st.write("Fetching data from YouTube using agent...")
115
+ result = agent_executor.invoke({"input": state['input_data']})
116
+ return {**state, 'search_results': [{"content": result['output'], "url": state['input_data']}]}
117
+ return state
118
+
119
+ @traceable
120
+ def text_tool_node(state: BlogState):
121
+ if state['input_type'] == 'Text':
122
+ st.write("Searching web for the data using agent...")
123
+ result = agent_executor.invoke({"input": state['input_data']})
124
+ return {**state, 'search_results': [{"content": result['output'], "url": "https://example.com"}]}
125
+ return state
126
+
127
+ @traceable # LangSmith debugging
128
+ def summarize_results(state: BlogState):
129
+ """Summarizes the web search results."""
130
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
131
+
132
+ # Ensure search_results is a list of dictionaries
133
+ search_results = state.get("search_results", [])
134
+ if not isinstance(search_results, list):
135
+ search_results = []
136
+
137
+ # Convert search results into Document objects
138
+ documents = [
139
+ Document(page_content=result.get("content", ""), metadata={"source": result.get("url", "")})
140
+ for result in search_results if result and isinstance(result, dict) and result.get("content")
141
+ ]
142
+
143
+ if not documents:
144
+ summary = "No relevant information available."
145
+ else:
146
+ splits = text_splitter.split_documents(documents)
147
+ summary = "\n".join(doc.page_content for doc in splits[:3]) # Taking first 3 chunks
148
+
149
+ return {**state, 'summary': summary}
150
+
151
+ # Orchestrator Node
152
+ @traceable
153
+ def orchestrator_node(state: BlogState):
154
+ st.write("Creating blog outline...")
155
+ sys_msg = SystemMessage(content="Provide an interesting and informative content outline for the given summary.")
156
+ human_msg = HumanMessage(content=f"Here is the blog topic: {state['summary']}")
157
+ result = llm.invoke([sys_msg, human_msg])
158
+ outline = result.content.split("\n") if isinstance(result.content, str) else result.content
159
+ return {**state, 'outline': outline}
160
+
161
+ # Assign Writers Node
162
+ @traceable
163
+ def assign_writers(state: BlogState):
164
+ st.write("Assigning writers to sections...")
165
+ if not state.get('outline'):
166
+ st.write("No outline found to assign writers.")
167
+ return []
168
+ return [Send('section_writer', {'section': s}) for s in state['outline']]
169
+
170
+ # Section Writer Node
171
+ @traceable
172
+ def section_writer_node(state: BlogState):
173
+ st.write("Generating content for the section...")
174
+ section_content = llm.invoke([
175
+ SystemMessage(content="Write a detailed blog section based on the provided name and description."),
176
+ HumanMessage(content=f"Section Name: {state['section']}, Description: {state['section']}")
177
+ ])
178
+ completed_sections = state.get("completed_sections", [])
179
+ completed_sections.append(section_content.content)
180
+ return {**state, "completed_sections": completed_sections}
181
+
182
+ # Function to generate an image using DALL·E
183
+ def generate_image_with_dalle(prompt: str):
184
+ try:
185
+ response = client.images.generate(
186
+ model="dall-e-3",
187
+ prompt=prompt,
188
+ size="1024x1024",
189
+ quality="hd",
190
+ n=1,
191
+ )
192
+ image_url = response.data[0].url
193
+ return image_url
194
+ except Exception as e:
195
+ st.error(f"Failed to generate image: {e}")
196
+ return None
197
+
198
+ # Function to provide a fallback link for image search
199
+ def get_fallback_image_link(topic: str):
200
+ # Provide a Google Images search link for the topic
201
+ search_query = topic.replace(" ", "+")
202
+ return f"https://www.google.com/search?q={search_query}&tbm=isch"
203
+
204
+ # Image Generator Node
205
+ @traceable
206
+ def image_generator_node(state: BlogState):
207
+ st.write("Generating an image for the section...")
208
+ completed_sections = state.get("completed_sections", [])
209
+ if not completed_sections:
210
+ st.write("No completed sections found to generate an image.")
211
+ return {**state, "image_urls": state.get('image_urls', []), "fallback_links": state.get('fallback_links', [])}
212
+
213
+ section = completed_sections[0]
214
+ prompt = f"Generate an image for the blog section: {section} with no text. More of a representation and informative image"
215
+
216
+ # Use an open-source image generation model or fallback
217
+ image_url = generate_image_with_dalle(prompt) # Replace with open-source model
218
+ if image_url:
219
+ image_urls = state.get('image_urls', [])
220
+ image_urls.append(image_url)
221
+ return {**state, "image_urls": image_urls, "fallback_links": state.get('fallback_links', [])}
222
+ else:
223
+ fallback_links = state.get('fallback_links', [])
224
+ fallback_link = get_fallback_image_link(section)
225
+ fallback_links.append(fallback_link)
226
+ return {**state, "image_urls": state.get('image_urls', []), "fallback_links": fallback_links}
227
+
228
+ # Review Node
229
+ @traceable
230
+ def review_node(state: BlogState):
231
+ st.write("Reviewing the section...")
232
+ completed_sections = state.get("completed_sections", [])
233
+ if not completed_sections:
234
+ st.write("No completed sections found to review.")
235
+ return {"step": "send_seo_optimization"}
236
+
237
+ prompt = PromptTemplate.from_template(
238
+ "Check if the section can be improved: {completed_sections}. "
239
+ "If no, return 'send_seo_optimization'. "
240
+ "If yes, return 'revise_section_content'."
241
+ )
242
+ chain = prompt | llm
243
+ result = chain.invoke({'completed_sections': completed_sections})
244
+
245
+ decision = result.content.strip().lower()
246
+ if decision not in ["send_seo_optimization", "revise_section_content"]:
247
+ decision = "send_seo_optimization"
248
+
249
+ return {"step": decision}
250
+
251
+ # SEO Optimization Node
252
+ @traceable
253
+ def seo_optimization_node(state: BlogState):
254
+ st.write("Performing SEO optimization...")
255
+ completed_sections = state.get("completed_sections", [])
256
+ if not completed_sections:
257
+ st.write("No completed sections found for SEO optimization.")
258
+ return state
259
+
260
+ result = llm.invoke(f"Optimize the blog for search ranking: {completed_sections}")
261
+ return {**state, 'seo_optimized_content': result.content}
262
+
263
+ # Publish Node
264
+ @traceable
265
+ def publish_node(state: BlogState):
266
+ st.write("Finalizing and publishing the blog...")
267
+ final_blog = state.get('seo_optimized_content', '')
268
+
269
+ # Add images to the blog
270
+ if state.get('image_urls'):
271
+ st.write("AI-Generated Images")
272
+ for image_url in state['image_urls']:
273
+ st.image(image_url, caption="AI-Generated Image")
274
+
275
+ # Add fallback links if images were not generated
276
+ if state.get('fallback_links'):
277
+ st.write("Fallback Image Search Links")
278
+ for link in state['fallback_links']:
279
+ st.markdown(f"[Search for related images on Google]({link})")
280
+
281
+ return {**state, "final_blog": final_blog}
282
+
283
+ # Streamlit App
284
+ def main():
285
+ st.title("Blog Generation Workflow")
286
+
287
+ # Input options
288
+ input_data = st.text_input("Enter YouTube, Arxiv URL, or your desired Query")
289
+
290
+ if st.button("Run Workflow"):
291
+ # Initialize the state
292
+ initial_state = {
293
+ "search_results": [], # Initialize as an empty list
294
+ "input_type": "", # Will be set by the router_node
295
+ "input_data": input_data,
296
+ "summary": [],
297
+ "outline": [],
298
+ "completed_sections": [],
299
+ "image_urls": [],
300
+ "fallback_links": [],
301
+ "review_content": "",
302
+ "seo_optimized_content": "",
303
+ "final_blog": "",
304
+ }
305
+
306
+ # Build the workflow
307
+ builder = StateGraph(BlogState)
308
+ builder.add_node("router", router_node)
309
+ builder.add_node("arxiv_tool", arxiv_tool_node)
310
+ builder.add_node("youtube_tool", youtube_tool_node)
311
+ builder.add_node("text_tool", text_tool_node)
312
+ builder.add_node("orchestrator", orchestrator_node)
313
+ builder.add_node("section_writer", section_writer_node)
314
+ builder.add_node("image_generator", image_generator_node)
315
+ builder.add_node("review", review_node)
316
+ builder.add_node("seo_optimization", seo_optimization_node)
317
+ builder.add_node("publish", publish_node)
318
+ builder.add_node('summarize_results', summarize_results)
319
+
320
+ # Define edges
321
+ builder.add_edge(START, "router")
322
+ builder.add_conditional_edges(
323
+ "router",
324
+ route_decision,
325
+ {
326
+ "arxiv_tool": "arxiv_tool",
327
+ "youtube_tool": "youtube_tool",
328
+ "text_tool": "text_tool",
329
+ },
330
+ )
331
+ builder.add_edge("arxiv_tool", "summarize_results")
332
+ builder.add_edge("youtube_tool", "summarize_results")
333
+ builder.add_edge('text_tool', 'summarize_results')
334
+ builder.add_edge('summarize_results', 'orchestrator')
335
+ builder.add_conditional_edges("orchestrator", assign_writers, ["section_writer"])
336
+ builder.add_edge("section_writer", "image_generator")
337
+ builder.add_edge("image_generator", "review")
338
+ builder.add_conditional_edges(
339
+ "review",
340
+ lambda state: "seo_optimization" if state.get("step") == "send_seo_optimization" else "section_writer",
341
+ )
342
+ builder.add_edge("seo_optimization", "publish")
343
+ builder.add_edge("publish", END)
344
+
345
+ # Compile the workflow
346
+ workflow = builder.compile()
347
+
348
+ # Run the workflow
349
+ result = workflow.invoke(initial_state)
350
+
351
+ # Display the final result
352
+ st.subheader("Final Blog Output")
353
+ st.write(result['final_blog'])
354
+
355
+ if __name__ == "__main__":
356
+ main()
BlogGeneration.zip ADDED
Binary file (3.5 kB). View file
 
README.md CHANGED
@@ -17,4 +17,3 @@ This project automates blog generation using an LLM-powered workflow.
17
  ## 🛠 Setup
18
  1. Clone repo
19
  2. Install dependencies: `pip install -r requirements.txt`
20
- # BlogGeneration
 
17
  ## 🛠 Setup
18
  1. Clone repo
19
  2. Install dependencies: `pip install -r requirements.txt`