NaderAfshar commited on
Commit
6cdd178
·
1 Parent(s): 2cd0d42

Commiting updated files before refining

Browse files
Files changed (5) hide show
  1. app.py +416 -0
  2. data/fake_application_form.pdf +0 -0
  3. data/fake_resume.pdf +0 -0
  4. helper.py +29 -0
  5. requirements.txt +23 -0
app.py ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # # Lesson 6: Use your voice
5
+
6
+ # **Lesson objective**: Get voice feedback
7
+ #
8
+ # So far we've set up a moderately complex workflows with a human feedback loop. Let's run it through the visualizer to see what it looks like.
9
+
10
+ # <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the notebook cell by cell. Please try to avoid running all cells at once.</div>
11
+
12
+ # In[1]:
13
+
14
+
15
+ # Warning control
16
+ import warnings
17
+ warnings.filterwarnings('ignore')
18
+
19
+
20
+ import os, json
21
+ from llama_parse import LlamaParse
22
+ from llama_index.llms.openai import OpenAI
23
+ from llama_index.embeddings.openai import OpenAIEmbedding
24
+ from llama_index.core import (
25
+ VectorStoreIndex,
26
+ StorageContext,
27
+ load_index_from_storage
28
+ )
29
+ from llama_index.core.workflow import (
30
+ StartEvent,
31
+ StopEvent,
32
+ Workflow,
33
+ step,
34
+ Event,
35
+ Context,
36
+ InputRequiredEvent,
37
+ HumanResponseEvent
38
+ )
39
+ from llama_index.utils.workflow import draw_all_possible_flows
40
+ import whisper
41
+ import gradio as gr
42
+ import asyncio
43
+ from queue import Queue
44
+ from dotenv import load_dotenv
45
+
46
+ # Load environment variables
47
+ load_dotenv()
48
+ COHERE_API_KEY = os.getenv("COHERE_API_KEY")
49
+ llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
50
+ LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
51
+
52
+
53
+ class ParseFormEvent(Event):
54
+ application_form: str
55
+
56
+ class QueryEvent(Event):
57
+ query: str
58
+
59
+ class ResponseEvent(Event):
60
+ response: str
61
+
62
+ class FeedbackEvent(Event):
63
+ feedback: str
64
+
65
+ class GenerateQuestionsEvent(Event):
66
+ pass
67
+
68
+ class RAGWorkflow(Workflow):
69
+ storage_dir = "./storage"
70
+ llm: OpenAI
71
+ query_engine: VectorStoreIndex
72
+
73
+ @step
74
+ async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
75
+
76
+ if not ev.resume_file:
77
+ raise ValueError("No resume file provided")
78
+
79
+ if not ev.application_form:
80
+ raise ValueError("No application form provided")
81
+
82
+ # give ourselves an LLM to work with
83
+ self.llm = OpenAI(model="gpt-4o-mini")
84
+
85
+ # ingest our data and set up the query engine
86
+ if os.path.exists(self.storage_dir):
87
+ # we've already ingested our documents
88
+ storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
89
+ index = load_index_from_storage(storage_context)
90
+ else:
91
+ # we need to parse and load our documents
92
+ documents = LlamaParse(
93
+ api_key=llama_cloud_api_key,
94
+ base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
95
+ result_type="markdown",
96
+ content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
97
+ ).load_data(ev.resume_file)
98
+ # embed and index the documents
99
+ index = VectorStoreIndex.from_documents(
100
+ documents,
101
+ embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
102
+ )
103
+ index.storage_context.persist(persist_dir=self.storage_dir)
104
+
105
+ # either way, create a query engine
106
+ self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
107
+
108
+ # let's pass our application form to a new step where we parse it
109
+ return ParseFormEvent(application_form=ev.application_form)
110
+
111
+ # we've separated the form parsing from the question generation
112
+ @step
113
+ async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
114
+ parser = LlamaParse(
115
+ api_key=llama_cloud_api_key,
116
+ base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
117
+ result_type="markdown",
118
+ content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
119
+ formatting_instruction="Return a bulleted list of the fields ONLY."
120
+ )
121
+
122
+ # get the LLM to convert the parsed form into JSON
123
+ result = parser.load_data(ev.application_form)[0]
124
+ raw_json = self.llm.complete(
125
+ f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown.")
126
+ fields = json.loads(raw_json.text)["fields"]
127
+
128
+ await ctx.set("fields_to_fill", fields)
129
+
130
+ return GenerateQuestionsEvent()
131
+
132
+ # this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
133
+ @step
134
+ async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
135
+
136
+ # get the list of fields to fill in
137
+ fields = await ctx.get("fields_to_fill")
138
+
139
+ # generate one query for each of the fields, and fire them off
140
+ for field in fields:
141
+ question = f"How would you answer this question about the candidate? <field>{field}</field>"
142
+
143
+ if hasattr(ev,"feedback"):
144
+ question += f"""
145
+ \nWe previously got feedback about how we answered the questions.
146
+ It might not be relevant to this particular field, but here it is:
147
+ <feedback>{ev.feedback}</feedback>
148
+ """
149
+
150
+ ctx.send_event(QueryEvent(
151
+ field=field,
152
+ query=question
153
+ ))
154
+
155
+ # store the number of fields so we know how many to wait for later
156
+ await ctx.set("total_fields", len(fields))
157
+ return
158
+
159
+ @step
160
+ async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
161
+ print(f"Asking question: {ev.query}")
162
+
163
+ response = self.query_engine.query(f"This is a question about the specific resume we have in our database: {ev.query}")
164
+
165
+ print(f"Answer was: {str(response)}")
166
+
167
+ return ResponseEvent(field=ev.field, response=response.response)
168
+
169
+ # we now emit an InputRequiredEvent
170
+ @step
171
+ async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
172
+ # get the total number of fields to wait for
173
+ total_fields = await ctx.get("total_fields")
174
+
175
+ responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
176
+ if responses is None:
177
+ return None # do nothing if there's nothing to do yet
178
+
179
+ # we've got all the responses!
180
+ responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
181
+
182
+ result = self.llm.complete(f"""
183
+ You are given a list of fields in an application form and responses to
184
+ questions about those fields from a resume. Combine the two into a list of
185
+ fields and succinct, factual answers to fill in those fields.
186
+
187
+ <responses>
188
+ {responseList}
189
+ </responses>
190
+ """)
191
+
192
+ # save the result for later
193
+ await ctx.set("filled_form", str(result))
194
+
195
+ # Let's get a human in the loop
196
+ return InputRequiredEvent(
197
+ prefix="How does this look? Give me any feedback you have on any of the answers.",
198
+ result=result
199
+ )
200
+
201
+ # Accept the feedback.
202
+ @step
203
+ async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
204
+
205
+ result = self.llm.complete(f"""
206
+ You have received some human feedback on the form-filling task you've done.
207
+ Does everything look good, or is there more work to be done?
208
+ <feedback>
209
+ {ev.response}
210
+ </feedback>
211
+ If everything is fine, respond with just the word 'OKAY'.
212
+ If there's any other feedback, respond with just the word 'FEEDBACK'.
213
+ """)
214
+
215
+ verdict = result.text.strip()
216
+
217
+ print(f"LLM says the verdict was {verdict}")
218
+ if (verdict == "OKAY"):
219
+ return StopEvent(result=await ctx.get("filled_form"))
220
+ else:
221
+ return FeedbackEvent(feedback=ev.response)
222
+
223
+
224
+ # In[4]:
225
+
226
+
227
+ WORKFLOW_FILE = "workflows/RAG-EventDriven.html"
228
+ draw_all_possible_flows(RAGWorkflow, filename=WORKFLOW_FILE)
229
+
230
+
231
+ # In[5]:
232
+
233
+
234
+ from IPython.display import display, HTML, DisplayHandle
235
+ from helper import extract_html_content
236
+
237
+ html_content = extract_html_content(WORKFLOW_FILE)
238
+ display(HTML(html_content), metadata=dict(isolated=True))
239
+
240
+
241
+ # Cool! You can see the path all the way to the end and the feedback loop is clear.
242
+
243
+ # <div style="background-color:#fff6ff; padding:13px; border-width:3px; border-color:#efe6ef; border-style:solid; border-radius:6px">
244
+ # <p> 💻 &nbsp; <b>To access <code>fake_application_form.pdf</code>, <code>fake_resume.pdf</code>, <code>requirements.txt</code> and <code>helper.py</code> files:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Open"</em>. The form and resume are inside the data folder.
245
+ #
246
+ # <p> ⬇ &nbsp; <b>Download Notebooks:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Download as"</em> and select <em>"Notebook (.ipynb)"</em>.</p>
247
+ #
248
+ # <p> 📒 &nbsp; For more help, please see the <em>"Appendix – Tips and Help"</em> Lesson.</p>
249
+ #
250
+ # </div>
251
+
252
+ # <p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> 🚨
253
+ # &nbsp; <b>Different Run Results:</b> The output generated by AI chat models can vary with each execution due to their dynamic, probabilistic nature. Don't be surprised if your results differ from those shown in the video.</p>
254
+
255
+ # ## Getting voice feedback
256
+
257
+ # Now, just for fun, you'll do one more thing: change the feedback from text feedback to actual words spoken out loud. To do this we'll use a different model from OpenAI called Whisper. LlamaIndex has a built-in way to transcribe audio files into text using Whisper.
258
+ #
259
+ # Here's a function that takes a file and uses Whisper to return just the text:
260
+
261
+ # In[6]:
262
+
263
+
264
+ def transcribe_speech(filepath):
265
+ if filepath is None:
266
+ gr.Warning("No audio found, please retry.")
267
+ return ""
268
+
269
+ whisper_model = whisper.load_model("base")
270
+ document = whisper_model.transcribe(filepath)
271
+
272
+ return document['text']
273
+
274
+
275
+ # But before we can use it, you need to capture some audio from your microphone. That involves some extra steps!
276
+ #
277
+ # First, create a callback function that saves data to a global variable.
278
+
279
+ # In[15]:
280
+
281
+
282
+ def store_transcription(output):
283
+ global transcription_value
284
+ transcription_value = output
285
+ return output
286
+
287
+
288
+ # Now use Gradio, which has special widgets that can render inside a notebook, to create an interface
289
+ # for capturing audio from a microphone. When the audio is captured, it calls `transcribe_speech` on the recorded data,
290
+ # and calls `store_transcription` on that.
291
+
292
+
293
+ mic_transcribe = gr.Interface(
294
+ fn=lambda x: store_transcription(transcribe_speech(x)),
295
+ inputs=gr.Audio(sources=["microphone"],
296
+ type="filepath"),
297
+ outputs=gr.Textbox(label="Transcription"))
298
+
299
+
300
+ # In Gradio, define a visual interface containing this microphone input and output, and then launch it:
301
+
302
+ # Make sure to wait for the gradio interface to load. A popup window will appear and ask you to allow the use of your
303
+ # microphone. To record audio, make sure to click on record -> stop -> submit. Make sure the audio is captured
304
+ # before clicking on 'submit'.
305
+
306
+
307
+ test_interface = gr.Blocks()
308
+ with test_interface:
309
+ gr.TabbedInterface(
310
+ [mic_transcribe],
311
+ ["Transcribe Microphone"]
312
+ )
313
+
314
+ test_interface.launch(
315
+ share=False,
316
+ server_port=8000,
317
+ prevent_thread_lock=True
318
+ )
319
+
320
+
321
+ # You can now print out the transcription, which is stored in that global variable you created earlier:
322
+
323
+ # In[ ]:
324
+
325
+
326
+ print(transcription_value)
327
+
328
+
329
+ # run Gradio again, so it's a good idea to shut down the running Gradio interface.
330
+
331
+
332
+
333
+ test_interface.close()
334
+
335
+
336
+ # <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the previous cell to close the Gradio interface before running the next cell.</div>
337
+
338
+ # Now create an entirely new class, a Transcription Handler.
339
+ class TranscriptionHandler:
340
+
341
+ # we create a queue to hold transcription values
342
+ def __init__(self):
343
+ self.transcription_queue = Queue()
344
+ self.interface = None
345
+
346
+ # every time we record something we put it in the queue
347
+ def store_transcription(self, output):
348
+ self.transcription_queue.put(output)
349
+ return output
350
+
351
+ # This is the same interface and transcription logic as before
352
+ # except it stores the result in a queue instead of a global
353
+ def create_interface(self):
354
+ mic_transcribe = gr.Interface(
355
+ fn=lambda x: self.store_transcription(transcribe_speech(x)),
356
+ inputs=gr.Audio(sources=["microphone"], type="filepath"),
357
+ outputs=gr.Textbox(label="Transcription")
358
+ )
359
+ self.interface = gr.Blocks()
360
+ with self.interface:
361
+ gr.TabbedInterface(
362
+ [mic_transcribe],
363
+ ["Transcribe Microphone"]
364
+ )
365
+ return self.interface
366
+
367
+ # we launch the transcription interface
368
+ async def get_transcription(self):
369
+ self.interface = self.create_interface()
370
+ self.interface.launch(
371
+ share=False,
372
+ server_port=8000,
373
+ prevent_thread_lock=True
374
+ )
375
+
376
+ # we poll every 1.5 seconds waiting for something to end up in the queue
377
+ while True:
378
+ if not self.transcription_queue.empty():
379
+ result = self.transcription_queue.get()
380
+ if self.interface is not None:
381
+ self.interface.close()
382
+ return result
383
+ await asyncio.sleep(1.5)
384
+
385
+
386
+ # Now you have a transcription handler, you can use it instead of the keyboard input interface when you're getting human input when you run your workflows:
387
+
388
+
389
+
390
+ async def main():
391
+ w = RAGWorkflow(timeout=600, verbose=False)
392
+
393
+ handler = w.run(
394
+ resume_file="./data/fake_resume.pdf",
395
+ application_form="./data/fake_application_form.pdf"
396
+ )
397
+
398
+ async for event in handler.stream_events():
399
+ if isinstance(event, InputRequiredEvent):
400
+ # Get transcription
401
+ transcription_handler = TranscriptionHandler()
402
+ response = await transcription_handler.get_transcription()
403
+
404
+ handler.ctx.send_event(
405
+ HumanResponseEvent(
406
+ response=response
407
+ )
408
+ )
409
+
410
+ response = await handler
411
+ print("Agent complete! Here's your final result:")
412
+ print(str(response))
413
+
414
+ if __name__ == "__main__":
415
+ asyncio.run(main())
416
+
data/fake_application_form.pdf ADDED
Binary file (59.1 kB). View file
 
data/fake_resume.pdf ADDED
Binary file (133 kB). View file
 
helper.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Add your utilities or helper functions to this file.
2
+
3
+ import os
4
+ from dotenv import load_dotenv, find_dotenv
5
+
6
+ # these expect to find a .env file at the directory above the lesson.
7
+ # the format for that file is (without the comment)
8
+ #API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService
9
+ def load_env():
10
+ _ = load_dotenv(find_dotenv())
11
+
12
+ def get_openai_api_key():
13
+ load_env()
14
+ openai_api_key = os.getenv("OPENAI_API_KEY")
15
+ return openai_api_key
16
+
17
+ def get_llama_cloud_api_key():
18
+ load_env()
19
+ llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
20
+ return llama_cloud_api_key
21
+
22
+ def extract_html_content(filename):
23
+ try:
24
+ with open(filename, 'r') as file:
25
+ html_content = file.read()
26
+ html_content = f""" <div style="width: 100%; height: 800px; overflow: hidden;"> {html_content} </div>"""
27
+ return html_content
28
+ except Exception as e:
29
+ raise Exception(f"Error reading file: {str(e)}")
requirements.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pytorch
2
+ gradio ==5.20.1
3
+ gradio_client ==1.7.2
4
+ llama-cloud-services ==0.6.5
5
+ llama-index ==0.12.23
6
+ llama-index-agent-openai ==0.4.6
7
+ llama-index-cli ==0.4.1
8
+ llama-index-core ==0.12.23.post2
9
+ llama-index-embeddings-huggingface ==0.5.2
10
+ llama-index-embeddings-openai ==0.3.1
11
+ llama-index-indices-managed-llama-cloud ==0.6.8
12
+ llama-index-llms-cohere ==0.4.0
13
+ llama-index-llms-openai ==0.3.25
14
+ llama-index-multi-modal-llms-openai ==0.4.3
15
+ llama-index-program-openai ==0.3.1
16
+ llama-index-question-gen-openai ==0.3.0
17
+ llama-index-readers-file ==0.4.6
18
+ llama-index-readers-llama-parse ==0.4.0
19
+ llama-index-utils-workflow ==0.3.0
20
+ openai-whisper ==20240930
21
+ pydantic ==2.10.6
22
+ pydantic_core ==2.27.2
23
+ dotenv