Spaces:

nader01
/

RAG01

Build error

App Files Files Community

NaderAfshar commited on Mar 12, 2025

Commit

6cdd178

1 Parent(s): 2cd0d42

Commiting updated files before refining

Browse files

Files changed (5) hide show

app.py +416 -0
data/fake_application_form.pdf +0 -0
data/fake_resume.pdf +0 -0
helper.py +29 -0
requirements.txt +23 -0

app.py ADDED Viewed

	@@ -0,0 +1,416 @@

+#!/usr/bin/env python
+# coding: utf-8
+# # Lesson 6: Use your voice
+# **Lesson objective**: Get voice feedback
+#
+# So far we've set up a moderately complex workflows with a human feedback loop. Let's run it through the visualizer to see what it looks like.
+# <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the notebook cell by cell. Please try to avoid running all cells at once.</div>
+# In[1]:
+# Warning control
+import warnings
+warnings.filterwarnings('ignore')
+import os, json
+from llama_parse import LlamaParse
+from llama_index.llms.openai import OpenAI
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.core import (
+    VectorStoreIndex,
+    StorageContext,
+    load_index_from_storage
+)
+from llama_index.core.workflow import (
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+    Event,
+    Context,
+    InputRequiredEvent,
+    HumanResponseEvent
+)
+from llama_index.utils.workflow import draw_all_possible_flows
+import whisper
+import gradio as gr
+import asyncio
+from queue import Queue
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+COHERE_API_KEY = os.getenv("COHERE_API_KEY")
+llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+LLAMA_CLOUD_BASE_URL = os.getenv("LLAMA_CLOUD_BASE_URL")
+class ParseFormEvent(Event):
+    application_form: str
+class QueryEvent(Event):
+    query: str
+class ResponseEvent(Event):
+    response: str
+class FeedbackEvent(Event):
+    feedback: str
+class GenerateQuestionsEvent(Event):
+    pass
+class RAGWorkflow(Workflow):
+    storage_dir = "./storage"
+    llm: OpenAI
+    query_engine: VectorStoreIndex
+    @step
+    async def set_up(self, ctx: Context, ev: StartEvent) -> ParseFormEvent:
+        if not ev.resume_file:
+            raise ValueError("No resume file provided")
+        if not ev.application_form:
+            raise ValueError("No application form provided")
+        # give ourselves an LLM to work with
+        self.llm = OpenAI(model="gpt-4o-mini")
+        # ingest our data and set up the query engine
+        if os.path.exists(self.storage_dir):
+            # we've already ingested our documents
+            storage_context = StorageContext.from_defaults(persist_dir=self.storage_dir)
+            index = load_index_from_storage(storage_context)
+        else:
+            # we need to parse and load our documents
+            documents = LlamaParse(
+                api_key=llama_cloud_api_key,
+                base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
+                result_type="markdown",
+                content_guideline_instruction="This is a resume, gather related facts together and format it as bullet points with headers"
+            ).load_data(ev.resume_file)
+            # embed and index the documents
+            index = VectorStoreIndex.from_documents(
+                documents,
+                embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
+            )
+            index.storage_context.persist(persist_dir=self.storage_dir)
+        # either way, create a query engine
+        self.query_engine = index.as_query_engine(llm=self.llm, similarity_top_k=5)
+        # let's pass our application form to a new step where we parse it
+        return ParseFormEvent(application_form=ev.application_form)
+    # we've separated the form parsing from the question generation
+    @step
+    async def parse_form(self, ctx: Context, ev: ParseFormEvent) -> GenerateQuestionsEvent:
+        parser = LlamaParse(
+            api_key=llama_cloud_api_key,
+            base_url=os.getenv("LLAMA_CLOUD_BASE_URL"),
+            result_type="markdown",
+            content_guideline_instruction="This is a job application form. Create a list of all the fields that need to be filled in.",
+            formatting_instruction="Return a bulleted list of the fields ONLY."
+        )
+        # get the LLM to convert the parsed form into JSON
+        result = parser.load_data(ev.application_form)[0]
+        raw_json = self.llm.complete(
+            f"This is a parsed form. Convert it into a JSON object containing only the list of fields to be filled in, in the form {{ fields: [...] }}. <form>{result.text}</form>. Return JSON ONLY, no markdown.")
+        fields = json.loads(raw_json.text)["fields"]
+        await ctx.set("fields_to_fill", fields)
+        return GenerateQuestionsEvent()
+    # this step can get triggered either by GenerateQuestionsEvent or a FeedbackEvent
+    @step
+    async def generate_questions(self, ctx: Context, ev: GenerateQuestionsEvent | FeedbackEvent) -> QueryEvent:
+        # get the list of fields to fill in
+        fields = await ctx.get("fields_to_fill")
+        # generate one query for each of the fields, and fire them off
+        for field in fields:
+            question = f"How would you answer this question about the candidate? <field>{field}</field>"
+            if hasattr(ev,"feedback"):
+                question += f"""
+                    \nWe previously got feedback about how we answered the questions.
+                    It might not be relevant to this particular field, but here it is:
+                    <feedback>{ev.feedback}</feedback>
+                """
+            ctx.send_event(QueryEvent(
+                field=field,
+                query=question
+            ))
+        # store the number of fields so we know how many to wait for later
+        await ctx.set("total_fields", len(fields))
+        return
+    @step
+    async def ask_question(self, ctx: Context, ev: QueryEvent) -> ResponseEvent:
+        print(f"Asking question: {ev.query}")
+        response = self.query_engine.query(f"This is a question about the specific resume we have in our database: {ev.query}")
+        print(f"Answer was: {str(response)}")
+        return ResponseEvent(field=ev.field, response=response.response)
+    # we now emit an InputRequiredEvent
+    @step
+    async def fill_in_application(self, ctx: Context, ev: ResponseEvent) -> InputRequiredEvent:
+        # get the total number of fields to wait for
+        total_fields = await ctx.get("total_fields")
+        responses = ctx.collect_events(ev, [ResponseEvent] * total_fields)
+        if responses is None:
+            return None # do nothing if there's nothing to do yet
+        # we've got all the responses!
+        responseList = "\n".join("Field: " + r.field + "\n" + "Response: " + r.response for r in responses)
+        result = self.llm.complete(f"""
+            You are given a list of fields in an application form and responses to
+            questions about those fields from a resume. Combine the two into a list of
+            fields and succinct, factual answers to fill in those fields.
+            <responses>
+            {responseList}
+            </responses>
+        """)
+        # save the result for later
+        await ctx.set("filled_form", str(result))
+        # Let's get a human in the loop
+        return InputRequiredEvent(
+            prefix="How does this look? Give me any feedback you have on any of the answers.",
+            result=result
+        )
+    # Accept the feedback.
+    @step
+    async def get_feedback(self, ctx: Context, ev: HumanResponseEvent) -> FeedbackEvent | StopEvent:
+        result = self.llm.complete(f"""
+            You have received some human feedback on the form-filling task you've done.
+            Does everything look good, or is there more work to be done?
+            <feedback>
+            {ev.response}
+            </feedback>
+            If everything is fine, respond with just the word 'OKAY'.
+            If there's any other feedback, respond with just the word 'FEEDBACK'.
+        """)
+        verdict = result.text.strip()
+        print(f"LLM says the verdict was {verdict}")
+        if (verdict == "OKAY"):
+            return StopEvent(result=await ctx.get("filled_form"))
+        else:
+            return FeedbackEvent(feedback=ev.response)
+# In[4]:
+WORKFLOW_FILE = "workflows/RAG-EventDriven.html"
+draw_all_possible_flows(RAGWorkflow, filename=WORKFLOW_FILE)
+# In[5]:
+from IPython.display import display, HTML, DisplayHandle
+from helper import extract_html_content
+html_content = extract_html_content(WORKFLOW_FILE)
+display(HTML(html_content), metadata=dict(isolated=True))
+# Cool! You can see the path all the way to the end and the feedback loop is clear.
+# <div style="background-color:#fff6ff; padding:13px; border-width:3px; border-color:#efe6ef; border-style:solid; border-radius:6px">
+# <p> 💻 &nbsp; <b>To access <code>fake_application_form.pdf</code>, <code>fake_resume.pdf</code>, <code>requirements.txt</code> and <code>helper.py</code> files:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Open"</em>. The form and resume are inside the data folder.
+#
+# <p> ⬇ &nbsp; <b>Download Notebooks:</b> 1) click on the <em>"File"</em> option on the top menu of the notebook and then 2) click on <em>"Download as"</em> and select <em>"Notebook (.ipynb)"</em>.</p>
+#
+# <p> 📒 &nbsp; For more help, please see the <em>"Appendix – Tips and Help"</em> Lesson.</p>
+#
+# </div>
+# <p style="background-color:#f7fff8; padding:15px; border-width:3px; border-color:#e0f0e0; border-style:solid; border-radius:6px"> 🚨
+# &nbsp; <b>Different Run Results:</b> The output generated by AI chat models can vary with each execution due to their dynamic, probabilistic nature. Don't be surprised if your results differ from those shown in the video.</p>
+# ## Getting voice feedback
+# Now, just for fun, you'll do one more thing: change the feedback from text feedback to actual words spoken out loud. To do this we'll use a different model from OpenAI called Whisper. LlamaIndex has a built-in way to transcribe audio files into text using Whisper.
+#
+# Here's a function that takes a file and uses Whisper to return just the text:
+# In[6]:
+def transcribe_speech(filepath):
+    if filepath is None:
+        gr.Warning("No audio found, please retry.")
+        return ""
+    whisper_model = whisper.load_model("base")
+    document = whisper_model.transcribe(filepath)
+    return document['text']
+# But before we can use it, you need to capture some audio from your microphone. That involves some extra steps!
+#
+# First, create a callback function that saves data to a global variable.
+# In[15]:
+def store_transcription(output):
+    global transcription_value
+    transcription_value = output
+    return output
+# Now use Gradio, which has special widgets that can render inside a notebook, to create an interface
+# for capturing audio from a microphone. When the audio is captured, it calls `transcribe_speech` on the recorded data,
+# and calls `store_transcription` on that.
+mic_transcribe = gr.Interface(
+    fn=lambda x: store_transcription(transcribe_speech(x)),
+    inputs=gr.Audio(sources=["microphone"],
+                    type="filepath"),
+    outputs=gr.Textbox(label="Transcription"))
+# In Gradio, define a visual interface containing this microphone input and output, and then launch it:
+# Make sure to wait for the gradio interface to load. A popup window will appear and ask you to allow the use of your
+# microphone. To record audio, make sure to click on record -> stop -> submit. Make sure the audio is captured
+# before clicking on 'submit'.
+test_interface = gr.Blocks()
+with test_interface:
+    gr.TabbedInterface(
+        [mic_transcribe],
+        ["Transcribe Microphone"]
+    )
+test_interface.launch(
+    share=False,
+    server_port=8000,
+    prevent_thread_lock=True
+)
+# You can now print out the transcription, which is stored in that global variable you created earlier:
+# In[ ]:
+print(transcription_value)
+# run Gradio again, so it's a good idea to shut down the running Gradio interface.
+test_interface.close()
+# <div style="background-color:#fff1d7; padding:15px;"> <b> Note</b>: Make sure to run the previous cell to close the Gradio interface before running the next cell.</div>
+# Now create an entirely new class, a Transcription Handler.
+class TranscriptionHandler:
+    # we create a queue to hold transcription values
+    def __init__(self):
+        self.transcription_queue = Queue()
+        self.interface = None
+    # every time we record something we put it in the queue
+    def store_transcription(self, output):
+        self.transcription_queue.put(output)
+        return output
+    # This is the same interface and transcription logic as before
+    # except it stores the result in a queue instead of a global
+    def create_interface(self):
+        mic_transcribe = gr.Interface(
+            fn=lambda x: self.store_transcription(transcribe_speech(x)),
+            inputs=gr.Audio(sources=["microphone"], type="filepath"),
+            outputs=gr.Textbox(label="Transcription")
+        )
+        self.interface = gr.Blocks()
+        with self.interface:
+            gr.TabbedInterface(
+                [mic_transcribe],
+                ["Transcribe Microphone"]
+            )
+        return self.interface
+    # we launch the transcription interface
+    async def get_transcription(self):
+        self.interface = self.create_interface()
+        self.interface.launch(
+            share=False,
+            server_port=8000,
+            prevent_thread_lock=True
+        )
+        # we poll every 1.5 seconds waiting for something to end up in the queue
+        while True:
+            if not self.transcription_queue.empty():
+                result = self.transcription_queue.get()
+                if self.interface is not None:
+                    self.interface.close()
+                return result
+            await asyncio.sleep(1.5)
+# Now you have a transcription handler, you can use it instead of the keyboard input interface when you're getting human input when you run your workflows:
+async def main():
+    w = RAGWorkflow(timeout=600, verbose=False)
+    handler = w.run(
+        resume_file="./data/fake_resume.pdf",
+        application_form="./data/fake_application_form.pdf"
+    )
+    async for event in handler.stream_events():
+        if isinstance(event, InputRequiredEvent):
+            # Get transcription
+            transcription_handler = TranscriptionHandler()
+            response = await transcription_handler.get_transcription()
+            handler.ctx.send_event(
+                HumanResponseEvent(
+                    response=response
+                )
+            )
+    response = await handler
+    print("Agent complete! Here's your final result:")
+    print(str(response))
+if __name__ == "__main__":
+    asyncio.run(main())

data/fake_application_form.pdf ADDED Viewed

Binary file (59.1 kB). View file

data/fake_resume.pdf ADDED Viewed

Binary file (133 kB). View file

helper.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# Add your utilities or helper functions to this file.
+import os
+from dotenv import load_dotenv, find_dotenv
+# these expect to find a .env file at the directory above the lesson.
+# the format for that file is (without the comment)
+#API_KEYNAME=AStringThatIsTheLongAPIKeyFromSomeService
+def load_env():
+    _ = load_dotenv(find_dotenv())
+def get_openai_api_key():
+    load_env()
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    return openai_api_key
+def get_llama_cloud_api_key():
+    load_env()
+    llama_cloud_api_key = os.getenv("LLAMA_CLOUD_API_KEY")
+    return llama_cloud_api_key
+def extract_html_content(filename):
+    try:
+        with open(filename, 'r') as file:
+            html_content = file.read()
+            html_content = f""" <div style="width: 100%; height: 800px; overflow: hidden;"> {html_content} </div>"""
+            return html_content
+    except Exception as e:
+        raise Exception(f"Error reading file: {str(e)}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+pytorch
+gradio                                  ==5.20.1
+gradio_client                           ==1.7.2
+llama-cloud-services                    ==0.6.5
+llama-index                             ==0.12.23
+llama-index-agent-openai                ==0.4.6
+llama-index-cli                         ==0.4.1
+llama-index-core                        ==0.12.23.post2
+llama-index-embeddings-huggingface      ==0.5.2
+llama-index-embeddings-openai           ==0.3.1
+llama-index-indices-managed-llama-cloud ==0.6.8
+llama-index-llms-cohere                 ==0.4.0
+llama-index-llms-openai                 ==0.3.25
+llama-index-multi-modal-llms-openai     ==0.4.3
+llama-index-program-openai              ==0.3.1
+llama-index-question-gen-openai         ==0.3.0
+llama-index-readers-file                ==0.4.6
+llama-index-readers-llama-parse         ==0.4.0
+llama-index-utils-workflow              ==0.3.0
+openai-whisper                          ==20240930
+pydantic                                ==2.10.6
+pydantic_core                           ==2.27.2
+dotenv