Spaces:

Emerging-Tech
/

document

Sleeping

App Files Files Community

Nikhil0987 commited on Feb 26, 2024

Commit

ca6013c

verified ·

1 Parent(s): c33fb07

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -44

app.py CHANGED Viewed

@@ -10,84 +10,73 @@ import streamlit as st
 from dotenv import load_dotenv
 load_dotenv()
 PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
 PINECONE_ENV = os.getenv('PINECONE_ENV')
 OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
 @st.cache_resource
 def embedding_db():
-    # we use the openAI embedding model
     embeddings = OpenAIEmbeddings()
-    # Initialize Pinecone: Correct Indentation
     pc = pinecone.init(
-        api_key=PINECONE_API_KEY,
-        environment=PINECONE_ENV
     )
 def doc_preprocessing():
     loader = DirectoryLoader(
         'data/',
-        glob='**/*.pdf',     # only the PDFs
         show_progress=True
     )
     docs = loader.load()
     text_splitter = CharacterTextSplitter(
-        chunk_size=1000,
         chunk_overlap=0
     )
     docs_split = text_splitter.split_documents(docs)
     return docs_split
-#     docs_split = doc_preprocessing()
-#     # Check if index exists, create if needed
-#     if 'langchain-demo-indexes' not in pc.list_indexes().names():
-#         pc.create_index(
-#            name='langchain-demo-indexes',
-#            dimension=1536, # Adjust dimension if needed
-#            metric='euclidean',
-#            spec=ServerlessSpec(cloud='aws', region='us-west-2')
-#         )
-#     doc_db = Pinecone.from_documents(
-#         docs_split,
-#         embeddings,
-#         index_name='langchain-demo-indexes',
-#         client=pc  # Pass the Pinecone object
-#     )
-#     return doc_db
-# llm = ChatOpenAI()
-# doc_db = embedding_db()
 def retrieval_answer(query):
-    chat_model = ChatOpenAI()  # Create the LLM instance
     qa = RetrievalQA.from_chain_type(
-        llm=chat_model,   # Pass the chat_model instance
         chain_type='stuff',
         retriever=doc_db.as_retriever(),
-    )
-    query = query
     result = qa.run(query)
     return result
 def main():
     st.title("Question and Answering App powered by LLM and Pinecone")
     text_input = st.text_input("Ask your query...")
     if st.button("Ask Query"):
-        if len(text_input)>0:
             st.info("Your Query: " + text_input)
-            answer = retrieval_answer(text_input)
             st.success(answer)
-if __name__ == "__main__":
-    main()

 from dotenv import load_dotenv
 load_dotenv()
 PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')
 PINECONE_ENV = os.getenv('PINECONE_ENV')
 OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
 os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
 @st.cache_resource
 def embedding_db():
     embeddings = OpenAIEmbeddings()
     pc = pinecone.init(
+        api_key=PINECONE_API_KEY,
+        environment=PINECONE_ENV
     )
+    # Check if index exists, create if needed
+    if 'langchain-demo-indexes' not in pc.list_indexes().names():
+        pc.create_index(
+            name='langchain-demo-indexes',
+            dimension=1536, # Adjust dimension if needed
+            metric='euclidean'
+        )
+    docs_split = doc_preprocessing()  # Make sure this function is defined
+    doc_db = Pinecone.from_documents(
+        docs_split,
+        embeddings,
+        index_name='langchain-demo-indexes',
+        client=pc
+    )
+    return doc_db
 def doc_preprocessing():
     loader = DirectoryLoader(
         'data/',
+        glob='**/*.pdf',
         show_progress=True
     )
     docs = loader.load()
     text_splitter = CharacterTextSplitter(
+        chunk_size=1000,
         chunk_overlap=0
     )
     docs_split = text_splitter.split_documents(docs)
     return docs_split
 def retrieval_answer(query):
+    chat_model = ChatOpenAI()
     qa = RetrievalQA.from_chain_type(
+        llm=chat_model,
         chain_type='stuff',
         retriever=doc_db.as_retriever(),
+    )
     result = qa.run(query)
     return result
 def main():
     st.title("Question and Answering App powered by LLM and Pinecone")
     text_input = st.text_input("Ask your query...")
     if st.button("Ask Query"):
+        if len(text_input) > 0:
             st.info("Your Query: " + text_input)
+            # Potential loading message
+            with st.spinner("Processing your query..."):
+                doc_db = embedding_db()  # Create the embedding database
+                answer = retrieval_answer(text_input)
             st.success(answer)
+if __name__ == "__main__":
+    main()